mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			125 lines
		
	
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			125 lines
		
	
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #! /usr/bin/env python
 | |
| 
 | |
| """A variant on webchecker that creates a mirror copy of a remote site."""
 | |
| 
 | |
| __version__ = "$Revision$"
 | |
| 
 | |
| import os
 | |
| import sys
 | |
| import urllib
 | |
| import getopt
 | |
| 
 | |
| import webchecker
 | |
| 
 | |
| # Extract real version number if necessary
 | |
| if __version__[0] == '$':
 | |
|     _v = __version__.split()
 | |
|     if len(_v) == 3:
 | |
|         __version__ = _v[1]
 | |
| 
 | |
| def main():
 | |
|     verbose = webchecker.VERBOSE
 | |
|     try:
 | |
|         opts, args = getopt.getopt(sys.argv[1:], "qv")
 | |
|     except getopt.error as msg:
 | |
|         print(msg)
 | |
|         print("usage:", sys.argv[0], "[-qv] ... [rooturl] ...")
 | |
|         return 2
 | |
|     for o, a in opts:
 | |
|         if o == "-q":
 | |
|             verbose = 0
 | |
|         if o == "-v":
 | |
|             verbose = verbose + 1
 | |
|     c = Sucker()
 | |
|     c.setflags(verbose=verbose)
 | |
|     c.urlopener.addheaders = [
 | |
|             ('User-agent', 'websucker/%s' % __version__),
 | |
|         ]
 | |
|     for arg in args:
 | |
|         print("Adding root", arg)
 | |
|         c.addroot(arg)
 | |
|     print("Run...")
 | |
|     c.run()
 | |
| 
 | |
| class Sucker(webchecker.Checker):
 | |
| 
 | |
|     checkext = 0
 | |
|     nonames = 1
 | |
| 
 | |
|     # SAM 11/13/99: in general, URLs are now URL pairs.
 | |
|     # Since we've suppressed name anchor checking,
 | |
|     # we can ignore the second dimension.
 | |
| 
 | |
|     def readhtml(self, url_pair):
 | |
|         url = url_pair[0]
 | |
|         text = None
 | |
|         path = self.savefilename(url)
 | |
|         try:
 | |
|             f = open(path, "rb")
 | |
|         except IOError:
 | |
|             f = self.openpage(url_pair)
 | |
|             if f:
 | |
|                 info = f.info()
 | |
|                 nurl = f.geturl()
 | |
|                 if nurl != url:
 | |
|                     url = nurl
 | |
|                     path = self.savefilename(url)
 | |
|                 text = f.read()
 | |
|                 f.close()
 | |
|                 self.savefile(text, path)
 | |
|                 if not self.checkforhtml(info, url):
 | |
|                     text = None
 | |
|         else:
 | |
|             if self.checkforhtml({}, url):
 | |
|                 text = f.read()
 | |
|             f.close()
 | |
|         return text, url
 | |
| 
 | |
|     def savefile(self, text, path):
 | |
|         dir, base = os.path.split(path)
 | |
|         makedirs(dir)
 | |
|         try:
 | |
|             f = open(path, "wb")
 | |
|             f.write(text)
 | |
|             f.close()
 | |
|             self.message("saved %s", path)
 | |
|         except IOError as msg:
 | |
|             self.message("didn't save %s: %s", path, str(msg))
 | |
| 
 | |
|     def savefilename(self, url):
 | |
|         type, rest = urllib.splittype(url)
 | |
|         host, path = urllib.splithost(rest)
 | |
|         path = path.lstrip("/")
 | |
|         user, host = urllib.splituser(host)
 | |
|         host, port = urllib.splitnport(host)
 | |
|         host = host.lower()
 | |
|         if not path or path[-1] == "/":
 | |
|             path = path + "index.html"
 | |
|         if os.sep != "/":
 | |
|             path = os.sep.join(path.split("/"))
 | |
|             if os.name == "mac":
 | |
|                 path = os.sep + path
 | |
|         path = os.path.join(host, path)
 | |
|         return path
 | |
| 
 | |
| def makedirs(dir):
 | |
|     if not dir:
 | |
|         return
 | |
|     if os.path.exists(dir):
 | |
|         if not os.path.isdir(dir):
 | |
|             try:
 | |
|                 os.rename(dir, dir + ".bak")
 | |
|                 os.mkdir(dir)
 | |
|                 os.rename(dir + ".bak", os.path.join(dir, "index.html"))
 | |
|             except os.error:
 | |
|                 pass
 | |
|         return
 | |
|     head, tail = os.path.split(dir)
 | |
|     if not tail:
 | |
|         print("Huh?  Don't know how to make dir", dir)
 | |
|         return
 | |
|     makedirs(head)
 | |
|     os.mkdir(dir, 0o777)
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     sys.exit(main() or 0)
 | 
