mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 a3b0b298e6
			
		
	
	
		a3b0b298e6
		
	
	
	
	
		
			
			just checked into Zope's ZCTextIndex branch. This reduces the size of the .chm file by about 100KB.
		
			
				
	
	
		
			471 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			471 lines
		
	
	
	
		
			15 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| """
 | |
|     Makes the necesary files to convert from plain html of
 | |
|     Python 1.5 and 1.5.x Documentation to
 | |
|     Microsoft HTML Help format version 1.1
 | |
|     Doesn't change the html's docs.
 | |
| 
 | |
|     by hernan.foffani@iname.com
 | |
|     no copyright and no responsabilities.
 | |
| 
 | |
|     modified by Dale Nagata for Python 1.5.2
 | |
| 
 | |
|     Renamed from make_chm.py to prechm.py, and checked into the Python
 | |
|     project, 19-Apr-2002 by Tim Peters.  Assorted modifications by Tim
 | |
|     and Fred Drake.  Obtained from Robin Dunn's .chm packaging of the
 | |
|     Python 2.2 docs, at <http://alldunn.com/python/>.
 | |
| """
 | |
| 
 | |
| import sys
 | |
| import os
 | |
| from formatter import NullWriter, AbstractFormatter
 | |
| from htmllib import HTMLParser
 | |
| import getopt
 | |
| import cgi
 | |
| 
 | |
| usage_mode = '''
 | |
| Usage: make_chm.py [-c] [-k] [-p] [-v 1.5[.x]] filename
 | |
|     -c: does not build filename.hhc (Table of Contents)
 | |
|     -k: does not build filename.hhk (Index)
 | |
|     -p: does not build filename.hhp (Project File)
 | |
|     -v 1.5[.x]: makes help for the python 1.5[.x] docs
 | |
|         (default is python 1.5.2 docs)
 | |
| '''
 | |
| 
 | |
| # Project file (*.hhp) template.  'arch' is the file basename (like
 | |
| # the pythlp in pythlp.hhp); 'version' is the doc version number (like
 | |
| # the 2.2 in Python 2.2).
 | |
| # The magical numbers in the long line under [WINDOWS] set most of the
 | |
| # user-visible features (visible buttons, tabs, etc).
 | |
| # About 0x10384e:  This defines the buttons in the help viewer.  The
 | |
| # following defns are taken from htmlhelp.h.  Not all possibilities
 | |
| # actually work, and not all those that work are available from the Help
 | |
| # Workshop GUI.  In particular, the Zoom/Font button works and is not
 | |
| # available from the GUI.  The ones we're using are marked with 'x':
 | |
| #
 | |
| #    0x000002   Hide/Show   x
 | |
| #    0x000004   Back        x
 | |
| #    0x000008   Forward     x
 | |
| #    0x000010   Stop
 | |
| #    0x000020   Refresh
 | |
| #    0x000040   Home        x
 | |
| #    0x000080   Forward
 | |
| #    0x000100   Back
 | |
| #    0x000200   Notes
 | |
| #    0x000400   Contents
 | |
| #    0x000800   Locate      x
 | |
| #    0x001000   Options     x
 | |
| #    0x002000   Print       x
 | |
| #    0x004000   Index
 | |
| #    0x008000   Search
 | |
| #    0x010000   History
 | |
| #    0x020000   Favorites
 | |
| #    0x040000   Jump 1
 | |
| #    0x080000   Jump 2
 | |
| #    0x100000   Zoom/Font   x
 | |
| #    0x200000   TOC Next
 | |
| #    0x400000   TOC Prev
 | |
| 
 | |
| project_template = '''
 | |
| [OPTIONS]
 | |
| Compiled file=%(arch)s.chm
 | |
| Contents file=%(arch)s.hhc
 | |
| Default Window=%(arch)s
 | |
| Default topic=index.html
 | |
| Display compile progress=No
 | |
| Full text search stop list file=%(arch)s.stp
 | |
| Full-text search=Yes
 | |
| Index file=%(arch)s.hhk
 | |
| Language=0x409
 | |
| Title=Python %(version)s Documentation
 | |
| 
 | |
| [WINDOWS]
 | |
| %(arch)s="Python %(version)s Documentation","%(arch)s.hhc","%(arch)s.hhk",\
 | |
| "index.html","index.html",,,,,0x63520,220,0x10384e,[271,372,740,718],,,,,,,0
 | |
| 
 | |
| [FILES]
 | |
| '''
 | |
| 
 | |
| contents_header = '''\
 | |
| <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML//EN">
 | |
| <HTML>
 | |
| <HEAD>
 | |
| <meta name="GENERATOR" content="Microsoft® HTML Help Workshop 4.1">
 | |
| <!-- Sitemap 1.0 -->
 | |
| </HEAD><BODY>
 | |
| <OBJECT type="text/site properties">
 | |
| 	<param name="Window Styles" value="0x801227">
 | |
| 	<param name="ImageType" value="Folder">
 | |
| </OBJECT>
 | |
| <UL>
 | |
| '''
 | |
| 
 | |
| contents_footer = '''\
 | |
| </UL></BODY></HTML>
 | |
| '''
 | |
| 
 | |
| object_sitemap = '''\
 | |
| <OBJECT type="text/sitemap">
 | |
|     <param name="Name" value="%s">
 | |
|     <param name="Local" value="%s">
 | |
| </OBJECT>
 | |
| '''
 | |
| 
 | |
| # List of words the full text search facility shouldn't index.  This
 | |
| # becomes file ARCH.stp.  Note that this list must be pretty small!
 | |
| # Different versions of the MS docs claim the file has a maximum size of
 | |
| # 256 or 512 bytes (including \r\n at the end of each line).
 | |
| # Note that "and", "or", "not" and "near" are operators in the search
 | |
| # language, so no point indexing them even if we wanted to.
 | |
| stop_list = '''
 | |
| a  and  are  as  at
 | |
| be  but  by
 | |
| for
 | |
| if  in  into  is  it
 | |
| near  no  not
 | |
| of  on  or
 | |
| such
 | |
| that  the  their  then  there  these  they  this  to
 | |
| was  will  with
 | |
| '''
 | |
| 
 | |
| # s is a string or None.  If None or empty, return None.  Else tack '.html'
 | |
| # on to the end, unless it's already there.
 | |
| def addhtml(s):
 | |
|     if s:
 | |
|         if not s.endswith('.html'):
 | |
|             s += '.html'
 | |
|     return s
 | |
| 
 | |
| # Convenience class to hold info about "a book" in HTMLHelp terms == a doc
 | |
| # directory in Python terms.
 | |
| class Book:
 | |
|     def __init__(self, directory, title, firstpage,
 | |
|                  contentpage=None, indexpage=None):
 | |
|         self.directory   = directory
 | |
|         self.title       = title
 | |
|         self.firstpage   = addhtml(firstpage)
 | |
|         self.contentpage = addhtml(contentpage)
 | |
|         self.indexpage   = addhtml(indexpage)
 | |
| 
 | |
| # Library Doc list of books:
 | |
| # each 'book' : (Dir, Title, First page, Content page, Index page)
 | |
| supported_libraries = {
 | |
|     '2.2':
 | |
|     [
 | |
|         Book('.', 'Main page', 'index'),
 | |
|         Book('.', 'Global Module Index', 'modindex'),
 | |
|         Book('whatsnew', "What's New", 'index', 'contents'),
 | |
|         Book('tut','Tutorial','tut','node2'),
 | |
|         Book('lib','Library Reference','lib','contents','genindex'),
 | |
|         Book('ref','Language Reference','ref','contents','genindex'),
 | |
|         Book('mac','Macintosh Reference','mac','contents','genindex'),
 | |
|         Book('ext','Extending and Embedding','ext','contents'),
 | |
|         Book('api','Python/C API','api','contents','genindex'),
 | |
|         Book('doc','Documenting Python','doc','contents'),
 | |
|         Book('inst','Installing Python Modules', 'inst', 'index'),
 | |
|         Book('dist','Distributing Python Modules', 'dist', 'index'),
 | |
|     ],
 | |
| 
 | |
|     '2.1.1':
 | |
|     [
 | |
|         Book('.', 'Main page', 'index'),
 | |
|         Book('.', 'Global Module Index', 'modindex'),
 | |
|         Book('tut','Tutorial','tut','node2'),
 | |
|         Book('lib','Library Reference','lib','contents','genindex'),
 | |
|         Book('ref','Language Reference','ref','contents','genindex'),
 | |
|         Book('mac','Macintosh Reference','mac','contents','genindex'),
 | |
|         Book('ext','Extending and Embedding','ext','contents'),
 | |
|         Book('api','Python/C API','api','contents','genindex'),
 | |
|         Book('doc','Documenting Python','doc','contents'),
 | |
|         Book('inst','Installing Python Modules', 'inst', 'index'),
 | |
|         Book('dist','Distributing Python Modules', 'dist', 'index'),
 | |
|     ],
 | |
| 
 | |
|     '2.0.0':
 | |
|     [
 | |
|         Book('.', 'Global Module Index', 'modindex'),
 | |
|         Book('tut','Tutorial','tut','node2'),
 | |
|         Book('lib','Library Reference','lib','contents','genindex'),
 | |
|         Book('ref','Language Reference','ref','contents','genindex'),
 | |
|         Book('mac','Macintosh Reference','mac','contents','genindex'),
 | |
|         Book('ext','Extending and Embedding','ext','contents'),
 | |
|         Book('api','Python/C API','api','contents','genindex'),
 | |
|         Book('doc','Documenting Python','doc','contents'),
 | |
|         Book('inst','Installing Python Modules', 'inst', 'contents'),
 | |
|         Book('dist','Distributing Python Modules', 'dist', 'contents'),
 | |
|     ],
 | |
| 
 | |
|     # <dnagata@creo.com> Apr 17/99: library for 1.5.2 version:
 | |
|     # <hernan.foffani@iname.com> May 01/99: library for 1.5.2 (04/30/99):
 | |
|     '1.5.2':
 | |
|     [
 | |
|         Book('tut','Tutorial','tut','node2'),
 | |
|         Book('lib','Library Reference','lib','contents','genindex'),
 | |
|         Book('ref','Language Reference','ref','contents','genindex'),
 | |
|         Book('mac','Macintosh Reference','mac','contents','genindex'),
 | |
|         Book('ext','Extending and Embedding','ext','contents'),
 | |
|         Book('api','Python/C API','api','contents','genindex'),
 | |
|         Book('doc','Documenting Python','doc','contents')
 | |
|     ],
 | |
| 
 | |
|     # library for 1.5.1 version:
 | |
|     '1.5.1':
 | |
|     [
 | |
|         Book('tut','Tutorial','tut','contents'),
 | |
|         Book('lib','Library Reference','lib','contents','genindex'),
 | |
|         Book('ref','Language Reference','ref-1','ref-2','ref-11'),
 | |
|         Book('ext','Extending and Embedding','ext','contents'),
 | |
|         Book('api','Python/C API','api','contents','genindex')
 | |
|     ],
 | |
| 
 | |
|     # library for 1.5 version:
 | |
|     '1.5':
 | |
|     [
 | |
|         Book('tut','Tutorial','tut','node1'),
 | |
|         Book('lib','Library Reference','lib','node1','node268'),
 | |
|         Book('ref','Language Reference','ref-1','ref-2','ref-11'),
 | |
|         Book('ext','Extending and Embedding','ext','node1'),
 | |
|         Book('api','Python/C API','api','node1','node48')
 | |
|     ]
 | |
| }
 | |
| 
 | |
| # AlmostNullWriter doesn't print anything; it just arranges to save the
 | |
| # text sent to send_flowing_data().  This is used to capture the text
 | |
| # between an anchor begin/end pair, e.g. for TOC entries.
 | |
| 
 | |
| class AlmostNullWriter(NullWriter):
 | |
| 
 | |
|     def __init__(self):
 | |
|         NullWriter.__init__(self)
 | |
|         self.saved_clear()
 | |
| 
 | |
|     def send_flowing_data(self, data):
 | |
|         stripped = data.strip()
 | |
|         if stripped:    # don't bother to save runs of whitespace
 | |
|             self.saved.append(stripped)
 | |
| 
 | |
|     # Forget all saved text.
 | |
|     def saved_clear(self):
 | |
|         self.saved = []
 | |
| 
 | |
|     # Return all saved text as a string.
 | |
|     def saved_get(self):
 | |
|         return ' '.join(self.saved)
 | |
| 
 | |
| class HelpHtmlParser(HTMLParser):
 | |
| 
 | |
|     def __init__(self, formatter, path, output):
 | |
|         HTMLParser.__init__(self, formatter)
 | |
|         self.path = path    # relative path
 | |
|         self.ft = output    # output file
 | |
|         self.indent = 0     # number of tabs for pretty printing of files
 | |
|         self.proc = False   # True when actively processing, else False
 | |
|                             # (headers, footers, etc)
 | |
|         # XXX This shouldn't need to be a stack -- anchors shouldn't nest.
 | |
|         # XXX See SF bug <http://www.python.org/sf/546579>.
 | |
|         self.hrefstack = [] # stack of hrefs from anchor begins
 | |
| 
 | |
|     def begin_group(self):
 | |
|         self.indent += 1
 | |
|         self.proc = True
 | |
| 
 | |
|     def finish_group(self):
 | |
|         self.indent -= 1
 | |
|         # stop processing when back to top level
 | |
|         self.proc = self.indent > 0
 | |
| 
 | |
|     def anchor_bgn(self, href, name, type):
 | |
|         if self.proc:
 | |
|             # XXX See SF bug <http://www.python.org/sf/546579>.
 | |
|             # XXX index.html for the 2.2.1 language reference manual contains
 | |
|             # XXX nested <a></a> tags in the entry for the section on blank
 | |
|             # XXX lines.  We want to ignore the nested part completely.
 | |
|             if len(self.hrefstack) == 0:
 | |
|                 self.saved_clear()
 | |
|                 self.hrefstack.append(href)
 | |
| 
 | |
|     def anchor_end(self):
 | |
|         if self.proc:
 | |
|             # XXX See XXX above.
 | |
|             if self.hrefstack:
 | |
|                 title = cgi.escape(self.saved_get(), True)
 | |
|                 path = self.path + '/' + self.hrefstack.pop()
 | |
|                 self.tab(object_sitemap % (title, path))
 | |
| 
 | |
|     def start_dl(self, atr_val):
 | |
|         self.begin_group()
 | |
| 
 | |
|     def end_dl(self):
 | |
|         self.finish_group()
 | |
| 
 | |
|     def do_dt(self, atr_val):
 | |
|         # no trailing newline on purpose!
 | |
|         self.tab("<LI>")
 | |
| 
 | |
|     # Write text to output file.
 | |
|     def write(self, text):
 | |
|         self.ft.write(text)
 | |
| 
 | |
|     # Write text to output file after indenting by self.indent tabs.
 | |
|     def tab(self, text=''):
 | |
|         self.write('\t' * self.indent)
 | |
|         if text:
 | |
|             self.write(text)
 | |
| 
 | |
|     # Forget all saved text.
 | |
|     def saved_clear(self):
 | |
|         self.formatter.writer.saved_clear()
 | |
| 
 | |
|     # Return all saved text as a string.
 | |
|     def saved_get(self):
 | |
|         return self.formatter.writer.saved_get()
 | |
| 
 | |
| class IdxHlpHtmlParser(HelpHtmlParser):
 | |
|     # nothing special here, seems enough with parent class
 | |
|     pass
 | |
| 
 | |
| class TocHlpHtmlParser(HelpHtmlParser):
 | |
| 
 | |
|     def start_dl(self, atr_val):
 | |
|         self.begin_group()
 | |
|         self.tab('<UL>\n')
 | |
| 
 | |
|     def end_dl(self):
 | |
|         self.finish_group()
 | |
|         self.tab('</UL>\n')
 | |
| 
 | |
|     def start_ul(self, atr_val):
 | |
|         self.begin_group()
 | |
|         self.tab('<UL>\n')
 | |
| 
 | |
|     def end_ul(self):
 | |
|         self.finish_group()
 | |
|         self.tab('</UL>\n')
 | |
| 
 | |
|     def do_li(self, atr_val):
 | |
|         # no trailing newline on purpose!
 | |
|         self.tab("<LI>")
 | |
| 
 | |
| def index(path, indexpage, output):
 | |
|     parser = IdxHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
 | |
|                               path, output)
 | |
|     f = open(path + '/' + indexpage)
 | |
|     parser.feed(f.read())
 | |
|     parser.close()
 | |
|     f.close()
 | |
| 
 | |
| def content(path, contentpage, output):
 | |
|     parser = TocHlpHtmlParser(AbstractFormatter(AlmostNullWriter()),
 | |
|                               path, output)
 | |
|     f = open(path + '/' + contentpage)
 | |
|     parser.feed(f.read())
 | |
|     parser.close()
 | |
|     f.close()
 | |
| 
 | |
| def do_index(library, output):
 | |
|     output.write('<UL>\n')
 | |
|     for book in library:
 | |
|         print '\t', book.title, '-', book.indexpage
 | |
|         if book.indexpage:
 | |
|             index(book.directory, book.indexpage, output)
 | |
|     output.write('</UL>\n')
 | |
| 
 | |
| def do_content(library, version, output):
 | |
|     output.write(contents_header)
 | |
|     for book in library:
 | |
|         print '\t', book.title, '-', book.firstpage
 | |
|         path = book.directory + "/" + book.firstpage
 | |
|         output.write('<LI>')
 | |
|         output.write(object_sitemap % (book.title, path))
 | |
|         if book.contentpage:
 | |
|             content(book.directory, book.contentpage, output)
 | |
|     output.write(contents_footer)
 | |
| 
 | |
| # Fill in the [FILES] section of the project (.hhp) file.
 | |
| # 'library' is the list of directory description tuples from
 | |
| # supported_libraries for the version of the docs getting generated.
 | |
| def do_project(library, output, arch, version):
 | |
|     output.write(project_template % locals())
 | |
|     pathseen = {}
 | |
|     for book in library:
 | |
|         directory = book.directory
 | |
|         path = directory + '\\%s\n'
 | |
|         for page in os.listdir(directory):
 | |
|             if page.endswith('.html') or page.endswith('.css'):
 | |
|                 fullpath = path % page
 | |
|                 if fullpath not in pathseen:
 | |
|                     output.write(fullpath)
 | |
|                     pathseen[fullpath] = True
 | |
| 
 | |
| def openfile(file):
 | |
|     try:
 | |
|         p = open(file, "w")
 | |
|     except IOError, msg:
 | |
|         print file, ":", msg
 | |
|         sys.exit(1)
 | |
|     return p
 | |
| 
 | |
| def usage():
 | |
|         print usage_mode
 | |
|         sys.exit(0)
 | |
| 
 | |
| def do_it(args = None):
 | |
|     if not args:
 | |
|         args = sys.argv[1:]
 | |
| 
 | |
|     if not args:
 | |
|         usage()
 | |
| 
 | |
|     try:
 | |
|         optlist, args = getopt.getopt(args, 'ckpv:')
 | |
|     except getopt.error, msg:
 | |
|         print msg
 | |
|         usage()
 | |
| 
 | |
|     if not args or len(args) > 1:
 | |
|         usage()
 | |
|     arch = args[0]
 | |
| 
 | |
|     version = None
 | |
|     for opt in optlist:
 | |
|         if opt[0] == '-v':
 | |
|             version = opt[1]
 | |
|             break
 | |
|     if not version:
 | |
|         usage()
 | |
| 
 | |
|     library = supported_libraries[version]
 | |
| 
 | |
|     if not (('-p','') in optlist):
 | |
|         fname = arch + '.stp'
 | |
|         f = openfile(fname)
 | |
|         print "Building stoplist", fname, "..."
 | |
|         words = stop_list.split()
 | |
|         words.sort()
 | |
|         for word in words:
 | |
|             print >> f, word
 | |
|         f.close()
 | |
| 
 | |
|         f = openfile(arch + '.hhp')
 | |
|         print "Building Project..."
 | |
|         do_project(library, f, arch, version)
 | |
|         if version == '2.0.0':
 | |
|             for image in os.listdir('icons'):
 | |
|                 f.write('icons'+ '\\' + image + '\n')
 | |
| 
 | |
|         f.close()
 | |
| 
 | |
|     if not (('-c','') in optlist):
 | |
|         f = openfile(arch + '.hhc')
 | |
|         print "Building Table of Content..."
 | |
|         do_content(library, version, f)
 | |
|         f.close()
 | |
| 
 | |
|     if not (('-k','') in optlist):
 | |
|         f = openfile(arch + '.hhk')
 | |
|         print "Building Index..."
 | |
|         do_index(library, f)
 | |
|         f.close()
 | |
| 
 | |
| if __name__ == '__main__':
 | |
|     do_it()
 |