mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 07:31:38 +00:00 
			
		
		
		
	
		
			
	
	
		
			329 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
		
		
			
		
	
	
			329 lines
		
	
	
	
		
			8.2 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| 
								 | 
							
								# Tools for info file processing.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# XXX Need to be more careful with reading ahead searching for nodes.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								import regexp
							 | 
						||
| 
								 | 
							
								import string
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Exported exceptions.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								NoSuchFile = 'no such file'
							 | 
						||
| 
								 | 
							
								NoSuchNode = 'no such node'
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# The search path for info files; this is site-specific.
							 | 
						||
| 
								 | 
							
								# Directory names should end in a partname delimiter,
							 | 
						||
| 
								 | 
							
								# so they can simply be concatenated to a relative pathname.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								#INFOPATH = ['', ':Info.Ibrowse:', ':Info:']	# Mac
							 | 
						||
| 
								 | 
							
								INFOPATH = ['', '/usr/local/emacs/info/']	# X11 on UNIX
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Tunable constants.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								BLOCKSIZE = 512			# Qty to align reads to, if possible
							 | 
						||
| 
								 | 
							
								FUZZ = 2*BLOCKSIZE		# Qty to back-up before searching for a node
							 | 
						||
| 
								 | 
							
								CHUNKSIZE = 4*BLOCKSIZE		# Qty to read at once when reading lots of data
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Regular expressions used.
							 | 
						||
| 
								 | 
							
								# Note that it is essential that Python leaves unrecognized backslash
							 | 
						||
| 
								 | 
							
								# escapes in a string so they can be seen by regexp.compile!
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								findheader = regexp.compile('\037\014?\n(.*\n)').match
							 | 
						||
| 
								 | 
							
								findescape = regexp.compile('\037').match
							 | 
						||
| 
								 | 
							
								parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
							 | 
						||
| 
								 | 
							
								findfirstline = regexp.compile('^.*\n').match
							 | 
						||
| 
								 | 
							
								findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
							 | 
						||
| 
								 | 
							
								findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
							 | 
						||
| 
								 | 
							
								findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
							 | 
						||
| 
								 | 
							
								findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
							 | 
						||
| 
								 | 
							
								findmenu = regexp.compile('^\* [mM]enu:').match
							 | 
						||
| 
								 | 
							
								findmenuitem = regexp.compile( \
							 | 
						||
| 
								 | 
							
									'^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
							 | 
						||
| 
								 | 
							
								findfootnote = regexp.compile( \
							 | 
						||
| 
								 | 
							
									'\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
							 | 
						||
| 
								 | 
							
								parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Get a node and all information pertaining to it.
							 | 
						||
| 
								 | 
							
								# This doesn't work if there is an indirect tag table,
							 | 
						||
| 
								 | 
							
								# and in general you are better off using icache.get_node() instead.
							 | 
						||
| 
								 | 
							
								# Functions get_whole_file() and get_file_node() provide part
							 | 
						||
| 
								 | 
							
								# functionality used by icache.
							 | 
						||
| 
								 | 
							
								# Raise NoSuchFile or NoSuchNode as appropriate.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def get_node(curfile, ref):
							 | 
						||
| 
								 | 
							
									file, node = parse_ref(curfile, ref)
							 | 
						||
| 
								 | 
							
									if node == '*':
							 | 
						||
| 
								 | 
							
										return get_whole_file(file)
							 | 
						||
| 
								 | 
							
									else:
							 | 
						||
| 
								 | 
							
										return get_file_node(file, 0, node)
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def get_whole_file(file):
							 | 
						||
| 
								 | 
							
									f = try_open(file) # May raise NoSuchFile
							 | 
						||
| 
								 | 
							
									text = f.read()
							 | 
						||
| 
								 | 
							
									header, menu, footnotes = ('', '', ''), [], []
							 | 
						||
| 
								 | 
							
									return file, '*', header, menu, footnotes, text
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def get_file_node(file, offset, node):
							 | 
						||
| 
								 | 
							
									f = try_open(file) # May raise NoSuchFile
							 | 
						||
| 
								 | 
							
									text = find_node(f, offset, node) # May raise NoSuchNode
							 | 
						||
| 
								 | 
							
									node, header, menu, footnotes = analyze_node(text)
							 | 
						||
| 
								 | 
							
									return file, node, header, menu, footnotes, text
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Parse a node reference into a file (possibly default) and node name.
							 | 
						||
| 
								 | 
							
								# Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
							 | 
						||
| 
								 | 
							
								# Default file is the curfile argument; default node is Top.
							 | 
						||
| 
								 | 
							
								# A node value of '*' is a special case: the whole file should
							 | 
						||
| 
								 | 
							
								# be interpreted (by the caller!) as a single node.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def parse_ref(curfile, ref):
							 | 
						||
| 
								 | 
							
									match = parsenoderef(ref)
							 | 
						||
| 
								 | 
							
									if not match:
							 | 
						||
| 
								 | 
							
										file, node = curfile, ref
							 | 
						||
| 
								 | 
							
									else:
							 | 
						||
| 
								 | 
							
										(a, b), (a1, b1), (a2, b2) = match
							 | 
						||
| 
								 | 
							
										file, node = ref[a1:b1], ref[a2:b2]
							 | 
						||
| 
								 | 
							
									if not file:
							 | 
						||
| 
								 | 
							
										file = curfile # (Is this necessary?)
							 | 
						||
| 
								 | 
							
									if not node:
							 | 
						||
| 
								 | 
							
										node = 'Top'
							 | 
						||
| 
								 | 
							
									return file, node
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Extract node name, links, menu and footnotes from the node text.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def analyze_node(text):
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# Get node name and links from the header line
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									match = findfirstline(text)
							 | 
						||
| 
								 | 
							
									if match:
							 | 
						||
| 
								 | 
							
										(a, b) = match[0]
							 | 
						||
| 
								 | 
							
										line = text[a:b]
							 | 
						||
| 
								 | 
							
									else:
							 | 
						||
| 
								 | 
							
										line = ''
							 | 
						||
| 
								 | 
							
									node = get_it(text, findnode)
							 | 
						||
| 
								 | 
							
									prev = get_it(text, findprev)
							 | 
						||
| 
								 | 
							
									next = get_it(text, findnext)
							 | 
						||
| 
								 | 
							
									up = get_it(text, findup)
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# Get the menu items, if there is a menu
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									menu = []
							 | 
						||
| 
								 | 
							
									match = findmenu(text)
							 | 
						||
| 
								 | 
							
									if match:
							 | 
						||
| 
								 | 
							
										(a, b) = match[0]
							 | 
						||
| 
								 | 
							
										while 1:
							 | 
						||
| 
								 | 
							
											match = findmenuitem(text, b)
							 | 
						||
| 
								 | 
							
											if not match:
							 | 
						||
| 
								 | 
							
												break
							 | 
						||
| 
								 | 
							
											(a, b), (a1, b1), (a2, b2) = match
							 | 
						||
| 
								 | 
							
											topic, ref = text[a1:b1], text[a2:b2]
							 | 
						||
| 
								 | 
							
											if ref == ':':
							 | 
						||
| 
								 | 
							
												ref = topic
							 | 
						||
| 
								 | 
							
											menu.append(topic, ref)
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# Get the footnotes
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									footnotes = []
							 | 
						||
| 
								 | 
							
									b = 0
							 | 
						||
| 
								 | 
							
									while 1:
							 | 
						||
| 
								 | 
							
										match = findfootnote(text, b)
							 | 
						||
| 
								 | 
							
										if not match:
							 | 
						||
| 
								 | 
							
											break
							 | 
						||
| 
								 | 
							
										(a, b), (a1, b1), (a2, b2) = match
							 | 
						||
| 
								 | 
							
										topic, ref = text[a1:b1], text[a2:b2]
							 | 
						||
| 
								 | 
							
										if ref == ':':
							 | 
						||
| 
								 | 
							
											ref = topic
							 | 
						||
| 
								 | 
							
										footnotes.append(topic, ref)
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									return node, (prev, next, up), menu, footnotes
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def get_it(line, matcher):
							 | 
						||
| 
								 | 
							
									match = matcher(line)
							 | 
						||
| 
								 | 
							
									if not match:
							 | 
						||
| 
								 | 
							
										return ''
							 | 
						||
| 
								 | 
							
									else:
							 | 
						||
| 
								 | 
							
										(a, b), (a1, b1) = match
							 | 
						||
| 
								 | 
							
										return line[a1:b1]
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Find a node in an open file.
							 | 
						||
| 
								 | 
							
								# The offset (from the tags table) is a hint about the node's position.
							 | 
						||
| 
								 | 
							
								# Pass zero if there is no tags table.
							 | 
						||
| 
								 | 
							
								# Raise NoSuchNode if the node isn't found.
							 | 
						||
| 
								 | 
							
								# NB: This seeks around in the file.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def find_node(f, offset, node):
							 | 
						||
| 
								 | 
							
									node = string.lower(node) # Just to be sure
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# Position a little before the given offset,
							 | 
						||
| 
								 | 
							
									# so we may find the node even if it has moved around
							 | 
						||
| 
								 | 
							
									# in the file a little.
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
							 | 
						||
| 
								 | 
							
									f.seek(offset)
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# Loop, hunting for a matching node header.
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									while 1:
							 | 
						||
| 
								 | 
							
										buf = f.read(CHUNKSIZE)
							 | 
						||
| 
								 | 
							
										if not buf:
							 | 
						||
| 
								 | 
							
											break
							 | 
						||
| 
								 | 
							
										i = 0
							 | 
						||
| 
								 | 
							
										while 1:
							 | 
						||
| 
								 | 
							
											match = findheader(buf, i)
							 | 
						||
| 
								 | 
							
											if match:
							 | 
						||
| 
								 | 
							
												(a,b), (a1,b1) = match
							 | 
						||
| 
								 | 
							
												start = a1
							 | 
						||
| 
								 | 
							
												line = buf[a1:b1]
							 | 
						||
| 
								 | 
							
												i = b
							 | 
						||
| 
								 | 
							
												match = parseheader(line)
							 | 
						||
| 
								 | 
							
												if match:
							 | 
						||
| 
								 | 
							
													(a,b), (a1,b1) = match
							 | 
						||
| 
								 | 
							
													key = string.lower(line[a1:b1])
							 | 
						||
| 
								 | 
							
													if key == node:
							 | 
						||
| 
								 | 
							
														# Got it!  Now read the rest.
							 | 
						||
| 
								 | 
							
														return read_node(f, buf[start:])
							 | 
						||
| 
								 | 
							
											elif findescape(buf, i):
							 | 
						||
| 
								 | 
							
												next = f.read(CHUNKSIZE)
							 | 
						||
| 
								 | 
							
												if not next:
							 | 
						||
| 
								 | 
							
													break
							 | 
						||
| 
								 | 
							
												buf = buf + next
							 | 
						||
| 
								 | 
							
											else:
							 | 
						||
| 
								 | 
							
												break
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									# If we get here, we didn't find it.  Too bad.
							 | 
						||
| 
								 | 
							
									#
							 | 
						||
| 
								 | 
							
									raise NoSuchNode, node
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Finish off getting a node (subroutine for find_node()).
							 | 
						||
| 
								 | 
							
								# The node begins at the start of buf and may end in buf;
							 | 
						||
| 
								 | 
							
								# if it doesn't end there, read additional data from f.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def read_node(f, buf):
							 | 
						||
| 
								 | 
							
									i = 0
							 | 
						||
| 
								 | 
							
									match = findescape(buf, i)
							 | 
						||
| 
								 | 
							
									while not match:
							 | 
						||
| 
								 | 
							
										next = f.read(CHUNKSIZE)
							 | 
						||
| 
								 | 
							
										if not next:
							 | 
						||
| 
								 | 
							
											end = len(buf)
							 | 
						||
| 
								 | 
							
											break
							 | 
						||
| 
								 | 
							
										i = len(buf)
							 | 
						||
| 
								 | 
							
										buf = buf + next
							 | 
						||
| 
								 | 
							
										match = findescape(buf, i)
							 | 
						||
| 
								 | 
							
									else:
							 | 
						||
| 
								 | 
							
										# Got a match
							 | 
						||
| 
								 | 
							
										(a, b) = match[0]
							 | 
						||
| 
								 | 
							
										end = a
							 | 
						||
| 
								 | 
							
									# Strip trailing newlines
							 | 
						||
| 
								 | 
							
									while end > 0 and buf[end-1] == '\n':
							 | 
						||
| 
								 | 
							
										end = end-1
							 | 
						||
| 
								 | 
							
									buf = buf[:end]
							 | 
						||
| 
								 | 
							
									return buf
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Read reverse starting at offset until the beginning of a node is found.
							 | 
						||
| 
								 | 
							
								# Then return a buffer containing the beginning of the node,
							 | 
						||
| 
								 | 
							
								# with f positioned just after the buffer.
							 | 
						||
| 
								 | 
							
								# The buffer will contain at least the full header line of the node;
							 | 
						||
| 
								 | 
							
								# the caller should finish off with read_node() if it is the right node.
							 | 
						||
| 
								 | 
							
								# (It is also possible that the buffer extends beyond the node!)
							 | 
						||
| 
								 | 
							
								# Return an empty string if there is no node before the given offset.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def backup_node(f, offset):
							 | 
						||
| 
								 | 
							
									start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
							 | 
						||
| 
								 | 
							
									end = offset
							 | 
						||
| 
								 | 
							
									while start < end:
							 | 
						||
| 
								 | 
							
										f.seek(start)
							 | 
						||
| 
								 | 
							
										buf = f.read(end-start)
							 | 
						||
| 
								 | 
							
										i = 0
							 | 
						||
| 
								 | 
							
										hit = -1
							 | 
						||
| 
								 | 
							
										while 1:
							 | 
						||
| 
								 | 
							
											match = findheader(buf, i)
							 | 
						||
| 
								 | 
							
											if match:
							 | 
						||
| 
								 | 
							
												(a,b), (a1,b1) = match
							 | 
						||
| 
								 | 
							
												hit = a1
							 | 
						||
| 
								 | 
							
												i = b
							 | 
						||
| 
								 | 
							
											elif end < offset and findescape(buf, i):
							 | 
						||
| 
								 | 
							
												next = f.read(min(offset-end, BLOCKSIZE))
							 | 
						||
| 
								 | 
							
												if not next:
							 | 
						||
| 
								 | 
							
													break
							 | 
						||
| 
								 | 
							
												buf = buf + next
							 | 
						||
| 
								 | 
							
												end = end + len(next)
							 | 
						||
| 
								 | 
							
											else:
							 | 
						||
| 
								 | 
							
												break
							 | 
						||
| 
								 | 
							
										if hit >= 0:
							 | 
						||
| 
								 | 
							
											return buf[hit:]
							 | 
						||
| 
								 | 
							
										end = start
							 | 
						||
| 
								 | 
							
										start = max(0, end - CHUNKSIZE)
							 | 
						||
| 
								 | 
							
									return ''
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Make a tag table for the given file by scanning the file.
							 | 
						||
| 
								 | 
							
								# The file must be open for reading, and positioned at the beginning
							 | 
						||
| 
								 | 
							
								# (or wherever the hunt for tags must begin; it is read till the end).
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def make_tags(f):
							 | 
						||
| 
								 | 
							
									tags = {}
							 | 
						||
| 
								 | 
							
									while 1:
							 | 
						||
| 
								 | 
							
										offset = f.tell()
							 | 
						||
| 
								 | 
							
										buf = f.read(CHUNKSIZE)
							 | 
						||
| 
								 | 
							
										if not buf:
							 | 
						||
| 
								 | 
							
											break
							 | 
						||
| 
								 | 
							
										i = 0
							 | 
						||
| 
								 | 
							
										while 1:
							 | 
						||
| 
								 | 
							
											match = findheader(buf, i)
							 | 
						||
| 
								 | 
							
											if match:
							 | 
						||
| 
								 | 
							
												(a,b), (a1,b1) = match
							 | 
						||
| 
								 | 
							
												start = offset+a1
							 | 
						||
| 
								 | 
							
												line = buf[a1:b1]
							 | 
						||
| 
								 | 
							
												i = b
							 | 
						||
| 
								 | 
							
												match = parseheader(line)
							 | 
						||
| 
								 | 
							
												if match:
							 | 
						||
| 
								 | 
							
													(a,b), (a1,b1) = match
							 | 
						||
| 
								 | 
							
													key = string.lower(line[a1:b1])
							 | 
						||
| 
								 | 
							
													if tags.has_key(key):
							 | 
						||
| 
								 | 
							
														print 'Duplicate node:',
							 | 
						||
| 
								 | 
							
														print key
							 | 
						||
| 
								 | 
							
													tags[key] = '', start, line
							 | 
						||
| 
								 | 
							
											elif findescape(buf, i):
							 | 
						||
| 
								 | 
							
												next = f.read(CHUNKSIZE)
							 | 
						||
| 
								 | 
							
												if not next:
							 | 
						||
| 
								 | 
							
													break
							 | 
						||
| 
								 | 
							
												buf = buf + next
							 | 
						||
| 
								 | 
							
											else:
							 | 
						||
| 
								 | 
							
												break
							 | 
						||
| 
								 | 
							
									return tags
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# Try to open a file, return a file object if succeeds.
							 | 
						||
| 
								 | 
							
								# Raise NoSuchFile if the file can't be opened.
							 | 
						||
| 
								 | 
							
								# Should treat absolute pathnames special.
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								def try_open(file):
							 | 
						||
| 
								 | 
							
									for dir in INFOPATH:
							 | 
						||
| 
								 | 
							
										try:
							 | 
						||
| 
								 | 
							
											return open(dir + file, 'r')
							 | 
						||
| 
								 | 
							
										except IOError:
							 | 
						||
| 
								 | 
							
											pass
							 | 
						||
| 
								 | 
							
									raise NoSuchFile, file
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								# A little test for the speed of make_tags().
							 | 
						||
| 
								 | 
							
								#
							 | 
						||
| 
								 | 
							
								TESTFILE = 'texinfo-1'
							 | 
						||
| 
								 | 
							
								def test_make_tags():
							 | 
						||
| 
								 | 
							
									import time
							 | 
						||
| 
								 | 
							
									f = try_open(TESTFILE)
							 | 
						||
| 
								 | 
							
									t1 = time.time()
							 | 
						||
| 
								 | 
							
									tags = make_tags(f)
							 | 
						||
| 
								 | 
							
									t2 = time.time()
							 | 
						||
| 
								 | 
							
									print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'sec.'
							 |