mirror of
				https://github.com/python/cpython.git
				synced 2025-10-25 10:44:55 +00:00 
			
		
		
		
	added html parser and supporting cast
This commit is contained in:
		
							parent
							
								
									eb9e9d2b2a
								
							
						
					
					
						commit
						7c750e1e09
					
				
					 6 changed files with 3014 additions and 0 deletions
				
			
		
							
								
								
									
										408
									
								
								Lib/Para.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										408
									
								
								Lib/Para.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,408 @@ | |||
| # Text formatting abstractions | ||||
| 
 | ||||
| 
 | ||||
| # Oft-used type object | ||||
| Int = type(0) | ||||
| 
 | ||||
| 
 | ||||
| # Represent a paragraph.  This is a list of words with associated | ||||
| # font and size information, plus indents and justification for the | ||||
| # entire paragraph. | ||||
| # Once the words have been added to a paragraph, it can be laid out | ||||
| # for different line widths.  Once laid out, it can be rendered at | ||||
| # different screen locations.  Once rendered, it can be queried | ||||
| # for mouse hits, and parts of the text can be highlighted | ||||
| class Para: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.words = [] # The words | ||||
| 		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c' | ||||
| 		self.indent_left = self.indent_right = self.indent_hang = 0 | ||||
| 		# Final lay-out parameters, may change | ||||
| 		self.left = self.top = self.right = self.bottom = \ | ||||
| 			self.width = self.height = self.lines = None | ||||
| 	# | ||||
| 	# Add a word, computing size information for it. | ||||
| 	# Words may also be added manually by appending to self.words | ||||
| 	# Each word should be a 7-tuple: | ||||
| 	# (font, text, width, space, stretch, ascent, descent) | ||||
| 	def addword(self, d, font, text, space, stretch): | ||||
| 		if font <> None: | ||||
| 			d.setfont(font) | ||||
| 		width = d.textwidth(text) | ||||
| 		ascent = d.baseline() | ||||
| 		descent = d.lineheight() - ascent | ||||
| 		spw = d.textwidth(' ') | ||||
| 		space = space * spw | ||||
| 		stretch = stretch * spw | ||||
| 		tuple = (font, text, width, space, stretch, ascent, descent) | ||||
| 		self.words.append(tuple) | ||||
| 	# | ||||
| 	# Hooks to begin and end anchors -- insert numbers in the word list! | ||||
| 	def bgn_anchor(self, id): | ||||
| 		self.words.append(id) | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		self.words.append(0) | ||||
| 	# | ||||
| 	# Return the total length (width) of the text added so far, in pixels | ||||
| 	def getlength(self): | ||||
| 		total = 0 | ||||
| 		for word in self.words: | ||||
| 			if type(word) <> Int: | ||||
| 				total = total + word[2] + word[3] | ||||
| 		return total | ||||
| 	# | ||||
| 	# Tab to a given position (relative to the current left indent): | ||||
| 	# remove all stretch, add fixed space up to the new indent. | ||||
| 	# If the current position is already beying the tab stop, | ||||
| 	# don't add any new space (but still remove the stretch) | ||||
| 	def tabto(self, tab): | ||||
| 		total = 0 | ||||
| 		as, de = 1, 0 | ||||
| 		for i in range(len(self.words)): | ||||
| 			word = self.words[i] | ||||
| 			if type(word) == Int: continue | ||||
| 			fo, te, wi, sp, st, as, de = word | ||||
| 			self.words[i] = fo, te, wi, sp, 0, as, de | ||||
| 			total = total + wi + sp | ||||
| 		if total < tab: | ||||
| 			self.words.append(None, '', 0, tab-total, 0, as, de) | ||||
| 	# | ||||
| 	# Make a hanging tag: tab to hang, increment indent_left by hang, | ||||
| 	# and reset indent_hang to -hang | ||||
| 	def makehangingtag(self, hang): | ||||
| 		self.tabto(hang) | ||||
| 		self.indent_left = self.indent_left + hang | ||||
| 		self.indent_hang = -hang | ||||
| 	# | ||||
| 	# Decide where the line breaks will be given some screen width | ||||
| 	def layout(self, linewidth): | ||||
| 		self.width = linewidth | ||||
| 		height = 0 | ||||
| 		self.lines = lines = [] | ||||
| 		avail1 = self.width - self.indent_left - self.indent_right | ||||
| 		avail = avail1 - self.indent_hang | ||||
| 		words = self.words | ||||
| 		i = 0 | ||||
| 		n = len(words) | ||||
| 		lastfont = None | ||||
| 		while i < n: | ||||
| 			firstfont = lastfont | ||||
| 			charcount = 0 | ||||
| 			width = 0 | ||||
| 			stretch = 0 | ||||
| 			ascent = 0 | ||||
| 			descent = 0 | ||||
| 			lsp = 0 | ||||
| 			j = i | ||||
| 			while i < n: | ||||
| 				word = words[i] | ||||
| 				if type(word) == Int: | ||||
| 					if word > 0 and width >= avail: | ||||
| 						break | ||||
| 					i = i+1 | ||||
| 					continue | ||||
| 				fo, te, wi, sp, st, as, de = word | ||||
| 				if width + wi > avail and width > 0 and wi > 0: | ||||
| 					break | ||||
| 				if fo <> None: | ||||
| 					lastfont = fo | ||||
| 					if width == 0: | ||||
| 						firstfont = fo | ||||
| 				charcount = charcount + len(te) + (sp > 0) | ||||
| 				width = width + wi + sp | ||||
| 				lsp = sp | ||||
| 				stretch = stretch + st | ||||
| 				lst = st | ||||
| 				ascent = max(ascent, as) | ||||
| 				descent = max(descent, de) | ||||
| 				i = i+1 | ||||
| 			while i > j and type(words[i-1]) == Int and \ | ||||
| 				words[i-1] > 0: i = i-1 | ||||
| 			width = width - lsp | ||||
| 			if i < n: | ||||
| 				stretch = stretch - lst | ||||
| 			else: | ||||
| 				stretch = 0 | ||||
| 			tuple = i-j, firstfont, charcount, width, stretch, \ | ||||
| 				ascent, descent | ||||
| 			lines.append(tuple) | ||||
| 			height = height + ascent + descent | ||||
| 			avail = avail1 | ||||
| 		self.height = height | ||||
| 	# | ||||
| 	# Call a function for all words in a line | ||||
| 	def visit(self, wordfunc, anchorfunc): | ||||
| 		avail1 = self.width - self.indent_left - self.indent_right | ||||
| 		avail = avail1 - self.indent_hang | ||||
| 		v = self.top | ||||
| 		i = 0 | ||||
| 		for tuple in self.lines: | ||||
| 			wordcount, firstfont, charcount, width, stretch, \ | ||||
| 				ascent, descent = tuple | ||||
| 			h = self.left + self.indent_left | ||||
| 			if i == 0: h = h + self.indent_hang | ||||
| 			extra = 0 | ||||
| 			if self.just == 'r': h = h + avail - width | ||||
| 			elif self.just == 'c': h = h + (avail - width) / 2 | ||||
| 			elif self.just == 'lr' and stretch > 0: | ||||
| 				extra = avail - width | ||||
| 			v2 = v + ascent + descent | ||||
| 			for j in range(i, i+wordcount): | ||||
| 				word = self.words[j] | ||||
| 				if type(word) == Int: | ||||
| 					ok = anchorfunc(self, tuple, word, \ | ||||
| 							h, v) | ||||
| 					if ok <> None: return ok | ||||
| 					continue | ||||
| 				fo, te, wi, sp, st, as, de = word | ||||
| 				if extra > 0 and stretch > 0: | ||||
| 					ex = extra * st / stretch | ||||
| 					extra = extra - ex | ||||
| 					stretch = stretch - st | ||||
| 				else: | ||||
| 					ex = 0 | ||||
| 				h2 = h + wi + sp + ex | ||||
| 				ok = wordfunc(self, tuple, word, h, v, \ | ||||
| 					h2, v2, (j==i), (j==i+wordcount-1)) | ||||
| 				if ok <> None: return ok | ||||
| 				h = h2 | ||||
| 			v = v2 | ||||
| 			i = i + wordcount | ||||
| 			avail = avail1 | ||||
| 	# | ||||
| 	# Render a paragraph in "drawing object" d, using the rectangle | ||||
| 	# given by (left, top, right) with an unspecified bottom. | ||||
| 	# Return the computed bottom of the text. | ||||
| 	def render(self, d, left, top, right): | ||||
| 		if self.width <> right-left: | ||||
| 			self.layout(right-left) | ||||
| 		self.left = left | ||||
| 		self.top = top | ||||
| 		self.right = right | ||||
| 		self.bottom = self.top + self.height | ||||
| 		self.anchorid = 0 | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			self.visit(self.__class__._renderword, \ | ||||
| 				   self.__class__._renderanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 		return self.bottom | ||||
| 	# | ||||
| 	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast): | ||||
| 		if word[0] <> None: self.d.setfont(word[0]) | ||||
| 		baseline = v + tuple[5] | ||||
| 		self.d.text((h, baseline - word[5]), word[1]) | ||||
| 		if self.anchorid > 0: | ||||
| 			self.d.line((h, baseline+2), (h2, baseline+2)) | ||||
| 	# | ||||
| 	def _renderanchor(self, tuple, word, h, v): | ||||
| 		self.anchorid = word | ||||
| 	# | ||||
| 	# Return which anchor(s) was hit by the mouse | ||||
| 	def hitcheck(self, mouseh, mousev): | ||||
| 		self.mouseh = mouseh | ||||
| 		self.mousev = mousev | ||||
| 		self.anchorid = 0 | ||||
| 		self.hits = [] | ||||
| 		self.visit(self.__class__._hitcheckword, \ | ||||
| 			   self.__class__._hitcheckanchor) | ||||
| 		return self.hits | ||||
| 	# | ||||
| 	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast): | ||||
| 		if self.anchorid > 0 and h <= self.mouseh <= h2 and \ | ||||
| 			v <= self.mousev <= v2: | ||||
| 			self.hits.append(self.anchorid) | ||||
| 	# | ||||
| 	def _hitcheckanchor(self, tuple, word, h, v): | ||||
| 		self.anchorid = word | ||||
| 	# | ||||
| 	# Return whether the given anchor id is present | ||||
| 	def hasanchor(self, id): | ||||
| 		return id in self.words or -id in self.words | ||||
| 	# | ||||
| 	# Extract the raw text from the word list, substituting one space | ||||
| 	# for non-empty inter-word space, and terminating with '\n' | ||||
| 	def extract(self): | ||||
| 		text = '' | ||||
| 		for w in self.words: | ||||
| 			if type(w) <> Int: | ||||
| 				word = w[1] | ||||
| 				if w[3]: word = word + ' ' | ||||
| 				text = text + word | ||||
| 		return text + '\n' | ||||
| 	# | ||||
| 	# Return which character position was hit by the mouse, as | ||||
| 	# an offset in the entire text as returned by extract(). | ||||
| 	# Return None if the mouse was not in this paragraph | ||||
| 	def whereis(self, d, mouseh, mousev): | ||||
| 		if mousev < self.top or mousev > self.bottom: | ||||
| 			return None | ||||
| 		self.mouseh = mouseh | ||||
| 		self.mousev = mousev | ||||
| 		self.lastfont = None | ||||
| 		self.charcount = 0 | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			return self.visit(self.__class__._whereisword, \ | ||||
| 					  self.__class__._whereisanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 	# | ||||
| 	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): | ||||
| 		fo, te, wi, sp, st, as, de = word | ||||
| 		if fo <> None: self.lastfont = fo | ||||
| 		h = h1 | ||||
| 		if isfirst: h1 = 0 | ||||
| 		if islast: h2 = 999999 | ||||
| 		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2): | ||||
| 			self.charcount = self.charcount + len(te) + (sp > 0) | ||||
| 			return | ||||
| 		if self.lastfont <> None: | ||||
| 			self.d.setfont(self.lastfont) | ||||
| 		cc = 0 | ||||
| 		for c in te: | ||||
| 			cw = self.d.textwidth(c) | ||||
| 			if self.mouseh <= h + cw/2: | ||||
| 				return self.charcount + cc | ||||
| 			cc = cc+1 | ||||
| 			h = h+cw | ||||
| 		self.charcount = self.charcount + cc | ||||
| 		if self.mouseh <= (h+h2) / 2: | ||||
| 			return self.charcount | ||||
| 		else: | ||||
| 			return self.charcount + 1 | ||||
| 	# | ||||
| 	def _whereisanchor(self, tuple, word, h, v): | ||||
| 		pass | ||||
| 	# | ||||
| 	# Return screen position corresponding to position in paragraph. | ||||
| 	# Return tuple (h, vtop, vbaseline, vbottom). | ||||
| 	# This is more or less the inverse of whereis() | ||||
| 	def screenpos(self, d, pos): | ||||
| 		if pos < 0: | ||||
| 			ascent, descent = self.lines[0][5:7] | ||||
| 			return self.left, self.top, self.top + ascent, \ | ||||
| 				self.top + ascent + descent | ||||
| 		self.pos = pos | ||||
| 		self.lastfont = None | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			ok = self.visit(self.__class__._screenposword, \ | ||||
| 					self.__class__._screenposanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 		if ok == None: | ||||
| 			ascent, descent = self.lines[-1][5:7] | ||||
| 			ok = self.right, self.bottom - ascent - descent, \ | ||||
| 				self.bottom - descent, self.bottom | ||||
| 		return ok | ||||
| 	# | ||||
| 	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): | ||||
| 		fo, te, wi, sp, st, as, de = word | ||||
| 		if fo <> None: self.lastfont = fo | ||||
| 		cc = len(te) + (sp > 0) | ||||
| 		if self.pos > cc: | ||||
| 			self.pos = self.pos - cc | ||||
| 			return | ||||
| 		if self.pos < cc: | ||||
| 			self.d.setfont(self.lastfont) | ||||
| 			h = h1 + self.d.textwidth(te[:self.pos]) | ||||
| 		else: | ||||
| 			h = h2 | ||||
| 		ascent, descent = tuple[5:7] | ||||
| 		return h, v1, v1+ascent, v2 | ||||
| 	# | ||||
| 	def _screenposanchor(self, tuple, word, h, v): | ||||
| 		pass | ||||
| 	# | ||||
| 	# Invert the stretch of text between pos1 and pos2. | ||||
| 	# If pos1 is None, the beginning is implied; | ||||
| 	# if pos2 is None, the end is implied. | ||||
| 	# Undoes its own effect when called again with the same arguments | ||||
| 	def invert(self, d, pos1, pos2): | ||||
| 		if pos1 == None: | ||||
| 			pos1 = self.left, self.top, self.top, self.top | ||||
| 		else: | ||||
| 			pos1 = self.screenpos(d, pos1) | ||||
| 		if pos2 == None: | ||||
| 			pos2 = self.right, self.bottom,self.bottom,self.bottom | ||||
| 		else: | ||||
| 			pos2 = self.screenpos(d, pos2) | ||||
| 		h1, top1, baseline1, bottom1 = pos1 | ||||
| 		h2, top2, baseline2, bottom2 = pos2 | ||||
| 		if bottom1 <= top2: | ||||
| 			d.invert((h1, top1), (self.right, bottom1)) | ||||
| 			h1 = self.left | ||||
| 			if bottom1 < top2: | ||||
| 				d.invert((h1, bottom1), (self.right, top2)) | ||||
| 			top1, bottom1 = top2, bottom2 | ||||
| 		d.invert((h1, top1), (h2, bottom2)) | ||||
| 
 | ||||
| 
 | ||||
| # Test class Para | ||||
| # XXX This was last used on the Mac, hence the weird fonts... | ||||
| def test(): | ||||
| 	import stdwin | ||||
| 	from stdwinevents import * | ||||
| 	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \ | ||||
| 		'the', 'lazy', 'dog.' | ||||
| 	paralist = [] | ||||
| 	for just in 'l', 'r', 'lr', 'c': | ||||
| 		p = Para() | ||||
| 		p.just = just | ||||
| 		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1) | ||||
| 		for word in words[1:-1]: | ||||
| 			p.addword(stdwin, None, word, 1, 1) | ||||
| 		p.addword(stdwin, None, words[-1], 2, 4) | ||||
| 		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0) | ||||
| 		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0) | ||||
| 		paralist.append(p) | ||||
| 	window = stdwin.open('Para.test()') | ||||
| 	start = stop = selpara = None | ||||
| 	while 1: | ||||
| 		etype, win, detail = stdwin.getevent() | ||||
| 		if etype == WE_CLOSE: | ||||
| 			break | ||||
| 		if etype == WE_SIZE: | ||||
| 			window.change((0, 0), (1000, 1000)) | ||||
| 		if etype == WE_DRAW: | ||||
| 			width, height = window.getwinsize() | ||||
| 			d = None | ||||
| 			try: | ||||
| 				d = window.begindrawing() | ||||
| 				d.cliprect(detail) | ||||
| 				d.erase(detail) | ||||
| 				v = 0 | ||||
| 				for p in paralist: | ||||
| 					v = p.render(d, 0, v, width) | ||||
| 					if p == selpara and \ | ||||
| 					   start <> None and stop <> None: | ||||
| 						p.invert(d, start, stop) | ||||
| 			finally: | ||||
| 				if d: d.close() | ||||
| 		if etype == WE_MOUSE_DOWN: | ||||
| 			if selpara and start <> None and stop <> None: | ||||
| 				d = window.begindrawing() | ||||
| 				selpara.invert(d, start, stop) | ||||
| 				d.close() | ||||
| 			start = stop = selpara = None | ||||
| 			mouseh, mousev = detail[0] | ||||
| 			for p in paralist: | ||||
| 				start = p.whereis(stdwin, mouseh, mousev) | ||||
| 				if start <> None: | ||||
| 					selpara = p | ||||
| 					break | ||||
| 		if etype == WE_MOUSE_UP and start <> None and selpara: | ||||
| 			mouseh, mousev = detail[0] | ||||
| 			stop = selpara.whereis(stdwin, mouseh, mousev) | ||||
| 			if stop == None: start = selpara = None | ||||
| 			else: | ||||
| 				if start > stop: | ||||
| 					start, stop = stop, start | ||||
| 				d = window.begindrawing() | ||||
| 				selpara.invert(d, start, stop) | ||||
| 				d.close() | ||||
| 	window.close() | ||||
							
								
								
									
										621
									
								
								Lib/fmt.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										621
									
								
								Lib/fmt.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,621 @@ | |||
| # Text formatting abstractions | ||||
| 
 | ||||
| 
 | ||||
| import string | ||||
| import Para | ||||
| 
 | ||||
| 
 | ||||
| # A formatter back-end object has one method that is called by the formatter: | ||||
| # addpara(p), where p is a paragraph object.  For example: | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to do nothing at all with the paragraphs | ||||
| class NullBackEnd: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		pass | ||||
| 	# | ||||
| 	def bgn_anchor(self, id): | ||||
| 		pass | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		pass | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to collect the paragraphs in a list | ||||
| class SavingBackEnd(NullBackEnd): | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.paralist = [] | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 	# | ||||
| 	def hitcheck(self, h, v): | ||||
| 		hits = [] | ||||
| 		for p in self.paralist: | ||||
| 			if p.top <= v <= p.bottom: | ||||
| 				for id in p.hitcheck(h, v): | ||||
| 					if id not in hits: | ||||
| 						hits.append(id) | ||||
| 		return hits | ||||
| 	# | ||||
| 	def extract(self): | ||||
| 		text = '' | ||||
| 		for p in self.paralist: | ||||
| 			text = text + (p.extract()) | ||||
| 		return text | ||||
| 	# | ||||
| 	def extractpart(self, long1, long2): | ||||
| 		if long1 > long2: long1, long2 = long2, long1 | ||||
| 		para1, pos1 = long1 | ||||
| 		para2, pos2 = long2 | ||||
| 		text = '' | ||||
| 		while para1 < para2: | ||||
| 			ptext = self.paralist[para1].extract() | ||||
| 			text = text + ptext[pos1:] | ||||
| 			pos1 = 0 | ||||
| 			para1 = para1 + 1 | ||||
| 		ptext = self.paralist[para2].extract() | ||||
| 		return text + ptext[pos1:pos2] | ||||
| 	# | ||||
| 	def whereis(self, d, h, v): | ||||
| 		total = 0 | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			p = self.paralist[i] | ||||
| 			result = p.whereis(d, h, v) | ||||
| 			if result <> None: | ||||
| 				return i, result | ||||
| 		return None | ||||
| 	# | ||||
| 	def roundtowords(self, long1, long2): | ||||
| 		i, offset = long1 | ||||
| 		text = self.paralist[i].extract() | ||||
| 		while offset > 0 and text[offset-1] <> ' ': offset = offset-1 | ||||
| 		long1 = i, offset | ||||
| 		# | ||||
| 		i, offset = long2 | ||||
| 		text = self.paralist[i].extract() | ||||
| 		n = len(text) | ||||
| 		while offset < n-1 and text[offset] <> ' ': offset = offset+1 | ||||
| 		long2 = i, offset | ||||
| 		# | ||||
| 		return long1, long2 | ||||
| 	# | ||||
| 	def roundtoparagraphs(self, long1, long2): | ||||
| 		long1 = long1[0], 0 | ||||
| 		long2 = long2[0], len(self.paralist[long2[0]].extract()) | ||||
| 		return long1, long2 | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to send the text directly to the drawing object | ||||
| class WritingBackEnd(NullBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, d, width): | ||||
| 		self.d = d | ||||
| 		self.width = width | ||||
| 		self.lineno = 0 | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.lineno = p.render(self.d, 0, self.lineno, self.width) | ||||
| 
 | ||||
| 
 | ||||
| # A formatter receives a stream of formatting instructions and assembles | ||||
| # these into a stream of paragraphs on to a back-end.  The assembly is | ||||
| # parametrized by a text measurement object, which must match the output | ||||
| # operations of the back-end.  The back-end is responsible for splitting | ||||
| # paragraphs up in lines of a given maximum width.  (This is done because | ||||
| # in a windowing environment, when the window size changes, there is no | ||||
| # need to redo the assembly into paragraphs, but the splitting into lines | ||||
| # must be done taking the new window size into account.) | ||||
| 
 | ||||
| 
 | ||||
| # Formatter base class.  Initialize it with a text measurement object, | ||||
| # which is used for text measurements, and a back-end object, | ||||
| # which receives the completed paragraphs.  The formatting methods are: | ||||
| # setfont(font) | ||||
| # setleftindent(nspaces) | ||||
| # setjust(type) where type is 'l', 'c', 'r', or 'lr' | ||||
| # flush() | ||||
| # vspace(nlines) | ||||
| # needvspace(nlines) | ||||
| # addword(word, nspaces) | ||||
| class BaseFormatter: | ||||
| 	# | ||||
| 	def __init__(self, d, b): | ||||
| 		# Drawing object used for text measurements | ||||
| 		self.d = d | ||||
| 		# | ||||
| 		# BackEnd object receiving completed paragraphs | ||||
| 		self.b = b | ||||
| 		# | ||||
| 		# Parameters of the formatting model | ||||
| 		self.leftindent = 0 | ||||
| 		self.just = 'l' | ||||
| 		self.font = None | ||||
| 		self.blanklines = 0 | ||||
| 		# | ||||
| 		# Parameters derived from the current font | ||||
| 		self.space = d.textwidth(' ') | ||||
| 		self.line = d.lineheight() | ||||
| 		self.ascent = d.baseline() | ||||
| 		self.descent = self.line - self.ascent | ||||
| 		# | ||||
| 		# Parameter derived from the default font | ||||
| 		self.n_space = self.space | ||||
| 		# | ||||
| 		# Current paragraph being built | ||||
| 		self.para = None | ||||
| 		self.nospace = 1 | ||||
| 		# | ||||
| 		# Font to set on the next word | ||||
| 		self.nextfont = None | ||||
| 	# | ||||
| 	def newpara(self): | ||||
| 		return Para.Para() | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		if font == None: return | ||||
| 		self.font = self.nextfont = font | ||||
| 		d = self.d | ||||
| 		d.setfont(font) | ||||
| 		self.space = d.textwidth(' ') | ||||
| 		self.line = d.lineheight() | ||||
| 		self.ascent = d.baseline() | ||||
| 		self.descent = self.line - self.ascent | ||||
| 	# | ||||
| 	def setleftindent(self, nspaces): | ||||
| 		self.leftindent = int(self.n_space * nspaces) | ||||
| 		if self.para: | ||||
| 			hang = self.leftindent - self.para.indent_left | ||||
| 			if hang > 0 and self.para.getlength() <= hang: | ||||
| 				self.para.makehangingtag(hang) | ||||
| 				self.nospace = 1 | ||||
| 			else: | ||||
| 				self.flush() | ||||
| 	# | ||||
| 	def setrightindent(self, nspaces): | ||||
| 		self.rightindent = int(self.n_space * nspaces) | ||||
| 		if self.para: | ||||
| 			self.para.indent_right = self.rightindent | ||||
| 			self.flush() | ||||
| 	# | ||||
| 	def setjust(self, just): | ||||
| 		self.just = just | ||||
| 		if self.para: | ||||
| 			self.para.just = self.just | ||||
| 	# | ||||
| 	def flush(self): | ||||
| 		if self.para: | ||||
| 			self.b.addpara(self.para) | ||||
| 			self.para = None | ||||
| 			if self.font <> None: | ||||
| 				self.d.setfont(self.font) | ||||
| 		self.nospace = 1 | ||||
| 	# | ||||
| 	def vspace(self, nlines): | ||||
| 		self.flush() | ||||
| 		if nlines > 0: | ||||
| 			self.para = self.newpara() | ||||
| 			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0 | ||||
| 			self.para.words.append(tuple) | ||||
| 			self.flush() | ||||
| 			self.blanklines = self.blanklines + nlines | ||||
| 	# | ||||
| 	def needvspace(self, nlines): | ||||
| 		self.flush() # Just to be sure | ||||
| 		if nlines > self.blanklines: | ||||
| 			self.vspace(nlines - self.blanklines) | ||||
| 	# | ||||
| 	def addword(self, text, space): | ||||
| 		if self.nospace and not text: | ||||
| 			return | ||||
| 		self.nospace = 0 | ||||
| 		self.blanklines = 0 | ||||
| 		if not self.para: | ||||
| 			self.para = self.newpara() | ||||
| 			self.para.indent_left = self.leftindent | ||||
| 			self.para.just = self.just | ||||
| 			self.nextfont = self.font | ||||
| 		space = int(space * self.space) | ||||
| 		self.para.words.append(self.nextfont, text, \ | ||||
| 			self.d.textwidth(text), space, space, \ | ||||
| 			self.ascent, self.descent) | ||||
| 		self.nextfont = None | ||||
| 	# | ||||
| 	def bgn_anchor(self, id): | ||||
| 		if not self.para: | ||||
| 			self.nospace = 0 | ||||
| 			self.addword('', 0) | ||||
| 		self.para.bgn_anchor(id) | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		if not self.para: | ||||
| 			self.nospace = 0 | ||||
| 			self.addword('', 0) | ||||
| 		self.para.end_anchor(id) | ||||
| 
 | ||||
| 
 | ||||
| # Measuring object for measuring text as viewed on a tty | ||||
| class NullMeasurer: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		pass | ||||
| 	# | ||||
| 	def textwidth(self, text): | ||||
| 		return len(text) | ||||
| 	# | ||||
| 	def lineheight(self): | ||||
| 		return 1 | ||||
| 	# | ||||
| 	def baseline(self): | ||||
| 		return 0 | ||||
| 
 | ||||
| 
 | ||||
| # Drawing object for writing plain ASCII text to a file | ||||
| class FileWriter: | ||||
| 	# | ||||
| 	def __init__(self, fp): | ||||
| 		self.fp = fp | ||||
| 		self.lineno, self.colno = 0, 0 | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		pass | ||||
| 	# | ||||
| 	def text(self, (h, v), str): | ||||
| 		if not str: return | ||||
| 		if '\n' in str: | ||||
| 			raise ValueError, 'can\'t write \\n' | ||||
| 		while self.lineno < v: | ||||
| 			self.fp.write('\n') | ||||
| 			self.colno, self.lineno = 0, self.lineno + 1 | ||||
| 		while self.lineno > v: | ||||
| 			# XXX This should never happen... | ||||
| 			self.fp.write('\033[A') # ANSI up arrow | ||||
| 			self.lineno = self.lineno - 1 | ||||
| 		if self.colno < h: | ||||
| 			self.fp.write(' ' * (h - self.colno)) | ||||
| 		elif self.colno > h: | ||||
| 			self.fp.write('\b' * (self.colno - h)) | ||||
| 		self.colno = h | ||||
| 		self.fp.write(str) | ||||
| 		self.colno = h + len(str) | ||||
| 
 | ||||
| 
 | ||||
| # Formatting class to do nothing at all with the data | ||||
| class NullFormatter(BaseFormatter): | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		d = NullMeasurer() | ||||
| 		b = NullBackEnd() | ||||
| 		BaseFormatter.__init__(self, d, b) | ||||
| 
 | ||||
| 
 | ||||
| # Formatting class to write directly to a file | ||||
| class WritingFormatter(BaseFormatter): | ||||
| 	# | ||||
| 	def __init__(self, fp, width): | ||||
| 		dm = NullMeasurer() | ||||
| 		dw = FileWriter(fp) | ||||
| 		b = WritingBackEnd(dw, width) | ||||
| 		BaseFormatter.__init__(self, dm, b) | ||||
| 		self.blanklines = 1 | ||||
| 	# | ||||
| 	# Suppress multiple blank lines | ||||
| 	def needvspace(self, nlines): | ||||
| 		BaseFormatter.needvspace(self, min(1, nlines)) | ||||
| 
 | ||||
| 
 | ||||
| # A "FunnyFormatter" writes ASCII text with a twist: *bold words*, | ||||
| # _italic text_ and _underlined words_, and `quoted text'. | ||||
| # It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman, | ||||
| # italic, bold, underline, quote). | ||||
| # Moreover, if the font is in upper case, the text is converted to | ||||
| # UPPER CASE. | ||||
| class FunnyFormatter(WritingFormatter): | ||||
| 	# | ||||
| 	def flush(self): | ||||
| 		if self.para: finalize(self.para) | ||||
| 		WritingFormatter.flush(self) | ||||
| 
 | ||||
| 
 | ||||
| # Surrounds *bold words* and _italic text_ in a paragraph with | ||||
| # appropriate markers, fixing the size (assuming these characters' | ||||
| # width is 1). | ||||
| openchar = \ | ||||
|     {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'} | ||||
| closechar = \ | ||||
|     {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''} | ||||
| def finalize(para): | ||||
| 	oldfont = curfont = 'r' | ||||
| 	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end | ||||
| 	for i in range(len(para.words)): | ||||
| 		fo, te, wi = para.words[i][:3] | ||||
| 		if fo <> None: curfont = fo | ||||
| 		if curfont <> oldfont: | ||||
| 			if closechar.has_key(oldfont): | ||||
| 				c = closechar[oldfont] | ||||
| 				j = i-1 | ||||
| 				while j > 0 and para.words[j][1] == '': j = j-1 | ||||
| 				fo1, te1, wi1 = para.words[j][:3] | ||||
| 				te1 = te1 + c | ||||
| 				wi1 = wi1 + len(c) | ||||
| 				para.words[j] = (fo1, te1, wi1) + \ | ||||
| 					para.words[j][3:] | ||||
| 			if openchar.has_key(curfont) and te: | ||||
| 				c = openchar[curfont] | ||||
| 				te = c + te | ||||
| 				wi = len(c) + wi | ||||
| 				para.words[i] = (fo, te, wi) + \ | ||||
| 					para.words[i][3:] | ||||
| 			if te: oldfont = curfont | ||||
| 			else: oldfont = 'r' | ||||
| 		if curfont in string.uppercase: | ||||
| 			te = string.upper(te) | ||||
| 			para.words[i] = (fo, te, wi) + para.words[i][3:] | ||||
| 	del para.words[-1] | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to draw the text in a window. | ||||
| # This has an option to draw while the paragraphs are being added, | ||||
| # to minimize the delay before the user sees anything. | ||||
| # This manages the entire "document" of the window. | ||||
| class StdwinBackEnd(SavingBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, window, drawnow): | ||||
| 		self.window = window | ||||
| 		self.drawnow = drawnow | ||||
| 		self.width = window.getwinsize()[0] | ||||
| 		self.selection = None | ||||
| 		self.height = 0 | ||||
| 		window.setorigin(0, 0) | ||||
| 		window.setdocsize(0, 0) | ||||
| 		self.d = window.begindrawing() | ||||
| 		SavingBackEnd.__init__(self) | ||||
| 	# | ||||
| 	def finish(self): | ||||
| 		self.d.close() | ||||
| 		self.d = None | ||||
| 		self.window.setdocsize(0, self.height) | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 		if self.drawnow: | ||||
| 			self.height = \ | ||||
| 				p.render(self.d, 0, self.height, self.width) | ||||
| 		else: | ||||
| 			p.layout(self.width) | ||||
| 			p.left = 0 | ||||
| 			p.top = self.height | ||||
| 			p.right = self.width | ||||
| 			p.bottom = self.height + p.height | ||||
| 			self.height = p.bottom | ||||
| 	# | ||||
| 	def resize(self): | ||||
| 		self.window.change((0, 0), (self.width, self.height)) | ||||
| 		self.width = self.window.getwinsize()[0] | ||||
| 		self.height = 0 | ||||
| 		for p in self.paralist: | ||||
| 			p.layout(self.width) | ||||
| 			p.left = 0 | ||||
| 			p.top = self.height | ||||
| 			p.right = self.width | ||||
| 			p.bottom = self.height + p.height | ||||
| 			self.height = p.bottom | ||||
| 		self.window.change((0, 0), (self.width, self.height)) | ||||
| 		self.window.setdocsize(0, self.height) | ||||
| 	# | ||||
| 	def redraw(self, area): | ||||
| 		d = self.window.begindrawing() | ||||
| 		(left, top), (right, bottom) = area | ||||
| 		d.erase(area) | ||||
| 		d.cliprect(area) | ||||
| 		for p in self.paralist: | ||||
| 			if top < p.bottom and p.top < bottom: | ||||
| 				v = p.render(d, p.left, p.top, p.right) | ||||
| 		if self.selection: | ||||
| 			self.invert(d, self.selection) | ||||
| 		d.close() | ||||
| 	# | ||||
| 	def setselection(self, new): | ||||
| 		if new: | ||||
| 			long1, long2 = new | ||||
| 			pos1 = long1[:3] | ||||
| 			pos2 = long2[:3] | ||||
| 			new = pos1, pos2 | ||||
| 		if new <> self.selection: | ||||
| 			d = self.window.begindrawing() | ||||
| 			if self.selection: | ||||
| 				self.invert(d, self.selection) | ||||
| 			if new: | ||||
| 				self.invert(d, new) | ||||
| 			d.close() | ||||
| 			self.selection = new | ||||
| 	# | ||||
| 	def getselection(self): | ||||
| 		return self.selection | ||||
| 	# | ||||
| 	def extractselection(self): | ||||
| 		if self.selection: | ||||
| 			a, b = self.selection | ||||
| 			return self.extractpart(a, b) | ||||
| 		else: | ||||
| 			return None | ||||
| 	# | ||||
| 	def invert(self, d, region): | ||||
| 		long1, long2 = region | ||||
| 		if long1 > long2: long1, long2 = long2, long1 | ||||
| 		para1, pos1 = long1 | ||||
| 		para2, pos2 = long2 | ||||
| 		while para1 < para2: | ||||
| 			self.paralist[para1].invert(d, pos1, None) | ||||
| 			pos1 = None | ||||
| 			para1 = para1 + 1 | ||||
| 		self.paralist[para2].invert(d, pos1, pos2) | ||||
| 	# | ||||
| 	def search(self, prog): | ||||
| 		import regex, string | ||||
| 		if type(prog) == type(''): | ||||
| 			prog = regex.compile(string.lower(prog)) | ||||
| 		if self.selection: | ||||
| 			iold = self.selection[0][0] | ||||
| 		else: | ||||
| 			iold = -1 | ||||
| 		hit = None | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			if i == iold or i < iold and hit: | ||||
| 				continue | ||||
| 			p = self.paralist[i] | ||||
| 			text = string.lower(p.extract()) | ||||
| 			if prog.search(text) >= 0: | ||||
| 				a, b = prog.regs[0] | ||||
| 				long1 = i, a | ||||
| 				long2 = i, b | ||||
| 				hit = long1, long2 | ||||
| 				if i > iold: | ||||
| 					break | ||||
| 		if hit: | ||||
| 			self.setselection(hit) | ||||
| 			i = hit[0][0] | ||||
| 			p = self.paralist[i] | ||||
| 			self.window.show((p.left, p.top), (p.right, p.bottom)) | ||||
| 			return 1 | ||||
| 		else: | ||||
| 			return 0 | ||||
| 	# | ||||
| 	def showanchor(self, id): | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			p = self.paralist[i] | ||||
| 			if p.hasanchor(id): | ||||
| 				long1 = i, 0 | ||||
| 				long2 = i, len(p.extract()) | ||||
| 				hit = long1, long2 | ||||
| 				self.setselection(hit) | ||||
| 				self.window.show( \ | ||||
| 					(p.left, p.top), (p.right, p.bottom)) | ||||
| 				break | ||||
| 
 | ||||
| 
 | ||||
| # GL extensions | ||||
| 
 | ||||
| class GLFontCache: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.reset() | ||||
| 		self.setfont('') | ||||
| 	# | ||||
| 	def reset(self): | ||||
| 		self.fontkey = None | ||||
| 		self.fonthandle = None | ||||
| 		self.fontinfo = None | ||||
| 		self.fontcache = {} | ||||
| 	# | ||||
| 	def close(self): | ||||
| 		self.reset() | ||||
| 	# | ||||
| 	def setfont(self, fontkey): | ||||
| 		if fontkey == '': | ||||
| 			fontkey = 'Times-Roman 12' | ||||
| 		elif ' ' not in fontkey: | ||||
| 			fontkey = fontkey + ' 12' | ||||
| 		if fontkey == self.fontkey: | ||||
| 			return | ||||
| 		if self.fontcache.has_key(fontkey): | ||||
| 			handle = self.fontcache[fontkey] | ||||
| 		else: | ||||
| 			import string | ||||
| 			i = string.index(fontkey, ' ') | ||||
| 			name, sizestr = fontkey[:i], fontkey[i:] | ||||
| 			size = eval(sizestr) | ||||
| 			key1 = name + ' 1' | ||||
| 			key = name + ' ' + `size` | ||||
| 			# NB key may differ from fontkey! | ||||
| 			if self.fontcache.has_key(key): | ||||
| 				handle = self.fontcache[key] | ||||
| 			else: | ||||
| 				if self.fontcache.has_key(key1): | ||||
| 					handle = self.fontcache[key1] | ||||
| 				else: | ||||
| 					import fm | ||||
| 					handle = fm.findfont(name) | ||||
| 					self.fontcache[key1] = handle | ||||
| 				handle = handle.scalefont(size) | ||||
| 				self.fontcache[fontkey] = \ | ||||
| 					self.fontcache[key] = handle | ||||
| 		self.fontkey = fontkey | ||||
| 		if self.fonthandle <> handle: | ||||
| 			self.fonthandle = handle | ||||
| 			self.fontinfo = handle.getfontinfo() | ||||
| 			handle.setfont() | ||||
| 
 | ||||
| 
 | ||||
| class GLMeasurer(GLFontCache): | ||||
| 	# | ||||
| 	def textwidth(self, text): | ||||
| 		return self.fonthandle.getstrwidth(text) | ||||
| 	# | ||||
| 	def baseline(self): | ||||
| 		return self.fontinfo[6] - self.fontinfo[3] | ||||
| 	# | ||||
| 	def lineheight(self): | ||||
| 		return self.fontinfo[6] | ||||
| 
 | ||||
| 
 | ||||
| class GLWriter(GLFontCache): | ||||
| 	# | ||||
| 	# NOTES: | ||||
| 	# (1) Use gl.ortho2 to use X pixel coordinates! | ||||
| 	# | ||||
| 	def text(self, (h, v), text): | ||||
| 		import gl, fm | ||||
| 		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3]) | ||||
| 		fm.prstr(text) | ||||
| 	# | ||||
| 	def setfont(self, fontkey): | ||||
| 		oldhandle = self.fonthandle | ||||
| 		GLFontCache.setfont(fontkey) | ||||
| 		if self.fonthandle <> oldhandle: | ||||
| 			handle.setfont() | ||||
| 
 | ||||
| 
 | ||||
| class GLMeasurerWriter(GLMeasurer, GLWriter): | ||||
| 	pass | ||||
| 
 | ||||
| 
 | ||||
| class GLBackEnd(SavingBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, wid): | ||||
| 		import gl | ||||
| 		gl.winset(wid) | ||||
| 		self.wid = wid | ||||
| 		self.width = gl.getsize()[1] | ||||
| 		self.height = 0 | ||||
| 		self.d = GLMeasurerWriter() | ||||
| 		SavingBackEnd.__init__(self) | ||||
| 	# | ||||
| 	def finish(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 		self.height = p.render(self.d, 0, self.height, self.width) | ||||
| 	# | ||||
| 	def redraw(self): | ||||
| 		import gl | ||||
| 		gl.winset(self.wid) | ||||
| 		width = gl.getsize()[1] | ||||
| 		if width <> self.width: | ||||
| 			setdocsize = 1 | ||||
| 			self.width = width | ||||
| 			for p in self.paralist: | ||||
| 				p.top = p.bottom = None | ||||
| 		d = self.d | ||||
| 		v = 0 | ||||
| 		for p in self.paralist: | ||||
| 			v = p.render(d, 0, v, width) | ||||
							
								
								
									
										635
									
								
								Lib/htmllib.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										635
									
								
								Lib/htmllib.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,635 @@ | |||
| # A parser for HTML documents | ||||
| 
 | ||||
| 
 | ||||
| # HTML: HyperText Markup Language; an SGML-like syntax used by WWW to | ||||
| # describe hypertext documents | ||||
| # | ||||
| # SGML: Standard Generalized Markup Language | ||||
| # | ||||
| # WWW: World-Wide Web; a distributed hypertext system develped at CERN | ||||
| # | ||||
| # CERN: European Particle Physics Laboratory in Geneva, Switzerland | ||||
| 
 | ||||
| 
 | ||||
| # This file is only concerned with parsing and formatting HTML | ||||
| # documents, not with the other (hypertext and networking) aspects of | ||||
| # the WWW project.  (It does support highlighting of anchors.) | ||||
| 
 | ||||
| 
 | ||||
| import os | ||||
| import sys | ||||
| import regex | ||||
| import string | ||||
| import sgmllib | ||||
| 
 | ||||
| 
 | ||||
| class HTMLParser(sgmllib.SGMLParser): | ||||
| 
 | ||||
| 	# Copy base class entities and add some | ||||
| 	entitydefs = {} | ||||
| 	for key in sgmllib.SGMLParser.entitydefs.keys(): | ||||
| 		entitydefs[key] = sgmllib.SGMLParser.entitydefs[key] | ||||
| 	entitydefs['bullet'] = '*' | ||||
| 
 | ||||
| 	# Provided -- handlers for tags introducing literal text | ||||
| 	 | ||||
| 	def start_listing(self, attrs): | ||||
| 		self.setliteral('listing') | ||||
| 		self.literal_bgn('listing', attrs) | ||||
| 
 | ||||
| 	def end_listing(self): | ||||
| 		self.literal_end('listing') | ||||
| 
 | ||||
| 	def start_xmp(self, attrs): | ||||
| 		self.setliteral('xmp') | ||||
| 		self.literal_bgn('xmp', attrs) | ||||
| 
 | ||||
| 	def end_xmp(self): | ||||
| 		self.literal_end('xmp') | ||||
| 
 | ||||
| 	def do_plaintext(self, attrs): | ||||
| 		self.setnomoretags() | ||||
| 		self.literal_bgn('plaintext', attrs) | ||||
| 
 | ||||
| 	# To be overridden -- begin/end literal mode | ||||
| 	def literal_bgn(self, tag, attrs): pass | ||||
| 	def literal_end(self, tag): pass | ||||
| 
 | ||||
| 
 | ||||
| # Next level of sophistication -- collect anchors, title, nextid and isindex | ||||
| class CollectingParser(HTMLParser): | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		HTMLParser.__init__(self) | ||||
| 		self.savetext = None | ||||
| 		self.nextid = '' | ||||
| 		self.isindex = 0 | ||||
| 		self.title = '' | ||||
| 		self.inanchor = 0 | ||||
| 		self.anchors = [] | ||||
| 		self.anchornames = [] | ||||
| 		self.anchortypes = [] | ||||
| 	# | ||||
| 	def start_a(self, attrs): | ||||
| 		self.inanchor = 0 | ||||
| 		href = '' | ||||
| 		name = '' | ||||
| 		type = '' | ||||
| 		for attrname, value in attrs: | ||||
| 			if attrname == 'href': | ||||
| 				href = value | ||||
| 			if attrname == 'name=': | ||||
| 				name = value | ||||
| 			if attrname == 'type=': | ||||
| 				type = string.lower(value) | ||||
| 		if not (href or name): | ||||
| 			return | ||||
| 		self.anchors.append(href) | ||||
| 		self.anchornames.append(name) | ||||
| 		self.anchortypes.append(type) | ||||
| 		self.inanchor = len(self.anchors) | ||||
| 		if not href: | ||||
| 			self.inanchor = -self.inanchor | ||||
| 	# | ||||
| 	def end_a(self): | ||||
| 		if self.inanchor > 0: | ||||
| 			# Don't show anchors pointing into the current document | ||||
| 			if self.anchors[self.inanchor-1][:1] <> '#': | ||||
| 				self.handle_data('[' + `self.inanchor` + ']') | ||||
| 		self.inanchor = 0 | ||||
| 	# | ||||
| 	def start_header(self, attrs): pass | ||||
| 	def end_header(self): pass | ||||
| 	# | ||||
| 	# (head is the same as header) | ||||
| 	def start_head(self, attrs): pass | ||||
| 	def end_head(self): pass | ||||
| 	# | ||||
| 	def start_body(self, attrs): pass | ||||
| 	def end_body(self): pass | ||||
| 	# | ||||
| 	def do_nextid(self, attrs): | ||||
| 		self.nextid = attrs | ||||
| 	# | ||||
| 	def do_isindex(self, attrs): | ||||
| 		self.isindex = 1 | ||||
| 	# | ||||
| 	def start_title(self, attrs): | ||||
| 		self.savetext = '' | ||||
| 	# | ||||
| 	def end_title(self): | ||||
| 		if self.savetext <> None: | ||||
| 			self.title = self.savetext | ||||
| 			self.savetext = None | ||||
| 	# | ||||
| 	def handle_data(self, text): | ||||
| 		if self.savetext is not None: | ||||
| 			self.savetext = self.savetext + text | ||||
| 
 | ||||
| 
 | ||||
| # Formatting parser -- takes a formatter and a style sheet as arguments | ||||
| 
 | ||||
| # XXX The use of style sheets should change: for each tag and end tag | ||||
| # there should be a style definition, and a style definition should | ||||
| # encompass many more parameters: font, justification, indentation, | ||||
| # vspace before, vspace after, hanging tag... | ||||
| 
 | ||||
| wordprog = regex.compile('[^ \t\n]*') | ||||
| spaceprog = regex.compile('[ \t\n]*') | ||||
| 
 | ||||
| class FormattingParser(CollectingParser): | ||||
| 
 | ||||
| 	def __init__(self, formatter, stylesheet): | ||||
| 		CollectingParser.__init__(self) | ||||
| 		self.fmt = formatter | ||||
| 		self.stl = stylesheet | ||||
| 		self.savetext = None | ||||
| 		self.compact = 0 | ||||
| 		self.nofill = 0 | ||||
| 		self.resetfont() | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def resetfont(self): | ||||
| 		self.fontstack = [] | ||||
| 		self.stylestack = [] | ||||
| 		self.fontset = self.stl.stdfontset | ||||
| 		self.style = ROMAN | ||||
| 		self.passfont() | ||||
| 
 | ||||
| 	def passfont(self): | ||||
| 		font = self.fontset[self.style] | ||||
| 		self.fmt.setfont(font) | ||||
| 
 | ||||
| 	def pushstyle(self, style): | ||||
| 		self.stylestack.append(self.style) | ||||
| 		self.style = min(style, len(self.fontset)-1) | ||||
| 		self.passfont() | ||||
| 
 | ||||
| 	def popstyle(self): | ||||
| 		self.style = self.stylestack[-1] | ||||
| 		del self.stylestack[-1] | ||||
| 		self.passfont() | ||||
| 
 | ||||
| 	def pushfontset(self, fontset, style): | ||||
| 		self.fontstack.append(self.fontset) | ||||
| 		self.fontset = fontset | ||||
| 		self.pushstyle(style) | ||||
| 
 | ||||
| 	def popfontset(self): | ||||
| 		self.fontset = self.fontstack[-1] | ||||
| 		del self.fontstack[-1] | ||||
| 		self.popstyle() | ||||
| 
 | ||||
| 	def flush(self): | ||||
| 		self.fmt.flush() | ||||
| 
 | ||||
| 	def setindent(self, n): | ||||
| 		self.fmt.setleftindent(n) | ||||
| 
 | ||||
| 	def needvspace(self, n): | ||||
| 		self.fmt.needvspace(n) | ||||
| 
 | ||||
| 	def close(self): | ||||
| 		HTMLParser.close(self) | ||||
| 		self.fmt.flush() | ||||
| 
 | ||||
| 	def handle_literal(self, text): | ||||
| 		lines = string.splitfields(text, '\n') | ||||
| 		for i in range(1, len(lines)): | ||||
| 			lines[i] = string.expandtabs(lines[i], 8) | ||||
| 		for line in lines[:-1]: | ||||
| 			self.fmt.addword(line, 0) | ||||
| 			self.fmt.flush() | ||||
| 			self.fmt.nospace = 0 | ||||
| 		for line in lines[-1:]: | ||||
| 			self.fmt.addword(line, 0) | ||||
| 
 | ||||
| 	def handle_data(self, text): | ||||
| 		if self.savetext is not None: | ||||
| 			self.savetext = self.savetext + text | ||||
| 			return | ||||
| 		if self.literal: | ||||
| 			self.handle_literal(text) | ||||
| 			return | ||||
| 		i = 0 | ||||
| 		n = len(text) | ||||
| 		while i < n: | ||||
| 			j = i + wordprog.match(text, i) | ||||
| 			word = text[i:j] | ||||
| 			i = j + spaceprog.match(text, j) | ||||
| 			self.fmt.addword(word, i-j) | ||||
| 			if self.nofill and '\n' in text[j:i]: | ||||
| 				self.fmt.flush() | ||||
| 				self.fmt.nospace = 0 | ||||
| 				i = j+1 | ||||
| 				while text[i-1] <> '\n': i = i+1 | ||||
| 
 | ||||
| 	def literal_bgn(self, tag, attrs): | ||||
| 		if tag == 'plaintext': | ||||
| 			self.flush() | ||||
| 		else: | ||||
| 			self.needvspace(1) | ||||
| 		self.pushfontset(self.stl.stdfontset, FIXED) | ||||
| 		self.setindent(self.stl.literalindent) | ||||
| 
 | ||||
| 	def literal_end(self, tag): | ||||
| 		self.needvspace(1) | ||||
| 		self.popfontset() | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def start_title(self, attrs): | ||||
| 		self.flush() | ||||
| 		self.savetext = '' | ||||
| 	# NB end_title is unchanged | ||||
| 
 | ||||
| 	def do_p(self, attrs): | ||||
| 		if self.compact: | ||||
| 			self.flush() | ||||
| 		else: | ||||
| 			self.needvspace(1) | ||||
| 
 | ||||
| 	def start_h1(self, attrs): | ||||
| 		self.needvspace(2) | ||||
| 		self.setindent(self.stl.h1indent) | ||||
| 		self.pushfontset(self.stl.h1fontset, BOLD) | ||||
| 		self.fmt.setjust('c') | ||||
| 
 | ||||
| 	def end_h1(self): | ||||
| 		self.popfontset() | ||||
| 		self.needvspace(2) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 		self.fmt.setjust('l') | ||||
| 
 | ||||
| 	def start_h2(self, attrs): | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.h2indent) | ||||
| 		self.pushfontset(self.stl.h2fontset, BOLD) | ||||
| 
 | ||||
| 	def end_h2(self): | ||||
| 		self.popfontset() | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def start_h3(self, attrs): | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 		self.pushfontset(self.stl.h3fontset, BOLD) | ||||
| 
 | ||||
| 	def end_h3(self): | ||||
| 		self.popfontset() | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def start_h4(self, attrs): | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 		self.pushfontset(self.stl.stdfontset, BOLD) | ||||
| 
 | ||||
| 	def end_h4(self): | ||||
| 		self.popfontset() | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	start_h5 = start_h4 | ||||
| 	end_h5 = end_h4 | ||||
| 
 | ||||
| 	start_h6 = start_h5 | ||||
| 	end_h6 = end_h5 | ||||
| 
 | ||||
| 	start_h7 = start_h6 | ||||
| 	end_h7 = end_h6 | ||||
| 
 | ||||
| 	def start_ul(self, attrs): | ||||
| 		self.needvspace(1) | ||||
| 		for attrname, value in attrs: | ||||
| 			if attrname == 'compact': | ||||
| 				self.compact = 1 | ||||
| 				self.setindent(0) | ||||
| 				break | ||||
| 		else: | ||||
| 			self.setindent(self.stl.ulindent) | ||||
| 
 | ||||
| 	start_dir = start_menu = start_ol = start_ul | ||||
| 
 | ||||
| 	do_li = do_p | ||||
| 
 | ||||
| 	def end_ul(self): | ||||
| 		self.compact = 0 | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	end_dir = end_menu = end_ol = end_ul | ||||
| 
 | ||||
| 	def start_dl(self, attrs): | ||||
| 		for attrname, value in attrs: | ||||
| 			if attrname == 'compact': | ||||
| 				self.compact = 1 | ||||
| 		self.needvspace(1) | ||||
| 
 | ||||
| 	def end_dl(self): | ||||
| 		self.compact = 0 | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def do_dt(self, attrs): | ||||
| 		if self.compact: | ||||
| 			self.flush() | ||||
| 		else: | ||||
| 			self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 
 | ||||
| 	def do_dd(self, attrs): | ||||
| 		self.fmt.addword('', 1) | ||||
| 		self.setindent(self.stl.ddindent) | ||||
| 
 | ||||
| 	def start_address(self, attrs): | ||||
| 		self.compact = 1 | ||||
| 		self.needvspace(1) | ||||
| 		self.fmt.setjust('r') | ||||
| 
 | ||||
| 	def end_address(self): | ||||
| 		self.compact = 0 | ||||
| 		self.needvspace(1) | ||||
| 		self.setindent(self.stl.stdindent) | ||||
| 		self.fmt.setjust('l') | ||||
| 
 | ||||
| 	def start_pre(self, attrs): | ||||
| 		self.needvspace(1) | ||||
| 		self.nofill = self.nofill + 1 | ||||
| 		self.pushstyle(FIXED) | ||||
| 
 | ||||
| 	def end_pre(self): | ||||
| 		self.popstyle() | ||||
| 		self.nofill = self.nofill - 1 | ||||
| 		self.needvspace(1) | ||||
| 
 | ||||
| 	start_typewriter = start_pre | ||||
| 	end_typewriter = end_pre | ||||
| 
 | ||||
| 	def do_img(self, attrs): | ||||
| 		self.fmt.addword('(image)', 0) | ||||
| 
 | ||||
| 	# Physical styles | ||||
| 
 | ||||
| 	def start_tt(self, attrs): self.pushstyle(FIXED) | ||||
| 	def end_tt(self): self.popstyle() | ||||
| 
 | ||||
| 	def start_b(self, attrs): self.pushstyle(BOLD) | ||||
| 	def end_b(self): self.popstyle() | ||||
| 
 | ||||
| 	def start_i(self, attrs): self.pushstyle(ITALIC) | ||||
| 	def end_i(self): self.popstyle() | ||||
| 
 | ||||
| 	def start_u(self, attrs): self.pushstyle(ITALIC) # Underline??? | ||||
| 	def end_u(self): self.popstyle() | ||||
| 
 | ||||
| 	def start_r(self, attrs): self.pushstyle(ROMAN) # Not official | ||||
| 	def end_r(self): self.popstyle() | ||||
| 
 | ||||
| 	# Logical styles | ||||
| 
 | ||||
| 	start_em = start_i | ||||
| 	end_em = end_i | ||||
| 
 | ||||
| 	start_strong = start_b | ||||
| 	end_strong = end_b | ||||
| 
 | ||||
| 	start_code = start_tt | ||||
| 	end_code = end_tt | ||||
| 
 | ||||
| 	start_samp = start_tt | ||||
| 	end_samp = end_tt | ||||
| 
 | ||||
| 	start_kbd = start_tt | ||||
| 	end_kbd = end_tt | ||||
| 
 | ||||
| 	start_file = start_tt # unofficial | ||||
| 	end_file = end_tt | ||||
| 
 | ||||
| 	start_var = start_i | ||||
| 	end_var = end_i | ||||
| 
 | ||||
| 	start_dfn = start_i | ||||
| 	end_dfn = end_i | ||||
| 
 | ||||
| 	start_cite = start_i | ||||
| 	end_cite = end_i | ||||
| 
 | ||||
| 	start_hp1 = start_i | ||||
| 	end_hp1 = start_i | ||||
| 
 | ||||
| 	start_hp2 = start_b | ||||
| 	end_hp2 = end_b | ||||
| 
 | ||||
| 	def unknown_starttag(self, tag, attrs): | ||||
| 		print '*** unknown <' + tag + '>' | ||||
| 
 | ||||
| 	def unknown_endtag(self, tag): | ||||
| 		print '*** unknown </' + tag + '>' | ||||
| 
 | ||||
| 
 | ||||
| # An extension of the formatting parser which formats anchors differently. | ||||
| class AnchoringParser(FormattingParser): | ||||
| 
 | ||||
| 	def start_a(self, attrs): | ||||
| 		FormattingParser.start_a(self, attrs) | ||||
| 		if self.inanchor: | ||||
| 			self.fmt.bgn_anchor(self.inanchor) | ||||
| 
 | ||||
| 	def end_a(self): | ||||
| 		if self.inanchor: | ||||
| 			self.fmt.end_anchor(self.inanchor) | ||||
| 			self.inanchor = 0 | ||||
| 
 | ||||
| 
 | ||||
| # Style sheet -- this is never instantiated, but the attributes | ||||
| # of the class object itself are used to specify fonts to be used | ||||
| # for various paragraph styles. | ||||
| # A font set is a non-empty list of fonts, in the order: | ||||
| # [roman, italic, bold, fixed]. | ||||
| # When a style is not available the nearest lower style is used | ||||
| 
 | ||||
| ROMAN = 0 | ||||
| ITALIC = 1 | ||||
| BOLD = 2 | ||||
| FIXED = 3 | ||||
| 
 | ||||
| class NullStylesheet: | ||||
| 	# Fonts -- none | ||||
| 	stdfontset = [None] | ||||
| 	h1fontset = [None] | ||||
| 	h2fontset = [None] | ||||
| 	h3fontset = [None] | ||||
| 	# Indents | ||||
| 	stdindent = 2 | ||||
| 	ddindent = 25 | ||||
| 	ulindent = 4 | ||||
| 	h1indent = 0 | ||||
| 	h2indent = 0 | ||||
| 	literalindent = 0 | ||||
| 
 | ||||
| 
 | ||||
| class X11Stylesheet(NullStylesheet): | ||||
| 	stdfontset = [ \ | ||||
| 		'-*-helvetica-medium-r-normal-*-*-100-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-medium-o-normal-*-*-100-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-bold-r-normal-*-*-100-100-*-*-*-*-*', \ | ||||
| 		'-*-courier-medium-r-normal-*-*-100-100-*-*-*-*-*', \ | ||||
| 		] | ||||
| 	h1fontset = [ \ | ||||
| 		'-*-helvetica-medium-r-normal-*-*-180-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-medium-o-normal-*-*-180-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-bold-r-normal-*-*-180-100-*-*-*-*-*', \ | ||||
| 		] | ||||
| 	h2fontset = [ \ | ||||
| 		'-*-helvetica-medium-r-normal-*-*-140-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-medium-o-normal-*-*-140-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-bold-r-normal-*-*-140-100-*-*-*-*-*', \ | ||||
| 		] | ||||
| 	h3fontset = [ \ | ||||
| 		'-*-helvetica-medium-r-normal-*-*-120-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-medium-o-normal-*-*-120-100-*-*-*-*-*', \ | ||||
| 		'-*-helvetica-bold-r-normal-*-*-120-100-*-*-*-*-*', \ | ||||
| 		] | ||||
| 	ddindent = 40 | ||||
| 
 | ||||
| 
 | ||||
| class MacStylesheet(NullStylesheet): | ||||
| 	stdfontset = [ \ | ||||
| 		('Geneva', 'p', 10), \ | ||||
| 		('Geneva', 'i', 10), \ | ||||
| 		('Geneva', 'b', 10), \ | ||||
| 		('Monaco', 'p', 10), \ | ||||
| 		] | ||||
| 	h1fontset = [ \ | ||||
| 		('Geneva', 'p', 18), \ | ||||
| 		('Geneva', 'i', 18), \ | ||||
| 		('Geneva', 'b', 18), \ | ||||
| 		('Monaco', 'p', 18), \ | ||||
| 		] | ||||
| 	h3fontset = [ \ | ||||
| 		('Geneva', 'p', 14), \ | ||||
| 		('Geneva', 'i', 14), \ | ||||
| 		('Geneva', 'b', 14), \ | ||||
| 		('Monaco', 'p', 14), \ | ||||
| 		] | ||||
| 	h3fontset = [ \ | ||||
| 		('Geneva', 'p', 12), \ | ||||
| 		('Geneva', 'i', 12), \ | ||||
| 		('Geneva', 'b', 12), \ | ||||
| 		('Monaco', 'p', 12), \ | ||||
| 		] | ||||
| 
 | ||||
| 
 | ||||
| if os.name == 'mac': | ||||
| 	StdwinStylesheet = MacStylesheet | ||||
| else: | ||||
| 	StdwinStylesheet = X11Stylesheet | ||||
| 
 | ||||
| 
 | ||||
| class GLStylesheet(NullStylesheet): | ||||
| 	stdfontset = [ \ | ||||
| 		'Helvetica 10', \ | ||||
| 		'Helvetica-Italic 10', \ | ||||
| 		'Helvetica-Bold 10', \ | ||||
| 		'Courier 10', \ | ||||
| 		] | ||||
| 	h1fontset = [ \ | ||||
| 		'Helvetica 18', \ | ||||
| 		'Helvetica-Italic 18', \ | ||||
| 		'Helvetica-Bold 18', \ | ||||
| 		'Courier 18', \ | ||||
| 		] | ||||
| 	h2fontset = [ \ | ||||
| 		'Helvetica 14', \ | ||||
| 		'Helvetica-Italic 14', \ | ||||
| 		'Helvetica-Bold 14', \ | ||||
| 		'Courier 14', \ | ||||
| 		] | ||||
| 	h3fontset = [ \ | ||||
| 		'Helvetica 12', \ | ||||
| 		'Helvetica-Italic 12', \ | ||||
| 		'Helvetica-Bold 12', \ | ||||
| 		'Courier 12', \ | ||||
| 		] | ||||
| 
 | ||||
| 
 | ||||
| # Test program -- produces no output but times how long it takes | ||||
| # to send a document to a null formatter, exclusive of I/O | ||||
| 
 | ||||
| def test(): | ||||
| 	import fmt | ||||
| 	import time | ||||
| 	if sys.argv[1:]: file = sys.argv[1] | ||||
| 	else: file = 'test.html' | ||||
| 	data = open(file, 'r').read() | ||||
| 	t0 = time.time() | ||||
| 	fmtr = fmt.WritingFormatter(sys.stdout, 79) | ||||
| 	p = FormattingParser(fmtr, NullStylesheet) | ||||
| 	p.feed(data) | ||||
| 	p.close() | ||||
| 	t1 = time.time() | ||||
| 	print | ||||
| 	print '*** Formatting time:', round(t1-t0, 3), 'seconds.' | ||||
| 
 | ||||
| 
 | ||||
| # Test program using stdwin | ||||
| 
 | ||||
| def testStdwin(): | ||||
| 	import stdwin, fmt | ||||
| 	from stdwinevents import * | ||||
| 	if sys.argv[1:]: file = sys.argv[1] | ||||
| 	else: file = 'test.html' | ||||
| 	data = open(file, 'r').read() | ||||
| 	window = stdwin.open('testStdwin') | ||||
| 	b = None | ||||
| 	while 1: | ||||
| 		etype, ewin, edetail = stdwin.getevent() | ||||
| 		if etype == WE_CLOSE: | ||||
| 			break | ||||
| 		if etype == WE_SIZE: | ||||
| 			window.setdocsize(0, 0) | ||||
| 			window.setorigin(0, 0) | ||||
| 			window.change((0, 0), (10000, 30000)) # XXX | ||||
| 		if etype == WE_DRAW: | ||||
| 			if not b: | ||||
| 				b = fmt.StdwinBackEnd(window, 1) | ||||
| 				f = fmt.BaseFormatter(b.d, b) | ||||
| 				p = FormattingParser(f, \ | ||||
| 							    MacStylesheet) | ||||
| 				p.feed(data) | ||||
| 				p.close() | ||||
| 				b.finish() | ||||
| 			else: | ||||
| 				b.redraw(edetail) | ||||
| 	window.close() | ||||
| 
 | ||||
| 
 | ||||
| # Test program using GL | ||||
| 
 | ||||
| def testGL(): | ||||
| 	import gl, GL, fmt | ||||
| 	if sys.argv[1:]: file = sys.argv[1] | ||||
| 	else: file = 'test.html' | ||||
| 	data = open(file, 'r').read() | ||||
| 	W, H = 600, 600 | ||||
| 	gl.foreground() | ||||
| 	gl.prefsize(W, H) | ||||
| 	wid = gl.winopen('testGL') | ||||
| 	gl.ortho2(0, W, H, 0) | ||||
| 	gl.color(GL.WHITE) | ||||
| 	gl.clear() | ||||
| 	gl.color(GL.BLACK) | ||||
| 	b = fmt.GLBackEnd(wid) | ||||
| 	f = fmt.BaseFormatter(b.d, b) | ||||
| 	p = FormattingParser(f, GLStylesheet) | ||||
| 	p.feed(data) | ||||
| 	p.close() | ||||
| 	b.finish() | ||||
| 	# | ||||
| 	import time | ||||
| 	time.sleep(5) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
| 	test() | ||||
							
								
								
									
										408
									
								
								Lib/lib-old/Para.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										408
									
								
								Lib/lib-old/Para.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,408 @@ | |||
| # Text formatting abstractions | ||||
| 
 | ||||
| 
 | ||||
| # Oft-used type object | ||||
| Int = type(0) | ||||
| 
 | ||||
| 
 | ||||
| # Represent a paragraph.  This is a list of words with associated | ||||
| # font and size information, plus indents and justification for the | ||||
| # entire paragraph. | ||||
| # Once the words have been added to a paragraph, it can be laid out | ||||
| # for different line widths.  Once laid out, it can be rendered at | ||||
| # different screen locations.  Once rendered, it can be queried | ||||
| # for mouse hits, and parts of the text can be highlighted | ||||
| class Para: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.words = [] # The words | ||||
| 		self.just = 'l' # Justification: 'l', 'r', 'lr' or 'c' | ||||
| 		self.indent_left = self.indent_right = self.indent_hang = 0 | ||||
| 		# Final lay-out parameters, may change | ||||
| 		self.left = self.top = self.right = self.bottom = \ | ||||
| 			self.width = self.height = self.lines = None | ||||
| 	# | ||||
| 	# Add a word, computing size information for it. | ||||
| 	# Words may also be added manually by appending to self.words | ||||
| 	# Each word should be a 7-tuple: | ||||
| 	# (font, text, width, space, stretch, ascent, descent) | ||||
| 	def addword(self, d, font, text, space, stretch): | ||||
| 		if font <> None: | ||||
| 			d.setfont(font) | ||||
| 		width = d.textwidth(text) | ||||
| 		ascent = d.baseline() | ||||
| 		descent = d.lineheight() - ascent | ||||
| 		spw = d.textwidth(' ') | ||||
| 		space = space * spw | ||||
| 		stretch = stretch * spw | ||||
| 		tuple = (font, text, width, space, stretch, ascent, descent) | ||||
| 		self.words.append(tuple) | ||||
| 	# | ||||
| 	# Hooks to begin and end anchors -- insert numbers in the word list! | ||||
| 	def bgn_anchor(self, id): | ||||
| 		self.words.append(id) | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		self.words.append(0) | ||||
| 	# | ||||
| 	# Return the total length (width) of the text added so far, in pixels | ||||
| 	def getlength(self): | ||||
| 		total = 0 | ||||
| 		for word in self.words: | ||||
| 			if type(word) <> Int: | ||||
| 				total = total + word[2] + word[3] | ||||
| 		return total | ||||
| 	# | ||||
| 	# Tab to a given position (relative to the current left indent): | ||||
| 	# remove all stretch, add fixed space up to the new indent. | ||||
| 	# If the current position is already beying the tab stop, | ||||
| 	# don't add any new space (but still remove the stretch) | ||||
| 	def tabto(self, tab): | ||||
| 		total = 0 | ||||
| 		as, de = 1, 0 | ||||
| 		for i in range(len(self.words)): | ||||
| 			word = self.words[i] | ||||
| 			if type(word) == Int: continue | ||||
| 			fo, te, wi, sp, st, as, de = word | ||||
| 			self.words[i] = fo, te, wi, sp, 0, as, de | ||||
| 			total = total + wi + sp | ||||
| 		if total < tab: | ||||
| 			self.words.append(None, '', 0, tab-total, 0, as, de) | ||||
| 	# | ||||
| 	# Make a hanging tag: tab to hang, increment indent_left by hang, | ||||
| 	# and reset indent_hang to -hang | ||||
| 	def makehangingtag(self, hang): | ||||
| 		self.tabto(hang) | ||||
| 		self.indent_left = self.indent_left + hang | ||||
| 		self.indent_hang = -hang | ||||
| 	# | ||||
| 	# Decide where the line breaks will be given some screen width | ||||
| 	def layout(self, linewidth): | ||||
| 		self.width = linewidth | ||||
| 		height = 0 | ||||
| 		self.lines = lines = [] | ||||
| 		avail1 = self.width - self.indent_left - self.indent_right | ||||
| 		avail = avail1 - self.indent_hang | ||||
| 		words = self.words | ||||
| 		i = 0 | ||||
| 		n = len(words) | ||||
| 		lastfont = None | ||||
| 		while i < n: | ||||
| 			firstfont = lastfont | ||||
| 			charcount = 0 | ||||
| 			width = 0 | ||||
| 			stretch = 0 | ||||
| 			ascent = 0 | ||||
| 			descent = 0 | ||||
| 			lsp = 0 | ||||
| 			j = i | ||||
| 			while i < n: | ||||
| 				word = words[i] | ||||
| 				if type(word) == Int: | ||||
| 					if word > 0 and width >= avail: | ||||
| 						break | ||||
| 					i = i+1 | ||||
| 					continue | ||||
| 				fo, te, wi, sp, st, as, de = word | ||||
| 				if width + wi > avail and width > 0 and wi > 0: | ||||
| 					break | ||||
| 				if fo <> None: | ||||
| 					lastfont = fo | ||||
| 					if width == 0: | ||||
| 						firstfont = fo | ||||
| 				charcount = charcount + len(te) + (sp > 0) | ||||
| 				width = width + wi + sp | ||||
| 				lsp = sp | ||||
| 				stretch = stretch + st | ||||
| 				lst = st | ||||
| 				ascent = max(ascent, as) | ||||
| 				descent = max(descent, de) | ||||
| 				i = i+1 | ||||
| 			while i > j and type(words[i-1]) == Int and \ | ||||
| 				words[i-1] > 0: i = i-1 | ||||
| 			width = width - lsp | ||||
| 			if i < n: | ||||
| 				stretch = stretch - lst | ||||
| 			else: | ||||
| 				stretch = 0 | ||||
| 			tuple = i-j, firstfont, charcount, width, stretch, \ | ||||
| 				ascent, descent | ||||
| 			lines.append(tuple) | ||||
| 			height = height + ascent + descent | ||||
| 			avail = avail1 | ||||
| 		self.height = height | ||||
| 	# | ||||
| 	# Call a function for all words in a line | ||||
| 	def visit(self, wordfunc, anchorfunc): | ||||
| 		avail1 = self.width - self.indent_left - self.indent_right | ||||
| 		avail = avail1 - self.indent_hang | ||||
| 		v = self.top | ||||
| 		i = 0 | ||||
| 		for tuple in self.lines: | ||||
| 			wordcount, firstfont, charcount, width, stretch, \ | ||||
| 				ascent, descent = tuple | ||||
| 			h = self.left + self.indent_left | ||||
| 			if i == 0: h = h + self.indent_hang | ||||
| 			extra = 0 | ||||
| 			if self.just == 'r': h = h + avail - width | ||||
| 			elif self.just == 'c': h = h + (avail - width) / 2 | ||||
| 			elif self.just == 'lr' and stretch > 0: | ||||
| 				extra = avail - width | ||||
| 			v2 = v + ascent + descent | ||||
| 			for j in range(i, i+wordcount): | ||||
| 				word = self.words[j] | ||||
| 				if type(word) == Int: | ||||
| 					ok = anchorfunc(self, tuple, word, \ | ||||
| 							h, v) | ||||
| 					if ok <> None: return ok | ||||
| 					continue | ||||
| 				fo, te, wi, sp, st, as, de = word | ||||
| 				if extra > 0 and stretch > 0: | ||||
| 					ex = extra * st / stretch | ||||
| 					extra = extra - ex | ||||
| 					stretch = stretch - st | ||||
| 				else: | ||||
| 					ex = 0 | ||||
| 				h2 = h + wi + sp + ex | ||||
| 				ok = wordfunc(self, tuple, word, h, v, \ | ||||
| 					h2, v2, (j==i), (j==i+wordcount-1)) | ||||
| 				if ok <> None: return ok | ||||
| 				h = h2 | ||||
| 			v = v2 | ||||
| 			i = i + wordcount | ||||
| 			avail = avail1 | ||||
| 	# | ||||
| 	# Render a paragraph in "drawing object" d, using the rectangle | ||||
| 	# given by (left, top, right) with an unspecified bottom. | ||||
| 	# Return the computed bottom of the text. | ||||
| 	def render(self, d, left, top, right): | ||||
| 		if self.width <> right-left: | ||||
| 			self.layout(right-left) | ||||
| 		self.left = left | ||||
| 		self.top = top | ||||
| 		self.right = right | ||||
| 		self.bottom = self.top + self.height | ||||
| 		self.anchorid = 0 | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			self.visit(self.__class__._renderword, \ | ||||
| 				   self.__class__._renderanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 		return self.bottom | ||||
| 	# | ||||
| 	def _renderword(self, tuple, word, h, v, h2, v2, isfirst, islast): | ||||
| 		if word[0] <> None: self.d.setfont(word[0]) | ||||
| 		baseline = v + tuple[5] | ||||
| 		self.d.text((h, baseline - word[5]), word[1]) | ||||
| 		if self.anchorid > 0: | ||||
| 			self.d.line((h, baseline+2), (h2, baseline+2)) | ||||
| 	# | ||||
| 	def _renderanchor(self, tuple, word, h, v): | ||||
| 		self.anchorid = word | ||||
| 	# | ||||
| 	# Return which anchor(s) was hit by the mouse | ||||
| 	def hitcheck(self, mouseh, mousev): | ||||
| 		self.mouseh = mouseh | ||||
| 		self.mousev = mousev | ||||
| 		self.anchorid = 0 | ||||
| 		self.hits = [] | ||||
| 		self.visit(self.__class__._hitcheckword, \ | ||||
| 			   self.__class__._hitcheckanchor) | ||||
| 		return self.hits | ||||
| 	# | ||||
| 	def _hitcheckword(self, tuple, word, h, v, h2, v2, isfirst, islast): | ||||
| 		if self.anchorid > 0 and h <= self.mouseh <= h2 and \ | ||||
| 			v <= self.mousev <= v2: | ||||
| 			self.hits.append(self.anchorid) | ||||
| 	# | ||||
| 	def _hitcheckanchor(self, tuple, word, h, v): | ||||
| 		self.anchorid = word | ||||
| 	# | ||||
| 	# Return whether the given anchor id is present | ||||
| 	def hasanchor(self, id): | ||||
| 		return id in self.words or -id in self.words | ||||
| 	# | ||||
| 	# Extract the raw text from the word list, substituting one space | ||||
| 	# for non-empty inter-word space, and terminating with '\n' | ||||
| 	def extract(self): | ||||
| 		text = '' | ||||
| 		for w in self.words: | ||||
| 			if type(w) <> Int: | ||||
| 				word = w[1] | ||||
| 				if w[3]: word = word + ' ' | ||||
| 				text = text + word | ||||
| 		return text + '\n' | ||||
| 	# | ||||
| 	# Return which character position was hit by the mouse, as | ||||
| 	# an offset in the entire text as returned by extract(). | ||||
| 	# Return None if the mouse was not in this paragraph | ||||
| 	def whereis(self, d, mouseh, mousev): | ||||
| 		if mousev < self.top or mousev > self.bottom: | ||||
| 			return None | ||||
| 		self.mouseh = mouseh | ||||
| 		self.mousev = mousev | ||||
| 		self.lastfont = None | ||||
| 		self.charcount = 0 | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			return self.visit(self.__class__._whereisword, \ | ||||
| 					  self.__class__._whereisanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 	# | ||||
| 	def _whereisword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): | ||||
| 		fo, te, wi, sp, st, as, de = word | ||||
| 		if fo <> None: self.lastfont = fo | ||||
| 		h = h1 | ||||
| 		if isfirst: h1 = 0 | ||||
| 		if islast: h2 = 999999 | ||||
| 		if not (v1 <= self.mousev <= v2 and h1 <= self.mouseh <= h2): | ||||
| 			self.charcount = self.charcount + len(te) + (sp > 0) | ||||
| 			return | ||||
| 		if self.lastfont <> None: | ||||
| 			self.d.setfont(self.lastfont) | ||||
| 		cc = 0 | ||||
| 		for c in te: | ||||
| 			cw = self.d.textwidth(c) | ||||
| 			if self.mouseh <= h + cw/2: | ||||
| 				return self.charcount + cc | ||||
| 			cc = cc+1 | ||||
| 			h = h+cw | ||||
| 		self.charcount = self.charcount + cc | ||||
| 		if self.mouseh <= (h+h2) / 2: | ||||
| 			return self.charcount | ||||
| 		else: | ||||
| 			return self.charcount + 1 | ||||
| 	# | ||||
| 	def _whereisanchor(self, tuple, word, h, v): | ||||
| 		pass | ||||
| 	# | ||||
| 	# Return screen position corresponding to position in paragraph. | ||||
| 	# Return tuple (h, vtop, vbaseline, vbottom). | ||||
| 	# This is more or less the inverse of whereis() | ||||
| 	def screenpos(self, d, pos): | ||||
| 		if pos < 0: | ||||
| 			ascent, descent = self.lines[0][5:7] | ||||
| 			return self.left, self.top, self.top + ascent, \ | ||||
| 				self.top + ascent + descent | ||||
| 		self.pos = pos | ||||
| 		self.lastfont = None | ||||
| 		try: | ||||
| 			self.d = d | ||||
| 			ok = self.visit(self.__class__._screenposword, \ | ||||
| 					self.__class__._screenposanchor) | ||||
| 		finally: | ||||
| 			self.d = None | ||||
| 		if ok == None: | ||||
| 			ascent, descent = self.lines[-1][5:7] | ||||
| 			ok = self.right, self.bottom - ascent - descent, \ | ||||
| 				self.bottom - descent, self.bottom | ||||
| 		return ok | ||||
| 	# | ||||
| 	def _screenposword(self, tuple, word, h1, v1, h2, v2, isfirst, islast): | ||||
| 		fo, te, wi, sp, st, as, de = word | ||||
| 		if fo <> None: self.lastfont = fo | ||||
| 		cc = len(te) + (sp > 0) | ||||
| 		if self.pos > cc: | ||||
| 			self.pos = self.pos - cc | ||||
| 			return | ||||
| 		if self.pos < cc: | ||||
| 			self.d.setfont(self.lastfont) | ||||
| 			h = h1 + self.d.textwidth(te[:self.pos]) | ||||
| 		else: | ||||
| 			h = h2 | ||||
| 		ascent, descent = tuple[5:7] | ||||
| 		return h, v1, v1+ascent, v2 | ||||
| 	# | ||||
| 	def _screenposanchor(self, tuple, word, h, v): | ||||
| 		pass | ||||
| 	# | ||||
| 	# Invert the stretch of text between pos1 and pos2. | ||||
| 	# If pos1 is None, the beginning is implied; | ||||
| 	# if pos2 is None, the end is implied. | ||||
| 	# Undoes its own effect when called again with the same arguments | ||||
| 	def invert(self, d, pos1, pos2): | ||||
| 		if pos1 == None: | ||||
| 			pos1 = self.left, self.top, self.top, self.top | ||||
| 		else: | ||||
| 			pos1 = self.screenpos(d, pos1) | ||||
| 		if pos2 == None: | ||||
| 			pos2 = self.right, self.bottom,self.bottom,self.bottom | ||||
| 		else: | ||||
| 			pos2 = self.screenpos(d, pos2) | ||||
| 		h1, top1, baseline1, bottom1 = pos1 | ||||
| 		h2, top2, baseline2, bottom2 = pos2 | ||||
| 		if bottom1 <= top2: | ||||
| 			d.invert((h1, top1), (self.right, bottom1)) | ||||
| 			h1 = self.left | ||||
| 			if bottom1 < top2: | ||||
| 				d.invert((h1, bottom1), (self.right, top2)) | ||||
| 			top1, bottom1 = top2, bottom2 | ||||
| 		d.invert((h1, top1), (h2, bottom2)) | ||||
| 
 | ||||
| 
 | ||||
| # Test class Para | ||||
| # XXX This was last used on the Mac, hence the weird fonts... | ||||
| def test(): | ||||
| 	import stdwin | ||||
| 	from stdwinevents import * | ||||
| 	words = 'The', 'quick', 'brown', 'fox', 'jumps', 'over', \ | ||||
| 		'the', 'lazy', 'dog.' | ||||
| 	paralist = [] | ||||
| 	for just in 'l', 'r', 'lr', 'c': | ||||
| 		p = Para() | ||||
| 		p.just = just | ||||
| 		p.addword(stdwin, ('New York', 'p', 12), words[0], 1, 1) | ||||
| 		for word in words[1:-1]: | ||||
| 			p.addword(stdwin, None, word, 1, 1) | ||||
| 		p.addword(stdwin, None, words[-1], 2, 4) | ||||
| 		p.addword(stdwin, ('New York', 'b', 18), 'Bye!', 0, 0) | ||||
| 		p.addword(stdwin, ('New York', 'p', 10), 'Bye!', 0, 0) | ||||
| 		paralist.append(p) | ||||
| 	window = stdwin.open('Para.test()') | ||||
| 	start = stop = selpara = None | ||||
| 	while 1: | ||||
| 		etype, win, detail = stdwin.getevent() | ||||
| 		if etype == WE_CLOSE: | ||||
| 			break | ||||
| 		if etype == WE_SIZE: | ||||
| 			window.change((0, 0), (1000, 1000)) | ||||
| 		if etype == WE_DRAW: | ||||
| 			width, height = window.getwinsize() | ||||
| 			d = None | ||||
| 			try: | ||||
| 				d = window.begindrawing() | ||||
| 				d.cliprect(detail) | ||||
| 				d.erase(detail) | ||||
| 				v = 0 | ||||
| 				for p in paralist: | ||||
| 					v = p.render(d, 0, v, width) | ||||
| 					if p == selpara and \ | ||||
| 					   start <> None and stop <> None: | ||||
| 						p.invert(d, start, stop) | ||||
| 			finally: | ||||
| 				if d: d.close() | ||||
| 		if etype == WE_MOUSE_DOWN: | ||||
| 			if selpara and start <> None and stop <> None: | ||||
| 				d = window.begindrawing() | ||||
| 				selpara.invert(d, start, stop) | ||||
| 				d.close() | ||||
| 			start = stop = selpara = None | ||||
| 			mouseh, mousev = detail[0] | ||||
| 			for p in paralist: | ||||
| 				start = p.whereis(stdwin, mouseh, mousev) | ||||
| 				if start <> None: | ||||
| 					selpara = p | ||||
| 					break | ||||
| 		if etype == WE_MOUSE_UP and start <> None and selpara: | ||||
| 			mouseh, mousev = detail[0] | ||||
| 			stop = selpara.whereis(stdwin, mouseh, mousev) | ||||
| 			if stop == None: start = selpara = None | ||||
| 			else: | ||||
| 				if start > stop: | ||||
| 					start, stop = stop, start | ||||
| 				d = window.begindrawing() | ||||
| 				selpara.invert(d, start, stop) | ||||
| 				d.close() | ||||
| 	window.close() | ||||
							
								
								
									
										621
									
								
								Lib/lib-old/fmt.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										621
									
								
								Lib/lib-old/fmt.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,621 @@ | |||
| # Text formatting abstractions | ||||
| 
 | ||||
| 
 | ||||
| import string | ||||
| import Para | ||||
| 
 | ||||
| 
 | ||||
| # A formatter back-end object has one method that is called by the formatter: | ||||
| # addpara(p), where p is a paragraph object.  For example: | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to do nothing at all with the paragraphs | ||||
| class NullBackEnd: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		pass | ||||
| 	# | ||||
| 	def bgn_anchor(self, id): | ||||
| 		pass | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		pass | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to collect the paragraphs in a list | ||||
| class SavingBackEnd(NullBackEnd): | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.paralist = [] | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 	# | ||||
| 	def hitcheck(self, h, v): | ||||
| 		hits = [] | ||||
| 		for p in self.paralist: | ||||
| 			if p.top <= v <= p.bottom: | ||||
| 				for id in p.hitcheck(h, v): | ||||
| 					if id not in hits: | ||||
| 						hits.append(id) | ||||
| 		return hits | ||||
| 	# | ||||
| 	def extract(self): | ||||
| 		text = '' | ||||
| 		for p in self.paralist: | ||||
| 			text = text + (p.extract()) | ||||
| 		return text | ||||
| 	# | ||||
| 	def extractpart(self, long1, long2): | ||||
| 		if long1 > long2: long1, long2 = long2, long1 | ||||
| 		para1, pos1 = long1 | ||||
| 		para2, pos2 = long2 | ||||
| 		text = '' | ||||
| 		while para1 < para2: | ||||
| 			ptext = self.paralist[para1].extract() | ||||
| 			text = text + ptext[pos1:] | ||||
| 			pos1 = 0 | ||||
| 			para1 = para1 + 1 | ||||
| 		ptext = self.paralist[para2].extract() | ||||
| 		return text + ptext[pos1:pos2] | ||||
| 	# | ||||
| 	def whereis(self, d, h, v): | ||||
| 		total = 0 | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			p = self.paralist[i] | ||||
| 			result = p.whereis(d, h, v) | ||||
| 			if result <> None: | ||||
| 				return i, result | ||||
| 		return None | ||||
| 	# | ||||
| 	def roundtowords(self, long1, long2): | ||||
| 		i, offset = long1 | ||||
| 		text = self.paralist[i].extract() | ||||
| 		while offset > 0 and text[offset-1] <> ' ': offset = offset-1 | ||||
| 		long1 = i, offset | ||||
| 		# | ||||
| 		i, offset = long2 | ||||
| 		text = self.paralist[i].extract() | ||||
| 		n = len(text) | ||||
| 		while offset < n-1 and text[offset] <> ' ': offset = offset+1 | ||||
| 		long2 = i, offset | ||||
| 		# | ||||
| 		return long1, long2 | ||||
| 	# | ||||
| 	def roundtoparagraphs(self, long1, long2): | ||||
| 		long1 = long1[0], 0 | ||||
| 		long2 = long2[0], len(self.paralist[long2[0]].extract()) | ||||
| 		return long1, long2 | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to send the text directly to the drawing object | ||||
| class WritingBackEnd(NullBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, d, width): | ||||
| 		self.d = d | ||||
| 		self.width = width | ||||
| 		self.lineno = 0 | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.lineno = p.render(self.d, 0, self.lineno, self.width) | ||||
| 
 | ||||
| 
 | ||||
| # A formatter receives a stream of formatting instructions and assembles | ||||
| # these into a stream of paragraphs on to a back-end.  The assembly is | ||||
| # parametrized by a text measurement object, which must match the output | ||||
| # operations of the back-end.  The back-end is responsible for splitting | ||||
| # paragraphs up in lines of a given maximum width.  (This is done because | ||||
| # in a windowing environment, when the window size changes, there is no | ||||
| # need to redo the assembly into paragraphs, but the splitting into lines | ||||
| # must be done taking the new window size into account.) | ||||
| 
 | ||||
| 
 | ||||
| # Formatter base class.  Initialize it with a text measurement object, | ||||
| # which is used for text measurements, and a back-end object, | ||||
| # which receives the completed paragraphs.  The formatting methods are: | ||||
| # setfont(font) | ||||
| # setleftindent(nspaces) | ||||
| # setjust(type) where type is 'l', 'c', 'r', or 'lr' | ||||
| # flush() | ||||
| # vspace(nlines) | ||||
| # needvspace(nlines) | ||||
| # addword(word, nspaces) | ||||
| class BaseFormatter: | ||||
| 	# | ||||
| 	def __init__(self, d, b): | ||||
| 		# Drawing object used for text measurements | ||||
| 		self.d = d | ||||
| 		# | ||||
| 		# BackEnd object receiving completed paragraphs | ||||
| 		self.b = b | ||||
| 		# | ||||
| 		# Parameters of the formatting model | ||||
| 		self.leftindent = 0 | ||||
| 		self.just = 'l' | ||||
| 		self.font = None | ||||
| 		self.blanklines = 0 | ||||
| 		# | ||||
| 		# Parameters derived from the current font | ||||
| 		self.space = d.textwidth(' ') | ||||
| 		self.line = d.lineheight() | ||||
| 		self.ascent = d.baseline() | ||||
| 		self.descent = self.line - self.ascent | ||||
| 		# | ||||
| 		# Parameter derived from the default font | ||||
| 		self.n_space = self.space | ||||
| 		# | ||||
| 		# Current paragraph being built | ||||
| 		self.para = None | ||||
| 		self.nospace = 1 | ||||
| 		# | ||||
| 		# Font to set on the next word | ||||
| 		self.nextfont = None | ||||
| 	# | ||||
| 	def newpara(self): | ||||
| 		return Para.Para() | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		if font == None: return | ||||
| 		self.font = self.nextfont = font | ||||
| 		d = self.d | ||||
| 		d.setfont(font) | ||||
| 		self.space = d.textwidth(' ') | ||||
| 		self.line = d.lineheight() | ||||
| 		self.ascent = d.baseline() | ||||
| 		self.descent = self.line - self.ascent | ||||
| 	# | ||||
| 	def setleftindent(self, nspaces): | ||||
| 		self.leftindent = int(self.n_space * nspaces) | ||||
| 		if self.para: | ||||
| 			hang = self.leftindent - self.para.indent_left | ||||
| 			if hang > 0 and self.para.getlength() <= hang: | ||||
| 				self.para.makehangingtag(hang) | ||||
| 				self.nospace = 1 | ||||
| 			else: | ||||
| 				self.flush() | ||||
| 	# | ||||
| 	def setrightindent(self, nspaces): | ||||
| 		self.rightindent = int(self.n_space * nspaces) | ||||
| 		if self.para: | ||||
| 			self.para.indent_right = self.rightindent | ||||
| 			self.flush() | ||||
| 	# | ||||
| 	def setjust(self, just): | ||||
| 		self.just = just | ||||
| 		if self.para: | ||||
| 			self.para.just = self.just | ||||
| 	# | ||||
| 	def flush(self): | ||||
| 		if self.para: | ||||
| 			self.b.addpara(self.para) | ||||
| 			self.para = None | ||||
| 			if self.font <> None: | ||||
| 				self.d.setfont(self.font) | ||||
| 		self.nospace = 1 | ||||
| 	# | ||||
| 	def vspace(self, nlines): | ||||
| 		self.flush() | ||||
| 		if nlines > 0: | ||||
| 			self.para = self.newpara() | ||||
| 			tuple = None, '', 0, 0, 0, int(nlines*self.line), 0 | ||||
| 			self.para.words.append(tuple) | ||||
| 			self.flush() | ||||
| 			self.blanklines = self.blanklines + nlines | ||||
| 	# | ||||
| 	def needvspace(self, nlines): | ||||
| 		self.flush() # Just to be sure | ||||
| 		if nlines > self.blanklines: | ||||
| 			self.vspace(nlines - self.blanklines) | ||||
| 	# | ||||
| 	def addword(self, text, space): | ||||
| 		if self.nospace and not text: | ||||
| 			return | ||||
| 		self.nospace = 0 | ||||
| 		self.blanklines = 0 | ||||
| 		if not self.para: | ||||
| 			self.para = self.newpara() | ||||
| 			self.para.indent_left = self.leftindent | ||||
| 			self.para.just = self.just | ||||
| 			self.nextfont = self.font | ||||
| 		space = int(space * self.space) | ||||
| 		self.para.words.append(self.nextfont, text, \ | ||||
| 			self.d.textwidth(text), space, space, \ | ||||
| 			self.ascent, self.descent) | ||||
| 		self.nextfont = None | ||||
| 	# | ||||
| 	def bgn_anchor(self, id): | ||||
| 		if not self.para: | ||||
| 			self.nospace = 0 | ||||
| 			self.addword('', 0) | ||||
| 		self.para.bgn_anchor(id) | ||||
| 	# | ||||
| 	def end_anchor(self, id): | ||||
| 		if not self.para: | ||||
| 			self.nospace = 0 | ||||
| 			self.addword('', 0) | ||||
| 		self.para.end_anchor(id) | ||||
| 
 | ||||
| 
 | ||||
| # Measuring object for measuring text as viewed on a tty | ||||
| class NullMeasurer: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		pass | ||||
| 	# | ||||
| 	def textwidth(self, text): | ||||
| 		return len(text) | ||||
| 	# | ||||
| 	def lineheight(self): | ||||
| 		return 1 | ||||
| 	# | ||||
| 	def baseline(self): | ||||
| 		return 0 | ||||
| 
 | ||||
| 
 | ||||
| # Drawing object for writing plain ASCII text to a file | ||||
| class FileWriter: | ||||
| 	# | ||||
| 	def __init__(self, fp): | ||||
| 		self.fp = fp | ||||
| 		self.lineno, self.colno = 0, 0 | ||||
| 	# | ||||
| 	def setfont(self, font): | ||||
| 		pass | ||||
| 	# | ||||
| 	def text(self, (h, v), str): | ||||
| 		if not str: return | ||||
| 		if '\n' in str: | ||||
| 			raise ValueError, 'can\'t write \\n' | ||||
| 		while self.lineno < v: | ||||
| 			self.fp.write('\n') | ||||
| 			self.colno, self.lineno = 0, self.lineno + 1 | ||||
| 		while self.lineno > v: | ||||
| 			# XXX This should never happen... | ||||
| 			self.fp.write('\033[A') # ANSI up arrow | ||||
| 			self.lineno = self.lineno - 1 | ||||
| 		if self.colno < h: | ||||
| 			self.fp.write(' ' * (h - self.colno)) | ||||
| 		elif self.colno > h: | ||||
| 			self.fp.write('\b' * (self.colno - h)) | ||||
| 		self.colno = h | ||||
| 		self.fp.write(str) | ||||
| 		self.colno = h + len(str) | ||||
| 
 | ||||
| 
 | ||||
| # Formatting class to do nothing at all with the data | ||||
| class NullFormatter(BaseFormatter): | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		d = NullMeasurer() | ||||
| 		b = NullBackEnd() | ||||
| 		BaseFormatter.__init__(self, d, b) | ||||
| 
 | ||||
| 
 | ||||
| # Formatting class to write directly to a file | ||||
| class WritingFormatter(BaseFormatter): | ||||
| 	# | ||||
| 	def __init__(self, fp, width): | ||||
| 		dm = NullMeasurer() | ||||
| 		dw = FileWriter(fp) | ||||
| 		b = WritingBackEnd(dw, width) | ||||
| 		BaseFormatter.__init__(self, dm, b) | ||||
| 		self.blanklines = 1 | ||||
| 	# | ||||
| 	# Suppress multiple blank lines | ||||
| 	def needvspace(self, nlines): | ||||
| 		BaseFormatter.needvspace(self, min(1, nlines)) | ||||
| 
 | ||||
| 
 | ||||
| # A "FunnyFormatter" writes ASCII text with a twist: *bold words*, | ||||
| # _italic text_ and _underlined words_, and `quoted text'. | ||||
| # It assumes that the fonts are 'r', 'i', 'b', 'u', 'q': (roman, | ||||
| # italic, bold, underline, quote). | ||||
| # Moreover, if the font is in upper case, the text is converted to | ||||
| # UPPER CASE. | ||||
| class FunnyFormatter(WritingFormatter): | ||||
| 	# | ||||
| 	def flush(self): | ||||
| 		if self.para: finalize(self.para) | ||||
| 		WritingFormatter.flush(self) | ||||
| 
 | ||||
| 
 | ||||
| # Surrounds *bold words* and _italic text_ in a paragraph with | ||||
| # appropriate markers, fixing the size (assuming these characters' | ||||
| # width is 1). | ||||
| openchar = \ | ||||
|     {'b':'*', 'i':'_', 'u':'_', 'q':'`', 'B':'*', 'I':'_', 'U':'_', 'Q':'`'} | ||||
| closechar = \ | ||||
|     {'b':'*', 'i':'_', 'u':'_', 'q':'\'', 'B':'*', 'I':'_', 'U':'_', 'Q':'\''} | ||||
| def finalize(para): | ||||
| 	oldfont = curfont = 'r' | ||||
| 	para.words.append('r', '', 0, 0, 0, 0) # temporary, deleted at end | ||||
| 	for i in range(len(para.words)): | ||||
| 		fo, te, wi = para.words[i][:3] | ||||
| 		if fo <> None: curfont = fo | ||||
| 		if curfont <> oldfont: | ||||
| 			if closechar.has_key(oldfont): | ||||
| 				c = closechar[oldfont] | ||||
| 				j = i-1 | ||||
| 				while j > 0 and para.words[j][1] == '': j = j-1 | ||||
| 				fo1, te1, wi1 = para.words[j][:3] | ||||
| 				te1 = te1 + c | ||||
| 				wi1 = wi1 + len(c) | ||||
| 				para.words[j] = (fo1, te1, wi1) + \ | ||||
| 					para.words[j][3:] | ||||
| 			if openchar.has_key(curfont) and te: | ||||
| 				c = openchar[curfont] | ||||
| 				te = c + te | ||||
| 				wi = len(c) + wi | ||||
| 				para.words[i] = (fo, te, wi) + \ | ||||
| 					para.words[i][3:] | ||||
| 			if te: oldfont = curfont | ||||
| 			else: oldfont = 'r' | ||||
| 		if curfont in string.uppercase: | ||||
| 			te = string.upper(te) | ||||
| 			para.words[i] = (fo, te, wi) + para.words[i][3:] | ||||
| 	del para.words[-1] | ||||
| 
 | ||||
| 
 | ||||
| # Formatter back-end to draw the text in a window. | ||||
| # This has an option to draw while the paragraphs are being added, | ||||
| # to minimize the delay before the user sees anything. | ||||
| # This manages the entire "document" of the window. | ||||
| class StdwinBackEnd(SavingBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, window, drawnow): | ||||
| 		self.window = window | ||||
| 		self.drawnow = drawnow | ||||
| 		self.width = window.getwinsize()[0] | ||||
| 		self.selection = None | ||||
| 		self.height = 0 | ||||
| 		window.setorigin(0, 0) | ||||
| 		window.setdocsize(0, 0) | ||||
| 		self.d = window.begindrawing() | ||||
| 		SavingBackEnd.__init__(self) | ||||
| 	# | ||||
| 	def finish(self): | ||||
| 		self.d.close() | ||||
| 		self.d = None | ||||
| 		self.window.setdocsize(0, self.height) | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 		if self.drawnow: | ||||
| 			self.height = \ | ||||
| 				p.render(self.d, 0, self.height, self.width) | ||||
| 		else: | ||||
| 			p.layout(self.width) | ||||
| 			p.left = 0 | ||||
| 			p.top = self.height | ||||
| 			p.right = self.width | ||||
| 			p.bottom = self.height + p.height | ||||
| 			self.height = p.bottom | ||||
| 	# | ||||
| 	def resize(self): | ||||
| 		self.window.change((0, 0), (self.width, self.height)) | ||||
| 		self.width = self.window.getwinsize()[0] | ||||
| 		self.height = 0 | ||||
| 		for p in self.paralist: | ||||
| 			p.layout(self.width) | ||||
| 			p.left = 0 | ||||
| 			p.top = self.height | ||||
| 			p.right = self.width | ||||
| 			p.bottom = self.height + p.height | ||||
| 			self.height = p.bottom | ||||
| 		self.window.change((0, 0), (self.width, self.height)) | ||||
| 		self.window.setdocsize(0, self.height) | ||||
| 	# | ||||
| 	def redraw(self, area): | ||||
| 		d = self.window.begindrawing() | ||||
| 		(left, top), (right, bottom) = area | ||||
| 		d.erase(area) | ||||
| 		d.cliprect(area) | ||||
| 		for p in self.paralist: | ||||
| 			if top < p.bottom and p.top < bottom: | ||||
| 				v = p.render(d, p.left, p.top, p.right) | ||||
| 		if self.selection: | ||||
| 			self.invert(d, self.selection) | ||||
| 		d.close() | ||||
| 	# | ||||
| 	def setselection(self, new): | ||||
| 		if new: | ||||
| 			long1, long2 = new | ||||
| 			pos1 = long1[:3] | ||||
| 			pos2 = long2[:3] | ||||
| 			new = pos1, pos2 | ||||
| 		if new <> self.selection: | ||||
| 			d = self.window.begindrawing() | ||||
| 			if self.selection: | ||||
| 				self.invert(d, self.selection) | ||||
| 			if new: | ||||
| 				self.invert(d, new) | ||||
| 			d.close() | ||||
| 			self.selection = new | ||||
| 	# | ||||
| 	def getselection(self): | ||||
| 		return self.selection | ||||
| 	# | ||||
| 	def extractselection(self): | ||||
| 		if self.selection: | ||||
| 			a, b = self.selection | ||||
| 			return self.extractpart(a, b) | ||||
| 		else: | ||||
| 			return None | ||||
| 	# | ||||
| 	def invert(self, d, region): | ||||
| 		long1, long2 = region | ||||
| 		if long1 > long2: long1, long2 = long2, long1 | ||||
| 		para1, pos1 = long1 | ||||
| 		para2, pos2 = long2 | ||||
| 		while para1 < para2: | ||||
| 			self.paralist[para1].invert(d, pos1, None) | ||||
| 			pos1 = None | ||||
| 			para1 = para1 + 1 | ||||
| 		self.paralist[para2].invert(d, pos1, pos2) | ||||
| 	# | ||||
| 	def search(self, prog): | ||||
| 		import regex, string | ||||
| 		if type(prog) == type(''): | ||||
| 			prog = regex.compile(string.lower(prog)) | ||||
| 		if self.selection: | ||||
| 			iold = self.selection[0][0] | ||||
| 		else: | ||||
| 			iold = -1 | ||||
| 		hit = None | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			if i == iold or i < iold and hit: | ||||
| 				continue | ||||
| 			p = self.paralist[i] | ||||
| 			text = string.lower(p.extract()) | ||||
| 			if prog.search(text) >= 0: | ||||
| 				a, b = prog.regs[0] | ||||
| 				long1 = i, a | ||||
| 				long2 = i, b | ||||
| 				hit = long1, long2 | ||||
| 				if i > iold: | ||||
| 					break | ||||
| 		if hit: | ||||
| 			self.setselection(hit) | ||||
| 			i = hit[0][0] | ||||
| 			p = self.paralist[i] | ||||
| 			self.window.show((p.left, p.top), (p.right, p.bottom)) | ||||
| 			return 1 | ||||
| 		else: | ||||
| 			return 0 | ||||
| 	# | ||||
| 	def showanchor(self, id): | ||||
| 		for i in range(len(self.paralist)): | ||||
| 			p = self.paralist[i] | ||||
| 			if p.hasanchor(id): | ||||
| 				long1 = i, 0 | ||||
| 				long2 = i, len(p.extract()) | ||||
| 				hit = long1, long2 | ||||
| 				self.setselection(hit) | ||||
| 				self.window.show( \ | ||||
| 					(p.left, p.top), (p.right, p.bottom)) | ||||
| 				break | ||||
| 
 | ||||
| 
 | ||||
| # GL extensions | ||||
| 
 | ||||
| class GLFontCache: | ||||
| 	# | ||||
| 	def __init__(self): | ||||
| 		self.reset() | ||||
| 		self.setfont('') | ||||
| 	# | ||||
| 	def reset(self): | ||||
| 		self.fontkey = None | ||||
| 		self.fonthandle = None | ||||
| 		self.fontinfo = None | ||||
| 		self.fontcache = {} | ||||
| 	# | ||||
| 	def close(self): | ||||
| 		self.reset() | ||||
| 	# | ||||
| 	def setfont(self, fontkey): | ||||
| 		if fontkey == '': | ||||
| 			fontkey = 'Times-Roman 12' | ||||
| 		elif ' ' not in fontkey: | ||||
| 			fontkey = fontkey + ' 12' | ||||
| 		if fontkey == self.fontkey: | ||||
| 			return | ||||
| 		if self.fontcache.has_key(fontkey): | ||||
| 			handle = self.fontcache[fontkey] | ||||
| 		else: | ||||
| 			import string | ||||
| 			i = string.index(fontkey, ' ') | ||||
| 			name, sizestr = fontkey[:i], fontkey[i:] | ||||
| 			size = eval(sizestr) | ||||
| 			key1 = name + ' 1' | ||||
| 			key = name + ' ' + `size` | ||||
| 			# NB key may differ from fontkey! | ||||
| 			if self.fontcache.has_key(key): | ||||
| 				handle = self.fontcache[key] | ||||
| 			else: | ||||
| 				if self.fontcache.has_key(key1): | ||||
| 					handle = self.fontcache[key1] | ||||
| 				else: | ||||
| 					import fm | ||||
| 					handle = fm.findfont(name) | ||||
| 					self.fontcache[key1] = handle | ||||
| 				handle = handle.scalefont(size) | ||||
| 				self.fontcache[fontkey] = \ | ||||
| 					self.fontcache[key] = handle | ||||
| 		self.fontkey = fontkey | ||||
| 		if self.fonthandle <> handle: | ||||
| 			self.fonthandle = handle | ||||
| 			self.fontinfo = handle.getfontinfo() | ||||
| 			handle.setfont() | ||||
| 
 | ||||
| 
 | ||||
| class GLMeasurer(GLFontCache): | ||||
| 	# | ||||
| 	def textwidth(self, text): | ||||
| 		return self.fonthandle.getstrwidth(text) | ||||
| 	# | ||||
| 	def baseline(self): | ||||
| 		return self.fontinfo[6] - self.fontinfo[3] | ||||
| 	# | ||||
| 	def lineheight(self): | ||||
| 		return self.fontinfo[6] | ||||
| 
 | ||||
| 
 | ||||
| class GLWriter(GLFontCache): | ||||
| 	# | ||||
| 	# NOTES: | ||||
| 	# (1) Use gl.ortho2 to use X pixel coordinates! | ||||
| 	# | ||||
| 	def text(self, (h, v), text): | ||||
| 		import gl, fm | ||||
| 		gl.cmov2i(h, v + self.fontinfo[6] - self.fontinfo[3]) | ||||
| 		fm.prstr(text) | ||||
| 	# | ||||
| 	def setfont(self, fontkey): | ||||
| 		oldhandle = self.fonthandle | ||||
| 		GLFontCache.setfont(fontkey) | ||||
| 		if self.fonthandle <> oldhandle: | ||||
| 			handle.setfont() | ||||
| 
 | ||||
| 
 | ||||
| class GLMeasurerWriter(GLMeasurer, GLWriter): | ||||
| 	pass | ||||
| 
 | ||||
| 
 | ||||
| class GLBackEnd(SavingBackEnd): | ||||
| 	# | ||||
| 	def __init__(self, wid): | ||||
| 		import gl | ||||
| 		gl.winset(wid) | ||||
| 		self.wid = wid | ||||
| 		self.width = gl.getsize()[1] | ||||
| 		self.height = 0 | ||||
| 		self.d = GLMeasurerWriter() | ||||
| 		SavingBackEnd.__init__(self) | ||||
| 	# | ||||
| 	def finish(self): | ||||
| 		pass | ||||
| 	# | ||||
| 	def addpara(self, p): | ||||
| 		self.paralist.append(p) | ||||
| 		self.height = p.render(self.d, 0, self.height, self.width) | ||||
| 	# | ||||
| 	def redraw(self): | ||||
| 		import gl | ||||
| 		gl.winset(self.wid) | ||||
| 		width = gl.getsize()[1] | ||||
| 		if width <> self.width: | ||||
| 			setdocsize = 1 | ||||
| 			self.width = width | ||||
| 			for p in self.paralist: | ||||
| 				p.top = p.bottom = None | ||||
| 		d = self.d | ||||
| 		v = 0 | ||||
| 		for p in self.paralist: | ||||
| 			v = p.render(d, 0, v, width) | ||||
							
								
								
									
										321
									
								
								Lib/sgmllib.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										321
									
								
								Lib/sgmllib.py
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,321 @@ | |||
| # A parser for SGML, using the derived class as static DTD. | ||||
| 
 | ||||
| # XXX This only supports those SGML features used by HTML. | ||||
| 
 | ||||
| # XXX There should be a way to distinguish between PCDATA (parsed | ||||
| # character data -- the normal case), RCDATA (replaceable character | ||||
| # data -- only char and entity references and end tags are special) | ||||
| # and CDATA (character data -- only end tags are special). | ||||
| 
 | ||||
| 
 | ||||
| import regex | ||||
| import string | ||||
| 
 | ||||
| 
 | ||||
| # Regular expressions used for parsing | ||||
| 
 | ||||
| incomplete = regex.compile( \ | ||||
| 	  '<!-?\|</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*\|</?\|' + \ | ||||
| 	  '&#[a-zA-Z0-9]*\|&[a-zA-Z][a-zA-Z0-9]*\|&') | ||||
| entityref = regex.compile('&[a-zA-Z][a-zA-Z0-9]*[;.]') | ||||
| charref = regex.compile('&#[a-zA-Z0-9]+;') | ||||
| starttagopen = regex.compile('<[a-zA-Z]') | ||||
| endtag = regex.compile('</[a-zA-Z][a-zA-Z0-9]*[ \t\n]*>') | ||||
| commentopen = regex.compile('<!--') | ||||
| 
 | ||||
| 
 | ||||
| # SGML parser base class -- find tags and call handler functions. | ||||
| # Usage: p = SGMLParser(); p.feed(data); ...; p.close(). | ||||
| # The dtd is defined by deriving a class which defines methods | ||||
| # with special names to handle tags: start_foo and end_foo to handle | ||||
| # <foo> and </foo>, respectively, or do_foo to handle <foo> by itself. | ||||
| # (Tags are converted to lower case for this purpose.)  The data | ||||
| # between tags is passed to the parser by calling self.handle_data() | ||||
| # with some data as argument (the data may be split up in arbutrary | ||||
| # chunks).  Entity references are passed by calling | ||||
| # self.handle_entityref() with the entity reference as argument. | ||||
| 
 | ||||
| class SGMLParser: | ||||
| 
 | ||||
| 	# Interface -- initialize and reset this instance | ||||
| 	def __init__(self): | ||||
| 		self.reset() | ||||
| 
 | ||||
| 	# Interface -- reset this instance.  Loses all unprocessed data | ||||
| 	def reset(self): | ||||
| 		self.rawdata = '' | ||||
| 		self.stack = [] | ||||
| 		self.nomoretags = 0 | ||||
| 		self.literal = 0 | ||||
| 
 | ||||
| 	# For derived classes only -- enter literal mode (CDATA) till EOF | ||||
| 	def setnomoretags(self): | ||||
| 		self.nomoretags = self.literal = 1 | ||||
| 
 | ||||
| 	# For derived classes only -- enter literal mode (CDATA) | ||||
| 	def setliteral(self, *args): | ||||
| 		self.literal = 1 | ||||
| 
 | ||||
| 	# Interface -- feed some data to the parser.  Call this as | ||||
| 	# often as you want, with as little or as much text as you | ||||
| 	# want (may include '\n').  (This just saves the text, all the | ||||
| 	# processing is done by process() or close().) | ||||
| 	def feed(self, data): | ||||
| 		self.rawdata = self.rawdata + data | ||||
| 		self.goahead(0) | ||||
| 
 | ||||
| 	# Interface -- handle the remaining data | ||||
| 	def close(self): | ||||
| 		self.goahead(1) | ||||
| 
 | ||||
| 	# Internal -- handle data as far as reasonable.  May leave state | ||||
| 	# and data to be processed by a subsequent call.  If 'end' is | ||||
| 	# true, force handling all data as if followed by EOF marker. | ||||
| 	def goahead(self, end): | ||||
| 		rawdata = self.rawdata | ||||
| 		i = 0 | ||||
| 		n = len(rawdata) | ||||
| 		while i < n: | ||||
| 			if self.nomoretags: | ||||
| 				self.handle_data(rawdata[i:n]) | ||||
| 				i = n | ||||
| 				break | ||||
| 			j = incomplete.search(rawdata, i) | ||||
| 			if j < 0: j = n | ||||
| 			if i < j: self.handle_data(rawdata[i:j]) | ||||
| 			i = j | ||||
| 			if i == n: break | ||||
| 			if rawdata[i] == '<': | ||||
| 				if starttagopen.match(rawdata, i) >= 0: | ||||
| 					if self.literal: | ||||
| 						self.handle_data(rawdata[i]) | ||||
| 						i = i+1 | ||||
| 						continue | ||||
| 					k = self.parse_starttag(i) | ||||
| 					if k < 0: break | ||||
| 					i = i + k | ||||
| 					continue | ||||
| 				k = endtag.match(rawdata, i) | ||||
| 				if k >= 0: | ||||
| 					j = i+k | ||||
| 					self.parse_endtag(rawdata[i:j]) | ||||
| 					i = j | ||||
| 					self.literal = 0 | ||||
| 					continue | ||||
| 				if commentopen.match(rawdata, i) >= 0: | ||||
| 					if self.literal: | ||||
| 						self.handle_data(rawdata[i]) | ||||
| 						i = i+1 | ||||
| 						continue | ||||
| 					k = self.parse_comment(i) | ||||
| 					if k < 0: break | ||||
| 					i = i+k | ||||
| 					continue | ||||
| 			elif rawdata[i] == '&': | ||||
| 				k = charref.match(rawdata, i) | ||||
| 				if k >= 0: | ||||
| 					j = i+k | ||||
| 					self.handle_charref(rawdata[i+2:j-1]) | ||||
| 					i = j | ||||
| 					continue | ||||
| 				k = entityref.match(rawdata, i) | ||||
| 				if k >= 0: | ||||
| 					j = i+k | ||||
| 					self.handle_entityref(rawdata[i+1:j-1]) | ||||
| 					i = j | ||||
| 					continue | ||||
| 			else: | ||||
| 				raise RuntimeError, 'neither < nor & ??' | ||||
| 			# We get here only if incomplete matches but | ||||
| 			# nothing else | ||||
| 			k = incomplete.match(rawdata, i) | ||||
| 			if k < 0: raise RuntimeError, 'no incomplete match ??' | ||||
| 			j = i+k | ||||
| 			if j == n: break # Really incomplete | ||||
| 			self.handle_data(rawdata[i:j]) | ||||
| 			i = j | ||||
| 		# end while | ||||
| 		if end and i < n: | ||||
| 			self.handle_data(rawdata[i:n]) | ||||
| 			i = n | ||||
| 		self.rawdata = rawdata[i:] | ||||
| 		# XXX if end: check for empty stack | ||||
| 
 | ||||
| 	# Internal -- parse comment, return length or -1 if not ternimated | ||||
| 	def parse_comment(self, i): | ||||
| 		rawdata = self.rawdata | ||||
| 		if rawdata[i:i+4] <> '<!--': | ||||
| 			raise RuntimeError, 'unexpected call to handle_comment' | ||||
| 		try: | ||||
| 			j = string.index(rawdata, '--', i+4) | ||||
| 		except string.index_error: | ||||
| 			return -1 | ||||
| 		self.handle_comment(rawdata[i+4: j]) | ||||
| 		j = j+2 | ||||
| 		n = len(rawdata) | ||||
| 		while j < n and rawdata[j] in ' \t\n': j = j+1 | ||||
| 		if j == n: return -1 # Wait for final '>' | ||||
| 		if rawdata[j] == '>': | ||||
| 			j = j+1 | ||||
| 		else: | ||||
| 			print '*** comment not terminated with >' | ||||
| 			print repr(rawdata[j-5:j]), '*!*', repr(rawdata[j:j+5]) | ||||
| 		return j-i | ||||
| 
 | ||||
| 	# Internal -- handle starttag, return length or -1 if not terminated | ||||
| 	def parse_starttag(self, i): | ||||
| 		rawdata = self.rawdata | ||||
| 		try: | ||||
| 			j = string.index(rawdata, '>', i) | ||||
| 		except string.index_error: | ||||
| 			return -1 | ||||
| 		# Now parse the data between i+1 and j into a tag and attrs | ||||
| 		attrs = [] | ||||
| 		tagfind = regex.compile('[a-zA-Z][a-zA-Z0-9]*') | ||||
| 		attrfind = regex.compile( \ | ||||
| 		  '[ \t\n]+\([a-zA-Z][a-zA-Z0-9]*\)' + \ | ||||
| 		  '\([ \t\n]*=[ \t\n]*' + \ | ||||
| 		     '\(\'[^\']*\';\|"[^"]*"\|[-a-zA-Z0-9./:+*%?!()_#]+\)\)?') | ||||
| 		k = tagfind.match(rawdata, i+1) | ||||
| 		if k < 0: | ||||
| 			raise RuntimeError, 'unexpected call to parse_starttag' | ||||
| 		k = i+1+k | ||||
| 		tag = string.lower(rawdata[i+1:k]) | ||||
| 		while k < j: | ||||
| 			l = attrfind.match(rawdata, k) | ||||
| 			if l < 0: break | ||||
| 			regs = attrfind.regs | ||||
| 			a1, b1 = regs[1] | ||||
| 			a2, b2 = regs[2] | ||||
| 			a3, b3 = regs[3] | ||||
| 			attrname = rawdata[a1:b1] | ||||
| 			if '=' in rawdata[k:k+l]: | ||||
| 				attrvalue = rawdata[a3:b3] | ||||
| 				if attrvalue[:1] == '\'' == attrvalue[-1:] or \ | ||||
| 				   attrvalue[:1] == '"' == attrvalue[-1:]: | ||||
| 					attrvalue = attrvalue[1:-1] | ||||
| 			else: | ||||
| 				attrvalue = '' | ||||
| 			attrs.append(string.lower(attrname), attrvalue) | ||||
| 			k = k + l | ||||
| 		j = j+1 | ||||
| 		try: | ||||
| 			method = getattr(self, 'start_' + tag) | ||||
| 		except AttributeError: | ||||
| 			try: | ||||
| 				method = getattr(self, 'do_' + tag) | ||||
| 			except AttributeError: | ||||
| 				self.unknown_starttag(tag, attrs) | ||||
| 				return j-i | ||||
| 			method(attrs) | ||||
| 			return j-i | ||||
| 		self.stack.append(tag) | ||||
| 		method(attrs) | ||||
| 		return j-i | ||||
| 
 | ||||
| 	# Internal -- parse endtag | ||||
| 	def parse_endtag(self, data): | ||||
| 		if data[:2] <> '</' or data[-1:] <> '>': | ||||
| 			raise RuntimeError, 'unexpected call to parse_endtag' | ||||
| 		tag = string.lower(string.strip(data[2:-1])) | ||||
| 		try: | ||||
| 			method = getattr(self, 'end_' + tag) | ||||
| 		except AttributeError: | ||||
| 			self.unknown_endtag(tag) | ||||
| 			return | ||||
| 		if self.stack and self.stack[-1] == tag: | ||||
| 			del self.stack[-1] | ||||
| 		else: | ||||
| 			print '*** Unbalanced </' + tag + '>' | ||||
| 			print '*** Stack:', self.stack | ||||
| 			found = None | ||||
| 			for i in range(len(self.stack)): | ||||
| 				if self.stack[i] == tag: found = i | ||||
| 			if found <> None: | ||||
| 				del self.stack[found:] | ||||
| 		method() | ||||
| 
 | ||||
| 	# Example -- handle character reference, no need to override | ||||
| 	def handle_charref(self, name): | ||||
| 		try: | ||||
| 			n = string.atoi(name) | ||||
| 		except string.atoi_error: | ||||
| 			self.unknown_charref(name) | ||||
| 			return | ||||
| 		if not 0 <= n <= 255: | ||||
| 			self.unknown_charref(name) | ||||
| 			return | ||||
| 		self.handle_data(chr(n)) | ||||
| 
 | ||||
| 	# Definition of entities -- derived classes may override | ||||
| 	entitydefs = \ | ||||
| 		{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''} | ||||
| 
 | ||||
| 	# Example -- handle entity reference, no need to override | ||||
| 	def handle_entityref(self, name): | ||||
| 		table = self.__class__.entitydefs | ||||
| 		name = string.lower(name) | ||||
| 		if table.has_key(name): | ||||
| 			self.handle_data(table[name]) | ||||
| 		else: | ||||
| 			self.unknown_entityref(name) | ||||
| 			return | ||||
| 
 | ||||
| 	# Example -- handle data, should be overridden | ||||
| 	def handle_data(self, data): | ||||
| 		pass | ||||
| 
 | ||||
| 	# Example -- handle comment, could be overridden | ||||
| 	def handle_comment(self, data): | ||||
| 		pass | ||||
| 
 | ||||
| 	# To be overridden -- handlers for unknown objects | ||||
| 	def unknown_starttag(self, tag, attrs): pass | ||||
| 	def unknown_endtag(self, tag): pass | ||||
| 	def unknown_charref(self, ref): pass | ||||
| 	def unknown_entityref(self, ref): pass | ||||
| 
 | ||||
| 
 | ||||
| class TestSGML(SGMLParser): | ||||
| 
 | ||||
| 	def handle_data(self, data): | ||||
| 		r = repr(data) | ||||
| 		if len(r) > 72: | ||||
| 			r = r[:35] + '...' + r[-35:] | ||||
| 		print 'data:', r | ||||
| 
 | ||||
| 	def handle_comment(self, data): | ||||
| 		r = repr(data) | ||||
| 		if len(r) > 68: | ||||
| 			r = r[:32] + '...' + r[-32:] | ||||
| 		print 'comment:', r | ||||
| 
 | ||||
| 	def unknown_starttag(self, tag, attrs): | ||||
| 		print 'start tag: <' + tag, | ||||
| 		for name, value in attrs: | ||||
| 			print name + '=' + '"' + value + '"', | ||||
| 		print '>' | ||||
| 
 | ||||
| 	def unknown_endtag(self, tag): | ||||
| 		print 'end tag: </' + tag + '>' | ||||
| 
 | ||||
| 	def unknown_entityref(self, ref): | ||||
| 		print '*** unknown entity ref: &' + ref + ';' | ||||
| 
 | ||||
| 	def unknown_charref(self, ref): | ||||
| 		print '*** unknown char ref: &#' + ref + ';' | ||||
| 
 | ||||
| 
 | ||||
| def test(): | ||||
| 	file = 'test.html' | ||||
| 	f = open(file, 'r') | ||||
| 	x = TestSGML() | ||||
| 	while 1: | ||||
| 		line = f.readline() | ||||
| 		if not line: | ||||
| 			x.close() | ||||
| 			break | ||||
| 		x.feed(line) | ||||
| 
 | ||||
| 
 | ||||
| #test() | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Guido van Rossum
						Guido van Rossum