mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	SF bug #1504333: sgmlib should allow angle brackets in quoted values
(modified patch by Sam Ruby; changed to use separate REs for start and end tags to reduce matching cost for end tags; extended tests; updated to avoid breaking previous changes to support IPv6 addresses in unquoted attribute values)
This commit is contained in:
		
							parent
							
								
									960a3f88e5
								
							
						
					
					
						commit
						a136210a9f
					
				
					 2 changed files with 25 additions and 9 deletions
				
			
		|  | @ -29,7 +29,12 @@ | |||
| shorttagopen = re.compile('<[a-zA-Z][-.a-zA-Z0-9]*/') | ||||
| shorttag = re.compile('<([a-zA-Z][-.a-zA-Z0-9]*)/([^/]*)/') | ||||
| piclose = re.compile('>') | ||||
| endbracket = re.compile('[<>]') | ||||
| starttag = re.compile(r'<[a-zA-Z][-_.:a-zA-Z0-9]*\s*(' | ||||
|         r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' | ||||
|         r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]' | ||||
|         r'[][\-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~\'"@]*(?=[\s>/<])))?' | ||||
|     r')*\s*/?\s*(?=[<>])') | ||||
| endtag = re.compile(r'</?[a-zA-Z][-_.:a-zA-Z0-9]*\s*/?\s*(?=[<>])') | ||||
| tagfind = re.compile('[a-zA-Z][-_.a-zA-Z0-9]*') | ||||
| attrfind = re.compile( | ||||
|     r'\s*([a-zA-Z_][-:.a-zA-Z_0-9]*)(\s*=\s*' | ||||
|  | @ -249,14 +254,10 @@ def parse_starttag(self, i): | |||
|             self.finish_shorttag(tag, data) | ||||
|             self.__starttag_text = rawdata[start_pos:match.end(1) + 1] | ||||
|             return k | ||||
|         # XXX The following should skip matching quotes (' or ") | ||||
|         # As a shortcut way to exit, this isn't so bad, but shouldn't | ||||
|         # be used to locate the actual end of the start tag since the | ||||
|         # < or > characters may be embedded in an attribute value. | ||||
|         match = endbracket.search(rawdata, i+1) | ||||
|         match = starttag.match(rawdata, i) | ||||
|         if not match: | ||||
|             return -1 | ||||
|         j = match.start(0) | ||||
|         j = match.end(0) | ||||
|         # Now parse the data between i+1 and j into a tag and attrs | ||||
|         attrs = [] | ||||
|         if rawdata[i:i+2] == '<>': | ||||
|  | @ -305,10 +306,10 @@ def _convert_ref(self, match): | |||
|     # Internal -- parse endtag | ||||
|     def parse_endtag(self, i): | ||||
|         rawdata = self.rawdata | ||||
|         match = endbracket.search(rawdata, i+1) | ||||
|         match = endtag.match(rawdata, i) | ||||
|         if not match: | ||||
|             return -1 | ||||
|         j = match.start(0) | ||||
|         j = match.end(0) | ||||
|         tag = rawdata[i+2:j].strip().lower() | ||||
|         if rawdata[j] == '>': | ||||
|             j = j+1 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fred Drake
						Fred Drake