| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  | """Shared support for scanning document type declarations in HTML and XHTML.""" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _declname_match = re.compile(r'[a-zA-Z][-_.a-zA-Z0-9]*\s*').match | 
					
						
							|  |  |  | _declstringlit_match = re.compile(r'(\'[^\']*\'|"[^"]*")\s*').match | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  | _commentclose = re.compile(r'--\s*>') | 
					
						
							|  |  |  | _markedsectionclose = re.compile(r']\s*]\s*>') | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # An analysis of the MS-Word extensions is available at | 
					
						
							|  |  |  | # http://www.planetpublish.com/xmlarena/xap/Thursday/WordtoXML.pdf | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | _msmarkedsectionclose = re.compile(r']\s*>') | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | del re | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class ParserBase: | 
					
						
							|  |  |  |     """Parser base class which provides some common support methods used
 | 
					
						
							|  |  |  |     by the SGML/HTML and XHTML parsers."""
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-10-26 18:02:28 +00:00
										 |  |  |     def __init__(self): | 
					
						
							|  |  |  |         if self.__class__ is ParserBase: | 
					
						
							|  |  |  |             raise RuntimeError( | 
					
						
							|  |  |  |                 "markupbase.ParserBase must be subclassed") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def error(self, message): | 
					
						
							|  |  |  |         raise NotImplementedError( | 
					
						
							|  |  |  |             "subclasses of ParserBase must override error()") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |     def reset(self): | 
					
						
							|  |  |  |         self.lineno = 1 | 
					
						
							|  |  |  |         self.offset = 0 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def getpos(self): | 
					
						
							|  |  |  |         """Return current line number and offset.""" | 
					
						
							|  |  |  |         return self.lineno, self.offset | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- update line number and offset.  This should be | 
					
						
							|  |  |  |     # called for each piece of data exactly once, in order -- in other | 
					
						
							|  |  |  |     # words the concatenation of all the input strings to this | 
					
						
							|  |  |  |     # function should be exactly the entire input. | 
					
						
							|  |  |  |     def updatepos(self, i, j): | 
					
						
							|  |  |  |         if i >= j: | 
					
						
							|  |  |  |             return j | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							| 
									
										
										
										
											2002-05-31 14:13:04 +00:00
										 |  |  |         nlines = rawdata.count("\n", i, j) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         if nlines: | 
					
						
							|  |  |  |             self.lineno = self.lineno + nlines | 
					
						
							| 
									
										
										
										
											2002-05-31 14:13:04 +00:00
										 |  |  |             pos = rawdata.rindex("\n", i, j) # Should not fail | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |             self.offset = j-(pos+1) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.offset = self.offset + j-i | 
					
						
							|  |  |  |         return j | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     _decl_otherchars = '' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- parse declaration (for use by subclasses). | 
					
						
							|  |  |  |     def parse_declaration(self, i): | 
					
						
							|  |  |  |         # This is some sort of declaration; in "HTML as | 
					
						
							|  |  |  |         # deployed," this should only be the document type | 
					
						
							|  |  |  |         # declaration ("<!DOCTYPE html...>"). | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  |         # ISO 8879:1986, however, has more complex | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |         # declaration syntax for elements in <!...>, including: | 
					
						
							|  |  |  |         # --comment-- | 
					
						
							|  |  |  |         # [marked section] | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  |         # name in the following list: ENTITY, DOCTYPE, ELEMENT, | 
					
						
							|  |  |  |         # ATTLIST, NOTATION, SHORTREF, USEMAP, | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |         # LINKTYPE, LINK, IDLINK, USELINK, SYSTEM | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         j = i + 2 | 
					
						
							|  |  |  |         assert rawdata[i:j] == "<!", "unexpected call to parse_declaration" | 
					
						
							|  |  |  |         if rawdata[j:j+1] in ("-", ""): | 
					
						
							|  |  |  |             # Start of comment followed by buffer boundary, | 
					
						
							|  |  |  |             # or just a buffer boundary. | 
					
						
							|  |  |  |             return -1 | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |         # A simple, practical version could look like: ((name|stringlit) S*) + '>' | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         n = len(rawdata) | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |         if rawdata[j:j+1] == '--': #comment | 
					
						
							|  |  |  |             # Locate --.*-- as the body of the comment | 
					
						
							|  |  |  |             return self.parse_comment(i) | 
					
						
							|  |  |  |         elif rawdata[j] == '[': #marked section | 
					
						
							|  |  |  |             # Locate [statusWord [...arbitrary SGML...]] as the body of the marked section | 
					
						
							|  |  |  |             # Where statusWord is one of TEMP, CDATA, IGNORE, INCLUDE, RCDATA | 
					
						
							|  |  |  |             # Note that this is extended by Microsoft Office "Save as Web" function | 
					
						
							|  |  |  |             # to include [if...] and [endif]. | 
					
						
							|  |  |  |             return self.parse_marked_section(i) | 
					
						
							|  |  |  |         else: #all other declaration elements | 
					
						
							|  |  |  |             decltype, j = self._scan_name(j, i) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         if j < 0: | 
					
						
							|  |  |  |             return j | 
					
						
							|  |  |  |         if decltype == "doctype": | 
					
						
							|  |  |  |             self._decl_otherchars = '' | 
					
						
							|  |  |  |         while j < n: | 
					
						
							|  |  |  |             c = rawdata[j] | 
					
						
							|  |  |  |             if c == ">": | 
					
						
							|  |  |  |                 # end of declaration syntax | 
					
						
							|  |  |  |                 data = rawdata[i+2:j] | 
					
						
							|  |  |  |                 if decltype == "doctype": | 
					
						
							|  |  |  |                     self.handle_decl(data) | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     self.unknown_decl(data) | 
					
						
							|  |  |  |                 return j + 1 | 
					
						
							|  |  |  |             if c in "\"'": | 
					
						
							|  |  |  |                 m = _declstringlit_match(rawdata, j) | 
					
						
							|  |  |  |                 if not m: | 
					
						
							|  |  |  |                     return -1 # incomplete | 
					
						
							|  |  |  |                 j = m.end() | 
					
						
							|  |  |  |             elif c in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ": | 
					
						
							|  |  |  |                 name, j = self._scan_name(j, i) | 
					
						
							|  |  |  |             elif c in self._decl_otherchars: | 
					
						
							|  |  |  |                 j = j + 1 | 
					
						
							|  |  |  |             elif c == "[": | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |                 # this could be handled in a separate doctype parser | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 if decltype == "doctype": | 
					
						
							|  |  |  |                     j = self._parse_doctype_subset(j + 1, i) | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |                 elif decltype in ("attlist", "linktype", "link", "element"): | 
					
						
							|  |  |  |                     # must tolerate []'d groups in a content model in an element declaration | 
					
						
							|  |  |  |                     # also in data attribute specifications of attlist declaration | 
					
						
							|  |  |  |                     # also link type declaration subsets in linktype declarations | 
					
						
							|  |  |  |                     # also link attribute specification lists in link declarations | 
					
						
							|  |  |  |                     self.error("unsupported '[' char in %s declaration" % decltype) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     self.error("unexpected '[' char in declaration") | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 self.error( | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |                     "unexpected %r char in declaration" % rawdata[j]) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |             if j < 0: | 
					
						
							|  |  |  |                 return j | 
					
						
							|  |  |  |         return -1 # incomplete | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |     # Internal -- parse a marked section | 
					
						
							|  |  |  |     # Override this to handle MS-word extension syntax <![if word]>content<![endif]> | 
					
						
							|  |  |  |     def parse_marked_section( self, i, report=1 ): | 
					
						
							|  |  |  |         rawdata= self.rawdata | 
					
						
							|  |  |  |         assert rawdata[i:i+3] == '<![', "unexpected call to parse_marked_section()" | 
					
						
							|  |  |  |         sectName, j = self._scan_name( i+3, i ) | 
					
						
							|  |  |  |         if j < 0: | 
					
						
							|  |  |  |             return j | 
					
						
							|  |  |  |         if sectName in ("temp", "cdata", "ignore", "include", "rcdata"): | 
					
						
							|  |  |  |             # look for standard ]]> ending | 
					
						
							|  |  |  |             match= _markedsectionclose.search(rawdata, i+3) | 
					
						
							|  |  |  |         elif sectName in ("if", "else", "endif"): | 
					
						
							|  |  |  |             # look for MS Office ]> ending | 
					
						
							|  |  |  |             match= _msmarkedsectionclose.search(rawdata, i+3) | 
					
						
							|  |  |  |         else: | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |             self.error('unknown status keyword %r in marked section' % rawdata[i+3:j]) | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |         if not match: | 
					
						
							|  |  |  |             return -1 | 
					
						
							|  |  |  |         if report: | 
					
						
							|  |  |  |             j = match.start(0) | 
					
						
							|  |  |  |             self.unknown_decl(rawdata[i+3: j]) | 
					
						
							|  |  |  |         return match.end(0) | 
					
						
							| 
									
										
										
										
											2003-04-24 16:02:54 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2003-03-30 14:25:40 +00:00
										 |  |  |     # Internal -- parse comment, return length or -1 if not terminated | 
					
						
							|  |  |  |     def parse_comment(self, i, report=1): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         if rawdata[i:i+4] != '<!--': | 
					
						
							|  |  |  |             self.error('unexpected call to parse_comment()') | 
					
						
							|  |  |  |         match = _commentclose.search(rawdata, i+4) | 
					
						
							|  |  |  |         if not match: | 
					
						
							|  |  |  |             return -1 | 
					
						
							|  |  |  |         if report: | 
					
						
							|  |  |  |             j = match.start(0) | 
					
						
							|  |  |  |             self.handle_comment(rawdata[i+4: j]) | 
					
						
							|  |  |  |         return match.end(0) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |     # Internal -- scan past the internal subset in a <!DOCTYPE declaration, | 
					
						
							|  |  |  |     # returning the index just past any whitespace following the trailing ']'. | 
					
						
							|  |  |  |     def _parse_doctype_subset(self, i, declstartpos): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         n = len(rawdata) | 
					
						
							|  |  |  |         j = i | 
					
						
							|  |  |  |         while j < n: | 
					
						
							|  |  |  |             c = rawdata[j] | 
					
						
							|  |  |  |             if c == "<": | 
					
						
							|  |  |  |                 s = rawdata[j:j+2] | 
					
						
							|  |  |  |                 if s == "<": | 
					
						
							|  |  |  |                     # end of buffer; incomplete | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 if s != "<!": | 
					
						
							|  |  |  |                     self.updatepos(declstartpos, j + 1) | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |                     self.error("unexpected char in internal subset (in %r)" % s) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 if (j + 2) == n: | 
					
						
							|  |  |  |                     # end of buffer; incomplete | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 if (j + 4) > n: | 
					
						
							|  |  |  |                     # end of buffer; incomplete | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 if rawdata[j:j+4] == "<!--": | 
					
						
							|  |  |  |                     j = self.parse_comment(j, report=0) | 
					
						
							|  |  |  |                     if j < 0: | 
					
						
							|  |  |  |                         return j | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 name, j = self._scan_name(j + 2, declstartpos) | 
					
						
							|  |  |  |                 if j == -1: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 if name not in ("attlist", "element", "entity", "notation"): | 
					
						
							|  |  |  |                     self.updatepos(declstartpos, j + 2) | 
					
						
							|  |  |  |                     self.error( | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |                         "unknown declaration %r in internal subset" % name) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 # handle the individual names | 
					
						
							|  |  |  |                 meth = getattr(self, "_parse_doctype_" + name) | 
					
						
							|  |  |  |                 j = meth(j, declstartpos) | 
					
						
							|  |  |  |                 if j < 0: | 
					
						
							|  |  |  |                     return j | 
					
						
							|  |  |  |             elif c == "%": | 
					
						
							|  |  |  |                 # parameter entity reference | 
					
						
							|  |  |  |                 if (j + 1) == n: | 
					
						
							|  |  |  |                     # end of buffer; incomplete | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 s, j = self._scan_name(j + 1, declstartpos) | 
					
						
							|  |  |  |                 if j < 0: | 
					
						
							|  |  |  |                     return j | 
					
						
							|  |  |  |                 if rawdata[j] == ";": | 
					
						
							|  |  |  |                     j = j + 1 | 
					
						
							|  |  |  |             elif c == "]": | 
					
						
							|  |  |  |                 j = j + 1 | 
					
						
							| 
									
										
										
											
												Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
											
										 
											2002-06-03 15:58:32 +00:00
										 |  |  |                 while j < n and rawdata[j].isspace(): | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                     j = j + 1 | 
					
						
							|  |  |  |                 if j < n: | 
					
						
							|  |  |  |                     if rawdata[j] == ">": | 
					
						
							|  |  |  |                         return j | 
					
						
							|  |  |  |                     self.updatepos(declstartpos, j) | 
					
						
							|  |  |  |                     self.error("unexpected char after internal subset") | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							| 
									
										
										
											
												Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
											
										 
											2002-06-03 15:58:32 +00:00
										 |  |  |             elif c.isspace(): | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 j = j + 1 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 self.updatepos(declstartpos, j) | 
					
						
							| 
									
										
										
										
											2004-02-12 17:35:32 +00:00
										 |  |  |                 self.error("unexpected char %r in internal subset" % c) | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         # end of buffer reached | 
					
						
							|  |  |  |         return -1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- scan past <!ELEMENT declarations | 
					
						
							|  |  |  |     def _parse_doctype_element(self, i, declstartpos): | 
					
						
							|  |  |  |         name, j = self._scan_name(i, declstartpos) | 
					
						
							|  |  |  |         if j == -1: | 
					
						
							|  |  |  |             return -1 | 
					
						
							|  |  |  |         # style content model; just skip until '>' | 
					
						
							| 
									
										
										
										
											2001-10-26 18:02:28 +00:00
										 |  |  |         rawdata = self.rawdata | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         if '>' in rawdata[j:]: | 
					
						
							| 
									
										
										
										
											2002-05-31 14:13:04 +00:00
										 |  |  |             return rawdata.find(">", j) + 1 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         return -1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- scan past <!ATTLIST declarations | 
					
						
							|  |  |  |     def _parse_doctype_attlist(self, i, declstartpos): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         name, j = self._scan_name(i, declstartpos) | 
					
						
							|  |  |  |         c = rawdata[j:j+1] | 
					
						
							|  |  |  |         if c == "": | 
					
						
							|  |  |  |             return -1 | 
					
						
							|  |  |  |         if c == ">": | 
					
						
							|  |  |  |             return j + 1 | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             # scan a series of attribute descriptions; simplified: | 
					
						
							|  |  |  |             #   name type [value] [#constraint] | 
					
						
							|  |  |  |             name, j = self._scan_name(j, declstartpos) | 
					
						
							|  |  |  |             if j < 0: | 
					
						
							|  |  |  |                 return j | 
					
						
							|  |  |  |             c = rawdata[j:j+1] | 
					
						
							|  |  |  |             if c == "": | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             if c == "(": | 
					
						
							|  |  |  |                 # an enumerated type; look for ')' | 
					
						
							|  |  |  |                 if ")" in rawdata[j:]: | 
					
						
							| 
									
										
										
										
											2002-05-31 14:13:04 +00:00
										 |  |  |                     j = rawdata.find(")", j) + 1 | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                 else: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							| 
									
										
										
											
												Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
											
										 
											2002-06-03 15:58:32 +00:00
										 |  |  |                 while rawdata[j:j+1].isspace(): | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                     j = j + 1 | 
					
						
							|  |  |  |                 if not rawdata[j:]: | 
					
						
							|  |  |  |                     # end of buffer, incomplete | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 name, j = self._scan_name(j, declstartpos) | 
					
						
							|  |  |  |             c = rawdata[j:j+1] | 
					
						
							|  |  |  |             if not c: | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             if c in "'\"": | 
					
						
							|  |  |  |                 m = _declstringlit_match(rawdata, j) | 
					
						
							|  |  |  |                 if m: | 
					
						
							|  |  |  |                     j = m.end() | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 c = rawdata[j:j+1] | 
					
						
							|  |  |  |                 if not c: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |             if c == "#": | 
					
						
							|  |  |  |                 if rawdata[j:] == "#": | 
					
						
							|  |  |  |                     # end of buffer | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 name, j = self._scan_name(j + 1, declstartpos) | 
					
						
							|  |  |  |                 if j < 0: | 
					
						
							|  |  |  |                     return j | 
					
						
							|  |  |  |                 c = rawdata[j:j+1] | 
					
						
							|  |  |  |                 if not c: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |             if c == '>': | 
					
						
							|  |  |  |                 # all done | 
					
						
							|  |  |  |                 return j + 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- scan past <!NOTATION declarations | 
					
						
							|  |  |  |     def _parse_doctype_notation(self, i, declstartpos): | 
					
						
							|  |  |  |         name, j = self._scan_name(i, declstartpos) | 
					
						
							|  |  |  |         if j < 0: | 
					
						
							|  |  |  |             return j | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             c = rawdata[j:j+1] | 
					
						
							|  |  |  |             if not c: | 
					
						
							|  |  |  |                 # end of buffer; incomplete | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             if c == '>': | 
					
						
							|  |  |  |                 return j + 1 | 
					
						
							|  |  |  |             if c in "'\"": | 
					
						
							|  |  |  |                 m = _declstringlit_match(rawdata, j) | 
					
						
							|  |  |  |                 if not m: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							|  |  |  |                 j = m.end() | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 name, j = self._scan_name(j, declstartpos) | 
					
						
							|  |  |  |                 if j < 0: | 
					
						
							|  |  |  |                     return j | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- scan past <!ENTITY declarations | 
					
						
							|  |  |  |     def _parse_doctype_entity(self, i, declstartpos): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         if rawdata[i:i+1] == "%": | 
					
						
							|  |  |  |             j = i + 1 | 
					
						
							|  |  |  |             while 1: | 
					
						
							|  |  |  |                 c = rawdata[j:j+1] | 
					
						
							|  |  |  |                 if not c: | 
					
						
							|  |  |  |                     return -1 | 
					
						
							| 
									
										
										
											
												Remove uses of the string and types modules:
x in string.whitespace => x.isspace()
type(x) in types.StringTypes => isinstance(x, basestring)
isinstance(x, types.StringTypes) => isinstance(x, basestring)
type(x) is types.StringType => isinstance(x, str)
type(x) == types.StringType => isinstance(x, str)
string.split(x, ...) => x.split(...)
string.join(x, y) => y.join(x)
string.zfill(x, ...) => x.zfill(...)
string.count(x, ...) => x.count(...)
hasattr(types, "UnicodeType") => try: unicode except NameError:
type(x) != types.TupleTuple => not isinstance(x, tuple)
isinstance(x, types.TupleType) => isinstance(x, tuple)
type(x) is types.IntType => isinstance(x, int)
Do not mention the string module in the rlcompleter docstring.
This partially applies SF patch http://www.python.org/sf/562373
(with basestring instead of string). (It excludes the changes to
unittest.py and does not change the os.stat stuff.)
											
										 
											2002-06-03 15:58:32 +00:00
										 |  |  |                 if c.isspace(): | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |                     j = j + 1 | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             j = i | 
					
						
							|  |  |  |         name, j = self._scan_name(j, declstartpos) | 
					
						
							|  |  |  |         if j < 0: | 
					
						
							|  |  |  |             return j | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             c = self.rawdata[j:j+1] | 
					
						
							|  |  |  |             if not c: | 
					
						
							|  |  |  |                 return -1 | 
					
						
							|  |  |  |             if c in "'\"": | 
					
						
							|  |  |  |                 m = _declstringlit_match(rawdata, j) | 
					
						
							|  |  |  |                 if m: | 
					
						
							|  |  |  |                     j = m.end() | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     return -1    # incomplete | 
					
						
							|  |  |  |             elif c == ">": | 
					
						
							|  |  |  |                 return j + 1 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 name, j = self._scan_name(j, declstartpos) | 
					
						
							|  |  |  |                 if j < 0: | 
					
						
							|  |  |  |                     return j | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Internal -- scan a name token and the new position and the token, or | 
					
						
							|  |  |  |     # return -1 if we've reached the end of the buffer. | 
					
						
							|  |  |  |     def _scan_name(self, i, declstartpos): | 
					
						
							|  |  |  |         rawdata = self.rawdata | 
					
						
							|  |  |  |         n = len(rawdata) | 
					
						
							|  |  |  |         if i == n: | 
					
						
							|  |  |  |             return None, -1 | 
					
						
							|  |  |  |         m = _declname_match(rawdata, i) | 
					
						
							|  |  |  |         if m: | 
					
						
							|  |  |  |             s = m.group() | 
					
						
							|  |  |  |             name = s.strip() | 
					
						
							|  |  |  |             if (i + len(s)) == n: | 
					
						
							|  |  |  |                 return None, -1  # end of buffer | 
					
						
							| 
									
										
										
										
											2002-05-31 14:13:04 +00:00
										 |  |  |             return name.lower(), m.end() | 
					
						
							| 
									
										
										
										
											2001-09-24 20:01:28 +00:00
										 |  |  |         else: | 
					
						
							|  |  |  |             self.updatepos(declstartpos, i) | 
					
						
							| 
									
										
										
										
											2004-07-10 21:49:45 +00:00
										 |  |  |             self.error("expected name token at %r" | 
					
						
							|  |  |  |                        % rawdata[declstartpos:declstartpos+20]) | 
					
						
							| 
									
										
										
										
											2001-10-26 18:02:28 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |     # To be overridden -- handlers for unknown objects | 
					
						
							|  |  |  |     def unknown_decl(self, data): | 
					
						
							|  |  |  |         pass |