| 
									
										
										
										
											2005-11-18 22:05:48 +00:00
										 |  |  | """
 | 
					
						
							|  |  |  | HyperParser | 
					
						
							|  |  |  | =========== | 
					
						
							|  |  |  | This module defines the HyperParser class, which provides advanced parsing | 
					
						
							|  |  |  | abilities for the ParenMatch and other extensions. | 
					
						
							|  |  |  | The HyperParser uses PyParser. PyParser is intended mostly to give information | 
					
						
							|  |  |  | on the proper indentation of code. HyperParser gives some information on the | 
					
						
							|  |  |  | structure of code, used by extensions to help the user. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import string | 
					
						
							|  |  |  | import keyword | 
					
						
							| 
									
										
										
										
											2010-04-02 07:24:52 +00:00
										 |  |  | from idlelib import PyParse | 
					
						
							| 
									
										
										
										
											2005-11-18 22:05:48 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | class HyperParser: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__(self, editwin, index): | 
					
						
							|  |  |  |         """Initialize the HyperParser to analyze the surroundings of the given
 | 
					
						
							|  |  |  |         index. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.editwin = editwin | 
					
						
							|  |  |  |         self.text = text = editwin.text | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         def index2line(index): | 
					
						
							|  |  |  |             return int(float(index)) | 
					
						
							|  |  |  |         lno = index2line(text.index(index)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         if not editwin.context_use_ps1: | 
					
						
							|  |  |  |             for context in editwin.num_context_lines: | 
					
						
							|  |  |  |                 startat = max(lno - context, 1) | 
					
						
							| 
									
										
										
										
											2010-04-02 08:30:21 +00:00
										 |  |  |                 startatindex = repr(startat) + ".0" | 
					
						
							| 
									
										
										
										
											2005-11-18 22:05:48 +00:00
										 |  |  |                 stopatindex = "%d.end" % lno | 
					
						
							|  |  |  |                 # We add the newline because PyParse requires a newline at end. | 
					
						
							|  |  |  |                 # We add a space so that index won't be at end of line, so that | 
					
						
							|  |  |  |                 # its status will be the same as the char before it, if should. | 
					
						
							|  |  |  |                 parser.set_str(text.get(startatindex, stopatindex)+' \n') | 
					
						
							|  |  |  |                 bod = parser.find_good_parse_start( | 
					
						
							|  |  |  |                           editwin._build_char_in_string_func(startatindex)) | 
					
						
							|  |  |  |                 if bod is not None or startat == 1: | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  |             parser.set_lo(bod or 0) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             r = text.tag_prevrange("console", index) | 
					
						
							|  |  |  |             if r: | 
					
						
							|  |  |  |                 startatindex = r[1] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 startatindex = "1.0" | 
					
						
							|  |  |  |             stopatindex = "%d.end" % lno | 
					
						
							|  |  |  |             # We add the newline because PyParse requires a newline at end. | 
					
						
							|  |  |  |             # We add a space so that index won't be at end of line, so that | 
					
						
							|  |  |  |             # its status will be the same as the char before it, if should. | 
					
						
							|  |  |  |             parser.set_str(text.get(startatindex, stopatindex)+' \n') | 
					
						
							|  |  |  |             parser.set_lo(0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # We want what the parser has, except for the last newline and space. | 
					
						
							|  |  |  |         self.rawtext = parser.str[:-2] | 
					
						
							|  |  |  |         # As far as I can see, parser.str preserves the statement we are in, | 
					
						
							|  |  |  |         # so that stopatindex can be used to synchronize the string with the | 
					
						
							|  |  |  |         # text box indices. | 
					
						
							|  |  |  |         self.stopatindex = stopatindex | 
					
						
							|  |  |  |         self.bracketing = parser.get_last_stmt_bracketing() | 
					
						
							|  |  |  |         # find which pairs of bracketing are openers. These always correspond | 
					
						
							|  |  |  |         # to a character of rawtext. | 
					
						
							|  |  |  |         self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1] | 
					
						
							|  |  |  |                          for i in range(len(self.bracketing))] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         self.set_index(index) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def set_index(self, index): | 
					
						
							|  |  |  |         """Set the index to which the functions relate. Note that it must be
 | 
					
						
							|  |  |  |         in the same statement. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         indexinrawtext = \ | 
					
						
							|  |  |  |             len(self.rawtext) - len(self.text.get(index, self.stopatindex)) | 
					
						
							|  |  |  |         if indexinrawtext < 0: | 
					
						
							|  |  |  |             raise ValueError("The index given is before the analyzed statement") | 
					
						
							|  |  |  |         self.indexinrawtext = indexinrawtext | 
					
						
							|  |  |  |         # find the rightmost bracket to which index belongs | 
					
						
							|  |  |  |         self.indexbracket = 0 | 
					
						
							|  |  |  |         while self.indexbracket < len(self.bracketing)-1 and \ | 
					
						
							|  |  |  |               self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: | 
					
						
							|  |  |  |             self.indexbracket += 1 | 
					
						
							|  |  |  |         if self.indexbracket < len(self.bracketing)-1 and \ | 
					
						
							|  |  |  |            self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ | 
					
						
							|  |  |  |            not self.isopener[self.indexbracket+1]: | 
					
						
							|  |  |  |             self.indexbracket += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def is_in_string(self): | 
					
						
							|  |  |  |         """Is the index given to the HyperParser is in a string?""" | 
					
						
							|  |  |  |         # The bracket to which we belong should be an opener. | 
					
						
							|  |  |  |         # If it's an opener, it has to have a character. | 
					
						
							|  |  |  |         return self.isopener[self.indexbracket] and \ | 
					
						
							|  |  |  |                self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def is_in_code(self): | 
					
						
							|  |  |  |         """Is the index given to the HyperParser is in a normal code?""" | 
					
						
							|  |  |  |         return not self.isopener[self.indexbracket] or \ | 
					
						
							|  |  |  |                self.rawtext[self.bracketing[self.indexbracket][0]] not in \ | 
					
						
							|  |  |  |                                                                 ('#', '"', "'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_surrounding_brackets(self, openers='([{', mustclose=False): | 
					
						
							|  |  |  |         """If the index given to the HyperParser is surrounded by a bracket
 | 
					
						
							|  |  |  |         defined in openers (or at least has one before it), return the | 
					
						
							|  |  |  |         indices of the opening bracket and the closing bracket (or the | 
					
						
							|  |  |  |         end of line, whichever comes first). | 
					
						
							|  |  |  |         If it is not surrounded by brackets, or the end of line comes before | 
					
						
							|  |  |  |         the closing bracket and mustclose is True, returns None. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         bracketinglevel = self.bracketing[self.indexbracket][1] | 
					
						
							|  |  |  |         before = self.indexbracket | 
					
						
							|  |  |  |         while not self.isopener[before] or \ | 
					
						
							|  |  |  |               self.rawtext[self.bracketing[before][0]] not in openers or \ | 
					
						
							|  |  |  |               self.bracketing[before][1] > bracketinglevel: | 
					
						
							|  |  |  |             before -= 1 | 
					
						
							|  |  |  |             if before < 0: | 
					
						
							|  |  |  |                 return None | 
					
						
							|  |  |  |             bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) | 
					
						
							|  |  |  |         after = self.indexbracket + 1 | 
					
						
							|  |  |  |         while after < len(self.bracketing) and \ | 
					
						
							|  |  |  |               self.bracketing[after][1] >= bracketinglevel: | 
					
						
							|  |  |  |             after += 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         beforeindex = self.text.index("%s-%dc" % | 
					
						
							|  |  |  |             (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) | 
					
						
							|  |  |  |         if after >= len(self.bracketing) or \ | 
					
						
							|  |  |  |            self.bracketing[after][0] > len(self.rawtext): | 
					
						
							|  |  |  |             if mustclose: | 
					
						
							|  |  |  |                 return None | 
					
						
							|  |  |  |             afterindex = self.stopatindex | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             # We are after a real char, so it is a ')' and we give the index | 
					
						
							|  |  |  |             # before it. | 
					
						
							|  |  |  |             afterindex = self.text.index("%s-%dc" % | 
					
						
							|  |  |  |                 (self.stopatindex, | 
					
						
							|  |  |  |                  len(self.rawtext)-(self.bracketing[after][0]-1))) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return beforeindex, afterindex | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # This string includes all chars that may be in a white space | 
					
						
							|  |  |  |     _whitespace_chars = " \t\n\\" | 
					
						
							|  |  |  |     # This string includes all chars that may be in an identifier | 
					
						
							|  |  |  |     _id_chars = string.ascii_letters + string.digits + "_" | 
					
						
							|  |  |  |     # This string includes all chars that may be the first char of an identifier | 
					
						
							|  |  |  |     _id_first_chars = string.ascii_letters + "_" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Given a string and pos, return the number of chars in the identifier | 
					
						
							|  |  |  |     # which ends at pos, or 0 if there is no such one. Saved words are not | 
					
						
							|  |  |  |     # identifiers. | 
					
						
							|  |  |  |     def _eat_identifier(self, str, limit, pos): | 
					
						
							|  |  |  |         i = pos | 
					
						
							|  |  |  |         while i > limit and str[i-1] in self._id_chars: | 
					
						
							|  |  |  |             i -= 1 | 
					
						
							|  |  |  |         if i < pos and (str[i] not in self._id_first_chars or \ | 
					
						
							|  |  |  |                         keyword.iskeyword(str[i:pos])): | 
					
						
							|  |  |  |             i = pos | 
					
						
							|  |  |  |         return pos - i | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_expression(self): | 
					
						
							|  |  |  |         """Return a string with the Python expression which ends at the given
 | 
					
						
							|  |  |  |         index, which is empty if there is no real one. | 
					
						
							|  |  |  |         """
 | 
					
						
							|  |  |  |         if not self.is_in_code(): | 
					
						
							|  |  |  |             raise ValueError("get_expression should only be called if index "\ | 
					
						
							|  |  |  |                              "is inside a code.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         rawtext = self.rawtext | 
					
						
							|  |  |  |         bracketing = self.bracketing | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         brck_index = self.indexbracket | 
					
						
							|  |  |  |         brck_limit = bracketing[brck_index][0] | 
					
						
							|  |  |  |         pos = self.indexinrawtext | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         last_identifier_pos = pos | 
					
						
							|  |  |  |         postdot_phase = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         while 1: | 
					
						
							|  |  |  |             # Eat whitespaces, comments, and if postdot_phase is False - one dot | 
					
						
							|  |  |  |             while 1: | 
					
						
							|  |  |  |                 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: | 
					
						
							|  |  |  |                     # Eat a whitespace | 
					
						
							|  |  |  |                     pos -= 1 | 
					
						
							|  |  |  |                 elif not postdot_phase and \ | 
					
						
							|  |  |  |                      pos > brck_limit and rawtext[pos-1] == '.': | 
					
						
							|  |  |  |                     # Eat a dot | 
					
						
							|  |  |  |                     pos -= 1 | 
					
						
							|  |  |  |                     postdot_phase = True | 
					
						
							|  |  |  |                 # The next line will fail if we are *inside* a comment, but we | 
					
						
							|  |  |  |                 # shouldn't be. | 
					
						
							|  |  |  |                 elif pos == brck_limit and brck_index > 0 and \ | 
					
						
							|  |  |  |                      rawtext[bracketing[brck_index-1][0]] == '#': | 
					
						
							|  |  |  |                     # Eat a comment | 
					
						
							|  |  |  |                     brck_index -= 2 | 
					
						
							|  |  |  |                     brck_limit = bracketing[brck_index][0] | 
					
						
							|  |  |  |                     pos = bracketing[brck_index+1][0] | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     # If we didn't eat anything, quit. | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             if not postdot_phase: | 
					
						
							|  |  |  |                 # We didn't find a dot, so the expression end at the last | 
					
						
							|  |  |  |                 # identifier pos. | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             ret = self._eat_identifier(rawtext, brck_limit, pos) | 
					
						
							|  |  |  |             if ret: | 
					
						
							|  |  |  |                 # There is an identifier to eat | 
					
						
							|  |  |  |                 pos = pos - ret | 
					
						
							|  |  |  |                 last_identifier_pos = pos | 
					
						
							|  |  |  |                 # Now, in order to continue the search, we must find a dot. | 
					
						
							|  |  |  |                 postdot_phase = False | 
					
						
							|  |  |  |                 # (the loop continues now) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             elif pos == brck_limit: | 
					
						
							|  |  |  |                 # We are at a bracketing limit. If it is a closing bracket, | 
					
						
							|  |  |  |                 # eat the bracket, otherwise, stop the search. | 
					
						
							|  |  |  |                 level = bracketing[brck_index][1] | 
					
						
							|  |  |  |                 while brck_index > 0 and bracketing[brck_index-1][1] > level: | 
					
						
							|  |  |  |                     brck_index -= 1 | 
					
						
							|  |  |  |                 if bracketing[brck_index][0] == brck_limit: | 
					
						
							|  |  |  |                     # We were not at the end of a closing bracket | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  |                 pos = bracketing[brck_index][0] | 
					
						
							|  |  |  |                 brck_index -= 1 | 
					
						
							|  |  |  |                 brck_limit = bracketing[brck_index][0] | 
					
						
							|  |  |  |                 last_identifier_pos = pos | 
					
						
							|  |  |  |                 if rawtext[pos] in "([": | 
					
						
							|  |  |  |                     # [] and () may be used after an identifier, so we | 
					
						
							|  |  |  |                     # continue. postdot_phase is True, so we don't allow a dot. | 
					
						
							|  |  |  |                     pass | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     # We can't continue after other types of brackets | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 # We've found an operator or something. | 
					
						
							|  |  |  |                 break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return rawtext[last_identifier_pos:self.indexinrawtext] |