mirror of
				https://github.com/python/cpython.git
				synced 2025-10-30 21:21:22 +00:00 
			
		
		
		
	[3.13] gh-129873: IDLE: Improve help.py's method of parsing HTML (GH-129859) (#129884)
gh-129873: IDLE: Improve help.py's method of parsing HTML (GH-129859)
In `help.copy_strip`, only copy the text `<section>`.  In `help.HelpParser.handle_starttag` and elsewhere, remove code to skip the no longer present html.  Add a reminder at the top of idle.rst to run copy_strip after changes.
---------
(cherry picked from commit 6fbf15f98e)
Co-authored-by: Stan Ulbrych <89152624+StanFromIreland@users.noreply.github.com>
Co-authored-by: Terry Jan Reedy <tjreedy@udel.edu>
			
			
This commit is contained in:
		
							parent
							
								
									c28eed3c27
								
							
						
					
					
						commit
						f7d885a8ff
					
				
					 5 changed files with 30 additions and 416 deletions
				
			
		|  | @ -20,7 +20,7 @@ | |||
| 
 | ||||
| HelpWindow - Display HelpFrame in a standalone window. | ||||
| 
 | ||||
| copy_strip - Copy idle.html to help.html, rstripping each line. | ||||
| copy_strip - Copy the text part of idle.html to help.html while rstripping each line. | ||||
| 
 | ||||
| show_idlehelp - Create HelpWindow.  Called in EditorWindow.help_dialog. | ||||
| """ | ||||
|  | @ -54,7 +54,6 @@ def __init__(self, text): | |||
|         self.text = text         # Text widget we're rendering into. | ||||
|         self.tags = ''           # Current block level text tags to apply. | ||||
|         self.chartags = ''       # Current character level text tags. | ||||
|         self.show = False        # Exclude html page navigation. | ||||
|         self.hdrlink = False     # Exclude html header links. | ||||
|         self.level = 0           # Track indentation level. | ||||
|         self.pre = False         # Displaying preformatted text? | ||||
|  | @ -77,11 +76,7 @@ def handle_starttag(self, tag, attrs): | |||
|             if a == 'class': | ||||
|                 class_ = v | ||||
|         s = '' | ||||
|         if tag == 'section' and attrs == [('id', 'idle')]: | ||||
|             self.show = True    # Start main content. | ||||
|         elif tag == 'div' and class_ == 'clearer': | ||||
|             self.show = False   # End main content. | ||||
|         elif tag == 'p' and self.prevtag and not self.prevtag[0]: | ||||
|         if tag == 'p' and self.prevtag and not self.prevtag[0]: | ||||
|             # Begin a new block for <p> tags after a closed tag. | ||||
|             # Avoid extra lines, e.g. after <pre> tags. | ||||
|             lastline = self.text.get('end-1c linestart', 'end-1c') | ||||
|  | @ -112,31 +107,27 @@ def handle_starttag(self, tag, attrs): | |||
|             s = '\n' | ||||
|         elif tag == 'pre': | ||||
|             self.pre = True | ||||
|             if self.show: | ||||
|                 self.text.insert('end', '\n\n') | ||||
|             self.text.insert('end', '\n\n') | ||||
|             self.tags = 'preblock' | ||||
|         elif tag == 'a' and class_ == 'headerlink': | ||||
|             self.hdrlink = True | ||||
|         elif tag == 'h1': | ||||
|             self.tags = tag | ||||
|         elif tag in ['h2', 'h3']: | ||||
|             if self.show: | ||||
|                 self.header = '' | ||||
|                 self.text.insert('end', '\n\n') | ||||
|             self.header = '' | ||||
|             self.text.insert('end', '\n\n') | ||||
|             self.tags = tag | ||||
|         if self.show: | ||||
|             self.text.insert('end', s, (self.tags, self.chartags)) | ||||
|         self.text.insert('end', s, (self.tags, self.chartags)) | ||||
|         self.prevtag = (True, tag) | ||||
| 
 | ||||
|     def handle_endtag(self, tag): | ||||
|         "Handle endtags in help.html." | ||||
|         if tag in ['h1', 'h2', 'h3']: | ||||
|             assert self.level == 0 | ||||
|             if self.show: | ||||
|                 indent = ('        ' if tag == 'h3' else | ||||
|                           '    ' if tag == 'h2' else | ||||
|                           '') | ||||
|                 self.toc.append((indent+self.header, self.text.index('insert'))) | ||||
|             indent = ('        ' if tag == 'h3' else | ||||
|                       '    ' if tag == 'h2' else | ||||
|                       '') | ||||
|             self.toc.append((indent+self.header, self.text.index('insert'))) | ||||
|             self.tags = '' | ||||
|         elif tag in ['span', 'em']: | ||||
|             self.chartags = '' | ||||
|  | @ -151,7 +142,7 @@ def handle_endtag(self, tag): | |||
| 
 | ||||
|     def handle_data(self, data): | ||||
|         "Handle date segments in help.html." | ||||
|         if self.show and not self.hdrlink: | ||||
|         if not self.hdrlink: | ||||
|             d = data if self.pre else data.replace('\n', ' ') | ||||
|             if self.tags == 'h1': | ||||
|                 try: | ||||
|  | @ -253,7 +244,7 @@ def __init__(self, parent, filename, title): | |||
| 
 | ||||
| 
 | ||||
| def copy_strip():  # pragma: no cover | ||||
|     """Copy idle.html to idlelib/help.html, stripping trailing whitespace. | ||||
|     """Copy the text part of idle.html to idlelib/help.html while stripping trailing whitespace. | ||||
| 
 | ||||
|     Files with trailing whitespace cannot be pushed to the git cpython | ||||
|     repository.  For 3.x (on Windows), help.html is generated, after | ||||
|  | @ -265,7 +256,7 @@ def copy_strip():  # pragma: no cover | |||
| 
 | ||||
|     It can be worthwhile to occasionally generate help.html without | ||||
|     touching idle.rst.  Changes to the master version and to the doc | ||||
|     build system may result in changes that should not changed | ||||
|     build system may result in changes that should not change | ||||
|     the displayed text, but might break HelpParser. | ||||
| 
 | ||||
|     As long as master and maintenance versions of idle.rst remain the | ||||
|  | @ -278,10 +269,14 @@ def copy_strip():  # pragma: no cover | |||
|     src = join(abspath(dirname(dirname(dirname(__file__)))), | ||||
|             'Doc', 'build', 'html', 'library', 'idle.html') | ||||
|     dst = join(abspath(dirname(__file__)), 'help.html') | ||||
|     with open(src, 'rb') as inn,\ | ||||
|          open(dst, 'wb') as out: | ||||
| 
 | ||||
|     with open(src, 'r', encoding="utf-8") as inn, open(dst, 'w', encoding="utf-8") as out: | ||||
|         copy = False | ||||
|         for line in inn: | ||||
|             out.write(line.rstrip() + b'\n') | ||||
|             if '<section id="idle">' in line: copy = True | ||||
|             if '<div class="clearer">' in line: break | ||||
|             if copy: out.write(line.strip() + '\n') | ||||
| 
 | ||||
|     print(f'{src} copied to {dst}') | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Miss Islington (bot)
						Miss Islington (bot)