mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 05:31:20 +00:00 
			
		
		
		
	 eb953d6e44
			
		
	
	
		eb953d6e44
		
			
		
	
	
	
	
		
			
			* Only show GitHub check annotations on changed doc paragraphs * Improve check-warnings script arg parsing following Hugo's suggestions * Factor filtering warnings by modified diffs into helper function * Build docs on unmerged branch so warning lines match & avoid deep clone --------- Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
		
			
				
	
	
		
			311 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			311 lines
		
	
	
	
		
			9.5 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env python3
 | |
| """
 | |
| Check the output of running Sphinx in nit-picky mode (missing references).
 | |
| """
 | |
| from __future__ import annotations
 | |
| 
 | |
| import argparse
 | |
| import itertools
 | |
| import os
 | |
| import re
 | |
| import subprocess
 | |
| import sys
 | |
| from pathlib import Path
 | |
| from typing import TextIO
 | |
| 
 | |
| # Exclude these whether they're dirty or clean,
 | |
| # because they trigger a rebuild of dirty files.
 | |
| EXCLUDE_FILES = {
 | |
|     "Doc/whatsnew/changelog.rst",
 | |
| }
 | |
| 
 | |
| # Subdirectories of Doc/ to exclude.
 | |
| EXCLUDE_SUBDIRS = {
 | |
|     ".env",
 | |
|     ".venv",
 | |
|     "env",
 | |
|     "includes",
 | |
|     "venv",
 | |
| }
 | |
| 
 | |
| # Regex pattern to match the parts of a Sphinx warning
 | |
| WARNING_PATTERN = re.compile(
 | |
|     r"(?P<file>([A-Za-z]:[\\/])?[^:]+):(?P<line>\d+): WARNING: (?P<msg>.+)"
 | |
| )
 | |
| 
 | |
| # Regex pattern to match the line numbers in a Git unified diff
 | |
| DIFF_PATTERN = re.compile(
 | |
|     r"^@@ -(?P<linea>\d+)(?:,(?P<removed>\d+))? \+(?P<lineb>\d+)(?:,(?P<added>\d+))? @@",
 | |
|     flags=re.MULTILINE,
 | |
| )
 | |
| 
 | |
| 
 | |
| def get_diff_files(ref_a: str, ref_b: str, filter_mode: str = "") -> set[Path]:
 | |
|     """List the files changed between two Git refs, filtered by change type."""
 | |
|     added_files_result = subprocess.run(
 | |
|         [
 | |
|             "git",
 | |
|             "diff",
 | |
|             f"--diff-filter={filter_mode}",
 | |
|             "--name-only",
 | |
|             f"{ref_a}...{ref_b}",
 | |
|             "--",
 | |
|         ],
 | |
|         stdout=subprocess.PIPE,
 | |
|         check=True,
 | |
|         text=True,
 | |
|         encoding="UTF-8",
 | |
|     )
 | |
| 
 | |
|     added_files = added_files_result.stdout.strip().split("\n")
 | |
|     return {Path(file.strip()) for file in added_files if file.strip()}
 | |
| 
 | |
| 
 | |
| def get_diff_lines(ref_a: str, ref_b: str, file: Path) -> list[int]:
 | |
|     """List the lines changed between two Git refs for a specific file."""
 | |
|     diff_output = subprocess.run(
 | |
|         [
 | |
|             "git",
 | |
|             "diff",
 | |
|             "--unified=0",
 | |
|             f"{ref_a}...{ref_b}",
 | |
|             "--",
 | |
|             str(file),
 | |
|         ],
 | |
|         stdout=subprocess.PIPE,
 | |
|         check=True,
 | |
|         text=True,
 | |
|         encoding="UTF-8",
 | |
|     )
 | |
| 
 | |
|     # Scrape line offsets + lengths from diff and convert to line numbers
 | |
|     line_matches = DIFF_PATTERN.finditer(diff_output.stdout)
 | |
|     # Removed and added line counts are 1 if not printed
 | |
|     line_match_values = [
 | |
|         line_match.groupdict(default=1) for line_match in line_matches
 | |
|     ]
 | |
|     line_ints = [
 | |
|         (int(match_value["lineb"]), int(match_value["added"]))
 | |
|         for match_value in line_match_values
 | |
|     ]
 | |
|     line_ranges = [
 | |
|         range(line_b, line_b + added) for line_b, added in line_ints
 | |
|     ]
 | |
|     line_numbers = list(itertools.chain(*line_ranges))
 | |
| 
 | |
|     return line_numbers
 | |
| 
 | |
| 
 | |
| def get_para_line_numbers(file_obj: TextIO) -> list[list[int]]:
 | |
|     """Get the line numbers of text in a file object, grouped by paragraph."""
 | |
|     paragraphs = []
 | |
|     prev_line = None
 | |
|     for lineno, line in enumerate(file_obj):
 | |
|         lineno = lineno + 1
 | |
|         if prev_line is None or (line.strip() and not prev_line.strip()):
 | |
|             paragraph = [lineno - 1]
 | |
|             paragraphs.append(paragraph)
 | |
|         paragraph.append(lineno)
 | |
|         prev_line = line
 | |
|     return paragraphs
 | |
| 
 | |
| 
 | |
| def filter_and_parse_warnings(
 | |
|     warnings: list[str], files: set[Path]
 | |
| ) -> list[re.Match[str]]:
 | |
|     """Get the warnings matching passed files and parse them with regex."""
 | |
|     filtered_warnings = [
 | |
|         warning
 | |
|         for warning in warnings
 | |
|         if any(str(file) in warning for file in files)
 | |
|     ]
 | |
|     warning_matches = [
 | |
|         WARNING_PATTERN.fullmatch(warning.strip())
 | |
|         for warning in filtered_warnings
 | |
|     ]
 | |
|     non_null_matches = [warning for warning in warning_matches if warning]
 | |
|     return non_null_matches
 | |
| 
 | |
| 
 | |
| def filter_warnings_by_diff(
 | |
|     warnings: list[re.Match[str]], ref_a: str, ref_b: str, file: Path
 | |
| ) -> list[re.Match[str]]:
 | |
|     """Filter the passed per-file warnings to just those on changed lines."""
 | |
|     diff_lines = get_diff_lines(ref_a, ref_b, file)
 | |
|     with file.open(encoding="UTF-8") as file_obj:
 | |
|         paragraphs = get_para_line_numbers(file_obj)
 | |
|     touched_paras = [
 | |
|         para_lines
 | |
|         for para_lines in paragraphs
 | |
|         if set(diff_lines) & set(para_lines)
 | |
|     ]
 | |
|     touched_para_lines = set(itertools.chain(*touched_paras))
 | |
|     warnings_infile = [
 | |
|         warning for warning in warnings if str(file) in warning["file"]
 | |
|     ]
 | |
|     warnings_touched = [
 | |
|         warning
 | |
|         for warning in warnings_infile
 | |
|         if int(warning["line"]) in touched_para_lines
 | |
|     ]
 | |
|     return warnings_touched
 | |
| 
 | |
| 
 | |
| def process_touched_warnings(
 | |
|     warnings: list[str], ref_a: str, ref_b: str
 | |
| ) -> list[re.Match[str]]:
 | |
|     """Filter a list of Sphinx warnings to those affecting touched lines."""
 | |
|     added_files, modified_files = tuple(
 | |
|         get_diff_files(ref_a, ref_b, filter_mode=mode) for mode in ("A", "M")
 | |
|     )
 | |
| 
 | |
|     warnings_added = filter_and_parse_warnings(warnings, added_files)
 | |
|     warnings_modified = filter_and_parse_warnings(warnings, modified_files)
 | |
| 
 | |
|     modified_files_warned = {
 | |
|         file
 | |
|         for file in modified_files
 | |
|         if any(str(file) in warning["file"] for warning in warnings_modified)
 | |
|     }
 | |
| 
 | |
|     warnings_modified_touched = [
 | |
|         filter_warnings_by_diff(warnings_modified, ref_a, ref_b, file)
 | |
|         for file in modified_files_warned
 | |
|     ]
 | |
|     warnings_touched = warnings_added + list(
 | |
|         itertools.chain(*warnings_modified_touched)
 | |
|     )
 | |
| 
 | |
|     return warnings_touched
 | |
| 
 | |
| 
 | |
| def annotate_diff(
 | |
|     warnings: list[str], ref_a: str = "main", ref_b: str = "HEAD"
 | |
| ) -> None:
 | |
|     """
 | |
|     Convert Sphinx warning messages to GitHub Actions for changed paragraphs.
 | |
| 
 | |
|     Converts lines like:
 | |
|         .../Doc/library/cgi.rst:98: WARNING: reference target not found
 | |
|     to:
 | |
|         ::warning file=.../Doc/library/cgi.rst,line=98::reference target not found
 | |
| 
 | |
|     See:
 | |
|     https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#setting-a-warning-message
 | |
|     """
 | |
|     warnings_touched = process_touched_warnings(warnings, ref_a, ref_b)
 | |
|     print("Emitting doc warnings matching modified lines:")
 | |
|     for warning in warnings_touched:
 | |
|         print("::warning file={file},line={line}::{msg}".format_map(warning))
 | |
|         print(warning[0])
 | |
|     if not warnings_touched:
 | |
|         print("None")
 | |
| 
 | |
| 
 | |
| def fail_if_regression(
 | |
|     warnings: list[str], files_with_expected_nits: set[str], files_with_nits: set[str]
 | |
| ) -> int:
 | |
|     """
 | |
|     Ensure some files always pass Sphinx nit-picky mode (no missing references).
 | |
|     These are files which are *not* in .nitignore.
 | |
|     """
 | |
|     all_rst = {
 | |
|         str(rst)
 | |
|         for rst in Path("Doc/").rglob("*.rst")
 | |
|         if rst.parts[1] not in EXCLUDE_SUBDIRS
 | |
|     }
 | |
|     should_be_clean = all_rst - files_with_expected_nits - EXCLUDE_FILES
 | |
|     problem_files = sorted(should_be_clean & files_with_nits)
 | |
|     if problem_files:
 | |
|         print("\nError: must not contain warnings:\n")
 | |
|         for filename in problem_files:
 | |
|             print(filename)
 | |
|             for warning in warnings:
 | |
|                 if filename in warning:
 | |
|                     if match := WARNING_PATTERN.fullmatch(warning):
 | |
|                         print("  {line}: {msg}".format_map(match))
 | |
|         return -1
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| def fail_if_improved(
 | |
|     files_with_expected_nits: set[str], files_with_nits: set[str]
 | |
| ) -> int:
 | |
|     """
 | |
|     We may have fixed warnings in some files so that the files are now completely clean.
 | |
|     Good news! Let's add them to .nitignore to prevent regression.
 | |
|     """
 | |
|     files_with_no_nits = files_with_expected_nits - files_with_nits
 | |
|     if files_with_no_nits:
 | |
|         print("\nCongratulations! You improved:\n")
 | |
|         for filename in sorted(files_with_no_nits):
 | |
|             print(filename)
 | |
|         print("\nPlease remove from Doc/tools/.nitignore\n")
 | |
|         return -1
 | |
|     return 0
 | |
| 
 | |
| 
 | |
| def main(argv: list[str] | None = None) -> int:
 | |
|     parser = argparse.ArgumentParser()
 | |
|     parser.add_argument(
 | |
|         "--annotate-diff",
 | |
|         nargs="*",
 | |
|         metavar=("BASE_REF", "HEAD_REF"),
 | |
|         help="Add GitHub Actions annotations on the diff for warnings on "
 | |
|         "lines changed between the given refs (main and HEAD, by default)",
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "--fail-if-regression",
 | |
|         action="store_true",
 | |
|         help="Fail if known-good files have warnings",
 | |
|     )
 | |
|     parser.add_argument(
 | |
|         "--fail-if-improved",
 | |
|         action="store_true",
 | |
|         help="Fail if new files with no nits are found",
 | |
|     )
 | |
| 
 | |
|     args = parser.parse_args(argv)
 | |
|     if args.annotate_diff is not None and len(args.annotate_diff) > 2:
 | |
|         parser.error(
 | |
|             "--annotate-diff takes between 0 and 2 ref args, not "
 | |
|             f"{len(args.annotate_diff)} {tuple(args.annotate_diff)}"
 | |
|         )
 | |
|     exit_code = 0
 | |
| 
 | |
|     wrong_directory_msg = "Must run this script from the repo root"
 | |
|     assert Path("Doc").exists() and Path("Doc").is_dir(), wrong_directory_msg
 | |
| 
 | |
|     with Path("Doc/sphinx-warnings.txt").open(encoding="UTF-8") as f:
 | |
|         warnings = f.read().splitlines()
 | |
| 
 | |
|     cwd = str(Path.cwd()) + os.path.sep
 | |
|     files_with_nits = {
 | |
|         warning.removeprefix(cwd).split(":")[0]
 | |
|         for warning in warnings
 | |
|         if "Doc/" in warning
 | |
|     }
 | |
| 
 | |
|     with Path("Doc/tools/.nitignore").open(encoding="UTF-8") as clean_files:
 | |
|         files_with_expected_nits = {
 | |
|             filename.strip()
 | |
|             for filename in clean_files
 | |
|             if filename.strip() and not filename.startswith("#")
 | |
|         }
 | |
| 
 | |
|     if args.annotate_diff is not None:
 | |
|         annotate_diff(warnings, *args.annotate_diff)
 | |
| 
 | |
|     if args.fail_if_regression:
 | |
|         exit_code += fail_if_regression(
 | |
|             warnings, files_with_expected_nits, files_with_nits
 | |
|         )
 | |
| 
 | |
|     if args.fail_if_improved:
 | |
|         exit_code += fail_if_improved(files_with_expected_nits, files_with_nits)
 | |
| 
 | |
|     return exit_code
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     sys.exit(main())
 |