| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | """Tool for generating Software Bill of Materials (SBOM) for Python's dependencies""" | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  | import os | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | import re | 
					
						
							|  |  |  | import hashlib | 
					
						
							|  |  |  | import json | 
					
						
							|  |  |  | import glob | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  | from pathlib import Path, PurePosixPath, PureWindowsPath | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | import subprocess | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | import urllib.request | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | import typing | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  | CPYTHON_ROOT_DIR = Path(__file__).parent.parent.parent | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  | # Before adding a new entry to this list, double check that | 
					
						
							|  |  |  | # the license expression is a valid SPDX license expression: | 
					
						
							|  |  |  | # See: https://spdx.org/licenses | 
					
						
							|  |  |  | ALLOWED_LICENSE_EXPRESSIONS = { | 
					
						
							|  |  |  |     "Apache-2.0", | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |     "Apache-2.0 OR BSD-2-Clause", | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     "BSD-2-Clause", | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |     "BSD-3-Clause", | 
					
						
							|  |  |  |     "CC0-1.0", | 
					
						
							|  |  |  |     "ISC", | 
					
						
							|  |  |  |     "LGPL-2.1-only", | 
					
						
							|  |  |  |     "MIT", | 
					
						
							|  |  |  |     "MPL-2.0", | 
					
						
							|  |  |  |     "Python-2.0.1", | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # Properties which are required for our purposes. | 
					
						
							|  |  |  | REQUIRED_PROPERTIES_PACKAGE = frozenset([ | 
					
						
							|  |  |  |     "SPDXID", | 
					
						
							|  |  |  |     "name", | 
					
						
							|  |  |  |     "versionInfo", | 
					
						
							|  |  |  |     "downloadLocation", | 
					
						
							|  |  |  |     "checksums", | 
					
						
							|  |  |  |     "licenseConcluded", | 
					
						
							|  |  |  |     "externalRefs", | 
					
						
							|  |  |  |     "primaryPackagePurpose", | 
					
						
							|  |  |  | ]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class PackageFiles(typing.NamedTuple): | 
					
						
							|  |  |  |     """Structure for describing the files of a package""" | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |     include: list[str] | None | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     exclude: list[str] | None = None | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # SBOMS don't have a method to specify the sources of files | 
					
						
							|  |  |  | # so we need to do that external to the SBOM itself. Add new | 
					
						
							|  |  |  | # values to 'exclude' if we create new files within tracked | 
					
						
							|  |  |  | # directories that aren't sourced from third-party packages. | 
					
						
							|  |  |  | PACKAGE_TO_FILES = { | 
					
						
							|  |  |  |     "mpdecimal": PackageFiles( | 
					
						
							|  |  |  |         include=["Modules/_decimal/libmpdec/**"] | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  |     "expat": PackageFiles( | 
					
						
							| 
									
										
										
										
											2024-02-14 10:29:06 -06:00
										 |  |  |         include=["Modules/expat/**"], | 
					
						
							|  |  |  |         exclude=[ | 
					
						
							|  |  |  |             "Modules/expat/expat_config.h", | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     ), | 
					
						
							|  |  |  |     "macholib": PackageFiles( | 
					
						
							|  |  |  |         include=["Lib/ctypes/macholib/**"], | 
					
						
							|  |  |  |         exclude=[ | 
					
						
							|  |  |  |             "Lib/ctypes/macholib/README.ctypes", | 
					
						
							|  |  |  |             "Lib/ctypes/macholib/fetch_macholib", | 
					
						
							|  |  |  |             "Lib/ctypes/macholib/fetch_macholib.bat", | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  |     "hacl-star": PackageFiles( | 
					
						
							|  |  |  |         include=["Modules/_hacl/**"], | 
					
						
							|  |  |  |         exclude=[ | 
					
						
							|  |  |  |             "Modules/_hacl/refresh.sh", | 
					
						
							|  |  |  |             "Modules/_hacl/README.md", | 
					
						
							|  |  |  |             "Modules/_hacl/python_hacl_namespace.h", | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |     ), | 
					
						
							|  |  |  | } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def spdx_id(value: str) -> str: | 
					
						
							|  |  |  |     """Encode a value into characters that are valid in an SPDX ID""" | 
					
						
							|  |  |  |     return re.sub(r"[^a-zA-Z0-9.\-]+", "-", value) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  | def error_if(value: bool, error_message: str) -> None: | 
					
						
							|  |  |  |     """Prints an error if a comparison fails along with a link to the devguide""" | 
					
						
							|  |  |  |     if value: | 
					
						
							|  |  |  |         print(error_message) | 
					
						
							|  |  |  |         print("See 'https://devguide.python.org/developer-workflow/sbom' for more information.") | 
					
						
							|  |  |  |         sys.exit(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-09-02 14:35:30 -05:00
										 |  |  | def is_root_directory_git_index() -> bool: | 
					
						
							|  |  |  |     """Checks if the root directory is a git index""" | 
					
						
							|  |  |  |     try: | 
					
						
							|  |  |  |         subprocess.check_call( | 
					
						
							|  |  |  |             ["git", "-C", str(CPYTHON_ROOT_DIR), "rev-parse"], | 
					
						
							|  |  |  |             stdout=subprocess.DEVNULL, | 
					
						
							|  |  |  |             stderr=subprocess.DEVNULL, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |     except subprocess.CalledProcessError: | 
					
						
							|  |  |  |         return False | 
					
						
							|  |  |  |     return True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | def filter_gitignored_paths(paths: list[str]) -> list[str]: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Filter out paths excluded by the gitignore file. | 
					
						
							|  |  |  |     The output of 'git check-ignore --non-matching --verbose' looks | 
					
						
							|  |  |  |     like this for non-matching (included) files: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         '::<whitespace><path>' | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     And looks like this for matching (excluded) files: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         '.gitignore:9:*.a    Tools/lib.a' | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2024-07-27 06:10:05 -05:00
										 |  |  |     # No paths means no filtering to be done. | 
					
						
							|  |  |  |     if not paths: | 
					
						
							|  |  |  |         return [] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     # Filter out files in gitignore. | 
					
						
							|  |  |  |     # Non-matching files show up as '::<whitespace><path>' | 
					
						
							|  |  |  |     git_check_ignore_proc = subprocess.run( | 
					
						
							|  |  |  |         ["git", "check-ignore", "--verbose", "--non-matching", *paths], | 
					
						
							| 
									
										
										
										
											2024-01-17 17:25:39 +01:00
										 |  |  |         cwd=CPYTHON_ROOT_DIR, | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |         check=False, | 
					
						
							|  |  |  |         stdout=subprocess.PIPE, | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     # 1 means matches, 0 means no matches. | 
					
						
							|  |  |  |     assert git_check_ignore_proc.returncode in (0, 1) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  |     # Paths may or may not be quoted, Windows quotes paths. | 
					
						
							|  |  |  |     git_check_ignore_re = re.compile(r"^::\s+(\"([^\"]+)\"|(.+))\Z") | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     # Return the list of paths sorted | 
					
						
							|  |  |  |     git_check_ignore_lines = git_check_ignore_proc.stdout.decode().splitlines() | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  |     git_check_not_ignored = [] | 
					
						
							|  |  |  |     for line in git_check_ignore_lines: | 
					
						
							|  |  |  |         if match := git_check_ignore_re.fullmatch(line): | 
					
						
							|  |  |  |             git_check_not_ignored.append(match.group(2) or match.group(3)) | 
					
						
							|  |  |  |     return sorted(git_check_not_ignored) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | def get_externals() -> list[str]: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Parses 'PCbuild/get_externals.bat' for external libraries. | 
					
						
							|  |  |  |     Returns a list of (git tag, name, version) tuples. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     get_externals_bat_path = CPYTHON_ROOT_DIR / "PCbuild/get_externals.bat" | 
					
						
							|  |  |  |     externals = re.findall( | 
					
						
							|  |  |  |         r"set\s+libraries\s*=\s*%libraries%\s+([a-zA-Z0-9.-]+)\s", | 
					
						
							|  |  |  |         get_externals_bat_path.read_text() | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  |     return externals | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | def check_sbom_packages(sbom_data: dict[str, typing.Any]) -> None: | 
					
						
							|  |  |  |     """Make a bunch of assertions about the SBOM package data to ensure it's consistent.""" | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     for package in sbom_data["packages"]: | 
					
						
							|  |  |  |         # Properties and ID must be properly formed. | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         error_if( | 
					
						
							|  |  |  |             "name" not in package, | 
					
						
							|  |  |  |             "Package is missing the 'name' field" | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Verify that the checksum matches the expected value | 
					
						
							|  |  |  |         # and that the download URL is valid. | 
					
						
							|  |  |  |         if "checksums" not in package or "CI" in os.environ: | 
					
						
							|  |  |  |             download_location = package["downloadLocation"] | 
					
						
							|  |  |  |             resp = urllib.request.urlopen(download_location) | 
					
						
							|  |  |  |             error_if(resp.status != 200, f"Couldn't access URL: {download_location}'") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             package["checksums"] = [{ | 
					
						
							|  |  |  |                 "algorithm": "SHA256", | 
					
						
							|  |  |  |                 "checksumValue": hashlib.sha256(resp.read()).hexdigest() | 
					
						
							|  |  |  |             }] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |         missing_required_keys = REQUIRED_PROPERTIES_PACKAGE - set(package.keys()) | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         error_if( | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |             bool(missing_required_keys), | 
					
						
							|  |  |  |             f"Package '{package['name']}' is missing required fields: {missing_required_keys}", | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         ) | 
					
						
							|  |  |  |         error_if( | 
					
						
							|  |  |  |             package["SPDXID"] != spdx_id(f"SPDXRef-PACKAGE-{package['name']}"), | 
					
						
							|  |  |  |             f"Package '{package['name']}' has a malformed SPDXID", | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  |         # Version must be in the download and external references. | 
					
						
							|  |  |  |         version = package["versionInfo"] | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         error_if( | 
					
						
							|  |  |  |             version not in package["downloadLocation"], | 
					
						
							|  |  |  |             f"Version '{version}' for package '{package['name']} not in 'downloadLocation' field", | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         error_if( | 
					
						
							|  |  |  |             any(version not in ref["referenceLocator"] for ref in package["externalRefs"]), | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 f"Version '{version}' for package '{package['name']} not in " | 
					
						
							|  |  |  |                 f"all 'externalRefs[].referenceLocator' fields" | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-27 16:56:14 -05:00
										 |  |  |         # HACL* specifies its expected rev in a refresh script. | 
					
						
							|  |  |  |         if package["name"] == "hacl-star": | 
					
						
							|  |  |  |             hacl_refresh_sh = (CPYTHON_ROOT_DIR / "Modules/_hacl/refresh.sh").read_text() | 
					
						
							|  |  |  |             hacl_expected_rev_match = re.search( | 
					
						
							|  |  |  |                 r"expected_hacl_star_rev=([0-9a-f]{40})", | 
					
						
							|  |  |  |                 hacl_refresh_sh | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             hacl_expected_rev = hacl_expected_rev_match and hacl_expected_rev_match.group(1) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             error_if( | 
					
						
							|  |  |  |                 hacl_expected_rev != version, | 
					
						
							|  |  |  |                 "HACL* SBOM version doesn't match value in 'Modules/_hacl/refresh.sh'" | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |         # License must be on the approved list for SPDX. | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         license_concluded = package["licenseConcluded"] | 
					
						
							|  |  |  |         error_if( | 
					
						
							| 
									
										
										
										
											2024-02-06 04:25:58 -06:00
										 |  |  |             license_concluded != "NOASSERTION", | 
					
						
							|  |  |  |             f"License identifier must be 'NOASSERTION'" | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  | def create_source_sbom() -> None: | 
					
						
							|  |  |  |     sbom_path = CPYTHON_ROOT_DIR / "Misc/sbom.spdx.json" | 
					
						
							|  |  |  |     sbom_data = json.loads(sbom_path.read_bytes()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # We regenerate all of this information. Package information | 
					
						
							|  |  |  |     # should be preserved though since that is edited by humans. | 
					
						
							|  |  |  |     sbom_data["files"] = [] | 
					
						
							|  |  |  |     sbom_data["relationships"] = [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Ensure all packages in this tool are represented also in the SBOM file. | 
					
						
							|  |  |  |     actual_names = {package["name"] for package in sbom_data["packages"]} | 
					
						
							|  |  |  |     expected_names = set(PACKAGE_TO_FILES) | 
					
						
							|  |  |  |     error_if( | 
					
						
							|  |  |  |         actual_names != expected_names, | 
					
						
							|  |  |  |         f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     check_sbom_packages(sbom_data) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |     # We call 'sorted()' here a lot to avoid filesystem scan order issues. | 
					
						
							|  |  |  |     for name, files in sorted(PACKAGE_TO_FILES.items()): | 
					
						
							|  |  |  |         package_spdx_id = spdx_id(f"SPDXRef-PACKAGE-{name}") | 
					
						
							|  |  |  |         exclude = files.exclude or () | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |         for include in sorted(files.include or ()): | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |             # Find all the paths and then filter them through .gitignore. | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  |             paths = glob.glob(include, root_dir=CPYTHON_ROOT_DIR, recursive=True) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |             paths = filter_gitignored_paths(paths) | 
					
						
							| 
									
										
										
										
											2024-01-10 13:21:04 -06:00
										 |  |  |             error_if( | 
					
						
							|  |  |  |                 len(paths) == 0, | 
					
						
							|  |  |  |                 f"No valid paths found at path '{include}' for package '{name}", | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | 
 | 
					
						
							|  |  |  |             for path in paths: | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |                 # Normalize the filename from any combination of slashes. | 
					
						
							|  |  |  |                 path = str(PurePosixPath(PureWindowsPath(path))) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |                 # Skip directories and excluded files | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  |                 if not (CPYTHON_ROOT_DIR / path).is_file() or path in exclude: | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |                     continue | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # SPDX requires SHA1 to be used for files, but we provide SHA256 too. | 
					
						
							| 
									
										
										
										
											2023-12-20 11:28:20 -06:00
										 |  |  |                 data = (CPYTHON_ROOT_DIR / path).read_bytes() | 
					
						
							| 
									
										
										
										
											2024-04-30 10:05:05 -05:00
										 |  |  |                 # We normalize line-endings for consistent checksums. | 
					
						
							|  |  |  |                 # This is a rudimentary check for binary files. | 
					
						
							|  |  |  |                 if b"\x00" not in data: | 
					
						
							|  |  |  |                     data = data.replace(b"\r\n", b"\n") | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |                 checksum_sha1 = hashlib.sha1(data).hexdigest() | 
					
						
							|  |  |  |                 checksum_sha256 = hashlib.sha256(data).hexdigest() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 file_spdx_id = spdx_id(f"SPDXRef-FILE-{path}") | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |                 sbom_data["files"].append({ | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |                     "SPDXID": file_spdx_id, | 
					
						
							|  |  |  |                     "fileName": path, | 
					
						
							|  |  |  |                     "checksums": [ | 
					
						
							|  |  |  |                         {"algorithm": "SHA1", "checksumValue": checksum_sha1}, | 
					
						
							|  |  |  |                         {"algorithm": "SHA256", "checksumValue": checksum_sha256}, | 
					
						
							|  |  |  |                     ], | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |                 # Tie each file back to its respective package. | 
					
						
							| 
									
										
										
										
											2024-01-26 03:48:13 -06:00
										 |  |  |                 sbom_data["relationships"].append({ | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  |                     "spdxElementId": package_spdx_id, | 
					
						
							|  |  |  |                     "relatedSpdxElement": file_spdx_id, | 
					
						
							|  |  |  |                     "relationshipType": "CONTAINS", | 
					
						
							|  |  |  |                 }) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Update the SBOM on disk | 
					
						
							|  |  |  |     sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  | def create_externals_sbom() -> None: | 
					
						
							|  |  |  |     sbom_path = CPYTHON_ROOT_DIR / "Misc/externals.spdx.json" | 
					
						
							|  |  |  |     sbom_data = json.loads(sbom_path.read_bytes()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     externals = get_externals() | 
					
						
							|  |  |  |     externals_name_to_version = {} | 
					
						
							|  |  |  |     externals_name_to_git_tag = {} | 
					
						
							|  |  |  |     for git_tag in externals: | 
					
						
							|  |  |  |         name, _, version = git_tag.rpartition("-") | 
					
						
							|  |  |  |         externals_name_to_version[name] = version | 
					
						
							|  |  |  |         externals_name_to_git_tag[name] = git_tag | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Ensure all packages in this tool are represented also in the SBOM file. | 
					
						
							|  |  |  |     actual_names = {package["name"] for package in sbom_data["packages"]} | 
					
						
							|  |  |  |     expected_names = set(externals_name_to_version) | 
					
						
							|  |  |  |     error_if( | 
					
						
							|  |  |  |         actual_names != expected_names, | 
					
						
							|  |  |  |         f"Packages defined in SBOM tool don't match those defined in SBOM file: {actual_names}, {expected_names}", | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Set the versionInfo and downloadLocation fields for all packages. | 
					
						
							|  |  |  |     for package in sbom_data["packages"]: | 
					
						
							| 
									
										
										
										
											2024-05-20 13:27:09 -04:00
										 |  |  |         package_version = externals_name_to_version[package["name"]] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # Update the version information in all the locations. | 
					
						
							|  |  |  |         package["versionInfo"] = package_version | 
					
						
							|  |  |  |         for external_ref in package["externalRefs"]: | 
					
						
							|  |  |  |             if external_ref["referenceType"] != "cpe23Type": | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             # Version is the fifth field of a CPE. | 
					
						
							|  |  |  |             cpe23ref = external_ref["referenceLocator"] | 
					
						
							|  |  |  |             external_ref["referenceLocator"] = re.sub( | 
					
						
							|  |  |  |                 r"\A(cpe(?::[^:]+){4}):[^:]+:", | 
					
						
							|  |  |  |                 fr"\1:{package_version}:", | 
					
						
							|  |  |  |                 cpe23ref | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  |         download_location = ( | 
					
						
							|  |  |  |             f"https://github.com/python/cpython-source-deps/archive/refs/tags/{externals_name_to_git_tag[package['name']]}.tar.gz" | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         download_location_changed = download_location != package["downloadLocation"] | 
					
						
							|  |  |  |         package["downloadLocation"] = download_location | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # If the download URL has changed we want one to get recalulated. | 
					
						
							|  |  |  |         if download_location_changed: | 
					
						
							|  |  |  |             package.pop("checksums", None) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     check_sbom_packages(sbom_data) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     # Update the SBOM on disk | 
					
						
							|  |  |  |     sbom_path.write_text(json.dumps(sbom_data, indent=2, sort_keys=True)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def main() -> None: | 
					
						
							| 
									
										
										
										
											2024-09-02 14:35:30 -05:00
										 |  |  |     # Don't regenerate the SBOM if we're not a git repository. | 
					
						
							|  |  |  |     if not is_root_directory_git_index(): | 
					
						
							|  |  |  |         print("Skipping SBOM generation due to not being a git repository") | 
					
						
							|  |  |  |         return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-29 09:38:04 -06:00
										 |  |  |     create_source_sbom() | 
					
						
							|  |  |  |     create_externals_sbom() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-12-07 10:01:58 -06:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |