mirror of
				https://github.com/python/cpython.git
				synced 2025-10-31 13:41:24 +00:00 
			
		
		
		
	 da56601783
			
		
	
	
		da56601783
		
			
		
	
	
	
	
		
			
			Like #28744 but for the Tools directory.
Automerge-Triggered-By: GH:pablogsal
(cherry picked from commit 682aecfdeb)
		
	
			
		
			
				
	
	
		
			86 lines
		
	
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
	
		
			2.6 KiB
		
	
	
	
		
			Python
		
	
	
		
			Executable file
		
	
	
	
	
| #!/usr/bin/env python3.8
 | |
| 
 | |
| import argparse
 | |
| import os
 | |
| import json
 | |
| 
 | |
| from typing import Dict, Any
 | |
| from urllib.request import urlretrieve
 | |
| 
 | |
| argparser = argparse.ArgumentParser(
 | |
|     prog="download_pypi_packages", description="Helper program to download PyPI packages",
 | |
| )
 | |
| argparser.add_argument(
 | |
|     "-n", "--number", type=int, default=100, help="Number of packages to download"
 | |
| )
 | |
| argparser.add_argument(
 | |
|     "-a", "--all", action="store_true", help="Download all packages listed in the json file"
 | |
| )
 | |
| 
 | |
| 
 | |
| def load_json(filename: str) -> Dict[Any, Any]:
 | |
|     with open(os.path.join("data", f"{filename}.json"), "r") as f:
 | |
|         j = json.loads(f.read())
 | |
|     return j
 | |
| 
 | |
| 
 | |
| def remove_json(filename: str) -> None:
 | |
|     path = os.path.join("data", f"{filename}.json")
 | |
|     os.remove(path)
 | |
| 
 | |
| 
 | |
| def download_package_json(package_name: str) -> None:
 | |
|     url = f"https://pypi.org/pypi/{package_name}/json"
 | |
|     urlretrieve(url, os.path.join("data", f"{package_name}.json"))
 | |
| 
 | |
| 
 | |
| def download_package_code(name: str, package_json: Dict[Any, Any]) -> None:
 | |
|     source_index = -1
 | |
|     for idx, url_info in enumerate(package_json["urls"]):
 | |
|         if url_info["python_version"] == "source":
 | |
|             source_index = idx
 | |
|             break
 | |
|     filename = package_json["urls"][source_index]["filename"]
 | |
|     url = package_json["urls"][source_index]["url"]
 | |
|     urlretrieve(url, os.path.join("data", "pypi", filename))
 | |
| 
 | |
| 
 | |
| def main() -> None:
 | |
|     args = argparser.parse_args()
 | |
|     number_packages = args.number
 | |
|     all_packages = args.all
 | |
| 
 | |
|     top_pypi_packages = load_json("top-pypi-packages-365-days")
 | |
|     if all_packages:
 | |
|         top_pypi_packages = top_pypi_packages["rows"]
 | |
|     elif number_packages >= 0 and number_packages <= 4000:
 | |
|         top_pypi_packages = top_pypi_packages["rows"][:number_packages]
 | |
|     else:
 | |
|         raise AssertionError("Unknown value for NUMBER_OF_PACKAGES")
 | |
| 
 | |
|     try:
 | |
|         os.mkdir(os.path.join("data", "pypi"))
 | |
|     except FileExistsError:
 | |
|         pass
 | |
| 
 | |
|     for package in top_pypi_packages:
 | |
|         package_name = package["project"]
 | |
| 
 | |
|         print(f"Downloading JSON Data for {package_name}... ", end="")
 | |
|         download_package_json(package_name)
 | |
|         print("Done")
 | |
| 
 | |
|         package_json = load_json(package_name)
 | |
|         try:
 | |
|             print(f"Downloading and compressing package {package_name} ... ", end="")
 | |
|             download_package_code(package_name, package_json)
 | |
|             print("Done")
 | |
|         except (IndexError, KeyError):
 | |
|             print(f"Could not locate source for {package_name}")
 | |
|             continue
 | |
|         finally:
 | |
|             remove_json(package_name)
 | |
| 
 | |
| 
 | |
| if __name__ == "__main__":
 | |
|     main()
 |