cpython/Lib/profiling/sampling/module_utils.py

103 lines
3.1 KiB
Python
Raw Normal View History

"""Utilities for extracting module names from file paths."""
import os
import site
import sys
from pathlib import Path
def get_python_path_info():
"""Get information about Python's search paths.
Returns:
dict: Dictionary containing stdlib path, site-packages paths, and sys.path entries.
"""
info = {
'stdlib': None,
'site_packages': [],
'sys_path': []
}
# Get standard library path from os module location
try:
if hasattr(os, '__file__') and os.__file__:
info['stdlib'] = Path(os.__file__).parent
except (AttributeError, OSError):
pass # Silently continue if we can't determine stdlib path
# Get site-packages directories
site_packages = []
try:
site_packages.extend(Path(p) for p in site.getsitepackages())
except (AttributeError, OSError):
pass # Continue without site packages if unavailable
# Get user site-packages
try:
user_site = site.getusersitepackages()
if user_site and Path(user_site).exists():
site_packages.append(Path(user_site))
except (AttributeError, OSError):
pass # Continue without user site packages
info['site_packages'] = site_packages
info['sys_path'] = [Path(p) for p in sys.path if p]
return info
def extract_module_name(filename, path_info):
"""Extract Python module name and type from file path.
Args:
filename: Path to the Python file
path_info: Dictionary from get_python_path_info()
Returns:
tuple: (module_name, module_type) where module_type is one of:
'stdlib', 'site-packages', 'project', or 'other'
"""
if not filename:
return ('unknown', 'other')
try:
file_path = Path(filename)
except (ValueError, OSError):
return (str(filename), 'other')
# Check if it's in stdlib
if path_info['stdlib'] and file_path.is_relative_to(path_info['stdlib']):
return (_path_to_module(file_path.relative_to(path_info['stdlib'])), 'stdlib')
# Check site-packages
for site_pkg in path_info['site_packages']:
if file_path.is_relative_to(site_pkg):
return (_path_to_module(file_path.relative_to(site_pkg)), 'site-packages')
# Check other sys.path entries (project files)
if not str(file_path).startswith(('<', '[')): # Skip special files
for path_entry in path_info['sys_path']:
if file_path.is_relative_to(path_entry):
return (_path_to_module(file_path.relative_to(path_entry)), 'project')
# Fallback: just use the filename
return (_path_to_module(file_path), 'other')
def _path_to_module(path):
if isinstance(path, str):
path = Path(path)
# Remove .py extension
if path.suffix == '.py':
path = path.with_suffix('')
# Convert path separators to dots, stripping root/drive (e.g. "/" or "C:\")
parts = [p for p in path.parts if p != path.root and p != path.drive]
# Handle __init__ files - they represent the package itself
if parts and parts[-1] == '__init__':
parts = parts[:-1]
return '.'.join(parts) if parts else path.stem