| 
									
										
										
										
											2021-08-04 11:39:52 +01:00
										 |  |  | """Print a summary of specialization stats for all files in the
 | 
					
						
							|  |  |  | default stats folders. | 
					
						
							|  |  |  | """
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | from __future__ import annotations | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | # NOTE: Bytecode introspection modules (opcode, dis, etc.) should only | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | # be imported when loading a single dataset. When comparing datasets, it | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | # could get it wrong, leading to subtle errors. | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | import argparse | 
					
						
							| 
									
										
										
										
											2021-08-04 11:39:52 +01:00
										 |  |  | import collections | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | from collections.abc import KeysView | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  | from dataclasses import dataclass | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | from datetime import date | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | import enum | 
					
						
							|  |  |  | import functools | 
					
						
							| 
									
										
										
										
											2022-02-14 15:53:38 +00:00
										 |  |  | import itertools | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | import json | 
					
						
							|  |  |  | from operator import itemgetter | 
					
						
							|  |  |  | import os | 
					
						
							|  |  |  | from pathlib import Path | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | import re | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | import sys | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  | import textwrap | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | from typing import Any, Callable, TextIO, TypeAlias | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | RawData: TypeAlias = dict[str, Any] | 
					
						
							|  |  |  | Rows: TypeAlias = list[tuple] | 
					
						
							|  |  |  | Columns: TypeAlias = tuple[str, ...] | 
					
						
							|  |  |  | RowCalculator: TypeAlias = Callable[["Stats"], Rows] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | # TODO: Check for parity | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-04 11:39:52 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  | if os.name == "nt": | 
					
						
							|  |  |  |     DEFAULT_DIR = "c:\\temp\\py_stats\\" | 
					
						
							|  |  |  | else: | 
					
						
							|  |  |  |     DEFAULT_DIR = "/tmp/py_stats/" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | SOURCE_DIR = Path(__file__).parents[2] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-12-14 15:50:02 +00:00
										 |  |  | TOTAL = "specialization.hit", "specialization.miss", "execution_count" | 
					
						
							| 
									
										
										
										
											2021-12-17 14:48:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def pretty(name: str) -> str: | 
					
						
							|  |  |  |     return name.replace("_", " ").lower() | 
					
						
							| 
									
										
										
										
											2022-12-12 09:50:43 -05:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def _load_metadata_from_source(): | 
					
						
							|  |  |  |     def get_defines(filepath: Path, prefix: str = "SPEC_FAIL"): | 
					
						
							|  |  |  |         with open(SOURCE_DIR / filepath) as spec_src: | 
					
						
							|  |  |  |             defines = collections.defaultdict(list) | 
					
						
							|  |  |  |             start = "#define " + prefix + "_" | 
					
						
							|  |  |  |             for line in spec_src: | 
					
						
							|  |  |  |                 line = line.strip() | 
					
						
							|  |  |  |                 if not line.startswith(start): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |                 line = line[len(start) :] | 
					
						
							|  |  |  |                 name, val = line.split() | 
					
						
							|  |  |  |                 defines[int(val.strip())].append(name.strip()) | 
					
						
							|  |  |  |         return defines | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     import opcode | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return { | 
					
						
							|  |  |  |         "_specialized_instructions": [ | 
					
						
							|  |  |  |             op for op in opcode._specialized_opmap.keys() if "__" not in op  # type: ignore | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         "_stats_defines": get_defines( | 
					
						
							|  |  |  |             Path("Include") / "cpython" / "pystats.h", "EVAL_CALL" | 
					
						
							|  |  |  |         ), | 
					
						
							|  |  |  |         "_defines": get_defines(Path("Python") / "specialize.c"), | 
					
						
							|  |  |  |     } | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def load_raw_data(input: Path) -> RawData: | 
					
						
							|  |  |  |     if input.is_file(): | 
					
						
							|  |  |  |         with open(input, "r") as fd: | 
					
						
							|  |  |  |             data = json.load(fd) | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         data["_stats_defines"] = {int(k): v for k, v in data["_stats_defines"].items()} | 
					
						
							|  |  |  |         data["_defines"] = {int(k): v for k, v in data["_defines"].items()} | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return data | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     elif input.is_dir(): | 
					
						
							|  |  |  |         stats = collections.Counter[str]() | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         for filename in input.iterdir(): | 
					
						
							|  |  |  |             with open(filename) as fd: | 
					
						
							|  |  |  |                 for line in fd: | 
					
						
							|  |  |  |                     try: | 
					
						
							|  |  |  |                         key, value = line.split(":") | 
					
						
							|  |  |  |                     except ValueError: | 
					
						
							|  |  |  |                         print( | 
					
						
							|  |  |  |                             f"Unparsable line: '{line.strip()}' in {filename}", | 
					
						
							|  |  |  |                             file=sys.stderr, | 
					
						
							|  |  |  |                         ) | 
					
						
							|  |  |  |                         continue | 
					
						
							| 
									
										
										
										
											2024-02-20 12:24:35 -08:00
										 |  |  |                     # Hack to handle older data files where some uops | 
					
						
							|  |  |  |                     # are missing an underscore prefix in their name | 
					
						
							|  |  |  |                     if key.startswith("uops[") and key[5:6] != "_": | 
					
						
							|  |  |  |                         key = "uops[_" + key[5:] | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                     stats[key.strip()] += int(value) | 
					
						
							|  |  |  |             stats["__nfiles__"] += 1 | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         data = dict(stats) | 
					
						
							|  |  |  |         data.update(_load_metadata_from_source()) | 
					
						
							|  |  |  |         return data | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |     else: | 
					
						
							| 
									
										
										
										
											2024-03-29 11:31:09 -07:00
										 |  |  |         raise ValueError(f"{input} is not a file or directory path") | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def save_raw_data(data: RawData, json_output: TextIO): | 
					
						
							|  |  |  |     json.dump(data, json_output) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  | @dataclass(frozen=True) | 
					
						
							|  |  |  | class Doc: | 
					
						
							|  |  |  |     text: str | 
					
						
							|  |  |  |     doc: str | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def markdown(self) -> str: | 
					
						
							|  |  |  |         return textwrap.dedent( | 
					
						
							|  |  |  |             f"""
 | 
					
						
							|  |  |  |             {self.text} | 
					
						
							|  |  |  |             <details> | 
					
						
							|  |  |  |             <summary>ⓘ</summary> | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             {self.doc} | 
					
						
							|  |  |  |             </details> | 
					
						
							|  |  |  |             """
 | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Count(int): | 
					
						
							|  |  |  |     def markdown(self) -> str: | 
					
						
							|  |  |  |         return format(self, ",d") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | @dataclass(frozen=True) | 
					
						
							|  |  |  | class Ratio: | 
					
						
							|  |  |  |     num: int | 
					
						
							|  |  |  |     den: int | None = None | 
					
						
							|  |  |  |     percentage: bool = True | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __float__(self): | 
					
						
							|  |  |  |         if self.den == 0: | 
					
						
							|  |  |  |             return 0.0 | 
					
						
							|  |  |  |         elif self.den is None: | 
					
						
							|  |  |  |             return self.num | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return self.num / self.den | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def markdown(self) -> str: | 
					
						
							|  |  |  |         if self.den is None: | 
					
						
							|  |  |  |             return "" | 
					
						
							|  |  |  |         elif self.den == 0: | 
					
						
							|  |  |  |             if self.num != 0: | 
					
						
							|  |  |  |                 return f"{self.num:,} / 0 !!" | 
					
						
							|  |  |  |             return "" | 
					
						
							|  |  |  |         elif self.percentage: | 
					
						
							|  |  |  |             return f"{self.num / self.den:,.01%}" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             return f"{self.num / self.den:,.02f}" | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class DiffRatio(Ratio): | 
					
						
							|  |  |  |     def __init__(self, base: int | str, head: int | str): | 
					
						
							|  |  |  |         if isinstance(base, str) or isinstance(head, str): | 
					
						
							|  |  |  |             super().__init__(0, 0) | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             super().__init__(head - base, base) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | class OpcodeStats: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     Manages the data related to specific set of opcodes, e.g. tier1 (with prefix | 
					
						
							|  |  |  |     "opcode") or tier2 (with prefix "uops"). | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def __init__(self, data: dict[str, Any], defines, specialized_instructions): | 
					
						
							|  |  |  |         self._data = data | 
					
						
							|  |  |  |         self._defines = defines | 
					
						
							|  |  |  |         self._specialized_instructions = specialized_instructions | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_opcode_names(self) -> KeysView[str]: | 
					
						
							|  |  |  |         return self._data.keys() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_pair_counts(self) -> dict[tuple[str, str], int]: | 
					
						
							|  |  |  |         pair_counts = {} | 
					
						
							|  |  |  |         for name_i, opcode_stat in self._data.items(): | 
					
						
							|  |  |  |             for key, value in opcode_stat.items(): | 
					
						
							|  |  |  |                 if value and key.startswith("pair_count"): | 
					
						
							|  |  |  |                     name_j, _, _ = key[len("pair_count") + 1 :].partition("]") | 
					
						
							|  |  |  |                     pair_counts[(name_i, name_j)] = value | 
					
						
							|  |  |  |         return pair_counts | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_total_execution_count(self) -> int: | 
					
						
							|  |  |  |         return sum(x.get("execution_count", 0) for x in self._data.values()) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_execution_counts(self) -> dict[str, tuple[int, int]]: | 
					
						
							|  |  |  |         counts = {} | 
					
						
							|  |  |  |         for name, opcode_stat in self._data.items(): | 
					
						
							|  |  |  |             if "execution_count" in opcode_stat: | 
					
						
							|  |  |  |                 count = opcode_stat["execution_count"] | 
					
						
							|  |  |  |                 miss = 0 | 
					
						
							|  |  |  |                 if "specializable" not in opcode_stat: | 
					
						
							|  |  |  |                     miss = opcode_stat.get("specialization.miss", 0) | 
					
						
							|  |  |  |                 counts[name] = (count, miss) | 
					
						
							|  |  |  |         return counts | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     @functools.cache | 
					
						
							|  |  |  |     def _get_pred_succ( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |     ) -> tuple[dict[str, collections.Counter], dict[str, collections.Counter]]: | 
					
						
							|  |  |  |         pair_counts = self.get_pair_counts() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         predecessors: dict[str, collections.Counter] = collections.defaultdict( | 
					
						
							|  |  |  |             collections.Counter | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         successors: dict[str, collections.Counter] = collections.defaultdict( | 
					
						
							|  |  |  |             collections.Counter | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         for (first, second), count in pair_counts.items(): | 
					
						
							|  |  |  |             if count: | 
					
						
							|  |  |  |                 predecessors[second][first] = count | 
					
						
							|  |  |  |                 successors[first][second] = count | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return predecessors, successors | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_predecessors(self, opcode: str) -> collections.Counter[str]: | 
					
						
							|  |  |  |         return self._get_pred_succ()[0][opcode] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_successors(self, opcode: str) -> collections.Counter[str]: | 
					
						
							|  |  |  |         return self._get_pred_succ()[1][opcode] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def _get_stats_for_opcode(self, opcode: str) -> dict[str, int]: | 
					
						
							|  |  |  |         return self._data[opcode] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_specialization_total(self, opcode: str) -> int: | 
					
						
							|  |  |  |         family_stats = self._get_stats_for_opcode(opcode) | 
					
						
							|  |  |  |         return sum(family_stats.get(kind, 0) for kind in TOTAL) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_specialization_counts(self, opcode: str) -> dict[str, int]: | 
					
						
							|  |  |  |         family_stats = self._get_stats_for_opcode(opcode) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         result = {} | 
					
						
							|  |  |  |         for key, value in sorted(family_stats.items()): | 
					
						
							|  |  |  |             if key.startswith("specialization."): | 
					
						
							|  |  |  |                 label = key[len("specialization.") :] | 
					
						
							|  |  |  |                 if label in ("success", "failure") or label.startswith("failure_kinds"): | 
					
						
							|  |  |  |                     continue | 
					
						
							|  |  |  |             elif key in ( | 
					
						
							|  |  |  |                 "execution_count", | 
					
						
							|  |  |  |                 "specializable", | 
					
						
							|  |  |  |             ) or key.startswith("pair"): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 label = key | 
					
						
							|  |  |  |             result[label] = value | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return result | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_specialization_success_failure(self, opcode: str) -> dict[str, int]: | 
					
						
							|  |  |  |         family_stats = self._get_stats_for_opcode(opcode) | 
					
						
							|  |  |  |         result = {} | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         for key in ("specialization.success", "specialization.failure"): | 
					
						
							|  |  |  |             label = key[len("specialization.") :] | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  |             val = family_stats.get(key, 0) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             result[label] = val | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_specialization_failure_total(self, opcode: str) -> int: | 
					
						
							|  |  |  |         return self._get_stats_for_opcode(opcode).get("specialization.failure", 0) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_specialization_failure_kinds(self, opcode: str) -> dict[str, int]: | 
					
						
							|  |  |  |         def kind_to_text(kind: int, opcode: str): | 
					
						
							|  |  |  |             if kind <= 8: | 
					
						
							|  |  |  |                 return pretty(self._defines[kind][0]) | 
					
						
							| 
									
										
										
										
											2025-01-23 04:26:25 -05:00
										 |  |  |             if opcode == "LOAD_SUPER_ATTR": | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 opcode = "SUPER" | 
					
						
							|  |  |  |             elif opcode.endswith("ATTR"): | 
					
						
							|  |  |  |                 opcode = "ATTR" | 
					
						
							| 
									
										
										
										
											2025-04-29 09:00:14 +01:00
										 |  |  |             elif opcode in ("FOR_ITER", "GET_ITER", "SEND"): | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 opcode = "ITER" | 
					
						
							|  |  |  |             elif opcode.endswith("SUBSCR"): | 
					
						
							|  |  |  |                 opcode = "SUBSCR" | 
					
						
							|  |  |  |             for name in self._defines[kind]: | 
					
						
							|  |  |  |                 if name.startswith(opcode): | 
					
						
							|  |  |  |                     return pretty(name[len(opcode) + 1 :]) | 
					
						
							|  |  |  |             return "kind " + str(kind) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         family_stats = self._get_stats_for_opcode(opcode) | 
					
						
							| 
									
										
										
										
											2025-04-04 15:33:31 +01:00
										 |  |  | 
 | 
					
						
							|  |  |  |         def key_to_index(key): | 
					
						
							|  |  |  |             return int(key[:-1].split("[")[1]) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         max_index = 0 | 
					
						
							|  |  |  |         for key in family_stats: | 
					
						
							|  |  |  |             if key.startswith("specialization.failure_kind"): | 
					
						
							|  |  |  |                 max_index = max(max_index, key_to_index(key)) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         failure_kinds = [0] * (max_index + 1) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         for key in family_stats: | 
					
						
							|  |  |  |             if not key.startswith("specialization.failure_kind"): | 
					
						
							|  |  |  |                 continue | 
					
						
							| 
									
										
										
										
											2025-04-04 15:33:31 +01:00
										 |  |  |             failure_kinds[key_to_index(key)] = family_stats[key] | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return { | 
					
						
							|  |  |  |             kind_to_text(index, opcode): value | 
					
						
							|  |  |  |             for (index, value) in enumerate(failure_kinds) | 
					
						
							|  |  |  |             if value | 
					
						
							|  |  |  |         } | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def is_specializable(self, opcode: str) -> bool: | 
					
						
							|  |  |  |         return "specializable" in self._get_stats_for_opcode(opcode) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_specialized_total_counts(self) -> tuple[int, int, int]: | 
					
						
							|  |  |  |         basic = 0 | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |         specialized_hits = 0 | 
					
						
							|  |  |  |         specialized_misses = 0 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         not_specialized = 0 | 
					
						
							|  |  |  |         for opcode, opcode_stat in self._data.items(): | 
					
						
							|  |  |  |             if "execution_count" not in opcode_stat: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             count = opcode_stat["execution_count"] | 
					
						
							|  |  |  |             if "specializable" in opcode_stat: | 
					
						
							|  |  |  |                 not_specialized += count | 
					
						
							|  |  |  |             elif opcode in self._specialized_instructions: | 
					
						
							|  |  |  |                 miss = opcode_stat.get("specialization.miss", 0) | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |                 specialized_hits += count - miss | 
					
						
							|  |  |  |                 specialized_misses += miss | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             else: | 
					
						
							|  |  |  |                 basic += count | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |         return basic, specialized_hits, specialized_misses, not_specialized | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_deferred_counts(self) -> dict[str, int]: | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             opcode: opcode_stat.get("specialization.deferred", 0) | 
					
						
							|  |  |  |             for opcode, opcode_stat in self._data.items() | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |             if opcode != "RESUME" | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_misses_counts(self) -> dict[str, int]: | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             opcode: opcode_stat.get("specialization.miss", 0) | 
					
						
							|  |  |  |             for opcode, opcode_stat in self._data.items() | 
					
						
							|  |  |  |             if not self.is_specializable(opcode) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         } | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_opcode_counts(self) -> dict[str, int]: | 
					
						
							|  |  |  |         counts = {} | 
					
						
							|  |  |  |         for opcode, entry in self._data.items(): | 
					
						
							|  |  |  |             count = entry.get("count", 0) | 
					
						
							|  |  |  |             if count: | 
					
						
							|  |  |  |                 counts[opcode] = count | 
					
						
							|  |  |  |         return counts | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | class Stats: | 
					
						
							|  |  |  |     def __init__(self, data: RawData): | 
					
						
							|  |  |  |         self._data = data | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get(self, key: str) -> int: | 
					
						
							|  |  |  |         return self._data.get(key, 0) | 
					
						
							| 
									
										
										
										
											2021-12-17 14:48:01 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     @functools.cache | 
					
						
							|  |  |  |     def get_opcode_stats(self, prefix: str) -> OpcodeStats: | 
					
						
							|  |  |  |         opcode_stats = collections.defaultdict[str, dict](dict) | 
					
						
							|  |  |  |         for key, value in self._data.items(): | 
					
						
							|  |  |  |             if not key.startswith(prefix): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             name, _, rest = key[len(prefix) + 1 :].partition("]") | 
					
						
							|  |  |  |             opcode_stats[name][rest.strip(".")] = value | 
					
						
							|  |  |  |         return OpcodeStats( | 
					
						
							|  |  |  |             opcode_stats, | 
					
						
							|  |  |  |             self._data["_defines"], | 
					
						
							|  |  |  |             self._data["_specialized_instructions"], | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_call_stats(self) -> dict[str, int]: | 
					
						
							|  |  |  |         defines = self._data["_stats_defines"] | 
					
						
							|  |  |  |         result = {} | 
					
						
							|  |  |  |         for key, value in sorted(self._data.items()): | 
					
						
							|  |  |  |             if "Calls to" in key: | 
					
						
							|  |  |  |                 result[key] = value | 
					
						
							|  |  |  |             elif key.startswith("Calls "): | 
					
						
							|  |  |  |                 name, index = key[:-1].split("[") | 
					
						
							|  |  |  |                 label = f"{name} ({pretty(defines[int(index)][0])})" | 
					
						
							|  |  |  |                 result[label] = value | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         for key, value in sorted(self._data.items()): | 
					
						
							|  |  |  |             if key.startswith("Frame"): | 
					
						
							|  |  |  |                 result[key] = value | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_object_stats(self) -> dict[str, tuple[int, int]]: | 
					
						
							| 
									
										
										
										
											2024-04-02 11:59:21 +01:00
										 |  |  |         total_materializations = self._data.get("Object inline values", 0) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         total_allocations = self._data.get("Object allocations", 0) + self._data.get( | 
					
						
							|  |  |  |             "Object allocations from freelist", 0 | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-09-23 19:10:55 +01:00
										 |  |  |         total_increfs = ( | 
					
						
							|  |  |  |             self._data.get("Object interpreter mortal increfs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object mortal increfs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object interpreter immortal increfs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object immortal increfs", 0) | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         total_decrefs = ( | 
					
						
							|  |  |  |             self._data.get("Object interpreter mortal decrefs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object mortal decrefs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object interpreter immortal decrefs", 0) + | 
					
						
							|  |  |  |             self._data.get("Object immortal decrefs", 0) | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         result = {} | 
					
						
							|  |  |  |         for key, value in self._data.items(): | 
					
						
							|  |  |  |             if key.startswith("Object"): | 
					
						
							|  |  |  |                 if "materialize" in key: | 
					
						
							|  |  |  |                     den = total_materializations | 
					
						
							|  |  |  |                 elif "allocations" in key: | 
					
						
							|  |  |  |                     den = total_allocations | 
					
						
							|  |  |  |                 elif "increfs" in key: | 
					
						
							|  |  |  |                     den = total_increfs | 
					
						
							|  |  |  |                 elif "decrefs" in key: | 
					
						
							|  |  |  |                     den = total_decrefs | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     den = None | 
					
						
							|  |  |  |                 label = key[6:].strip() | 
					
						
							|  |  |  |                 label = label[0].upper() + label[1:] | 
					
						
							|  |  |  |                 result[label] = (value, den) | 
					
						
							|  |  |  |         return result | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_gc_stats(self) -> list[dict[str, int]]: | 
					
						
							|  |  |  |         gc_stats: list[dict[str, int]] = [] | 
					
						
							|  |  |  |         for key, value in self._data.items(): | 
					
						
							|  |  |  |             if not key.startswith("GC"): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             n, _, rest = key[3:].partition("]") | 
					
						
							|  |  |  |             name = rest.strip() | 
					
						
							|  |  |  |             gen_n = int(n) | 
					
						
							|  |  |  |             while len(gc_stats) <= gen_n: | 
					
						
							|  |  |  |                 gc_stats.append({}) | 
					
						
							|  |  |  |             gc_stats[gen_n][name] = value | 
					
						
							|  |  |  |         return gc_stats | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_optimization_stats(self) -> dict[str, tuple[int, int | None]]: | 
					
						
							|  |  |  |         if "Optimization attempts" not in self._data: | 
					
						
							|  |  |  |             return {} | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         attempts = self._data["Optimization attempts"] | 
					
						
							|  |  |  |         created = self._data["Optimization traces created"] | 
					
						
							|  |  |  |         executed = self._data["Optimization traces executed"] | 
					
						
							|  |  |  |         uops = self._data["Optimization uops executed"] | 
					
						
							|  |  |  |         trace_stack_overflow = self._data["Optimization trace stack overflow"] | 
					
						
							|  |  |  |         trace_stack_underflow = self._data["Optimization trace stack underflow"] | 
					
						
							|  |  |  |         trace_too_long = self._data["Optimization trace too long"] | 
					
						
							|  |  |  |         trace_too_short = self._data["Optimization trace too short"] | 
					
						
							|  |  |  |         inner_loop = self._data["Optimization inner loop"] | 
					
						
							|  |  |  |         recursive_call = self._data["Optimization recursive call"] | 
					
						
							| 
									
										
										
										
											2023-12-12 13:43:08 -08:00
										 |  |  |         low_confidence = self._data["Optimization low confidence"] | 
					
						
							| 
									
										
										
										
											2025-02-07 11:41:17 -08:00
										 |  |  |         unknown_callee = self._data["Optimization unknown callee"] | 
					
						
							| 
									
										
										
										
											2024-02-26 12:51:47 -05:00
										 |  |  |         executors_invalidated = self._data["Executors invalidated"] | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Optimization attempts", | 
					
						
							|  |  |  |                 "The number of times a potential trace is identified.  Specifically, this " | 
					
						
							|  |  |  |                 "occurs in the JUMP BACKWARD instruction when the counter reaches a " | 
					
						
							|  |  |  |                 "threshold.", | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  |             ): (attempts, None), | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Traces created", "The number of traces that were successfully created." | 
					
						
							|  |  |  |             ): (created, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Trace stack overflow", | 
					
						
							|  |  |  |                 "A trace is truncated because it would require more than 5 stack frames.", | 
					
						
							|  |  |  |             ): (trace_stack_overflow, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Trace stack underflow", | 
					
						
							|  |  |  |                 "A potential trace is abandoned because it pops more frames than it pushes.", | 
					
						
							|  |  |  |             ): (trace_stack_underflow, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Trace too long", | 
					
						
							|  |  |  |                 "A trace is truncated because it is longer than the instruction buffer.", | 
					
						
							|  |  |  |             ): (trace_too_long, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Trace too short", | 
					
						
							| 
									
										
										
										
											2025-06-30 06:04:02 +08:00
										 |  |  |                 "A potential trace is abandoned because it is too short.", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             ): (trace_too_short, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Inner loop found", "A trace is truncated because it has an inner loop" | 
					
						
							|  |  |  |             ): (inner_loop, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Recursive call", | 
					
						
							|  |  |  |                 "A trace is truncated because it has a recursive call.", | 
					
						
							|  |  |  |             ): (recursive_call, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Low confidence", | 
					
						
							|  |  |  |                 "A trace is abandoned because the likelihood of the jump to top being taken " | 
					
						
							|  |  |  |                 "is too low.", | 
					
						
							|  |  |  |             ): (low_confidence, attempts), | 
					
						
							| 
									
										
										
										
											2025-02-07 11:41:17 -08:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Unknown callee", | 
					
						
							|  |  |  |                 "A trace is abandoned because the target of a call is unknown.", | 
					
						
							|  |  |  |             ): (unknown_callee, attempts), | 
					
						
							| 
									
										
										
										
											2024-02-26 12:51:47 -05:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Executors invalidated", | 
					
						
							|  |  |  |                 "The number of executors that were invalidated due to watched " | 
					
						
							|  |  |  |                 "dictionary changes.", | 
					
						
							|  |  |  |             ): (executors_invalidated, created), | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             Doc("Traces executed", "The number of traces that were executed"): ( | 
					
						
							|  |  |  |                 executed, | 
					
						
							|  |  |  |                 None, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2024-02-26 12:51:47 -05:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Uops executed", | 
					
						
							|  |  |  |                 "The total number of uops (micro-operations) that were executed", | 
					
						
							|  |  |  |             ): ( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 uops, | 
					
						
							|  |  |  |                 executed, | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  |     def get_optimizer_stats(self) -> dict[str, tuple[int, int | None]]: | 
					
						
							|  |  |  |         attempts = self._data["Optimization optimizer attempts"] | 
					
						
							|  |  |  |         successes = self._data["Optimization optimizer successes"] | 
					
						
							|  |  |  |         no_memory = self._data["Optimization optimizer failure no memory"] | 
					
						
							| 
									
										
										
										
											2024-03-21 13:27:46 -04:00
										 |  |  |         builtins_changed = self._data["Optimizer remove globals builtins changed"] | 
					
						
							|  |  |  |         incorrect_keys = self._data["Optimizer remove globals incorrect keys"] | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Optimizer attempts", | 
					
						
							|  |  |  |                 "The number of times the trace optimizer (_Py_uop_analyze_and_optimize) was run.", | 
					
						
							|  |  |  |             ): (attempts, None), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Optimizer successes", | 
					
						
							|  |  |  |                 "The number of traces that were successfully optimized.", | 
					
						
							|  |  |  |             ): (successes, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Optimizer no memory", | 
					
						
							|  |  |  |                 "The number of optimizations that failed due to no memory.", | 
					
						
							|  |  |  |             ): (no_memory, attempts), | 
					
						
							| 
									
										
										
										
											2024-03-21 13:27:46 -04:00
										 |  |  |             Doc( | 
					
						
							|  |  |  |                 "Remove globals builtins changed", | 
					
						
							|  |  |  |                 "The builtins changed during optimization", | 
					
						
							|  |  |  |             ): (builtins_changed, attempts), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Remove globals incorrect keys", | 
					
						
							|  |  |  |                 "The keys in the globals dictionary aren't what was expected", | 
					
						
							|  |  |  |             ): (incorrect_keys, attempts), | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-02 23:17:53 +00:00
										 |  |  |     def get_jit_memory_stats(self) -> dict[Doc, tuple[int, int | None]]: | 
					
						
							|  |  |  |         jit_total_memory_size = self._data["JIT total memory size"] | 
					
						
							|  |  |  |         jit_code_size = self._data["JIT code size"] | 
					
						
							|  |  |  |         jit_trampoline_size = self._data["JIT trampoline size"] | 
					
						
							|  |  |  |         jit_data_size = self._data["JIT data size"] | 
					
						
							|  |  |  |         jit_padding_size = self._data["JIT padding size"] | 
					
						
							|  |  |  |         jit_freed_memory_size = self._data["JIT freed memory size"] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return { | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Total memory size", | 
					
						
							|  |  |  |                 "The total size of the memory allocated for the JIT traces", | 
					
						
							|  |  |  |             ): (jit_total_memory_size, None), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Code size", | 
					
						
							|  |  |  |                 "The size of the memory allocated for the code of the JIT traces", | 
					
						
							|  |  |  |             ): (jit_code_size, jit_total_memory_size), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Trampoline size", | 
					
						
							|  |  |  |                 "The size of the memory allocated for the trampolines of the JIT traces", | 
					
						
							|  |  |  |             ): (jit_trampoline_size, jit_total_memory_size), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Data size", | 
					
						
							|  |  |  |                 "The size of the memory allocated for the data of the JIT traces", | 
					
						
							|  |  |  |             ): (jit_data_size, jit_total_memory_size), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Padding size", | 
					
						
							|  |  |  |                 "The size of the memory allocated for the padding of the JIT traces", | 
					
						
							|  |  |  |             ): (jit_padding_size, jit_total_memory_size), | 
					
						
							|  |  |  |             Doc( | 
					
						
							|  |  |  |                 "Freed memory size", | 
					
						
							|  |  |  |                 "The size of the memory freed from the JIT traces", | 
					
						
							|  |  |  |             ): (jit_freed_memory_size, jit_total_memory_size), | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def get_histogram(self, prefix: str) -> list[tuple[int, int]]: | 
					
						
							|  |  |  |         rows = [] | 
					
						
							|  |  |  |         for k, v in self._data.items(): | 
					
						
							|  |  |  |             match = re.match(f"{prefix}\\[([0-9]+)\\]", k) | 
					
						
							|  |  |  |             if match is not None: | 
					
						
							|  |  |  |                 entry = int(match.groups()[0]) | 
					
						
							|  |  |  |                 rows.append((entry, v)) | 
					
						
							|  |  |  |         rows.sort() | 
					
						
							|  |  |  |         return rows | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  |     def get_rare_events(self) -> list[tuple[str, int]]: | 
					
						
							|  |  |  |         prefix = "Rare event " | 
					
						
							|  |  |  |         return [ | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             (key[len(prefix) + 1 : -1].replace("_", " "), val) | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  |             for key, val in self._data.items() | 
					
						
							|  |  |  |             if key.startswith(prefix) | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | class JoinMode(enum.Enum): | 
					
						
							|  |  |  |     # Join using the first column as a key | 
					
						
							|  |  |  |     SIMPLE = 0 | 
					
						
							|  |  |  |     # Join using the first column as a key, and indicate the change in the | 
					
						
							|  |  |  |     # second column of each input table as a new column | 
					
						
							|  |  |  |     CHANGE = 1 | 
					
						
							|  |  |  |     # Join using the first column as a key, indicating the change in the second | 
					
						
							| 
									
										
										
										
											2024-01-22 14:45:15 -05:00
										 |  |  |     # column of each input table as a new column, and omit all other columns | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     CHANGE_ONE_COLUMN = 2 | 
					
						
							| 
									
										
										
										
											2024-01-22 14:45:15 -05:00
										 |  |  |     # Join using the first column as a key, and indicate the change as a new | 
					
						
							|  |  |  |     # column, but don't sort by the amount of change. | 
					
						
							|  |  |  |     CHANGE_NO_SORT = 3 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class Table: | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  |     A Table defines how to convert a set of Stats into a specific set of rows | 
					
						
							|  |  |  |     displaying some aspect of the data. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         column_names: Columns, | 
					
						
							|  |  |  |         calc_rows: RowCalculator, | 
					
						
							|  |  |  |         join_mode: JoinMode = JoinMode.SIMPLE, | 
					
						
							|  |  |  |     ): | 
					
						
							|  |  |  |         self.columns = column_names | 
					
						
							|  |  |  |         self.calc_rows = calc_rows | 
					
						
							|  |  |  |         self.join_mode = join_mode | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def join_row(self, key: str, row_a: tuple, row_b: tuple) -> tuple: | 
					
						
							|  |  |  |         match self.join_mode: | 
					
						
							|  |  |  |             case JoinMode.SIMPLE: | 
					
						
							|  |  |  |                 return (key, *row_a, *row_b) | 
					
						
							| 
									
										
										
										
											2024-01-22 14:45:15 -05:00
										 |  |  |             case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 return (key, *row_a, *row_b, DiffRatio(row_a[0], row_b[0])) | 
					
						
							|  |  |  |             case JoinMode.CHANGE_ONE_COLUMN: | 
					
						
							|  |  |  |                 return (key, row_a[0], row_b[0], DiffRatio(row_a[0], row_b[0])) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def join_columns(self, columns: Columns) -> Columns: | 
					
						
							|  |  |  |         match self.join_mode: | 
					
						
							|  |  |  |             case JoinMode.SIMPLE: | 
					
						
							|  |  |  |                 return ( | 
					
						
							|  |  |  |                     columns[0], | 
					
						
							|  |  |  |                     *("Base " + x for x in columns[1:]), | 
					
						
							|  |  |  |                     *("Head " + x for x in columns[1:]), | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2024-01-22 14:45:15 -05:00
										 |  |  |             case JoinMode.CHANGE | JoinMode.CHANGE_NO_SORT: | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 return ( | 
					
						
							|  |  |  |                     columns[0], | 
					
						
							|  |  |  |                     *("Base " + x for x in columns[1:]), | 
					
						
							|  |  |  |                     *("Head " + x for x in columns[1:]), | 
					
						
							|  |  |  |                 ) + ("Change:",) | 
					
						
							|  |  |  |             case JoinMode.CHANGE_ONE_COLUMN: | 
					
						
							|  |  |  |                 return ( | 
					
						
							|  |  |  |                     columns[0], | 
					
						
							|  |  |  |                     "Base " + columns[1], | 
					
						
							|  |  |  |                     "Head " + columns[1], | 
					
						
							|  |  |  |                     "Change:", | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def join_tables(self, rows_a: Rows, rows_b: Rows) -> tuple[Columns, Rows]: | 
					
						
							|  |  |  |         ncols = len(self.columns) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         default = ("",) * (ncols - 1) | 
					
						
							|  |  |  |         data_a = {x[0]: x[1:] for x in rows_a} | 
					
						
							|  |  |  |         data_b = {x[0]: x[1:] for x in rows_b} | 
					
						
							| 
									
										
										
										
											2022-02-08 11:50:02 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         if len(data_a) != len(rows_a) or len(data_b) != len(rows_b): | 
					
						
							|  |  |  |             raise ValueError("Duplicate keys") | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         # To preserve ordering, use A's keys as is and then add any in B that | 
					
						
							|  |  |  |         # aren't in A | 
					
						
							|  |  |  |         keys = list(data_a.keys()) + [k for k in data_b.keys() if k not in data_a] | 
					
						
							|  |  |  |         rows = [ | 
					
						
							|  |  |  |             self.join_row(k, data_a.get(k, default), data_b.get(k, default)) | 
					
						
							|  |  |  |             for k in keys | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  |         if self.join_mode in (JoinMode.CHANGE, JoinMode.CHANGE_ONE_COLUMN): | 
					
						
							|  |  |  |             rows.sort(key=lambda row: abs(float(row[-1])), reverse=True) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         columns = self.join_columns(self.columns) | 
					
						
							|  |  |  |         return columns, rows | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def get_table( | 
					
						
							|  |  |  |         self, base_stats: Stats, head_stats: Stats | None = None | 
					
						
							|  |  |  |     ) -> tuple[Columns, Rows]: | 
					
						
							|  |  |  |         if head_stats is None: | 
					
						
							|  |  |  |             rows = self.calc_rows(base_stats) | 
					
						
							|  |  |  |             return self.columns, rows | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             rows_a = self.calc_rows(base_stats) | 
					
						
							|  |  |  |             rows_b = self.calc_rows(head_stats) | 
					
						
							|  |  |  |             cols, rows = self.join_tables(rows_a, rows_b) | 
					
						
							|  |  |  |             return cols, rows | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | class Section: | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     """
 | 
					
						
							|  |  |  |     A Section defines a section of the output document. | 
					
						
							|  |  |  |     """
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def __init__( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         title: str = "", | 
					
						
							|  |  |  |         summary: str = "", | 
					
						
							|  |  |  |         part_iter=None, | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         *, | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         comparative: bool = True, | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc: str = "", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ): | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  |         self.title = title | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         if not summary: | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  |             self.summary = title.lower() | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.summary = summary | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         self.doc = textwrap.dedent(doc) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         if part_iter is None: | 
					
						
							|  |  |  |             part_iter = [] | 
					
						
							|  |  |  |         if isinstance(part_iter, list): | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             def iter_parts(base_stats: Stats, head_stats: Stats | None): | 
					
						
							|  |  |  |                 yield from part_iter | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             self.part_iter = iter_parts | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             self.part_iter = part_iter | 
					
						
							|  |  |  |         self.comparative = comparative | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def calc_execution_count_table(prefix: str) -> RowCalculator: | 
					
						
							|  |  |  |     def calc(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         opcode_stats = stats.get_opcode_stats(prefix) | 
					
						
							|  |  |  |         counts = opcode_stats.get_execution_counts() | 
					
						
							|  |  |  |         total = opcode_stats.get_total_execution_count() | 
					
						
							|  |  |  |         cumulative = 0 | 
					
						
							|  |  |  |         rows: Rows = [] | 
					
						
							|  |  |  |         for opcode, (count, miss) in sorted( | 
					
						
							|  |  |  |             counts.items(), key=itemgetter(1), reverse=True | 
					
						
							|  |  |  |         ): | 
					
						
							|  |  |  |             cumulative += count | 
					
						
							|  |  |  |             if miss: | 
					
						
							|  |  |  |                 miss_val = Ratio(miss, count) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 miss_val = None | 
					
						
							|  |  |  |             rows.append( | 
					
						
							|  |  |  |                 ( | 
					
						
							|  |  |  |                     opcode, | 
					
						
							|  |  |  |                     Count(count), | 
					
						
							|  |  |  |                     Ratio(count, total), | 
					
						
							|  |  |  |                     Ratio(cumulative, total), | 
					
						
							|  |  |  |                     miss_val, | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return rows | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return calc | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def execution_count_section() -> Section: | 
					
						
							|  |  |  |     return Section( | 
					
						
							|  |  |  |         "Execution counts", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         "Execution counts for Tier 1 instructions.", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							|  |  |  |                 ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), | 
					
						
							|  |  |  |                 calc_execution_count_table("opcode"), | 
					
						
							|  |  |  |                 join_mode=JoinMode.CHANGE_ONE_COLUMN, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         The "miss ratio" column shows the percentage of times the instruction | 
					
						
							|  |  |  |         executed that it deoptimized. When this happens, the base unspecialized | 
					
						
							|  |  |  |         instruction is not counted. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-16 08:27:18 -05:00
										 |  |  | def pair_count_section(prefix: str, title=None) -> Section: | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def calc_pair_count_table(stats: Stats) -> Rows: | 
					
						
							| 
									
										
										
										
											2024-04-16 08:27:18 -05:00
										 |  |  |         opcode_stats = stats.get_opcode_stats(prefix) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         pair_counts = opcode_stats.get_pair_counts() | 
					
						
							|  |  |  |         total = opcode_stats.get_total_execution_count() | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         cumulative = 0 | 
					
						
							|  |  |  |         rows: Rows = [] | 
					
						
							|  |  |  |         for (opcode_i, opcode_j), count in itertools.islice( | 
					
						
							|  |  |  |             sorted(pair_counts.items(), key=itemgetter(1), reverse=True), 100 | 
					
						
							|  |  |  |         ): | 
					
						
							|  |  |  |             cumulative += count | 
					
						
							|  |  |  |             rows.append( | 
					
						
							|  |  |  |                 ( | 
					
						
							|  |  |  |                     f"{opcode_i} {opcode_j}", | 
					
						
							|  |  |  |                     Count(count), | 
					
						
							|  |  |  |                     Ratio(count, total), | 
					
						
							|  |  |  |                     Ratio(cumulative, total), | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         return rows | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return Section( | 
					
						
							|  |  |  |         "Pair counts", | 
					
						
							| 
									
										
										
										
											2024-04-16 08:27:18 -05:00
										 |  |  |         f"Pair counts for top 100 {title if title else prefix} pairs", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							|  |  |  |                 ("Pair", "Count:", "Self:", "Cumulative:"), | 
					
						
							|  |  |  |                 calc_pair_count_table, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							|  |  |  |         comparative=False, | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         Pairs of specialized operations that deoptimize and are then followed by | 
					
						
							|  |  |  |         the corresponding unspecialized instruction are not counted as pairs. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def pre_succ_pairs_section() -> Section: | 
					
						
							|  |  |  |     def iter_pre_succ_pairs_tables(base_stats: Stats, head_stats: Stats | None = None): | 
					
						
							|  |  |  |         assert head_stats is None | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         opcode_stats = base_stats.get_opcode_stats("opcode") | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         for opcode in opcode_stats.get_opcode_names(): | 
					
						
							|  |  |  |             predecessors = opcode_stats.get_predecessors(opcode) | 
					
						
							|  |  |  |             successors = opcode_stats.get_successors(opcode) | 
					
						
							|  |  |  |             predecessors_total = predecessors.total() | 
					
						
							|  |  |  |             successors_total = successors.total() | 
					
						
							|  |  |  |             if predecessors_total == 0 and successors_total == 0: | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             pred_rows = [ | 
					
						
							|  |  |  |                 (pred, Count(count), Ratio(count, predecessors_total)) | 
					
						
							|  |  |  |                 for (pred, count) in predecessors.most_common(5) | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  |             succ_rows = [ | 
					
						
							|  |  |  |                 (succ, Count(count), Ratio(count, successors_total)) | 
					
						
							|  |  |  |                 for (succ, count) in successors.most_common(5) | 
					
						
							|  |  |  |             ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             yield Section( | 
					
						
							|  |  |  |                 opcode, | 
					
						
							|  |  |  |                 f"Successors and predecessors for {opcode}", | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Predecessors", "Count:", "Percentage:"), | 
					
						
							|  |  |  |                         lambda *_: pred_rows,  # type: ignore | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Successors", "Count:", "Percentage:"), | 
					
						
							|  |  |  |                         lambda *_: succ_rows,  # type: ignore | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "Predecessor/Successor Pairs", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         "Top 5 predecessors and successors of each Tier 1 opcode.", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         iter_pre_succ_pairs_tables, | 
					
						
							|  |  |  |         comparative=False, | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         This does not include the unspecialized instructions that occur after a | 
					
						
							|  |  |  |         specialized instruction deoptimizes. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def specialization_section() -> Section: | 
					
						
							|  |  |  |     def calc_specialization_table(opcode: str) -> RowCalculator: | 
					
						
							|  |  |  |         def calc(stats: Stats) -> Rows: | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             DOCS = { | 
					
						
							|  |  |  |                 "deferred": 'Lists the number of "deferred" (i.e. not specialized) instructions executed.', | 
					
						
							|  |  |  |                 "hit": "Specialized instructions that complete.", | 
					
						
							|  |  |  |                 "miss": "Specialized instructions that deopt.", | 
					
						
							|  |  |  |                 "deopt": "Specialized instructions that deopt.", | 
					
						
							|  |  |  |             } | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             opcode_stats = stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |             total = opcode_stats.get_specialization_total(opcode) | 
					
						
							|  |  |  |             specialization_counts = opcode_stats.get_specialization_counts(opcode) | 
					
						
							| 
									
										
										
										
											2022-02-09 12:30:26 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             return [ | 
					
						
							|  |  |  |                 ( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                     Doc(label, DOCS[label]), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                     Count(count), | 
					
						
							|  |  |  |                     Ratio(count, total), | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |                 for label, count in specialization_counts.items() | 
					
						
							|  |  |  |             ] | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return calc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def calc_specialization_success_failure_table(name: str) -> RowCalculator: | 
					
						
							|  |  |  |         def calc(stats: Stats) -> Rows: | 
					
						
							|  |  |  |             values = stats.get_opcode_stats( | 
					
						
							|  |  |  |                 "opcode" | 
					
						
							|  |  |  |             ).get_specialization_success_failure(name) | 
					
						
							|  |  |  |             total = sum(values.values()) | 
					
						
							|  |  |  |             if total: | 
					
						
							|  |  |  |                 return [ | 
					
						
							|  |  |  |                     (label.capitalize(), Count(val), Ratio(val, total)) | 
					
						
							|  |  |  |                     for label, val in values.items() | 
					
						
							|  |  |  |                 ] | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return calc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def calc_specialization_failure_kind_table(name: str) -> RowCalculator: | 
					
						
							|  |  |  |         def calc(stats: Stats) -> Rows: | 
					
						
							|  |  |  |             opcode_stats = stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |             failures = opcode_stats.get_specialization_failure_kinds(name) | 
					
						
							|  |  |  |             total = opcode_stats.get_specialization_failure_total(name) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             return sorted( | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     (label, Count(value), Ratio(value, total)) | 
					
						
							|  |  |  |                     for label, value in failures.items() | 
					
						
							|  |  |  |                     if value | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |                 key=itemgetter(1), | 
					
						
							|  |  |  |                 reverse=True, | 
					
						
							| 
									
										
										
										
											2023-09-12 17:12:57 -04:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return calc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def iter_specialization_tables(base_stats: Stats, head_stats: Stats | None = None): | 
					
						
							|  |  |  |         opcode_base_stats = base_stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |         names = opcode_base_stats.get_opcode_names() | 
					
						
							|  |  |  |         if head_stats is not None: | 
					
						
							|  |  |  |             opcode_head_stats = head_stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |             names &= opcode_head_stats.get_opcode_names()  # type: ignore | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             opcode_head_stats = None | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         for opcode in sorted(names): | 
					
						
							|  |  |  |             if not opcode_base_stats.is_specializable(opcode): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             if opcode_base_stats.get_specialization_total(opcode) == 0 and ( | 
					
						
							|  |  |  |                 opcode_head_stats is None | 
					
						
							|  |  |  |                 or opcode_head_stats.get_specialization_total(opcode) == 0 | 
					
						
							|  |  |  |             ): | 
					
						
							|  |  |  |                 continue | 
					
						
							|  |  |  |             yield Section( | 
					
						
							|  |  |  |                 opcode, | 
					
						
							|  |  |  |                 f"specialization stats for {opcode} family", | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Kind", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                         calc_specialization_table(opcode), | 
					
						
							|  |  |  |                         JoinMode.CHANGE, | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                     Table( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                         ("Success", "Count:", "Ratio:"), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                         calc_specialization_success_failure_table(opcode), | 
					
						
							|  |  |  |                         JoinMode.CHANGE, | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Failure kind", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                         calc_specialization_failure_kind_table(opcode), | 
					
						
							|  |  |  |                         JoinMode.CHANGE, | 
					
						
							|  |  |  |                     ), | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return Section( | 
					
						
							|  |  |  |         "Specialization stats", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         "Specialization stats by family", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         iter_specialization_tables, | 
					
						
							| 
									
										
										
										
											2023-09-15 16:10:46 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def specialization_effectiveness_section() -> Section: | 
					
						
							|  |  |  |     def calc_specialization_effectiveness_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         opcode_stats = stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |         total = opcode_stats.get_total_execution_count() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         ( | 
					
						
							|  |  |  |             basic, | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |             specialized_hits, | 
					
						
							|  |  |  |             specialized_misses, | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             not_specialized, | 
					
						
							|  |  |  |         ) = opcode_stats.get_specialized_total_counts() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [ | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |             ( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 Doc( | 
					
						
							|  |  |  |                     "Basic", | 
					
						
							|  |  |  |                     "Instructions that are not and cannot be specialized, e.g. `LOAD_FAST`.", | 
					
						
							|  |  |  |                 ), | 
					
						
							|  |  |  |                 Count(basic), | 
					
						
							|  |  |  |                 Ratio(basic, total), | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 Doc( | 
					
						
							|  |  |  |                     "Not specialized", | 
					
						
							|  |  |  |                     "Instructions that could be specialized but aren't, e.g. `LOAD_ATTR`, `BINARY_SLICE`.", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 Count(not_specialized), | 
					
						
							|  |  |  |                 Ratio(not_specialized, total), | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |             ), | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |             ( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 Doc( | 
					
						
							|  |  |  |                     "Specialized hits", | 
					
						
							|  |  |  |                     "Specialized instructions, e.g. `LOAD_ATTR_MODULE` that complete.", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |                 Count(specialized_hits), | 
					
						
							|  |  |  |                 Ratio(specialized_hits, total), | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             ( | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 Doc( | 
					
						
							|  |  |  |                     "Specialized misses", | 
					
						
							|  |  |  |                     "Specialized instructions, e.g. `LOAD_ATTR_MODULE` that deopt.", | 
					
						
							|  |  |  |                 ), | 
					
						
							| 
									
										
										
										
											2023-10-26 11:33:12 +01:00
										 |  |  |                 Count(specialized_misses), | 
					
						
							|  |  |  |                 Ratio(specialized_misses, total), | 
					
						
							|  |  |  |             ), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def calc_deferred_by_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         opcode_stats = stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |         deferred_counts = opcode_stats.get_deferred_counts() | 
					
						
							|  |  |  |         total = sum(deferred_counts.values()) | 
					
						
							|  |  |  |         if total == 0: | 
					
						
							|  |  |  |             return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             (name, Count(value), Ratio(value, total)) | 
					
						
							|  |  |  |             for name, value in sorted( | 
					
						
							|  |  |  |                 deferred_counts.items(), key=itemgetter(1), reverse=True | 
					
						
							|  |  |  |             )[:10] | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def calc_misses_by_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         opcode_stats = stats.get_opcode_stats("opcode") | 
					
						
							|  |  |  |         misses_counts = opcode_stats.get_misses_counts() | 
					
						
							|  |  |  |         total = sum(misses_counts.values()) | 
					
						
							|  |  |  |         if total == 0: | 
					
						
							|  |  |  |             return [] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             (name, Count(value), Ratio(value, total)) | 
					
						
							|  |  |  |             for name, value in sorted( | 
					
						
							|  |  |  |                 misses_counts.items(), key=itemgetter(1), reverse=True | 
					
						
							|  |  |  |             )[:10] | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "Specialization effectiveness", | 
					
						
							|  |  |  |         "", | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							|  |  |  |                 ("Instructions", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                 calc_specialization_effectiveness_table, | 
					
						
							|  |  |  |                 JoinMode.CHANGE, | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             Section( | 
					
						
							|  |  |  |                 "Deferred by instruction", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 "Breakdown of deferred (not specialized) instruction counts by family", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 [ | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Name", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                         calc_deferred_by_table, | 
					
						
							|  |  |  |                         JoinMode.CHANGE, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             Section( | 
					
						
							|  |  |  |                 "Misses by instruction", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 "Breakdown of misses (specialized deopts) instruction counts by family", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 [ | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Name", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                         calc_misses_by_table, | 
					
						
							|  |  |  |                         JoinMode.CHANGE, | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         All entries are execution counts. Should add up to the total number of | 
					
						
							|  |  |  |         Tier 1 instructions executed. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def call_stats_section() -> Section: | 
					
						
							|  |  |  |     def calc_call_stats_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         call_stats = stats.get_call_stats() | 
					
						
							|  |  |  |         total = sum(v for k, v in call_stats.items() if "Calls to" in k) | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             (key, Count(value), Ratio(value, total)) | 
					
						
							|  |  |  |             for key, value in call_stats.items() | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     return Section( | 
					
						
							|  |  |  |         "Call stats", | 
					
						
							|  |  |  |         "Inlined calls and frame stats", | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							|  |  |  |                 ("", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                 calc_call_stats_table, | 
					
						
							|  |  |  |                 JoinMode.CHANGE, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         This shows what fraction of calls to Python functions are inlined (i.e. | 
					
						
							|  |  |  |         not having a call at the C level) and for those that are not, where the | 
					
						
							|  |  |  |         call comes from.  The various categories overlap. | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         Also includes the count of frame objects created. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def object_stats_section() -> Section: | 
					
						
							|  |  |  |     def calc_object_stats_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         object_stats = stats.get_object_stats() | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             (label, Count(value), Ratio(value, den)) | 
					
						
							|  |  |  |             for label, (value, den) in object_stats.items() | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "Object stats", | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         "Allocations, frees and dict materializatons", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							|  |  |  |                 ("", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                 calc_object_stats_table, | 
					
						
							|  |  |  |                 JoinMode.CHANGE, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         Below, "allocations" means "allocations that are not from a freelist". | 
					
						
							|  |  |  |         Total allocations = "Allocations from freelist" + "Allocations". | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-04-02 11:59:21 +01:00
										 |  |  |         "Inline values" is the number of values arrays inlined into objects. | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |         The cache hit/miss numbers are for the MRO cache, split into dunder and | 
					
						
							|  |  |  |         other names. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-08-04 10:34:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def gc_stats_section() -> Section: | 
					
						
							|  |  |  |     def calc_gc_stats(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         gc_stats = stats.get_gc_stats() | 
					
						
							| 
									
										
										
										
											2023-08-04 10:34:23 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return [ | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 Count(i), | 
					
						
							|  |  |  |                 Count(gen["collections"]), | 
					
						
							|  |  |  |                 Count(gen["objects collected"]), | 
					
						
							|  |  |  |                 Count(gen["object visits"]), | 
					
						
							| 
									
										
										
										
											2024-12-02 10:12:17 +00:00
										 |  |  |                 Count(gen["objects reachable from roots"]), | 
					
						
							|  |  |  |                 Count(gen["objects not reachable from roots"]), | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |             ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             for (i, gen) in enumerate(gc_stats) | 
					
						
							|  |  |  |         ] | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "GC stats", | 
					
						
							|  |  |  |         "GC collections and effectiveness", | 
					
						
							|  |  |  |         [ | 
					
						
							|  |  |  |             Table( | 
					
						
							| 
									
										
										
										
											2024-12-02 10:12:17 +00:00
										 |  |  |                 ("Generation:", "Collections:", "Objects collected:", "Object visits:", | 
					
						
							|  |  |  |                  "Reachable from roots:", "Not reachable from roots:"), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 calc_gc_stats, | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         ], | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         doc="""
 | 
					
						
							|  |  |  |         Collected/visits gives some measure of efficiency. | 
					
						
							|  |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def optimization_section() -> Section: | 
					
						
							|  |  |  |     def calc_optimization_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         optimization_stats = stats.get_optimization_stats() | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         return [ | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 label, | 
					
						
							|  |  |  |                 Count(value), | 
					
						
							|  |  |  |                 Ratio(value, den, percentage=label != "Uops executed"), | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             for label, (value, den) in optimization_stats.items() | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  |     def calc_optimizer_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         optimizer_stats = stats.get_optimizer_stats() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             (label, Count(value), Ratio(value, den)) | 
					
						
							|  |  |  |             for label, (value, den) in optimizer_stats.items() | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2025-02-02 23:17:53 +00:00
										 |  |  |     def calc_jit_memory_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         jit_memory_stats = stats.get_jit_memory_stats() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         return [ | 
					
						
							|  |  |  |             ( | 
					
						
							|  |  |  |                 label, | 
					
						
							|  |  |  |                 Count(value), | 
					
						
							|  |  |  |                 Ratio(value, den, percentage=label != "Total memory size"), | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |             for label, (value, den) in jit_memory_stats.items() | 
					
						
							|  |  |  |         ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def calc_histogram_table(key: str, den: str | None = None) -> RowCalculator: | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         def calc(stats: Stats) -> Rows: | 
					
						
							|  |  |  |             histogram = stats.get_histogram(key) | 
					
						
							| 
									
										
										
										
											2025-02-02 23:17:53 +00:00
										 |  |  | 
 | 
					
						
							|  |  |  |             if den: | 
					
						
							|  |  |  |                 denominator = stats.get(den) | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 denominator = 0 | 
					
						
							|  |  |  |                 for _, v in histogram: | 
					
						
							|  |  |  |                     denominator += v | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |             rows: Rows = [] | 
					
						
							|  |  |  |             for k, v in histogram: | 
					
						
							|  |  |  |                 rows.append( | 
					
						
							|  |  |  |                     ( | 
					
						
							|  |  |  |                         f"<= {k:,d}", | 
					
						
							|  |  |  |                         Count(v), | 
					
						
							|  |  |  |                         Ratio(v, denominator), | 
					
						
							|  |  |  |                     ) | 
					
						
							|  |  |  |                 ) | 
					
						
							| 
									
										
										
										
											2025-02-02 23:17:53 +00:00
										 |  |  |             # Don't include any leading and trailing zero entries | 
					
						
							|  |  |  |             start = 0 | 
					
						
							|  |  |  |             end = len(rows) - 1 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             while start <= end: | 
					
						
							|  |  |  |                 if rows[start][1] == 0: | 
					
						
							|  |  |  |                     start += 1 | 
					
						
							|  |  |  |                 elif rows[end][1] == 0: | 
					
						
							|  |  |  |                     end -= 1 | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     break | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             return rows[start:end+1] | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  |         return calc | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     def calc_unsupported_opcodes_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         unsupported_opcodes = stats.get_opcode_stats("unsupported_opcode") | 
					
						
							|  |  |  |         return sorted( | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 (opcode, Count(count)) | 
					
						
							|  |  |  |                 for opcode, count in unsupported_opcodes.get_opcode_counts().items() | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             key=itemgetter(1), | 
					
						
							|  |  |  |             reverse=True, | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-03-21 13:27:46 -04:00
										 |  |  |     def calc_error_in_opcodes_table(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         error_in_opcodes = stats.get_opcode_stats("error_in_opcode") | 
					
						
							|  |  |  |         return sorted( | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 (opcode, Count(count)) | 
					
						
							|  |  |  |                 for opcode, count in error_in_opcodes.get_opcode_counts().items() | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |             key=itemgetter(1), | 
					
						
							|  |  |  |             reverse=True, | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     def iter_optimization_tables(base_stats: Stats, head_stats: Stats | None = None): | 
					
						
							|  |  |  |         if not base_stats.get_optimization_stats() or ( | 
					
						
							|  |  |  |             head_stats is not None and not head_stats.get_optimization_stats() | 
					
						
							|  |  |  |         ): | 
					
						
							|  |  |  |             return | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         yield Table(("", "Count:", "Ratio:"), calc_optimization_table, JoinMode.CHANGE) | 
					
						
							| 
									
										
										
										
											2024-03-16 11:10:43 -04:00
										 |  |  |         yield Table(("", "Count:", "Ratio:"), calc_optimizer_table, JoinMode.CHANGE) | 
					
						
							| 
									
										
										
										
											2025-02-02 23:17:53 +00:00
										 |  |  |         yield Section( | 
					
						
							|  |  |  |             "JIT memory stats", | 
					
						
							|  |  |  |             "JIT memory stats", | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 Table( | 
					
						
							|  |  |  |                     ("", "Size (bytes):", "Ratio:"), | 
					
						
							|  |  |  |                     calc_jit_memory_table, | 
					
						
							|  |  |  |                     JoinMode.CHANGE | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         ) | 
					
						
							|  |  |  |         yield Section( | 
					
						
							|  |  |  |             "JIT trace total memory histogram", | 
					
						
							|  |  |  |             "JIT trace total memory histogram", | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 Table( | 
					
						
							|  |  |  |                     ("Size (bytes)", "Count", "Ratio:"), | 
					
						
							|  |  |  |                     calc_histogram_table("Trace total memory size"), | 
					
						
							|  |  |  |                     JoinMode.CHANGE_NO_SORT, | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         for name, den in [ | 
					
						
							|  |  |  |             ("Trace length", "Optimization traces created"), | 
					
						
							|  |  |  |             ("Optimized trace length", "Optimization traces created"), | 
					
						
							|  |  |  |             ("Trace run length", "Optimization traces executed"), | 
					
						
							|  |  |  |         ]: | 
					
						
							|  |  |  |             yield Section( | 
					
						
							|  |  |  |                 f"{name} histogram", | 
					
						
							|  |  |  |                 "", | 
					
						
							|  |  |  |                 [ | 
					
						
							|  |  |  |                     Table( | 
					
						
							|  |  |  |                         ("Range", "Count:", "Ratio:"), | 
					
						
							|  |  |  |                         calc_histogram_table(name, den), | 
					
						
							| 
									
										
										
										
											2024-01-22 14:45:15 -05:00
										 |  |  |                         JoinMode.CHANGE_NO_SORT, | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                     ) | 
					
						
							|  |  |  |                 ], | 
					
						
							|  |  |  |             ) | 
					
						
							|  |  |  |         yield Section( | 
					
						
							| 
									
										
										
										
											2023-10-30 20:02:45 -04:00
										 |  |  |             "Uop execution stats", | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             "", | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 Table( | 
					
						
							|  |  |  |                     ("Name", "Count:", "Self:", "Cumulative:", "Miss ratio:"), | 
					
						
							|  |  |  |                     calc_execution_count_table("uops"), | 
					
						
							|  |  |  |                     JoinMode.CHANGE_ONE_COLUMN, | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-04-16 08:27:18 -05:00
										 |  |  |         yield pair_count_section(prefix="uop", title="Non-JIT uop") | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |         yield Section( | 
					
						
							|  |  |  |             "Unsupported opcodes", | 
					
						
							|  |  |  |             "", | 
					
						
							|  |  |  |             [ | 
					
						
							|  |  |  |                 Table( | 
					
						
							|  |  |  |                     ("Opcode", "Count:"), | 
					
						
							|  |  |  |                     calc_unsupported_opcodes_table, | 
					
						
							|  |  |  |                     JoinMode.CHANGE, | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             ], | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         ) | 
					
						
							| 
									
										
										
										
											2024-03-21 13:27:46 -04:00
										 |  |  |         yield Section( | 
					
						
							|  |  |  |             "Optimizer errored out with opcode", | 
					
						
							|  |  |  |             "Optimization stopped after encountering this opcode", | 
					
						
							|  |  |  |             [Table(("Opcode", "Count:"), calc_error_in_opcodes_table, JoinMode.CHANGE)], | 
					
						
							|  |  |  |         ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "Optimization (Tier 2) stats", | 
					
						
							|  |  |  |         "statistics about the Tier 2 optimizer", | 
					
						
							|  |  |  |         iter_optimization_tables, | 
					
						
							|  |  |  |     ) | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  | def rare_event_section() -> Section: | 
					
						
							|  |  |  |     def calc_rare_event_table(stats: Stats) -> Table: | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |         DOCS = { | 
					
						
							|  |  |  |             "set class": "Setting an object's class, `obj.__class__ = ...`", | 
					
						
							|  |  |  |             "set bases": "Setting the bases of a class, `cls.__bases__ = ...`", | 
					
						
							|  |  |  |             "set eval frame func": ( | 
					
						
							|  |  |  |                 "Setting the PEP 523 frame eval function " | 
					
						
							|  |  |  |                 "`_PyInterpreterState_SetFrameEvalFunc()`" | 
					
						
							|  |  |  |             ), | 
					
						
							|  |  |  |             "builtin dict": "Modifying the builtins, `__builtins__.__dict__[var] = ...`", | 
					
						
							|  |  |  |             "func modification": "Modifying a function, e.g. `func.__defaults__ = ...`, etc.", | 
					
						
							|  |  |  |             "watched dict modification": "A watched dict has been modified", | 
					
						
							|  |  |  |             "watched globals modification": "A watched `globals()` dict has been modified", | 
					
						
							|  |  |  |         } | 
					
						
							|  |  |  |         return [(Doc(x, DOCS[x]), Count(y)) for x, y in stats.get_rare_events()] | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  | 
 | 
					
						
							|  |  |  |     return Section( | 
					
						
							|  |  |  |         "Rare events", | 
					
						
							|  |  |  |         "Counts of rare/unlikely events", | 
					
						
							|  |  |  |         [Table(("Event", "Count:"), calc_rare_event_table, JoinMode.CHANGE)], | 
					
						
							|  |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | def meta_stats_section() -> Section: | 
					
						
							|  |  |  |     def calc_rows(stats: Stats) -> Rows: | 
					
						
							|  |  |  |         return [("Number of data files", Count(stats.get("__nfiles__")))] | 
					
						
							| 
									
										
										
										
											2022-02-14 15:53:38 +00:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     return Section( | 
					
						
							|  |  |  |         "Meta stats", | 
					
						
							|  |  |  |         "Meta statistics", | 
					
						
							|  |  |  |         [Table(("", "Count:"), calc_rows, JoinMode.CHANGE)], | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |     ) | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  | LAYOUT = [ | 
					
						
							|  |  |  |     execution_count_section(), | 
					
						
							| 
									
										
										
										
											2024-04-16 08:27:18 -05:00
										 |  |  |     pair_count_section("opcode"), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     pre_succ_pairs_section(), | 
					
						
							|  |  |  |     specialization_section(), | 
					
						
							|  |  |  |     specialization_effectiveness_section(), | 
					
						
							|  |  |  |     call_stats_section(), | 
					
						
							|  |  |  |     object_stats_section(), | 
					
						
							|  |  |  |     gc_stats_section(), | 
					
						
							|  |  |  |     optimization_section(), | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  |     rare_event_section(), | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     meta_stats_section(), | 
					
						
							|  |  |  | ] | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | def output_markdown( | 
					
						
							|  |  |  |     out: TextIO, | 
					
						
							|  |  |  |     obj: Section | Table | list, | 
					
						
							|  |  |  |     base_stats: Stats, | 
					
						
							|  |  |  |     head_stats: Stats | None = None, | 
					
						
							|  |  |  |     level: int = 2, | 
					
						
							|  |  |  | ) -> None: | 
					
						
							|  |  |  |     def to_markdown(x): | 
					
						
							|  |  |  |         if hasattr(x, "markdown"): | 
					
						
							|  |  |  |             return x.markdown() | 
					
						
							|  |  |  |         elif isinstance(x, str): | 
					
						
							|  |  |  |             return x | 
					
						
							|  |  |  |         elif x is None: | 
					
						
							|  |  |  |             return "" | 
					
						
							|  |  |  |         else: | 
					
						
							|  |  |  |             raise TypeError(f"Can't convert {x} to markdown") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     match obj: | 
					
						
							|  |  |  |         case Section(): | 
					
						
							|  |  |  |             if obj.title: | 
					
						
							|  |  |  |                 print("#" * level, obj.title, file=out) | 
					
						
							|  |  |  |                 print(file=out) | 
					
						
							|  |  |  |                 print("<details>", file=out) | 
					
						
							|  |  |  |                 print("<summary>", obj.summary, "</summary>", file=out) | 
					
						
							|  |  |  |                 print(file=out) | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             if obj.doc: | 
					
						
							|  |  |  |                 print(obj.doc, file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             if head_stats is not None and obj.comparative is False: | 
					
						
							|  |  |  |                 print("Not included in comparative output.\n") | 
					
						
							|  |  |  |             else: | 
					
						
							|  |  |  |                 for part in obj.part_iter(base_stats, head_stats): | 
					
						
							|  |  |  |                     output_markdown(out, part, base_stats, head_stats, level=level + 1) | 
					
						
							|  |  |  |             print(file=out) | 
					
						
							|  |  |  |             if obj.title: | 
					
						
							|  |  |  |                 print("</details>", file=out) | 
					
						
							|  |  |  |                 print(file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         case Table(): | 
					
						
							|  |  |  |             header, rows = obj.get_table(base_stats, head_stats) | 
					
						
							|  |  |  |             if len(rows) == 0: | 
					
						
							|  |  |  |                 return | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |             alignments = [] | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             for item in header: | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 if item.endswith(":"): | 
					
						
							|  |  |  |                     alignments.append("right") | 
					
						
							|  |  |  |                 else: | 
					
						
							|  |  |  |                     alignments.append("left") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             print("<table>", file=out) | 
					
						
							|  |  |  |             print("<thead>", file=out) | 
					
						
							|  |  |  |             print("<tr>", file=out) | 
					
						
							|  |  |  |             for item, align in zip(header, alignments): | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                 if item.endswith(":"): | 
					
						
							|  |  |  |                     item = item[:-1] | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 print(f'<th align="{align}">{item}</th>', file=out) | 
					
						
							|  |  |  |             print("</tr>", file=out) | 
					
						
							|  |  |  |             print("</thead>", file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             print("<tbody>", file=out) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             for row in rows: | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 if len(row) != len(header): | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |                     raise ValueError( | 
					
						
							|  |  |  |                         "Wrong number of elements in row '" + str(row) + "'" | 
					
						
							|  |  |  |                     ) | 
					
						
							| 
									
										
										
										
											2024-02-16 12:06:07 -05:00
										 |  |  |                 print("<tr>", file=out) | 
					
						
							|  |  |  |                 for col, align in zip(row, alignments): | 
					
						
							|  |  |  |                     print(f'<td align="{align}">{to_markdown(col)}</td>', file=out) | 
					
						
							|  |  |  |                 print("</tr>", file=out) | 
					
						
							|  |  |  |             print("</tbody>", file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             print("</table>", file=out) | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             print(file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         case list(): | 
					
						
							|  |  |  |             for part in obj: | 
					
						
							|  |  |  |                 output_markdown(out, part, base_stats, head_stats, level=level) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |             print("---", file=out) | 
					
						
							|  |  |  |             print("Stats gathered on:", date.today(), file=out) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2024-01-10 15:07:19 +02:00
										 |  |  | def output_stats(inputs: list[Path], json_output=str | None): | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |     match len(inputs): | 
					
						
							|  |  |  |         case 1: | 
					
						
							|  |  |  |             data = load_raw_data(Path(inputs[0])) | 
					
						
							|  |  |  |             if json_output is not None: | 
					
						
							| 
									
										
										
										
											2024-01-25 06:10:51 -05:00
										 |  |  |                 with open(json_output, "w", encoding="utf-8") as f: | 
					
						
							| 
									
										
										
										
											2024-01-10 15:07:19 +02:00
										 |  |  |                     save_raw_data(data, f)  # type: ignore | 
					
						
							| 
									
										
										
										
											2023-10-24 04:57:39 -04:00
										 |  |  |             stats = Stats(data) | 
					
						
							|  |  |  |             output_markdown(sys.stdout, LAYOUT, stats) | 
					
						
							|  |  |  |         case 2: | 
					
						
							|  |  |  |             if json_output is not None: | 
					
						
							|  |  |  |                 raise ValueError( | 
					
						
							|  |  |  |                     "Can not output to JSON when there are multiple inputs" | 
					
						
							|  |  |  |                 ) | 
					
						
							|  |  |  |             base_data = load_raw_data(Path(inputs[0])) | 
					
						
							|  |  |  |             head_data = load_raw_data(Path(inputs[1])) | 
					
						
							|  |  |  |             base_stats = Stats(base_data) | 
					
						
							|  |  |  |             head_stats = Stats(head_data) | 
					
						
							|  |  |  |             output_markdown(sys.stdout, LAYOUT, base_stats, head_stats) | 
					
						
							| 
									
										
										
										
											2021-08-04 11:39:52 +01:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  | def main(): | 
					
						
							|  |  |  |     parser = argparse.ArgumentParser(description="Summarize pystats results") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "inputs", | 
					
						
							|  |  |  |         nargs="*", | 
					
						
							|  |  |  |         type=str, | 
					
						
							|  |  |  |         default=[DEFAULT_DIR], | 
					
						
							|  |  |  |         help=f"""
 | 
					
						
							|  |  |  |         Input source(s). | 
					
						
							|  |  |  |         For each entry, if a .json file, the output provided by --json-output from a previous run; | 
					
						
							|  |  |  |         if a directory, a directory containing raw pystats .txt files. | 
					
						
							|  |  |  |         If one source is provided, its stats are printed. | 
					
						
							|  |  |  |         If two sources are provided, comparative stats are printed. | 
					
						
							|  |  |  |         Default is {DEFAULT_DIR}. | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         """,
 | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     parser.add_argument( | 
					
						
							|  |  |  |         "--json-output", | 
					
						
							|  |  |  |         nargs="?", | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  |         help="Output complete raw results to the given JSON file.", | 
					
						
							| 
									
										
										
										
											2022-11-04 06:15:54 -04:00
										 |  |  |     ) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     args = parser.parse_args() | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     if len(args.inputs) > 2: | 
					
						
							|  |  |  |         raise ValueError("0-2 arguments may be provided.") | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     output_stats(args.inputs, json_output=args.json_output) | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2023-10-04 17:52:28 -04:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-08-04 11:39:52 +01:00
										 |  |  | if __name__ == "__main__": | 
					
						
							|  |  |  |     main() |