gh-123165: make dis functions render positions on demand (#123168)

This commit is contained in:
Bénédikt Tran 2024-08-21 15:46:24 +02:00 committed by GitHub
parent 94036e43a8
commit b1d3bd2e09
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 209 additions and 37 deletions

View file

@ -56,6 +56,10 @@ interpreter.
for jump targets and exception handlers. The ``-O`` command line
option and the ``show_offsets`` argument were added.
.. versionchanged:: 3.14
The :option:`-P <dis --show-positions>` command-line option
and the ``show_positions`` argument were added.
Example: Given the function :func:`!myfunc`::
def myfunc(alist):
@ -85,7 +89,7 @@ The :mod:`dis` module can be invoked as a script from the command line:
.. code-block:: sh
python -m dis [-h] [-C] [-O] [infile]
python -m dis [-h] [-C] [-O] [-P] [infile]
The following options are accepted:
@ -103,6 +107,10 @@ The following options are accepted:
Show offsets of instructions.
.. cmdoption:: -P, --show-positions
Show positions of instructions in the source code.
If :file:`infile` is specified, its disassembled code will be written to stdout.
Otherwise, disassembly is performed on compiled source code received from stdin.
@ -116,7 +124,8 @@ The bytecode analysis API allows pieces of Python code to be wrapped in a
code.
.. class:: Bytecode(x, *, first_line=None, current_offset=None,\
show_caches=False, adaptive=False, show_offsets=False)
show_caches=False, adaptive=False, show_offsets=False,\
show_positions=False)
Analyse the bytecode corresponding to a function, generator, asynchronous
generator, coroutine, method, string of source code, or a code object (as
@ -144,6 +153,9 @@ code.
If *show_offsets* is ``True``, :meth:`.dis` will include instruction
offsets in the output.
If *show_positions* is ``True``, :meth:`.dis` will include instruction
source code positions in the output.
.. classmethod:: from_traceback(tb, *, show_caches=False)
Construct a :class:`Bytecode` instance from the given traceback, setting
@ -173,6 +185,12 @@ code.
.. versionchanged:: 3.11
Added the *show_caches* and *adaptive* parameters.
.. versionchanged:: 3.13
Added the *show_offsets* parameter
.. versionchanged:: 3.14
Added the *show_positions* parameter.
Example:
.. doctest::
@ -226,7 +244,8 @@ operation is being performed, so the intermediate analysis object isn't useful:
Added *file* parameter.
.. function:: dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False)
.. function:: dis(x=None, *, file=None, depth=None, show_caches=False,\
adaptive=False, show_offsets=False, show_positions=False)
Disassemble the *x* object. *x* can denote either a module, a class, a
method, a function, a generator, an asynchronous generator, a coroutine,
@ -265,9 +284,14 @@ operation is being performed, so the intermediate analysis object isn't useful:
.. versionchanged:: 3.11
Added the *show_caches* and *adaptive* parameters.
.. versionchanged:: 3.13
Added the *show_offsets* parameter.
.. function:: distb(tb=None, *, file=None, show_caches=False, adaptive=False,
show_offset=False)
.. versionchanged:: 3.14
Added the *show_positions* parameter.
.. function:: distb(tb=None, *, file=None, show_caches=False, adaptive=False,\
show_offset=False, show_positions=False)
Disassemble the top-of-stack function of a traceback, using the last
traceback if none was passed. The instruction causing the exception is
@ -285,14 +309,19 @@ operation is being performed, so the intermediate analysis object isn't useful:
.. versionchanged:: 3.13
Added the *show_offsets* parameter.
.. versionchanged:: 3.14
Added the *show_positions* parameter.
.. function:: disassemble(code, lasti=-1, *, file=None, show_caches=False, adaptive=False)
disco(code, lasti=-1, *, file=None, show_caches=False, adaptive=False,
show_offsets=False)
disco(code, lasti=-1, *, file=None, show_caches=False, adaptive=False,\
show_offsets=False, show_positions=False)
Disassemble a code object, indicating the last instruction if *lasti* was
provided. The output is divided in the following columns:
#. the line number, for the first instruction of each line
#. the source code location of the instruction. Complete location information
is shown if *show_positions* is true. Otherwise (the default) only the
line number is displayed.
#. the current instruction, indicated as ``-->``,
#. a labelled instruction, indicated with ``>>``,
#. the address of the instruction,
@ -315,6 +344,9 @@ operation is being performed, so the intermediate analysis object isn't useful:
.. versionchanged:: 3.13
Added the *show_offsets* parameter.
.. versionchanged:: 3.14
Added the *show_positions* parameter.
.. function:: get_instructions(x, *, first_line=None, show_caches=False, adaptive=False)
Return an iterator over the instructions in the supplied function, method,

View file

@ -110,6 +110,22 @@ ast
(Contributed by Bénédikt Tran in :gh:`121141`.)
dis
---
* Added support for rendering full source location information of
:class:`instructions <dis.Instruction>`, rather than only the line number.
This feature is added to the following interfaces via the ``show_positions``
keyword argument:
- :class:`dis.Bytecode`,
- :func:`dis.dis`, :func:`dis.distb`, and
- :func:`dis.disassemble`.
This feature is also exposed via :option:`dis --show-positions`.
(Contributed by Bénédikt Tran in :gh:`123165`.)
fractions
---------

View file

@ -80,7 +80,7 @@ def _try_compile(source, name):
return compile(source, name, 'exec')
def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
show_offsets=False):
show_offsets=False, show_positions=False):
"""Disassemble classes, methods, functions, and other compiled objects.
With no argument, disassemble the last traceback.
@ -91,7 +91,7 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
"""
if x is None:
distb(file=file, show_caches=show_caches, adaptive=adaptive,
show_offsets=show_offsets)
show_offsets=show_offsets, show_positions=show_positions)
return
# Extract functions from methods.
if hasattr(x, '__func__'):
@ -112,12 +112,12 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
if isinstance(x1, _have_code):
print("Disassembly of %s:" % name, file=file)
try:
dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
dis(x1, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions)
except TypeError as msg:
print("Sorry:", msg, file=file)
print(file=file)
elif hasattr(x, 'co_code'): # Code object
_disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
_disassemble_recursive(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions)
elif isinstance(x, (bytes, bytearray)): # Raw bytecode
labels_map = _make_labels_map(x)
label_width = 4 + len(str(len(labels_map)))
@ -128,12 +128,12 @@ def dis(x=None, *, file=None, depth=None, show_caches=False, adaptive=False,
arg_resolver = ArgResolver(labels_map=labels_map)
_disassemble_bytes(x, arg_resolver=arg_resolver, formatter=formatter)
elif isinstance(x, str): # Source code
_disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
_disassemble_str(x, file=file, depth=depth, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions)
else:
raise TypeError("don't know how to disassemble %s objects" %
type(x).__name__)
def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False):
def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False):
"""Disassemble a traceback (default: last traceback)."""
if tb is None:
try:
@ -144,7 +144,7 @@ def distb(tb=None, *, file=None, show_caches=False, adaptive=False, show_offsets
except AttributeError:
raise RuntimeError("no last traceback to disassemble") from None
while tb.tb_next: tb = tb.tb_next
disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
disassemble(tb.tb_frame.f_code, tb.tb_lasti, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions)
# The inspect module interrogates this dictionary to build its
# list of CO_* constants. It is also used by pretty_flags to
@ -427,21 +427,25 @@ def __str__(self):
class Formatter:
def __init__(self, file=None, lineno_width=0, offset_width=0, label_width=0,
line_offset=0, show_caches=False):
line_offset=0, show_caches=False, *, show_positions=False):
"""Create a Formatter
*file* where to write the output
*lineno_width* sets the width of the line number field (0 omits it)
*lineno_width* sets the width of the source location field (0 omits it).
Should be large enough for a line number or full positions (depending
on the value of *show_positions*).
*offset_width* sets the width of the instruction offset field
*label_width* sets the width of the label field
*show_caches* is a boolean indicating whether to display cache lines
*show_positions* is a boolean indicating whether full positions should
be reported instead of only the line numbers.
"""
self.file = file
self.lineno_width = lineno_width
self.offset_width = offset_width
self.label_width = label_width
self.show_caches = show_caches
self.show_positions = show_positions
def print_instruction(self, instr, mark_as_current=False):
self.print_instruction_line(instr, mark_as_current)
@ -474,15 +478,27 @@ def print_instruction_line(self, instr, mark_as_current):
print(file=self.file)
fields = []
# Column: Source code line number
# Column: Source code locations information
if lineno_width:
if instr.starts_line:
lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds"
lineno_fmt = lineno_fmt % lineno_width
lineno = _NO_LINENO if instr.line_number is None else instr.line_number
fields.append(lineno_fmt % lineno)
if self.show_positions:
# reporting positions instead of just line numbers
if instr_positions := instr.positions:
if all(p is None for p in instr_positions):
positions_str = _NO_LINENO
else:
ps = tuple('?' if p is None else p for p in instr_positions)
positions_str = f"{ps[0]}:{ps[2]}-{ps[1]}:{ps[3]}"
fields.append(f'{positions_str:{lineno_width}}')
else:
fields.append(' ' * lineno_width)
else:
fields.append(' ' * lineno_width)
if instr.starts_line:
lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds"
lineno_fmt = lineno_fmt % lineno_width
lineno = _NO_LINENO if instr.line_number is None else instr.line_number
fields.append(lineno_fmt % lineno)
else:
fields.append(' ' * lineno_width)
# Column: Label
if instr.label is not None:
lbl = f"L{instr.label}:"
@ -769,17 +785,22 @@ def _get_instructions_bytes(code, linestarts=None, line_offset=0, co_positions=N
def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
show_offsets=False):
show_offsets=False, show_positions=False):
"""Disassemble a code object."""
linestarts = dict(findlinestarts(co))
exception_entries = _parse_exception_table(co)
if show_positions:
lineno_width = _get_positions_width(co)
else:
lineno_width = _get_lineno_width(linestarts)
labels_map = _make_labels_map(co.co_code, exception_entries=exception_entries)
label_width = 4 + len(str(len(labels_map)))
formatter = Formatter(file=file,
lineno_width=_get_lineno_width(linestarts),
lineno_width=lineno_width,
offset_width=len(str(max(len(co.co_code) - 2, 9999))) if show_offsets else 0,
label_width=label_width,
show_caches=show_caches)
show_caches=show_caches,
show_positions=show_positions)
arg_resolver = ArgResolver(co_consts=co.co_consts,
names=co.co_names,
varname_from_oparg=co._varname_from_oparg,
@ -788,8 +809,8 @@ def disassemble(co, lasti=-1, *, file=None, show_caches=False, adaptive=False,
exception_entries=exception_entries, co_positions=co.co_positions(),
original_code=co.co_code, arg_resolver=arg_resolver, formatter=formatter)
def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False):
disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets)
def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False):
disassemble(co, file=file, show_caches=show_caches, adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions)
if depth is None or depth > 0:
if depth is not None:
depth = depth - 1
@ -799,7 +820,7 @@ def _disassemble_recursive(co, *, file=None, depth=None, show_caches=False, adap
print("Disassembly of %r:" % (x,), file=file)
_disassemble_recursive(
x, file=file, depth=depth, show_caches=show_caches,
adaptive=adaptive, show_offsets=show_offsets
adaptive=adaptive, show_offsets=show_offsets, show_positions=show_positions
)
@ -832,6 +853,22 @@ def _get_lineno_width(linestarts):
lineno_width = len(_NO_LINENO)
return lineno_width
def _get_positions_width(code):
# Positions are formatted as 'LINE:COL-ENDLINE:ENDCOL ' (note trailing space).
# A missing component appears as '?', and when all components are None, we
# render '_NO_LINENO'. thus the minimum width is 1 + len(_NO_LINENO).
#
# If all values are missing, positions are not printed (i.e. positions_width = 0).
has_value = False
values_width = 0
for positions in code.co_positions():
has_value |= any(isinstance(p, int) for p in positions)
width = sum(1 if p is None else len(str(p)) for p in positions)
values_width = max(width, values_width)
if has_value:
# 3 = number of separators in a normal format
return 1 + max(len(_NO_LINENO), 3 + values_width)
return 0
def _disassemble_bytes(code, lasti=-1, linestarts=None,
*, line_offset=0, exception_entries=(),
@ -978,7 +1015,7 @@ class Bytecode:
Iterating over this yields the bytecode operations as Instruction instances.
"""
def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False):
def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False, adaptive=False, show_offsets=False, show_positions=False):
self.codeobj = co = _get_code_object(x)
if first_line is None:
self.first_line = co.co_firstlineno
@ -993,6 +1030,7 @@ def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False
self.show_caches = show_caches
self.adaptive = adaptive
self.show_offsets = show_offsets
self.show_positions = show_positions
def __iter__(self):
co = self.codeobj
@ -1036,16 +1074,19 @@ def dis(self):
with io.StringIO() as output:
code = _get_code_array(co, self.adaptive)
offset_width = len(str(max(len(code) - 2, 9999))) if self.show_offsets else 0
if self.show_positions:
lineno_width = _get_positions_width(co)
else:
lineno_width = _get_lineno_width(self._linestarts)
labels_map = _make_labels_map(co.co_code, self.exception_entries)
label_width = 4 + len(str(len(labels_map)))
formatter = Formatter(file=output,
lineno_width=_get_lineno_width(self._linestarts),
lineno_width=lineno_width,
offset_width=offset_width,
label_width=label_width,
line_offset=self._line_offset,
show_caches=self.show_caches)
show_caches=self.show_caches,
show_positions=self.show_positions)
arg_resolver = ArgResolver(co_consts=co.co_consts,
names=co.co_names,
@ -1071,6 +1112,8 @@ def main():
help='show inline caches')
parser.add_argument('-O', '--show-offsets', action='store_true',
help='show instruction offsets')
parser.add_argument('-P', '--show-positions', action='store_true',
help='show instruction positions')
parser.add_argument('infile', nargs='?', default='-')
args = parser.parse_args()
if args.infile == '-':
@ -1081,7 +1124,7 @@ def main():
with open(args.infile, 'rb') as infile:
source = infile.read()
code = compile(source, name, "exec")
dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets)
dis(code, show_caches=args.show_caches, show_offsets=args.show_offsets, show_positions=args.show_positions)
if __name__ == "__main__":
main()

View file

@ -127,6 +127,16 @@ def _f(a):
_f.__code__.co_firstlineno + 1,
_f.__code__.co_firstlineno + 2)
dis_f_with_positions_format = f"""\
%-14s RESUME 0
%-14s LOAD_GLOBAL 1 (print + NULL)
%-14s LOAD_FAST 0 (a)
%-14s CALL 1
%-14s POP_TOP
%-14s RETURN_CONST 1 (1)
"""
dis_f_co_code = """\
RESUME 0
@ -950,6 +960,76 @@ def test_dis(self):
def test_dis_with_offsets(self):
self.do_disassembly_test(_f, dis_f_with_offsets, show_offsets=True)
@requires_debug_ranges()
def test_dis_with_all_positions(self):
def format_instr_positions(instr):
values = tuple('?' if p is None else p for p in instr.positions)
return '%s:%s-%s:%s' % (values[0], values[2], values[1], values[3])
instrs = list(dis.get_instructions(_f))
for instr in instrs:
with self.subTest(instr=instr):
self.assertTrue(all(p is not None for p in instr.positions))
positions = tuple(map(format_instr_positions, instrs))
expected = dis_f_with_positions_format % positions
self.do_disassembly_test(_f, expected, show_positions=True)
@requires_debug_ranges()
def test_dis_with_some_positions(self):
def f():
pass
PY_CODE_LOCATION_INFO_NO_COLUMNS = 13
PY_CODE_LOCATION_INFO_WITH_COLUMNS = 14
PY_CODE_LOCATION_INFO_NO_LOCATION = 15
f.__code__ = f.__code__.replace(
co_stacksize=1,
co_firstlineno=42,
co_code=bytes([
dis.opmap["RESUME"], 0,
dis.opmap["NOP"], 0,
dis.opmap["RETURN_CONST"], 0,
]),
co_linetable=bytes([
(1 << 7)
| (PY_CODE_LOCATION_INFO_NO_COLUMNS << 3)
| (1 - 1), # 1 code unit (RESUME)
(1 << 1), # start line offset is 0 (encoded as an svarint)
(1 << 7)
| (PY_CODE_LOCATION_INFO_NO_LOCATION << 3)
| (1 - 1), # 1 code unit (NOP)
(1 << 7)
| (PY_CODE_LOCATION_INFO_WITH_COLUMNS << 3)
| (1 - 1), # 1 code unit (RETURN CONST)
(2 << 1), # start line offset is 0 (encoded as an svarint)
3, # end line offset is 0 (varint encoded)
1, # 1-based start column (reported as COL - 1)
5, # 1-based end column (reported as ENDCOL - 1)
]
))
expect = '\n'.join([
'43:?-43:? RESUME 0',
'',
' -- NOP',
'',
'45:0-48:4 RETURN_CONST 0 (None)',
'',
])
self.do_disassembly_test(f, expect, show_positions=True)
def test_dis_with_no_positions(self):
def f():
pass
f.__code__ = f.__code__.replace(co_linetable=b'')
expect = '\n'.join([
' RESUME 0',
' RETURN_CONST 0 (None)',
'',
])
self.do_disassembly_test(f, expect, show_positions=True)
def test_bug_708901(self):
self.do_disassembly_test(bug708901, dis_bug708901)

View file

@ -0,0 +1 @@
Add support for rendering :class:`~dis.Positions` in :mod:`dis`.