Skip to content

bpo-44554: refactor pdb targets (and internal tweaks) #26992

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Jul 19, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 104 additions & 64 deletions Lib/pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,12 @@
import signal
import inspect
import tokenize
import functools
import traceback
import linecache

from typing import Union


class Restart(Exception):
"""Causes a debugger to be restarted for the debugged python program."""
Expand Down Expand Up @@ -128,6 +131,77 @@ def __repr__(self):
return self


class ScriptTarget(str):
def __new__(cls, val):
# Mutate self to be the "real path".
res = super().__new__(cls, os.path.realpath(val))

# Store the original path for error reporting.
res.orig = val

return res

def check(self):
if not os.path.exists(self):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, the existence check was done on mainpyfile before it was mutated to a "real path". Now the existence check is the real path. If compatibility is needed, this line could be changed to:

Suggested change
if not os.path.exists(self):
if not os.path.exists(self.orig):

But in my opinion, the existence check on either form is equivalent, and the form presented is more readable and distracts less from the primary intention for self.orig (the error message).

print('Error:', self.orig, 'does not exist')
sys.exit(1)

# Replace pdb's dir with script's dir in front of module search path.
sys.path[0] = os.path.dirname(self)

@property
def filename(self):
return self

@property
def namespace(self):
return dict(
__name__='__main__',
__file__=self,
__builtins__=__builtins__,
)

@property
def code(self):
with io.open(self) as fp:
return f"exec(compile({fp.read()!r}, {self!r}, 'exec'))"


class ModuleTarget(str):
def check(self):
pass

@functools.cached_property
def _details(self):
import runpy
return runpy._get_module_details(self)

@property
def filename(self):
return self.code.co_filename

@property
def code(self):
name, spec, code = self._details
return code

@property
def _spec(self):
name, spec, code = self._details
return spec

@property
def namespace(self):
return dict(
__name__='__main__',
__file__=os.path.normcase(os.path.abspath(self.filename)),
__package__=self._spec.parent,
__loader__=self._spec.loader,
__spec__=self._spec,
__builtins__=__builtins__,
)


# Interaction prompt line will separate file and call info from code
# text using value of line_prefix string. A newline and arrow may
# be to your liking. You can set it once pdb is imported using the
Expand Down Expand Up @@ -1534,49 +1608,26 @@ def lookupmodule(self, filename):
return fullname
return None

def _runmodule(self, module_name):
def _run(self, target: Union[ModuleTarget, ScriptTarget]):
# When bdb sets tracing, a number of call and line events happen
# BEFORE debugger even reaches user's code (and the exact sequence of
# events depends on python version). Take special measures to
# avoid stopping before reaching the main script (see user_line and
# user_call for details).
self._wait_for_mainpyfile = True
self._user_requested_quit = False
import runpy
mod_name, mod_spec, code = runpy._get_module_details(module_name)
self.mainpyfile = self.canonic(code.co_filename)
import __main__
__main__.__dict__.clear()
__main__.__dict__.update({
"__name__": "__main__",
"__file__": self.mainpyfile,
"__package__": mod_spec.parent,
"__loader__": mod_spec.loader,
"__spec__": mod_spec,
"__builtins__": __builtins__,
})
self.run(code)

def _runscript(self, filename):
# The script has to run in __main__ namespace (or imports from
# __main__ will break).
#
# So we clear up the __main__ and set several special variables
# (this gets rid of pdb's globals and cleans old variables on restarts).

self.mainpyfile = self.canonic(target.filename)

# The target has to run in __main__ namespace (or imports from
# __main__ will break). Clear __main__ and replace with
# the target namespace.
import __main__
__main__.__dict__.clear()
__main__.__dict__.update({"__name__" : "__main__",
"__file__" : filename,
"__builtins__": __builtins__,
})
__main__.__dict__.update(target.namespace)

self.run(target.code)

# When bdb sets tracing, a number of call and line events happens
# BEFORE debugger even reaches user's code (and the exact sequence of
# events depends on python version). So we take special measures to
# avoid stopping before we reach the main script (see user_line and
# user_call for details).
self._wait_for_mainpyfile = True
self.mainpyfile = self.canonic(filename)
self._user_requested_quit = False
with io.open_code(filename) as fp:
statement = "exec(compile(%r, %r, 'exec'))" % \
(fp.read(), self.mainpyfile)
self.run(statement)

# Collect all command help into docstring, if not run with -OO

Expand Down Expand Up @@ -1665,6 +1716,7 @@ def help():
To let the script run up to a given line X in the debugged file, use
"-c 'until X'"."""


def main():
import getopt

Expand All @@ -1674,28 +1726,19 @@ def main():
print(_usage)
sys.exit(2)

commands = []
run_as_module = False
for opt, optarg in opts:
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously, the opts would be iterated over just once. In this new code, they are iterated over three times. Any performance degradation is negligible and outweighed by the value of disentangling the concerns.

if opt in ['-h', '--help']:
print(_usage)
sys.exit()
elif opt in ['-c', '--command']:
commands.append(optarg)
elif opt in ['-m']:
run_as_module = True

mainpyfile = args[0] # Get script filename
if not run_as_module and not os.path.exists(mainpyfile):
print('Error:', mainpyfile, 'does not exist')
sys.exit(1)
if any(opt in ['-h', '--help'] for opt, optarg in opts):
print(_usage)
sys.exit()

sys.argv[:] = args # Hide "pdb.py" and pdb options from argument list
commands = [optarg for opt, optarg in opts if opt in ['-c', '--command']]

if not run_as_module:
mainpyfile = os.path.realpath(mainpyfile)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This line changes the meaning of mainpyfile mid-function. It's because of this behavior that ScriptTarget contains an 'orig' property (to keep both representations), even though the "real path" form is the preferred one except during error checking.

# Replace pdb's dir with script's dir in front of module search path.
sys.path[0] = os.path.dirname(mainpyfile)
module_indicated = any(opt in ['-m'] for opt, optarg in opts)
cls = ModuleTarget if module_indicated else ScriptTarget
target = cls(args[0])

target.check()

sys.argv[:] = args # Hide "pdb.py" and pdb options from argument list
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the previous implementation, assignment of sys.argv[:] would happen before the sys.path change. In the new code, the sys.path change happens during the ScriptTarget.check. I contend these two operations are independent and can happen in any order.


# Note on saving/restoring sys.argv: it's a good idea when sys.argv was
# modified by the script being debugged. It's a bad idea when it was
Expand All @@ -1705,15 +1748,12 @@ def main():
pdb.rcLines.extend(commands)
while True:
try:
if run_as_module:
pdb._runmodule(mainpyfile)
else:
pdb._runscript(mainpyfile)
pdb._run(target)
if pdb._user_requested_quit:
break
print("The program finished and will be restarted")
except Restart:
print("Restarting", mainpyfile, "with arguments:")
print("Restarting", target, "with arguments:")
print("\t" + " ".join(sys.argv[1:]))
except SystemExit:
# In most cases SystemExit does not warrant a post-mortem session.
Expand All @@ -1728,7 +1768,7 @@ def main():
print("Running 'cont' or 'step' will restart the program")
t = sys.exc_info()[2]
pdb.interaction(None, t)
print("Post mortem debugger finished. The " + mainpyfile +
print("Post mortem debugger finished. The " + target +
" will be restarted")


Expand Down
2 changes: 1 addition & 1 deletion Lib/test/test_pdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,7 @@ def test_pdb_breakpoints_preserved_across_interactive_sessions():
1 breakpoint keep yes at ...test_pdb.py:...
2 breakpoint keep yes at ...test_pdb.py:...
(Pdb) break pdb.find_function
Breakpoint 3 at ...pdb.py:94
Breakpoint 3 at ...pdb.py:97
(Pdb) break
Num Type Disp Enb Where
1 breakpoint keep yes at ...test_pdb.py:...
Expand Down
6 changes: 5 additions & 1 deletion Lib/test/test_pyclbr.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,11 @@ def test_others(self):
cm('pickle', ignore=('partial', 'PickleBuffer'))
cm('aifc', ignore=('_aifc_params',)) # set with = in module
cm('sre_parse', ignore=('dump', 'groups', 'pos')) # from sre_constants import *; property
cm('pdb')
cm(
'pdb',
# pyclbr does not handle elegantly `typing` or properties
ignore=('Union', 'ModuleTarget', 'ScriptTarget'),
)
cm('pydoc', ignore=('input', 'output',)) # properties

# Tests for modules inside packages
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Refactor argument processing in :func:pdb.main to simplify detection of errors in input loading and clarify behavior around module or script invocation.