Skip to content

Commit

Permalink
Improve and expose cpp_backtrace to python binding (pytorch#84896)
Browse files Browse the repository at this point in the history
We can now get cpp stack trace by calling torch.utils.get_cpp_backtrace()

Sample output when calling from a torch_dispatch stack:
```
<omitting python frames>
frame #23: torch::handle_torch_function_no_python_arg_parser(c10::ArrayRef<pybind11::handle>, _object*, _object*, char const*, _object*, char const*, torch::TorchFunctionName) (0x7f69330bab90 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/utils/python_arg_parser.cpp:323)
frame #24: <unknown function> (0x7f6932a09e79 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/autograd/python_variable.cpp:2252)
frame #25: <unknown function> (0x7f69261aee33 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/PythonFallbackKernel.cpp:56)
frame #26: <unknown function> (0x7f69261afef9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/BoxedKernel_impl.h:19)
frame #27: c10::BoxedKernel::callBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6932aadced in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/BoxedKernel_impl.h:41)
frame #28: <unknown function> (0x7f6926fae9b9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/boxing.h:227)
frame #29: at::Tensor c10::Dispatcher::redispatch<at::Tensor, at::Tensor const&>(c10::TypedOperatorHandle<at::Tensor (at::Tensor const&)> const&, c10::DispatchKeySet, at::Tensor const&) const (0x7f6926e821f5 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/KernelFunction_impl.h:106)
frame #30: at::_ops::alias::redispatch(c10::DispatchKeySet, at::Tensor const&) (0x7f6927142c31 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:438)
frame #31: <unknown function> (0x7f692ae4f8be in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/autograd/generated/ADInplaceOrViewType_1.cpp:1361)
frame #32: <unknown function> (0x7f692ae4f9b1 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/autograd/generated/ADInplaceOrViewType_1.cpp:1362)
frame #33: <unknown function> (0x7f692aef77e9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/WrapFunctionIntoFunctor.h:13)
frame #34: <unknown function> (0x7f6926fae7d8 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/KernelFunction_impl.h:50)
frame #35: at::Tensor c10::Dispatcher::redispatch<at::Tensor, at::Tensor const&>(c10::TypedOperatorHandle<at::Tensor (at::Tensor const&)> const&, c10::DispatchKeySet, at::Tensor const&) const (0x7f6926e821c9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/KernelFunction_impl.h:97)
frame #36: at::_ops::alias::redispatch(c10::DispatchKeySet, at::Tensor const&) (0x7f6927142c31 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:438)
frame #37: <unknown function> (0x7f6929ec654a in /fsx/users/bahuang/repos/pytorch_fsx/build/aten/src/ATen/RedispatchFunctions.h:10697)
frame #38: <unknown function> (0x7f6929d9edae in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/autograd/generated/VariableType_1.cpp:2837)
frame #39: <unknown function> (0x7f6929d9f043 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/autograd/generated/VariableType_1.cpp:2838)
frame #40: <unknown function> (0x7f6929e7d2f9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/WrapFunctionIntoFunctor.h:13)
frame #41: <unknown function> (0x7f6929eb1344 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h:478)
frame #42: <unknown function> (0x7f6929ea7b99 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h:490)
frame #43: <unknown function> (0x7f6929e7d370 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h:563)
frame #44: <unknown function> (0x7f6929e7d43a in /fsx/users/bahuang/repos/pytorch_fsx/c10/util/C++17.h:239)
frame #45: <unknown function> (0x7f6929e7d48c in /fsx/users/bahuang/repos/pytorch_fsx/c10/util/C++17.h:364)
frame #46: <unknown function> (0x7f6929e7d50a in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/impl/make_boxed_from_unboxed_functor.h:554)
frame #47: c10::BoxedKernel::callBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6932aadced in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/BoxedKernel_impl.h:41)
frame #48: c10::KernelFunction::callBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6932aadd26 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/KernelFunction_impl.h:43)
frame #49: c10::Dispatcher::redispatchBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f692603890a in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:652)
frame #50: <unknown function> (0x7f69260387f9 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:388)
frame #51: <unknown function> (0x7f69261af0ef in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/PythonFallbackKernel.cpp:96)
frame #52: <unknown function> (0x7f69261aff2b in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/BoxedKernel_impl.h:25)
frame #53: c10::BoxedKernel::callBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6932aadced in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/BoxedKernel_impl.h:41)
frame #54: c10::KernelFunction::callBoxed(c10::OperatorHandle const&, c10::DispatchKeySet, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6932aadd26 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/boxing/KernelFunction_impl.h:43)
frame #55: c10::Dispatcher::callBoxed(c10::OperatorHandle const&, std::vector<c10::IValue, std::allocator<c10::IValue> >*) const (0x7f6925fd6ab2 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:628)
frame #56: <unknown function> (0x7f6925fd6690 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:376)
frame #57: <unknown function> (0x7f692bf5b525 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/dispatch/Dispatcher.h:380)
frame #58: <unknown function> (0x7f692bf59fac in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/jit/runtime/register_c10_ops.cpp:15)
frame #59: <unknown function> (0x7f692bf5af41 in /usr/include/c++/7/bits/std_function.h:316)
frame #60: std::function<void (std::vector<c10::IValue, std::allocator<c10::IValue> >&)>::operator()(std::vector<c10::IValue, std::allocator<c10::IValue> >&) const (0x7f6932ab9a0f in /usr/include/c++/7/bits/std_function.h:706)
frame #61: <unknown function> (0x7f6932aad541 in /fsx/users/bahuang/repos/pytorch_fsx/aten/src/ATen/core/stack.h:41)
frame #62: <unknown function> (0x7f6932ab3102 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/jit/python/pybind_utils.h:1206 (discriminator 1))
frame #63: <unknown function> (0x7f6932ab3943 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/jit/python/pybind_utils.h:1272)
frame #64: <unknown function> (0x7f6932a46120 in /fsx/users/bahuang/repos/pytorch_fsx/torch/csrc/jit/python/init.cpp:1767)
frame #65: <unknown function> (0x7f6932a997be in /fsx/users/bahuang/repos/pytorch_fsx/third_party/pybind11/include/pybind11/cast.h:1441)
frame #66: <unknown function> (0x7f6932a8a985 in /fsx/users/bahuang/repos/pytorch_fsx/third_party/pybind11/include/pybind11/cast.h:1410)
frame #67: <unknown function> (0x7f6932a66e1e in /fsx/users/bahuang/repos/pytorch_fsx/third_party/pybind11/include/pybind11/pybind11.h:249)
frame #68: <unknown function> (0x7f6932a66ec2 in /fsx/users/bahuang/repos/pytorch_fsx/third_party/pybind11/include/pybind11/pybind11.h:224)
frame #69: <unknown function> (0x7f6932473111 in /fsx/users/bahuang/repos/pytorch_fsx/third_party/pybind11/include/pybind11/pybind11.h:929)
frame #104: __libc_start_main (0x7f693485dc87 in /build/glibc-uZu3wS/glibc-2.27/csu/../csu/libc-start.c:310)
```

Pull Request resolved: pytorch#84896
Approved by: https://github.com/ezyang
  • Loading branch information
SherlockNoMad authored and pytorchmergebot committed Sep 21, 2022
1 parent 52fd7e4 commit 73fbca1
Show file tree
Hide file tree
Showing 6 changed files with 121 additions and 7 deletions.
1 change: 1 addition & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -924,6 +924,7 @@ add-auto-load-safe-path /path/to/pytorch/.gdbinit
### C++ stacktraces
Set `TORCH_SHOW_CPP_STACKTRACES=1` to get the C++ stacktrace when an error occurs in Python.
Set `TORCH_SHOW_CPP_STACKTRACES_WITH_LINENO=1` to get the C++ stacktrace with file and line number.
## CUDA development tips
Expand Down
100 changes: 93 additions & 7 deletions c10/util/Backtrace.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <c10/util/Backtrace.h>
#include <c10/util/Optional.h>
#include <c10/util/Type.h>
#include <c10/util/env.h>
#include <c10/util/irange.h>

#include <functional>
Expand All @@ -21,7 +22,14 @@
#include <dlfcn.h>
#include <unwind.h>
#else
#include <dlfcn.h>
#include <execinfo.h>

#ifndef __APPLE__
// link.h is not available on IOS and Mac builds
#include <link.h>
#endif

#endif
#endif

Expand Down Expand Up @@ -87,6 +95,46 @@ void dump_stack(
#if SUPPORTS_BACKTRACE
namespace {

#if !defined(C10_ANDROID) && !defined(__APPLE__)

// converts a function's address in memory to its VMA address in the executable
// file. VMA is what addr2line expects
size_t ConvertToVMA(size_t addr) {
Dl_info info;
link_map* link_map;
dladdr1((void*)addr, &info, (void**)&link_map, RTLD_DL_LINKMAP);
return addr - link_map->l_addr;
}

std::string exec(const char* cmd) {
std::array<char, 128> buffer;
std::string result;
std::unique_ptr<FILE, decltype(&pclose)> pipe(popen(cmd, "r"), pclose);
if (!pipe) {
throw std::runtime_error("popen() failed!");
}
while (fgets(buffer.data(), buffer.size(), pipe.get()) != nullptr) {
result += buffer.data();
}
return result;
}

std::string rstrip(const std::string& s) {
const std::string WHITESPACE = " \n\r\t\f\v";
size_t end = s.find_last_not_of(WHITESPACE);
return (end == std::string::npos) ? "" : s.substr(0, end + 1);
}

bool use_addr2line() {
static bool _use_addr2line = []() {
return c10::utils::check_env("TORCH_SHOW_CPP_STACKTRACES_WITH_LINENO") ==
true;
}();
return _use_addr2line;
}

#endif // !defined(C10_ANDROID) && !defined(__APPLE__)

struct FrameInformation {
/// If available, the demangled name of the function at this frame, else
/// whatever (possibly mangled) name we got from `backtrace()`.
Expand All @@ -99,6 +147,10 @@ struct FrameInformation {
/// NOTE: In debugger parlance, the "object file" refers to the ELF file that
/// the symbol originates from, i.e. either an executable or a library.
std::string object_file;
/// Source file name and line number
std::string source_file_lineno;

bool is_python_frame;
};

#ifndef C10_ANDROID
Expand All @@ -108,7 +160,8 @@ bool is_python_frame(const FrameInformation& frame) {
}

c10::optional<FrameInformation> parse_frame_information(
const std::string& frame_string) {
const std::string& frame_string,
void* frame_pointer) {
FrameInformation frame;

// This is the function name in the CXX ABI mangled format, e.g. something
Expand Down Expand Up @@ -141,6 +194,7 @@ c10::optional<FrameInformation> parse_frame_information(
frame.object_file = frame_string.substr(0, function_name_start - 1);
frame.offset_into_function =
frame_string.substr(offset_start, offset_end - offset_start);
frame.is_python_frame = is_python_frame(frame);

// NOTE: We don't need to parse the return address because
// we already have it from the call to `backtrace()`.
Expand Down Expand Up @@ -171,6 +225,30 @@ c10::optional<FrameInformation> parse_frame_information(
}

frame.function_name = demangle(mangled_function_name.c_str());

#if !defined(__APPLE__)

if (use_addr2line() && !frame.is_python_frame) {
Dl_info info;
if (dladdr(frame_pointer, &info)) {
char command[256];
size_t VMA_addr = ConvertToVMA((size_t)frame_pointer);
// Need to decrease the VMA address by 1 to get the correct line number
// https://stackoverflow.com/questions/11579509/wrong-line-numbers-from-addr2line/63841497#63841497
VMA_addr -= 1;
snprintf(
command,
sizeof(command),
"addr2line -e %s -C %zx",
info.dli_fname,
VMA_addr);

frame.source_file_lineno = rstrip(exec(command));
}
}

#endif // !defined(__APPLE__)

return frame;
}
#endif /* !defined(C10_ANDROID) */
Expand Down Expand Up @@ -283,9 +361,10 @@ std::string get_backtrace(
bool has_skipped_python_frames = false;

for (const auto frame_number : c10::irange(callstack.size())) {
const auto frame = parse_frame_information(symbols[frame_number]);
const auto frame =
parse_frame_information(symbols[frame_number], callstack[frame_number]);

if (skip_python_frames && frame && is_python_frame(*frame)) {
if (skip_python_frames && frame && frame->is_python_frame) {
if (!has_skipped_python_frames) {
stream << "<omitting python frames>\n";
has_skipped_python_frames = true;
Expand All @@ -297,10 +376,17 @@ std::string get_backtrace(
stream << "frame #" << frame_number << ": ";

if (frame) {
// <function_name> + <offset> (<return-address> in <object-file>)
stream << frame->function_name << " + " << frame->offset_into_function
<< " (" << callstack[frame_number] << " in " << frame->object_file
<< ")\n";
if (frame->source_file_lineno.empty()) {
// <function_name> + <offset> (<return-address> in <object-file>)
stream << frame->function_name << " + " << frame->offset_into_function
<< " (" << callstack[frame_number] << " in "
<< frame->object_file << ")\n";

} else {
// <function_name> (<return-address> in <filename>:<line-number>)
stream << frame->function_name << " (" << callstack[frame_number]
<< " in " << frame->source_file_lineno << ")\n";
}
} else {
// In the edge-case where we couldn't parse the frame string, we can
// just use it directly (it may have a different format).
Expand Down
1 change: 1 addition & 0 deletions torch/_C/__init__.pyi.in
Original file line number Diff line number Diff line change
Expand Up @@ -843,6 +843,7 @@ def _remove_meta_from_tls_dispatch_include() -> None: ...
# https://code.activestate.com/lists/python-dev/139675/
def _to_dlpack(data: Tensor) -> Any: ... # THPModule_toDLPack
def _from_dlpack(data: Any) -> Tensor: ... # THPModule_fromDLPack
def _get_cpp_backtrace(frames_to_skip: _int, maximum_number_of_frames: _int) -> str: ... # THPModule_getCppBacktrace
def set_flush_denormal(arg: _bool) -> _bool: ... # THPModule_setFlushDenormal
def get_default_dtype() -> _dtype: ... # THPModule_getDefaultDtype
def _get_default_device() -> str: ... # THPModule_getDefaultDevice
Expand Down
14 changes: 14 additions & 0 deletions torch/csrc/Module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,19 @@ PyObject* THPModule_fromDLPack(PyObject* _unused, PyObject* data) {
END_HANDLE_TH_ERRORS
}

PyObject* THModule_getCppBacktrace(PyObject* _unused, PyObject* args) {
HANDLE_TH_ERRORS
size_t frames_to_skip;
size_t maximum_number_of_frames;
if (!PyArg_ParseTuple(
args, "LL", &frames_to_skip, &maximum_number_of_frames)) {
return nullptr;
}
return THPUtils_packString(
c10::get_backtrace(frames_to_skip, maximum_number_of_frames, true));
END_HANDLE_TH_ERRORS
}

PyObject* THPModule_setAllowTF32CuDNN(PyObject* _unused, PyObject* arg) {
THPUtils_assert(
PyBool_Check(arg),
Expand Down Expand Up @@ -866,6 +879,7 @@ static PyMethodDef TorchMethods[] = {
nullptr},
{"_to_dlpack", THPModule_toDLPack, METH_O, nullptr},
{"_from_dlpack", THPModule_fromDLPack, METH_O, nullptr},
{"_get_cpp_backtrace", THModule_getCppBacktrace, METH_VARARGS, nullptr},
{"set_flush_denormal", THPModule_setFlushDenormal, METH_O, nullptr},
{"get_default_dtype", THPModule_getDefaultDtype, METH_NOARGS, nullptr},
{"_get_default_device", THPModule_getDefaultDevice, METH_NOARGS, nullptr},
Expand Down
1 change: 1 addition & 0 deletions torch/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from .throughput_benchmark import ThroughputBenchmark
from ._crash_handler import enable_minidumps, disable_minidumps, enable_minidumps_on_exceptions
from .cpp_backtrace import get_cpp_backtrace

# Set the module for a given object for nicer printing
def set_module(obj, mod):
Expand Down
11 changes: 11 additions & 0 deletions torch/utils/cpp_backtrace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from torch._C import _get_cpp_backtrace

def get_cpp_backtrace(frames_to_skip=0, maximum_number_of_frames=64) -> str:
r"""
Returns a string containing the C++ stack trace of the current thread.
Args:
frames_to_skip (int): the number of frames to skip from the top of the stack
maximum_number_of_frames (int): the maximum number of frames to return
"""

return _get_cpp_backtrace(frames_to_skip, maximum_number_of_frames)

0 comments on commit 73fbca1

Please sign in to comment.