diff --git a/.github/workflows/dev_release.yml b/.github/workflows/dev_release.yml new file mode 100644 index 00000000..c385861e --- /dev/null +++ b/.github/workflows/dev_release.yml @@ -0,0 +1,145 @@ +name: Development release +on: + push: + tags: + - 'dev' +jobs: + release-linux: + runs-on: ubuntu-latest + strategy: + fail-fast: false + name: Release (Linux) + steps: + - uses: actions/checkout@v2 + name: Checkout Austin + + - name: Generate artifacts + run: | + sudo apt-get update + sudo apt-get -y install autoconf build-essential libunwind-dev musl-tools + + # Build austin + autoreconf --install + ./configure + make + + # Compute dev version + export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p") + export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD) + sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h + + # Build austinp + gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a + + pushd src + tar -Jcf austin-$VERSION-gnu-linux-amd64.tar.xz austin + tar -Jcf austinp-$VERSION-gnu-linux-amd64.tar.xz austinp + popd + + # Build with musl + musl-gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austin -D__MUSL__ + pushd src + tar -Jcf austin-$VERSION-musl-linux-amd64.tar.xz austin + popd + + - name: Upload artifacts to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: src/austin*xz + tag: ${{ github.ref }} + overwrite: true + prerelease: true + release_name: Development build + file_glob: true + + release-win: + runs-on: windows-latest + strategy: + fail-fast: false + name: Release (Windows) + steps: + - uses: actions/checkout@v2 + name: Checkout Austin + with: + fetch-depth: 0 + + - name: Generate artifacts + shell: bash + run: | + echo "C:\Program Files (x86)\WiX Toolset v3.11\bin" >> $GITHUB_PATH + export PATH="/c/Program Files (x86)/`ls /c/Program\ Files\ \(x86\) | grep \"[wW]i[xX] [tT]oolset\"`/bin:$PATH" + + # Compute dev version + export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p") + export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g') + export VERSION_DEV=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD) + sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h + + gcc -s -Wall -O3 -Os -o src/austin src/*.c -lpsapi -lntdll + + git checkout HEAD -- src/austin.h + + git checkout "packaging/msi" + git checkout master + git checkout "packaging/msi" -- wix + + export WIN_MSI="austin-$VERSION_DEV-win64.msi" + + sed -i "s/%VERSION%/$VERSION/g" wix/Austin.wxs + pushd wix + candle Austin.wxs -out Austin.wixobj + light -ext WixUIExtension Austin.wixobj -out $WIN_MSI + popd + + mv wix/$WIN_MSI src/$WIN_MSI; + test -f src/$WIN_MSI && echo ">> Windows MSI installer at src/$WIN_MSI" || echo ">> ERROR No Windows MSI installer generated." + + pushd src + 7z a -tzip austin-${VERSION_DEV}-win64.zip austin.exe + popd + + - name: Upload artifacts to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: src/austin-* + tag: ${{ github.ref }} + overwrite: true + prerelease: true + release_name: Development build + file_glob: true + + release-osx: + runs-on: macos-latest + strategy: + fail-fast: false + name: Release (macOS) + steps: + - uses: actions/checkout@v2 + name: Checkout Austin + + - name: Generate artifacts + run: | + # Compute dev version + export PREV_VERSION=$(cat src/austin.h | sed -n -E "s/.*VERSION[ ]+\"(.+)\"/\1/p") + export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD) + sed -i "" "s/$PREV_VERSION/$VERSION/g" src/austin.h + echo "::set-output name=version::$VERSION" + + gcc -Wall -O3 -Os -o src/austin src/*.c + + pushd src + zip -r austin-${VERSION}-mac64.zip austin + popd + + - name: Upload artifacts to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: src/austin-* + tag: ${{ github.ref }} + overwrite: true + prerelease: true + release_name: Development build + file_glob: true \ No newline at end of file diff --git a/.github/workflows/dev_release_arch.yml b/.github/workflows/dev_release_arch.yml new file mode 100644 index 00000000..d6e7a49f --- /dev/null +++ b/.github/workflows/dev_release_arch.yml @@ -0,0 +1,71 @@ +name: Development release (Linux archs) +on: + push: + tags: + - 'dev' +jobs: + release-linux-archs: + runs-on: ubuntu-latest + strategy: + matrix: + arch: ["armv7", "aarch64", "ppc64le"] + fail-fast: false + name: Build on ${{ matrix.arch }} + steps: + - uses: actions/checkout@v2 + name: Checkout sources + - uses: uraimo/run-on-arch-action@v2.0.5 + name: Generate artifacts on ${{ matrix.arch }} + id: run-tests-on-arch + with: + arch: ${{ matrix.arch }} + distro: ubuntu20.04 + githubToken: ${{ github.token }} + dockerRunArgs: --volume "${GITHUB_WORKSPACE}/artifacts:/artifacts" + setup: | + mkdir -p ./artifacts + + # Compute dev version + export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p") + export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD) + sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h + run: | + apt-get update + apt-get -y install autoconf build-essential libunwind-dev musl-tools + + # Build austin + autoreconf --install + ./configure + make + + export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p") + + # Build austinp + gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a + + pushd src + tar -Jcf austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austin + tar -Jcf austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austinp + + musl-gcc -O3 -Os -s -Wall -pthread *.c -o austin -D__MUSL__ + tar -Jcf austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz austin + + mv austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts + mv austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts + mv austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz /artifacts + popd + + - name: Show artifacts + run: | + ls -al ./artifacts + + - name: Upload binaries to release + uses: svenstaro/upload-release-action@v2 + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: artifacts/austin* + tag: ${{ github.ref }} + overwrite: true + prerelease: true + release_name: Development build + file_glob: true \ No newline at end of file diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9aba86da..7a48562e 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,16 +16,27 @@ jobs: - name: Generate artifacts run: | sudo apt-get update - sudo apt-get -y install autoconf build-essential + sudo apt-get -y install autoconf build-essential libunwind-dev musl-tools + # Build austin autoreconf --install ./configure make export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p"); + # Build austinp + gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a + + pushd src + tar -Jcf austin-$VERSION-gnu-linux-amd64.tar.xz austin + tar -Jcf austinp-$VERSION-gnu-linux-amd64.tar.xz austinp + popd + + # Build with musl + musl-gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austin -D__MUSL__ pushd src - tar -Jcf austin-$VERSION-linux-amd64.tar.xz austin + tar -Jcf austin-$VERSION-musl-linux-amd64.tar.xz austin popd - name: Upload artifacts to release diff --git a/.github/workflows/release_arch.yml b/.github/workflows/release_arch.yml index eca8d120..832116cd 100644 --- a/.github/workflows/release_arch.yml +++ b/.github/workflows/release_arch.yml @@ -26,17 +26,28 @@ jobs: mkdir -p ./artifacts run: | apt-get update - apt-get -y install autoconf build-essential + apt-get -y install autoconf build-essential libunwind-dev musl-tools + # Build austin autoreconf --install ./configure make - - export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p"); + + export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p") + + # Build austinp + gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a pushd src - tar -Jcf austin-$VERSION-linux-${{ matrix.arch }}.tar.xz austin - mv austin-$VERSION-linux-${{ matrix.arch }}.tar.xz /artifacts + tar -Jcf austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austin + tar -Jcf austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austinp + + musl-gcc -O3 -Os -s -Wall -pthread *.c -o austin -D__MUSL__ + tar -Jcf austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz austin + + mv austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts + mv austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts + mv austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz /artifacts popd - name: Show artifacts diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cad45831..ee31e2b4 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -41,11 +41,13 @@ jobs: - name: Install test dependencies run: | + brew update brew install python || brew upgrade python brew install python@3.8 || true brew install python@3.9 || true + brew install python@3.10 || true brew install bats-core || true - brew cask install anaconda || true + brew install --cask anaconda || true - name: Run tests run: sudo bats test/macos/test.bats diff --git a/.gitignore b/.gitignore index 7df6e943..a65859ed 100644 --- a/.gitignore +++ b/.gitignore @@ -36,6 +36,7 @@ stamp-h1 *.gcda src/austin +src/austinp src/austin.exe diff --git a/ChangeLog b/ChangeLog index 159b17ef..1e8cd11d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,14 @@ +2021-xx-xx v3.2.0 + + Improved detection of invalid samples + + Added support for Python launchers on Windows + + Improved Python version detection on Linux + + Fixed support of older versions of glibc on Linux + + 2021-08-18 v3.1.0 Added garbage collection state sampling for Python 3.7 onward. diff --git a/README.md b/README.md index 4b143ee2..ec24b7c4 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ Synopsis • Installation • Usage • + Cheat sheet • Compatibility • Why Austin • Examples • @@ -115,10 +116,12 @@ The key features of Austin are: - Time and memory profiling; - Built-in support for multi-process applications (e.g. `mod_wsgi`). -The simplest way to turn Austin into a full-fledged profiler is to combine it -with [FlameGraph] or [Speedscope]. However, Austin's simple output format can be -piped into any other external or custom tool for further processing. Look, for -instance, at the following Python TUI +The simplest way to turn Austin into a full-fledged profiler is to use together +with the [VS +Code](https://marketplace.visualstudio.com/items?itemName=p403n1x87.austin-vscode) +extension or combine it with [FlameGraph] or [Speedscope]. However, Austin's +simple output format can be piped into any other external or custom tool for +further processing. Look, for instance, at the following Python TUI

@@ -163,7 +166,7 @@ library. Before proceding with the steps below, make sure that the `autotools` are installed on your system. Refer to your distro's documentation for details on how to do so. -~~~ bash +~~~ console git clone --depth=1 https://github.com/P403n1x87/austin.git && cd austin autoreconf --install ./configure @@ -183,7 +186,7 @@ Austin can be installed on [many major Linux distributions](https://snapcraft.io/docs/installing-snapd) from the Snap Store with the following command -~~~ bash +~~~ console sudo snap install austin --classic ~~~ @@ -198,7 +201,7 @@ can therefore be installed with the `apt` utility. Austin can be installed on macOS using [Homebrew](https://docs.brew.sh): -~~~bash +~~~ console brew install austin ~~~ @@ -208,13 +211,13 @@ brew install austin To install [Austin from Chocolatey](https://chocolatey.org/packages/austin), run the following command from the command line or from PowerShell -~~~ shell +~~~ console choco install austin ~~~ To upgrade run the following command from the command line or from PowerShell: -~~~ shell +~~~ console choco upgrade austin ~~~ @@ -224,13 +227,13 @@ choco upgrade austin To install Austin using Scoop, run the following command from the command line or from PowerShell -~~~ shell +~~~ console scoop install austin ~~~ To upgrade run the following command from the command line or from PowerShell: -~~~ shell +~~~ console scoop update ~~~ @@ -240,7 +243,7 @@ scoop update Anaconda users on Linux and macOS can install Austin from [Conda Forge] with the command -~~~ bash +~~~ console conda install -c conda-forge austin ~~~ @@ -249,25 +252,25 @@ conda install -c conda-forge austin To install Austin from sources using the GNU C compiler, without `autotools`, clone the repository with -~~~ bash +~~~ console git clone --depth=1 https://github.com/P403n1x87/austin.git ~~~ On Linux one can then use the command -~~~ bash +~~~ console gcc -O3 -Os -Wall -pthread src/*.c -o src/austin ~~~ whereas on macOS it is enough to run -~~~ bash +~~~ console gcc -O3 -Os -Wall src/*.c -o src/austin ~~~ On Windows, the `-lpsapi -lntdll` switches are needed -~~~ bash +~~~ console gcc -O3 -Os -Wall -lpsapi -lntdll src/*.c -o src/austin ~~~ @@ -287,6 +290,8 @@ Austin -- A frame stack sampler for Python. stacks. -f, --full Produce the full set of metrics (time +mem -mem). -g, --gc Sample the garbage collector state. + -h, --heap=n_mb Maximum heap size to allocate to increase sampling + accuracy, in MB (default is 256). -i, --interval=n_us Sampling interval in microseconds (default is 100). Accepted units: s, ms, us. -m, --memory Profile memory usage. @@ -379,6 +384,63 @@ garbage collector is in the collecting state. This gives you a measure of how *Since Austin 3.1.0*. +## Sampling Accuracy + +Austin tries to keep perturbations to the tracee at a minimum. In order to do +so, the tracee is never halted. To improve sampling accuracy, Austin allocates a +heap that is used to get large snapshots of the private VM of the tracee that is +likely to contain frame information in a single attempt. The larger the heap is +allowed the grow, the more accurate the results. The maximum size of the heap +that Austin is allowed to allocate can be controlled with the `-h/--heap` +option, followed by the maximum size in bytes. By default Austin allocates a +maximum of 256 MB. On systems with low resource limits, it is advisable to +reduce this value. + +*Since Austin 3.2.0*. + + +## Native Frame Stack + +If you want observability into the native frame stacks, you can use the +`austinp` variant of `austin` which can be obtained by compiling the source +with `-DAUSTINP` on Linux, or from the released binaries. + +`austinp` makes use of `ptrace` to halt the application and grab a +snapshot of the call stack with `libunwind`. If you are compiling `austinp` from +sources make sure that you have the development version of the `libunwind` +library available on your system, for example on Ubuntu, + +~~~ console +sudo apt install libunwind-dev +~~~ + +and compile with + +~~~ console +gcc -O3 -Os -Wall -pthread src/*.c -DAUSTINP -lunwind-ptrace -lunwind-generic -o src/austinp +~~~ + +then use as per normal. The extra `-k/--kernel` option is available with +`austinp` which allows sampling kernel call stacks as well. + +> **WARNING** Since `austinp` uses `ptrace`, the impact on the tracee is no +> longer minimal and it becomes higher at smaller sampling intervals. Therefore +> the use of `austinp` is not recommended in production environments. For this +> reason, the default sampling interval for `austinp` is 10 milliseconds. + +The `utils` folder has the script `resolve.py` that can be used to resolve the +VM addresses to source and line numbers, provided that the referenced binaries +have DWARF debug symbols. To resolve the references, assuming you have collected +the samples in `mysamples.austin`, do + +~~~ +python3 utils/resolve.py mysamples.austin > mysamples_resolved.austin +~~~ + +Internally, the script uses `addr2line(1)` to determine source and line number +given an address, when possible. + + ## Logging Austin uses `syslog` on Linux and macOS, and `%TEMP%\austin.log` on Windows @@ -388,6 +450,17 @@ entries for bad frames will not be visible in a flame graph as all tests show error rates below 1% on average. +## Cheat sheet + +All the above Austin options and arguments are summarised in a cheat sheet that +you can find in the [art](https://github.com/P403n1x87/austin/blob/master/art/) +folder in either the SVG or PNG format + +

+ +

+ # Compatibility Austin supports Python 2.3-2.7 and 3.3-3.10 and has been tested on the @@ -405,7 +478,7 @@ capability. This means that you will have to either use ``sudo`` when attaching to a running Python process or grant the CAP_SYS_PTRACE capability to the Austin binary with, e.g. -~~~ bash +~~~ console sudo setcap cap_sys_ptrace+ep `which austin` ~~~ @@ -465,7 +538,7 @@ is written in C, implementing the new changes is rather straight-forward. The following flame graph has been obtained with the command -~~~ bash +~~~ console austin -i 1ms ./test.py | sed '/^#/d' | ./flamegraph.pl --countname=Ξs > test.svg ~~~ @@ -488,7 +561,7 @@ for i in range(1000): To profile Apache2 WSGI application, one can attach Austin to the web server with -~~~ bash +~~~ console austin -Cp `pgrep apache2 | head -n 1` ~~~ @@ -519,13 +592,13 @@ or convert it to the [pprof] format. If you want to give it a go you can install it using `pip` with -~~~ bash +~~~ console pip install austin-tui --upgrade ~~~ and run it with -~~~ bash +~~~ console austin-tui [OPTION...] command [ARG...] ~~~ @@ -557,13 +630,13 @@ be used for _remote_ profiling by setting the `--host` and `--port` options. If you want to give it a go you can install it using `pip` with -~~~ bash +~~~ console pip install austin-web --upgrade ~~~ and run it with -~~~ bash +~~~ console austin-web [OPTION...] command [ARG...] ~~~ @@ -588,13 +661,13 @@ Austin to the Speedscope JSON format. If you want to give it a go you can install it using `pip` with -~~~ bash +~~~ console pip install austin-python --upgrade ~~~ and run it with -~~~ bash +~~~ console austin2speedscope [-h] [--indent INDENT] [-V] input output ~~~ @@ -613,13 +686,13 @@ Austin's format can also be converted to the Google pprof format using the `austin2pprof` utility that comes with [`austin-python`]. If you want to give it a go you can install it using `pip` with -~~~ bash +~~~ console pip install austin-python --upgrade ~~~ and run it with -~~~ bash +~~~ console austin2pprof [-h] [-V] input output ~~~ diff --git a/art/austin-tui.gif b/art/austin-tui.gif index 955bd574..3e8ca8d2 100644 Binary files a/art/austin-tui.gif and b/art/austin-tui.gif differ diff --git a/art/austin-tui.png b/art/austin-tui.png index baac2672..452b0d27 100644 Binary files a/art/austin-tui.png and b/art/austin-tui.png differ diff --git a/art/cheatsheet.png b/art/cheatsheet.png new file mode 100644 index 00000000..11591fb7 Binary files /dev/null and b/art/cheatsheet.png differ diff --git a/art/cheatsheet.svg b/art/cheatsheet.svg new file mode 100644 index 00000000..1d9adc49 --- /dev/null +++ b/art/cheatsheet.svg @@ -0,0 +1,7392 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + austin + + + + + + LAUNCH + + OUTPUT + + FORMAT + + Mode + + TOOLS + + Start a script + + austin python myscript.py + Start an executable script + + austin ./myscript.py + Start a module + + austin python -m mymodule + Attach to a running process + + austin -p 123 + Attach to child processes + + austin -C python my_multiproc_script.py + + austin -Cp 123 + Set sampling interval + + austin -p 123 -i 10ms + Set start-up timeout (on slow machines) + + austin -p 123 -t 1s + Wall clock time + + austin python myscript.py + CPU time + + austin -s python myscript.py + Memory + + austin -m python myscript.py + Wall clock time and garbage collector + + austin -g python myscript.py + All metrics + + austin -f python -m mymodule + All metrics and garbage collector + + austin -fg -p 123 + Emit to STDOUT (Python STDOUT suppressed) + + austin python -m mymodule + Pipe to other tools (Python STDOUT suppressed) + + austin -P ./myscript.py | ./flamegraph.pl > fg.svg + Default + + austin ./myscript.py + Alternative + + austin -ap 123 + + + foomodule:foo:42 + foomodule:foo:43 + + barmodule:bar:13 + + + + + + foomodule:foo + + barmodule:bar + + + + + L42 + L43 + + L13 + Exposure + + austin -x 3s ./myscript.py + Supported platforms + + + + + + + + + + + + + Supported interpreters + 2.7 and 3.3 thru 3.10 + INSTALL + + + + sudo snap install austin --classic + + + + brew install austin + + + + scoop install austin + + + + conda install -c conda-forge austin + + + + choco install austin + + + + + + + + + + + + + + + + + CPU time and garbage collector + + austin -sg python myscript.py + Redirect to file (Python STDOUT suppressed) + + austin -p 123 > /path/to/samples.austin + Emit to file (Python STDOUT preserved) + + austin -o /path/to/samples.austin -p 123 + Austin TUI + + + + + + + + + + + + + + + pipx install austin-tui + Austin VS Code + + + + + + + + + + + + + + + code --install-extension p403n1x87.austin-vscode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + p403n1x87/austin + + + + @AustinSampler + Report issues at + https://github.com/P403n1x87/austin/issues + Set heap size (for accurate results) + + austin -h 512 python -m mymodule + + + + AUSTIN + CHEATSHEET + for version 3.2 + + AUSTIN + + Frame stack sampler for CPython + + + + + + + + + diff --git a/art/vscode-demo.gif b/art/vscode-demo.gif index 490d8077..9f98726e 100644 Binary files a/art/vscode-demo.gif and b/art/vscode-demo.gif differ diff --git a/configure.ac b/configure.ac index 26e44bee..137ebbaf 100644 --- a/configure.ac +++ b/configure.ac @@ -2,7 +2,7 @@ # Process this file with autoconf to produce a configure script. AC_PREREQ([2.69]) -AC_INIT([austin], [3.1.0], [https://github.com/p403n1x87/austin/issues]) +AC_INIT([austin], [3.2.0], [https://github.com/p403n1x87/austin/issues]) AC_CONFIG_SRCDIR([config.h.in]) AC_CONFIG_HEADERS([config.h]) AM_INIT_AUTOMAKE diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml index c896ec23..9052143a 100644 --- a/snap/snapcraft.yaml +++ b/snap/snapcraft.yaml @@ -1,5 +1,5 @@ name: austin -version: '3.1.0+git' +version: '3.2.0+git' summary: A Python frame stack sampler for CPython description: | Austin is a Python frame stack sampler for CPython written in pure C. It diff --git a/src/Makefile.am b/src/Makefile.am index 4a5bb33b..6d8de8b7 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -32,6 +32,7 @@ austin_SOURCES = \ logging.c \ version.c \ stats.c \ + platform.c \ py_proc_list.c \ py_proc.c \ py_thread.c diff --git a/src/argparse.c b/src/argparse.c index 24fcb5eb..c5eb13ad 100644 --- a/src/argparse.c +++ b/src/argparse.c @@ -29,9 +29,17 @@ #include "hints.h" #include "platform.h" +#if defined PL_LINUX && !defined __MUSL__ +#define GNU_ARGP +#endif +#ifdef NATIVE +#define DEFAULT_SAMPLING_INTERVAL 10000 // reduces impact on tracee +#else #define DEFAULT_SAMPLING_INTERVAL 100 +#endif #define DEFAULT_INIT_RETRY_CNT 100 +#define DEFAULT_HEAP_SIZE 256 const char SAMPLE_FORMAT_NORMAL[] = ";%s:%s:%d"; const char SAMPLE_FORMAT_ALTERNATIVE[] = ";%s:%s;L%d"; @@ -53,6 +61,10 @@ parsed_args_t pargs = { /* exposure */ 0, /* pipe */ 0, /* gc */ 0, + /* heap */ DEFAULT_HEAP_SIZE, + #ifdef NATIVE + /* kernel */ 0, + #endif }; static int exec_arg = 0; @@ -144,7 +156,7 @@ parse_timeout(char * str, long * num) { // ---- GNU C ----------------------------------------------------------------- -#ifdef PL_LINUX /* LINUX */ +#ifdef GNU_ARGP /* LINUX */ #include @@ -219,10 +231,22 @@ static struct argp_option options[] = { "Pipe mode. Use when piping Austin output." }, { - "gc", 'g', NULL, 0, + "gc", 'g', NULL, 0, "Sample the garbage collector state." }, - #ifndef PL_LINUX + { + "heap", 'h', "n_mb", 0, + "Maximum heap size to allocate to increase sampling accuracy, in MB " + "(default is 256)." + }, + + #ifdef NATIVE + { + "kernel", 'k', NULL, 0, + "Sample the kernel call stack." + }, + #endif + #ifndef GNU_ARGP { "help", '?', NULL }, @@ -237,7 +261,7 @@ static struct argp_option options[] = { }; -#ifdef PL_LINUX +#ifdef GNU_ARGP // ---------------------------------------------------------------------------- static int @@ -331,6 +355,20 @@ parse_opt (int key, char *arg, struct argp_state *state) pargs.gc = 1; break; + case 'h': + if ( + fail(str_to_num(arg, (long *) &(pargs.heap))) || + pargs.heap > LONG_MAX + ) + argp_error(state, "the heap size must be a positive integer"); + break; + + #ifdef NATIVE + case 'k': + pargs.kernel = 1; + break; + #endif + case ARGP_KEY_ARG: case ARGP_KEY_END: if (pargs.attach_pid != 0 && exec_arg != 0) @@ -474,6 +512,8 @@ static const char * help_msg = \ " stacks.\n" " -f, --full Produce the full set of metrics (time +mem -mem).\n" " -g, --gc Sample the garbage collector state.\n" +" -h, --heap=n_mb Maximum heap size to allocate to increase sampling\n" +" accuracy, in MB (default is 256).\n" " -i, --interval=n_us Sampling interval in microseconds (default is\n" " 100). Accepted units: s, ms, us.\n" " -m, --memory Profile memory usage.\n" @@ -495,11 +535,11 @@ static const char * help_msg = \ "Report bugs to .\n"; static const char * usage_msg = \ -"Usage: austin [-aCefmPs?V] [-i n_us] [-o FILE] [-p PID] [-t n_ms] [-x n_sec]\n" -" [--alt-format] [--children] [--exclude-empty] [--full]\n" -" [--interval=n_us] [--memory] [--output=FILE] [--pid=PID] [--pipe]\n" -" [--sleepless] [--timeout=n_ms] [--exposure=n_sec] [--help]\n" -" [--usage] [--version] command [ARG...]\n"; +"Usage: austin [-aCefgmPs?V] [-h n_mb] [-i n_us] [-o FILE] [-p PID] [-t n_ms]\n" +" [-x n_sec] [--alt-format] [--children] [--exclude-empty] [--full]\n" +" [--gc] [--heap=n_mb] [--interval=n_us] [--memory] [--output=FILE]\n" +" [--pid=PID] [--pipe] [--sleepless] [--timeout=n_ms]\n" +" [--exposure=n_sec] [--help] [--usage] [--version] command [ARG...]\n"; static void @@ -631,6 +671,14 @@ cb(const char opt, const char * arg) { pargs.gc = 1; break; + case 'h': + if ( + fail(str_to_num((char*) arg, (long *) &(pargs.heap))) || + pargs.heap > LONG_MAX + ) + arg_error("the heap size must be a positive integer"); + break; + case '?': puts(help_msg); exit(0); @@ -662,7 +710,9 @@ cb(const char opt, const char * arg) { // ---------------------------------------------------------------------------- int parse_args(int argc, char ** argv) { - #ifdef PL_LINUX + pargs.output_file = stdout; + + #ifdef GNU_ARGP struct argp args = {options, parse_opt, "command [ARG...]", doc}; argp_parse(&args, argc, argv, 0, 0, 0); diff --git a/src/argparse.h b/src/argparse.h index d19f0c11..aea98c8d 100644 --- a/src/argparse.h +++ b/src/argparse.h @@ -27,6 +27,7 @@ #include #include +#include "platform.h" #include "stats.h" typedef struct { @@ -44,6 +45,10 @@ typedef struct { ctime_t exposure; int pipe; int gc; + size_t heap; + #ifdef NATIVE + int kernel; + #endif } parsed_args_t; diff --git a/src/austin.1 b/src/austin.1 index 4103b0d6..8d9821e6 100644 --- a/src/austin.1 +++ b/src/austin.1 @@ -1,7 +1,7 @@ -.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6. -.TH AUSTIN "1" "August 2021" "austin 3.1.0" "User Commands" +.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13. +.TH AUSTIN "1" "December 2021" "austin 3.2.0" "User Commands" .SH NAME -austin \- manual page for austin 3.1.0 +austin \- manual page for austin 3.2.0 .SH SYNOPSIS .B austin [\fI\,OPTION\/\fR...] \fI\,command \/\fR[\fI\,ARG\/\fR...] @@ -24,6 +24,10 @@ Produce the full set of metrics (time +mem \fB\-mem\fR). \fB\-g\fR, \fB\-\-gc\fR Sample the garbage collector state. .TP +\fB\-h\fR, \fB\-\-heap\fR=\fI\,n_mb\/\fR +Maximum heap size to allocate to increase sampling +accuracy, in MB (default is 256). +.TP \fB\-i\fR, \fB\-\-interval\fR=\fI\,n_us\/\fR Sampling interval in microseconds (default is 100). Accepted units: s, ms, us. diff --git a/src/austin.c b/src/austin.c index 65ce7e05..ee431ac3 100644 --- a/src/austin.c +++ b/src/austin.c @@ -75,8 +75,12 @@ do_single_process(py_proc_t * py_proc) { if (fail(py_proc__sample(py_proc))) break; - + + #ifdef NATIVE + stopwatch_pause(0); + #else stopwatch_pause(stopwatch_duration()); + #endif } } else { @@ -88,7 +92,11 @@ do_single_process(py_proc_t * py_proc) { if (fail(py_proc__sample(py_proc))) break; + #ifdef NATIVE + stopwatch_pause(0); + #else stopwatch_pause(stopwatch_duration()); + #endif if (end_time < gettime()) interrupt++; @@ -154,20 +162,32 @@ do_child_processes(py_proc_t * py_proc) { if (pargs.exposure == 0) { while (!py_proc_list__is_empty(list) && interrupt == FALSE) { + #ifndef NATIVE ctime_t start_time = gettime(); + #endif py_proc_list__update(list); py_proc_list__sample(list); + #ifdef NATIVE + stopwatch_pause(0); + #else stopwatch_pause(gettime() - start_time); + #endif } } else { log_m("🕑 Sampling for %d second%s", pargs.exposure, pargs.exposure != 1 ? "s" : ""); ctime_t end_time = gettime() + pargs.exposure * 1000000; while (!py_proc_list__is_empty(list) && interrupt == FALSE) { + #ifndef NATIVE ctime_t start_time = gettime(); + #endif py_proc_list__update(list); py_proc_list__sample(list); + #ifdef NATIVE + stopwatch_pause(0); + #else stopwatch_pause(gettime() - start_time); + #endif if (end_time < gettime()) interrupt++; } @@ -212,7 +232,7 @@ int main(int argc, char ** argv) { goto finally; } - if (fail(py_thread_allocate_stack())) { + if (fail(py_thread_allocate())) { log_ie("Cannot allocate memory for thread stack"); goto finally; } @@ -244,9 +264,7 @@ int main(int argc, char ** argv) { goto finally; // Redirect output to STDOUT if not output file was given. - if (pargs.output_file == NULL) - pargs.output_file = stdout; - else + if (pargs.output_file != stdout) log_i("Output file: %s", pargs.output_filename); log_i("Sampling interval: %lu Ξs", pargs.t_sampling_interval); @@ -298,7 +316,7 @@ int main(int argc, char ** argv) { stats_log_metrics();NL; finally: - py_thread_free_stack(); + py_thread_free(); py_proc__destroy(py_proc); log_d("Last error: %d :: %s", austin_errno, get_last_error()); @@ -345,7 +363,6 @@ int main(int argc, char ** argv) { retval = SIGTERM; log_footer(); - logger_close(); release: if (pargs.output_file != NULL && pargs.output_file != stdout) { @@ -353,6 +370,8 @@ int main(int argc, char ** argv) { log_d("Output file closed."); } + logger_close(); + return retval; } /* main */ diff --git a/src/austin.h b/src/austin.h index ac5f7ce8..da5561d4 100644 --- a/src/austin.h +++ b/src/austin.h @@ -24,6 +24,6 @@ #define AUSTIN_H #define PROGRAM_NAME "austin" -#define VERSION "3.1.0" +#define VERSION "3.2.0" #endif diff --git a/src/heap.h b/src/heap.h new file mode 100644 index 00000000..7a7162be --- /dev/null +++ b/src/heap.h @@ -0,0 +1,41 @@ +// This file is part of "austin" which is released under GPL. +// +// See file LICENCE or go to http://www.gnu.org/licenses/ for full license +// details. +// +// Austin is a Python frame stack sampler for CPython. +// +// Copyright (c) 2018-2021 Gabriele N. Tornetta . +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#ifndef HEAP_H +#define HEAP_H + +#include + +typedef struct { + void * lo; + void * hi; + void * newlo; + void * newhi; +} _mem_block_t; + + +typedef struct { + void * content; + size_t size; +} _heap_t; + +#endif diff --git a/src/hints.h b/src/hints.h index 43a858e8..84cf64bd 100644 --- a/src/hints.h +++ b/src/hints.h @@ -46,6 +46,7 @@ #endif #define with_resources int retval = 0; +#define OK goto release; #define NOK retval = 1; goto release; #define released return retval; diff --git a/src/linux/py_proc.h b/src/linux/py_proc.h index 78558358..a98a5db4 100644 --- a/src/linux/py_proc.h +++ b/src/linux/py_proc.h @@ -35,6 +35,9 @@ #include #include +#ifdef NATIVE +#include "../argparse.h" +#endif #include "../dict.h" #include "../hints.h" #include "../py_proc.h" @@ -377,13 +380,11 @@ _py_proc__parse_maps_file(py_proc_t * self) { while (getline(&line, &len, fp) != -1) { ssize_t lower, upper; char pathname[1024]; - char m[sizeof(void *)]; // We don't care about these values. - int field_count = sscanf(line, "%lx-%lx %4c %lx %x:%x %x %s\n", - &lower, &upper, // Map bounds - (char *) m, (ssize_t *) m, (int *) m, (int *) m, (int *) m, // Ignored - pathname // Binary path - ) - 7; // We expect between 7 and 8 matches. + int field_count = sscanf(line, "%lx-%lx %*s %*x %*x:%*x %*x %s\n", + &lower, &upper, // Map bounds + pathname // Binary path + ) - 3; // We expect between 3 and 4 matches. if (field_count >= 0) { if (field_count == 0 || strstr(pathname, "[v") == NULL) { // Skip meaningless addresses like [vsyscall] which would give @@ -488,6 +489,51 @@ _py_proc__get_resident_memory(py_proc_t * self) { } /* _py_proc__get_resident_memory */ +#ifdef NATIVE +// ---------------------------------------------------------------------------- +static int +_py_proc__dump_maps(py_proc_t * self) { + char file_name[32]; + FILE * fp = NULL; + char * line = NULL; + size_t len = 0; + + sprintf(file_name, "/proc/%d/maps", self->pid); + fp = fopen(file_name, "r"); + if (fp == NULL) { + switch (errno) { + case EACCES: // Needs elevated privileges + set_error(EPROCPERM); + break; + case ENOENT: // Invalid pid + set_error(EPROCNPID); + break; + default: + set_error(EPROCVM); + } + FAIL; + } + + while (getline(&line, &len, fp) != -1) { + ssize_t lower, upper; + char pathname[1024]; + + if (sscanf(line, "%lx-%lx %*s %*x %*x:%*x %*x %s\n", + &lower, &upper, // Map bounds + pathname // Binary path + ) == 3 && pathname[0] != '[') { + fprintf(pargs.output_file, "# map: %lx-%lx %s\n", lower, upper, pathname); + } + } + + sfree(line); + fclose(fp); + + SUCCESS; +} /* _py_proc__dump_maps */ +#endif + + // ---------------------------------------------------------------------------- static int _py_proc__init(py_proc_t * self) { @@ -503,6 +549,10 @@ _py_proc__init(py_proc_t * self) { self->last_resident_memory = _py_proc__get_resident_memory(self); + #ifdef NATIVE + _py_proc__dump_maps(self); + #endif + SUCCESS; } /* _py_proc__init */ diff --git a/src/linux/py_thread.h b/src/linux/py_thread.h index ae96f5d1..0d673028 100644 --- a/src/linux/py_thread.h +++ b/src/linux/py_thread.h @@ -44,23 +44,33 @@ void * _pthread_buffer[PTHREAD_BUFFER_SIZE]; // ---------------------------------------------------------------------------- static void _infer_tid_field_offset(py_thread_t * py_thread) { - if (success(copy_memory( + if (fail(copy_memory( py_thread->raddr.pid, (void *) py_thread->tid, // At this point this is still the pthread_t * PTHREAD_BUFFER_SIZE * sizeof(void *), _pthread_buffer ))) { - for (register int i = 0; i < PTHREAD_BUFFER_SIZE; i++) { - log_d("pthread_t at %p", py_thread->tid); - if (py_thread->raddr.pid == (uintptr_t) _pthread_buffer[i]) { - log_d("TID field offset: %d", i); - _pthread_tid_offset = i; - return; - } + log_d("Cannot copy pthread_t structure"); + return; + } + + log_d("pthread_t at %p", py_thread->tid); + + for (register int i = 0; i < PTHREAD_BUFFER_SIZE; i++) { + if (py_thread->raddr.pid == (uintptr_t) _pthread_buffer[i]) { + log_d("TID field offset: %d", i); + _pthread_tid_offset = i; + return; } } - else { - log_d("Cannot copy pthread_t structure"); + + // Fall-back to smaller steps if we failed + for (register int i = 0; i < PTHREAD_BUFFER_SIZE * sizeof(uintptr_t) / sizeof(pid_t); i++) { + if (py_thread->raddr.pid == (pid_t) ((pid_t*) _pthread_buffer)[i]) { + log_d("TID field offset (from fall-back): %d", i); + _pthread_tid_offset = i; + return; + } } } diff --git a/src/msg.h b/src/msg.h index 71915500..c02fdfaa 100644 --- a/src/msg.h +++ b/src/msg.h @@ -61,15 +61,7 @@ URL("https://github.com/P403n1x87/austin#compatibility") const char * MFORK = -#if defined PL_UNIX -"❌ Cannot launch the given command. Either it is not valid or the process\n" -"terminated too quickly"; -#else -"❌ Cannot launch the given command. Please make sure it is correct. If you\n" -"think it is, then try passing an output file via the -o/--output option.\n" -"Sometimes, the Python wrapper launch fails to duplicate the standard out\n" -"handle and fails to launch your Python application."; -#endif +"❌ Cannot launch the given command or it terminated too quickly"; const char * MATTACH = \ "🛑 Cannot attach to the given process. Make sure that the PID you have provided\n" diff --git a/src/platform.c b/src/platform.c new file mode 100644 index 00000000..ea281091 --- /dev/null +++ b/src/platform.c @@ -0,0 +1,30 @@ +#include + +#include "hints.h" +#include "platform.h" + + +// ---------------------------------------------------------------------------- +size_t +pid_max() { + #if defined PL_LINUX /* LINUX */ + FILE * pid_max_file = fopen("/proc/sys/kernel/pid_max", "rb"); + if (!isvalid(pid_max_file)) + return 0; + + size_t max_pid; + int has_pid_max = (fscanf(pid_max_file, "%ld", &max_pid) == 1); + fclose(pid_max_file); + if (!has_pid_max) + return 0; + + return max_pid; + + #elif defined PL_MACOS /* MACOS */ + return PID_MAX; + + #elif defined PL_WIN /* WIN */ + return (1 << 22); // 4M. WARNING: This could potentially be violated! + + #endif +} \ No newline at end of file diff --git a/src/platform.h b/src/platform.h index 40e88eaa..92f3909e 100644 --- a/src/platform.h +++ b/src/platform.h @@ -23,6 +23,8 @@ #ifndef PLATFORM_H #define PLATFORM_H +#include + #if defined(__linux__) #define PL_LINUX @@ -40,10 +42,29 @@ // ---------------------------------------------------------------------------- +#if defined(AUSTINP) && defined(PL_LINUX) +#define NATIVE +#endif + +// ---------------------------------------------------------------------------- + #if defined(PL_LINUX) || defined(PL_MACOS) #define PL_UNIX #define NULL_DEVICE "/dev/null" #endif +// ---------------------------------------------------------------------------- + +#if defined PL_MACOS +#define PID_MAX 99999 // From sys/proc_internal.h #endif + + +/** + * Get the maximum PID for the platform. + */ +size_t +pid_max(); + +#endif \ No newline at end of file diff --git a/src/py_proc.c b/src/py_proc.c index e488d693..5f6d1f25 100644 --- a/src/py_proc.c +++ b/src/py_proc.c @@ -144,7 +144,11 @@ _get_version_from_executable(char * binary, int * major, int * minor, int * patc char version[64]; char cmd[256]; + #if defined PL_WIN + sprintf(cmd, "\"\"%s\"\" -V 2>&1", binary); + #else sprintf(cmd, "%s -V 2>&1", binary); + #endif fp = _popen(cmd, "r"); if (!isvalid(fp)) { @@ -244,10 +248,20 @@ _py_proc__get_version(py_proc_t * self) { if (isvalid(self->lib_path)) { #if defined PL_LINUX /* LINUX */ - if (sscanf( - strstr(self->lib_path, "python"), "python%d.%d", &major, &minor - ) == 2) { - return PYVERSION(major, minor, patch) | 0xFF; + char * base = self->lib_path; + char * end = base + strlen(self->lib_path); + const char * needle = "python"; + const size_t needle_len = strlen(needle); + + while (base < end) { + base = strstr(base, needle); + if (!isvalid(base)) { + break; + } + base += needle_len; + if (sscanf(base,"%d.%d", &major, &minor) == 2) { + return PYVERSION(major, minor, patch) | 0xFF; + } } #elif defined PL_WIN /* WIN */ @@ -326,12 +340,12 @@ _py_proc__check_interp_state(py_proc_t * self, void * raddr) { if (py_proc__get_type(self, V_FIELD(void *, is, py_is, o_tstate_head), tstate_head)) { log_t( "Cannot copy PyThreadState head at %p from PyInterpreterState instance", - is.tstate_head + V_FIELD(void *, is, py_is, o_tstate_head) ); FAIL; } - log_t("PyThreadState head loaded @ %p", is.tstate_head); + log_t("PyThreadState head loaded @ %p", V_FIELD(void *, is, py_is, o_tstate_head)); if (V_FIELD(void*, tstate_head, py_thread, o_interp) != raddr) FAIL; @@ -343,21 +357,21 @@ _py_proc__check_interp_state(py_proc_t * self, void * raddr) { log_t( "PyInterpreterState loaded @ %p. Thread State head @ %p", - raddr, is.tstate_head + raddr, V_FIELD(void *, is, py_is, o_tstate_head) ); // As an extra sanity check, verify that the thread state is valid - raddr_t thread_raddr = { .pid = PROC_REF, .addr = V_FIELD(void *, is, py_is, o_tstate_head) }; - py_thread_t thread; - if (fail(py_thread__fill_from_raddr(&thread, &thread_raddr, self))) { - log_d("Failed to fill thread structure"); - FAIL; - } + // raddr_t thread_raddr = { .pid = PROC_REF, .addr = V_FIELD(void *, is, py_is, o_tstate_head) }; + // py_thread_t thread; + // if (fail(py_thread__fill_from_raddr(&thread, &thread_raddr, self))) { + // log_d("Failed to fill thread structure"); + // FAIL; + // } - if (thread.invalid) { - log_d("... but Head Thread State is invalid!"); - FAIL; - } + // if (thread.invalid) { + // log_d("... but Head Thread State is invalid!"); + // FAIL; + // } log_d("Stack trace constructed from possible interpreter state"); @@ -742,6 +756,9 @@ _py_proc__run(py_proc_t * self, int try_once) { self->timestamp = gettime(); + #ifdef NATIVE + self->unwind.as = unw_create_addr_space(&_UPT_accessors, 0); + #endif SUCCESS; } /* _py_proc__run */ @@ -765,6 +782,8 @@ py_proc_new() { } } + py_proc->frames_heap.newlo = py_proc->frames.newlo = (void *) -1; + py_proc->extra = (proc_extra_info *) calloc(1, sizeof(proc_extra_info)); if (!isvalid(py_proc->extra)) goto error; @@ -795,13 +814,19 @@ py_proc__attach(py_proc_t * self, pid_t pid, int child_process) { self->pid = pid; if (fail(_py_proc__run(self, child_process))) { - if (austin_errno == EPROCNPID) { - set_error(EPROCATTACH); - } - else { - log_ie("Cannot attach to running process."); + #if defined PL_WIN + if (fail(_py_proc__try_child_proc(self))) { + #endif + if (austin_errno == EPROCNPID) { + set_error(EPROCATTACH); + } + else { + log_ie("Cannot attach to running process."); + } + FAIL; + #if defined PL_WIN } - FAIL; + #endif } SUCCESS; @@ -819,6 +844,8 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) { SECURITY_ATTRIBUTES saAttr; HANDLE hChildStdInRd = NULL; HANDLE hChildStdInWr = NULL; + HANDLE hChildStdOutRd = NULL; + HANDLE hChildStdOutWr = NULL; ZeroMemory(&piProcInfo, sizeof(PROCESS_INFORMATION)); ZeroMemory(&siStartInfo, sizeof(STARTUPINFO)); @@ -828,7 +855,10 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) { saAttr.lpSecurityDescriptor = NULL; CreatePipe(&hChildStdInRd, &hChildStdInWr, &saAttr, 0); + CreatePipe(&hChildStdOutRd, &hChildStdOutWr, &saAttr, 0); + SetHandleInformation(hChildStdInWr, HANDLE_FLAG_INHERIT, 0); + SetHandleInformation(hChildStdOutRd, HANDLE_FLAG_INHERIT, 0); siStartInfo.cb = sizeof(STARTUPINFO); siStartInfo.hStdInput = hChildStdInRd; @@ -836,17 +866,23 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) { siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE); siStartInfo.dwFlags |= STARTF_USESTDHANDLES; - if (pargs.output_file == NULL) { - HANDLE nullStdOut = CreateFile( - TEXT(NULL_DEVICE), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL + if (pargs.output_file == stdout) { + log_d("Redirecting child's STDOUT to a pipe"); + siStartInfo.hStdOutput = hChildStdOutWr; + + // On Windows, Python is normally started by a launcher that duplicates the + // standard streams, so redirecting to the NULL device causes issues. To + // support these cases, we spawn a reader thread that reads from the pipe + // and ensures that the buffer never gets full, stalling STDOUT operations + // in the child process. + DWORD dwThreadId; + self->extra->h_reader_thread = CreateThread( + NULL, 0, reader_thread, hChildStdOutRd, 0, &dwThreadId ); - - if (nullStdOut == INVALID_HANDLE_VALUE) { - log_e(error_get_msg(ENULLDEV)); + if (self->extra->h_reader_thread == NULL) { + log_e("Failed to start STDOUT reader thread."); + set_error(ENULLDEV); } - - log_d("Redirecting child's STDOUT to " NULL_DEVICE); - siStartInfo.hStdOutput = nullStdOut; } // Concatenate the command line arguments @@ -888,13 +924,14 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) { self->pid = (pid_t) piProcInfo.dwProcessId; CloseHandle(hChildStdInRd); + CloseHandle(hChildStdOutWr); #else /* UNIX */ self->pid = fork(); if (self->pid == 0) { // If we are not writing to file we need to ensure the child process is // not writing to stdout. - if (pargs.output_file == NULL) { + if (pargs.output_file == stdout) { log_d("Redirecting child's STDOUT to " NULL_DEVICE); if (freopen(NULL_DEVICE, "w", stdout) == NULL) log_e(error_get_msg(ENULLDEV)); @@ -917,66 +954,15 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) { if (fail(_py_proc__run(self, FALSE))) { #if defined PL_WIN - // On Windows, if we fail with the parent process we look if it has a single - // child and try to attach to that instead. We keep going until we either - // find a single Python process or more or less than a single child. - log_d("Process is not Python so we look for a single child Python process"); - HANDLE orig_hproc = self->extra->h_proc; - pid_t orig_pid = self->pid; - while (TRUE) { - pid_t parent_pid = self->pid; - - HANDLE h = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); - if (h == INVALID_HANDLE_VALUE) - break; - - PROCESSENTRY32 pe = { 0 }; - pe.dwSize = sizeof(PROCESSENTRY32); - - if (Process32First(h, &pe)) { - pid_t child_pid = 0; - do { - if (pe.th32ParentProcessID == parent_pid) { - if (child_pid) { - log_d("Process has more than one child"); - goto exit; - } - child_pid = pe.th32ProcessID; - } - } while (Process32Next(h, &pe)); - - if (!child_pid) { - log_d("Process has no children"); - goto exit; - } - - self->pid = child_pid; - self->extra->h_proc = OpenProcess( - PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, FALSE, child_pid - ); - if (self->extra->h_proc == INVALID_HANDLE_VALUE) { - goto exit; - } - if (success(_py_proc__run(self, FALSE))) { - log_d("Process has a single Python child with PID %d. We will attach to that", child_pid); - SUCCESS; - } - else { - log_d("Process had a single non-Python child with PID %d. Taking it as new parent", child_pid); - CloseHandle(self->extra->h_proc); - } - } - - CloseHandle(h); + if (fail(_py_proc__try_child_proc(self))) { + #endif + if (austin_errno == EPROCNPID) + set_error(EPROCFORK); + log_ie("Cannot start new process"); + FAIL; + #if defined PL_WIN } - exit: - self->pid = orig_pid; - self->extra->h_proc = orig_hproc; #endif - if (austin_errno == EPROCNPID) - set_error(EPROCFORK); - log_ie("Cannot start new process"); - FAIL; } SUCCESS; @@ -995,10 +981,19 @@ py_proc__wait(py_proc_t * self) { #endif #ifdef PL_WIN /* WIN */ + if (isvalid(self->extra->h_reader_thread)) { + WaitForSingleObject(self->extra->h_reader_thread, INFINITE); + CloseHandle(self->extra->h_reader_thread); + } WaitForSingleObject(self->extra->h_proc, INFINITE); + CloseHandle(self->extra->h_proc); #else /* UNIX */ + #ifdef NATIVE + wait(NULL); + #else waitpid(self->pid, 0, 0); #endif + #endif } @@ -1096,6 +1091,53 @@ py_proc__is_gc_collecting(py_proc_t * self) { } +#ifdef NATIVE +// ---------------------------------------------------------------------------- +static int +_py_proc__interrupt_threads(py_proc_t * self, raddr_t * tstate_head_raddr) { + py_thread_t py_thread; + + if (fail(py_thread__fill_from_raddr(&py_thread, tstate_head_raddr, self))) { + FAIL; + } + + do { + if (fail(py_thread__set_idle(&py_thread))) + FAIL; + if (pargs.kernel && fail(py_thread__save_kernel_stack(&py_thread))) + FAIL; + if (ptrace(PTRACE_INTERRUPT, py_thread.tid, 0, 0)) { + log_e("ptrace: failed to interrupt thread %d", py_thread.tid); + FAIL; + } + log_t("ptrace: thread %d interrupted", py_thread.tid); + } while (success(py_thread__next(&py_thread))); + + SUCCESS; +} + + +// ---------------------------------------------------------------------------- +static int +_py_proc__resume_threads(py_proc_t * self, raddr_t * tstate_head_raddr) { + py_thread_t py_thread; + + if (fail(py_thread__fill_from_raddr(&py_thread, tstate_head_raddr, self))) { + FAIL; + } + + do { + while (ptrace(PTRACE_CONT, py_thread.tid, 0, 0)) { + log_t("ptrace: failed to resume thread %d", py_thread.tid); + } + log_t("ptrace: thread %d resumed", py_thread.tid); + } while (success(py_thread__next(&py_thread))); + + SUCCESS; +} +#endif + + // ---------------------------------------------------------------------------- int py_proc__sample(py_proc_t * self) { @@ -1111,8 +1153,18 @@ py_proc__sample(py_proc_t * self) { if (isvalid(tstate_head)) { raddr_t raddr = { .pid = PROC_REF, .addr = tstate_head }; py_thread_t py_thread; - if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self))) - FAIL; + + #ifdef NATIVE + _py_proc__interrupt_threads(self, &raddr); + time_delta = gettime() - self->timestamp; + #endif + + if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self))) { + if (is_fatal(austin_errno)) { + FAIL; + } + SUCCESS; + } if (pargs.memory) { // Use the current thread to determine which thread is manipulating memory @@ -1140,9 +1192,16 @@ py_proc__sample(py_proc_t * self) { mem_delta ); } while (success(py_thread__next(&py_thread))); + #ifdef NATIVE + self->timestamp = gettime(); + _py_proc__resume_threads(self, &raddr); + #endif + } + #ifndef NATIVE self->timestamp += time_delta; + #endif SUCCESS; } /* py_proc__sample */ @@ -1199,17 +1258,10 @@ py_proc__destroy(py_proc_t * self) { if (!isvalid(self)) return; - if (self->bin_path != NULL) - free(self->bin_path); - - if (self->lib_path != NULL) - free(self->lib_path); - - if (self->bss != NULL) - free(self->bss); - - if (self->extra != NULL) - free(self->extra); + sfree(self->bin_path); + sfree(self->lib_path); + sfree(self->bss); + sfree(self->extra); free(self); } diff --git a/src/py_proc.h b/src/py_proc.h index 41761300..8091a3ad 100644 --- a/src/py_proc.h +++ b/src/py_proc.h @@ -26,6 +26,12 @@ #include +#ifdef NATIVE +#include +#include +#endif + +#include "heap.h" #include "stats.h" @@ -77,6 +83,16 @@ typedef struct { // Offset of the tstate_current field within the _PyRuntimeState structure unsigned int tstate_current_offset; + // Frame objects VM ranges + _mem_block_t frames; + _mem_block_t frames_heap; + + #ifdef NATIVE + struct _puw { + unw_addr_space_t as; + } unwind; + #endif + // Platform-dependent fields proc_extra_info * extra; } py_proc_t; diff --git a/src/py_proc_list.c b/src/py_proc_list.c index edd1484d..ffef7a45 100644 --- a/src/py_proc_list.c +++ b/src/py_proc_list.c @@ -26,7 +26,6 @@ #include #elif defined PL_MACOS #include -#define PID_MAX 99999 // From sys/proc_internal.h #elif defined PL_WIN #include #include @@ -114,23 +113,7 @@ py_proc_list_new(py_proc_t * parent_py_proc) { if (list == NULL) return NULL; - #if defined PL_LINUX /* LINUX */ - FILE * pid_max_file = fopen("/proc/sys/kernel/pid_max", "rb"); - if (pid_max_file == NULL) - return NULL; - - int has_pid_max = (fscanf(pid_max_file, "%d", &(list->pids)) == 1); - fclose(pid_max_file); - if (!has_pid_max) - return NULL; - - #elif defined PL_MACOS /* MACOS */ - list->pids = PID_MAX; - - #elif defined PL_WIN /* WIN */ - list->pids = (1 << 22); // 4M. WARNING: This could potentially be violated! - - #endif + list->pids = pid_max(); log_t("Maximum number of PIDs: %d", list->pids); diff --git a/src/py_thread.c b/src/py_thread.c index 38cd4e4d..338ca285 100644 --- a/src/py_thread.c +++ b/src/py_thread.c @@ -23,6 +23,10 @@ #define PY_THREAD_C #include +#include +#include +#include +#include #include "argparse.h" #include "error.h" @@ -33,6 +37,7 @@ #include "timing.h" #include "version.h" +#include "heap.h" #include "py_thread.h" // ---------------------------------------------------------------------------- @@ -77,7 +82,16 @@ typedef struct frame { } py_frame_t; -py_frame_t * _stack = NULL; +static py_frame_t * _stack = NULL; +static size_t _stackp = 0; +static _heap_t _frames = {NULL, 0}; +static _heap_t _frames_heap = {NULL, 0}; + +#ifdef NATIVE +static void ** _tids = NULL; +static unsigned char * _tids_idle = NULL; +static char ** _kstacks = NULL; +#endif // ---- PyCode ---------------------------------------------------------------- @@ -288,35 +302,72 @@ _py_code__fill_from_raddr(py_code_t * self, raddr_t * raddr, int lasti) { // ---- PyFrame --------------------------------------------------------------- // ---------------------------------------------------------------------------- -static inline int -_py_frame__fill_from_raddr(py_frame_t * self, raddr_t * raddr) { - PyFrameObject frame; +#define _use_heaps (pargs.heap > 0) +#define _no_heaps {pargs.heap = 0;} - self->invalid = 1; +static inline int +_py_thread__read_frames(py_thread_t * self) { + size_t newsize; + size_t maxsize = pargs.heap >> 1; + + if (isvalid(self->proc->frames.newhi)) { + newsize = self->proc->frames.newhi - self->proc->frames.newlo; + if (newsize > maxsize) { + newsize = maxsize + sizeof(PyFrameObject); + } + if (newsize > _frames.size) { + _frames.content = realloc(_frames.content, newsize); + _frames.size = newsize; + self->proc->frames.hi = self->proc->frames.newhi; + self->proc->frames.lo = self->proc->frames.newlo; + } + if (fail(copy_memory(self->raddr.pid, self->proc->frames.lo, newsize, _frames.content))) + FAIL; + } - if (fail(copy_from_raddr_v(raddr, frame, py_v->py_frame.size))) { - log_ie("Cannot read remote PyFrameObject"); - FAIL; + if (isvalid(self->proc->frames_heap.newhi)) { + newsize = self->proc->frames_heap.newhi - self->proc->frames_heap.newlo; + if (newsize > maxsize) { + newsize = maxsize + sizeof(PyFrameObject); + } + if (newsize > _frames_heap.size) { + _frames_heap.content = realloc(_frames_heap.content, newsize); + _frames_heap.size = newsize; + self->proc->frames_heap.hi = self->proc->frames_heap.newhi; + self->proc->frames_heap.lo = self->proc->frames_heap.newlo; + } + return copy_memory(self->raddr.pid, self->proc->frames_heap.lo, newsize, _frames_heap.content); } + SUCCESS; +} + + +// ---------------------------------------------------------------------------- +static inline int +_py_frame_fill_from_addr(PyFrameObject * frame, raddr_t * raddr) { + py_frame_t * self = _stack + _stackp; + self->invalid = TRUE; raddr_t py_code_raddr = { .pid = raddr->pid, - .addr = V_FIELD(void *, frame, py_frame, o_code) + .addr = V_FIELD_PTR(void *, frame, py_frame, o_code) }; if (_py_code__fill_from_raddr( - &(self->code), &py_code_raddr, V_FIELD(int, frame, py_frame, o_lasti) + &(self->code), &py_code_raddr, V_FIELD_PTR(int, frame, py_frame, o_lasti) )) { log_ie("Cannot get PyCodeObject for frame"); - SUCCESS; + FAIL; } self->raddr.pid = raddr->pid; self->raddr.addr = raddr->addr; self->prev_raddr.pid = raddr->pid; - self->prev_raddr.addr = V_FIELD(void *, frame, py_frame, o_back); + self->prev_raddr.addr = V_FIELD_PTR(void *, frame, py_frame, o_back); - self->invalid = 0; + self->invalid = FALSE; + + _stackp++; SUCCESS; } @@ -324,35 +375,262 @@ _py_frame__fill_from_raddr(py_frame_t * self, raddr_t * raddr) { // ---------------------------------------------------------------------------- static inline int -_py_frame__prev(py_frame_t * self) { - if (!isvalid(self) || !isvalid(self->prev_raddr.addr)) +_py_frame_fill_from_raddr(raddr_t * raddr) { + PyFrameObject frame; + + if (fail(copy_from_raddr_v(raddr, frame, py_v->py_frame.size))) { + log_ie("Cannot read remote PyFrameObject"); + log_d(" raddr: (%p, %ld)", raddr->addr, raddr->pid); FAIL; + } + + return _py_frame_fill_from_addr(&frame, raddr); +} + + +// ---------------------------------------------------------------------------- +#define REL(raddr, block, base) (raddr->addr - block.lo + base) + +static inline int +_py_frame_fill(raddr_t * raddr, py_thread_t * thread) { + if (_use_heaps) { + py_proc_t * proc = thread->proc; + + if (isvalid(_frames.content) + && raddr->addr >= proc->frames.lo + && raddr->addr < proc->frames.lo + _frames.size + ) { + return _py_frame_fill_from_addr( + REL(raddr, proc->frames, _frames.content), + raddr + ); + } + else if (isvalid(_frames_heap.content) + && raddr->addr >= proc->frames_heap.lo + && raddr->addr < proc->frames_heap.lo + _frames_heap.size + ) { + return _py_frame_fill_from_addr( + REL(raddr, proc->frames_heap, _frames_heap.content), + raddr + ); + } + + // Miss: update ranges + // We quite likely set the bss map data so this should be a pretty reliable + // platform-independent way of dualising the frame heap. + if (raddr->addr >= proc->map.bss.base && raddr->addr <= proc->map.bss.base + (1 << 27)) { + if (raddr->addr + sizeof(PyFrameObject) > proc->frames_heap.newhi) { + proc->frames_heap.newhi = raddr->addr + sizeof(PyFrameObject); + } + if (raddr->addr < proc->frames_heap.newlo) { + proc->frames_heap.newlo = raddr->addr; + } + } + else { + if (raddr->addr + sizeof(PyFrameObject) > proc->frames.newhi) { + proc->frames.newhi = raddr->addr + sizeof(PyFrameObject); + } + if (raddr->addr < proc->frames.newlo) { + proc->frames.newlo = raddr->addr; + } + } + } + + return _py_frame_fill_from_raddr(raddr); +} + + +// ---------------------------------------------------------------------------- +static inline int +_py_frame__prev(py_thread_t * thread) { + if (_stackp <= 0) + FAIL; + + py_frame_t * self = _stack + _stackp - 1; + if (!isvalid(self) || !isvalid(self->prev_raddr.addr)) { + // Double-check it's the end of the stack if we're using the heap. + _stackp--; + if (fail(_py_frame_fill_from_raddr(&self->raddr)) || !isvalid(self->prev_raddr.addr)) { + FAIL; + } + } raddr_t prev_raddr = { .pid = self->prev_raddr.pid, .addr = self->prev_raddr.addr }; - return _py_frame__fill_from_raddr(self + 1, &prev_raddr); + int result = _py_frame_fill(&prev_raddr, thread); + + if (!_use_heaps) { + return result; + } + + // This sucks! :( + py_frame_t * last = self + 1; + for (py_frame_t * f = self; f >= _stack; f--) { + if (last->prev_raddr.addr == f->raddr.addr) { + log_d("Circular frame reference detected"); + last->invalid = TRUE; + FAIL; + } + } + + return result; } // ---------------------------------------------------------------------------- -static inline void +static inline int _py_thread__unwind_frame_stack(py_thread_t * self) { - register size_t i = 0; - while (success(_py_frame__prev(_stack + i)) && i < MAX_STACK_SIZE) { - if (_stack[++i].invalid) { - log_d("Frame number %d is invalid", i); - return; + size_t basep = _stackp; + + if (_use_heaps && fail(_py_thread__read_frames(self))) { + log_ie("Failed to read frames heaps"); + _no_heaps; + FAIL; + } + raddr_t frame_raddr = { .pid = self->raddr.pid, .addr = self->top_frame }; + if (fail(_py_frame_fill(&frame_raddr, self))) { + log_ie("Failed to fill top frame"); + FAIL; + } + + while (success(_py_frame__prev(self))) { + if (_stackp >= MAX_STACK_SIZE) { + log_w("Discarding frame stack: too tall"); + FAIL; } } - if (i >= MAX_STACK_SIZE) - log_w("Frames limit reached. Discarding the rest"); - self->stack_height += i; + + if (_stack[_stackp-1].invalid) { + log_d("Frame number %d is invalid", _stackp - basep); + FAIL; + } + + self->stack_height += _stackp - basep; + + SUCCESS; } +#ifdef NATIVE +// ---------------------------------------------------------------------------- +int +py_thread__set_idle(py_thread_t * self) { + size_t index = self->tid >> 3; + int offset = self->tid & 7; + + if (unlikely(_pthread_tid_offset == 0)) { + FAIL; + } + + unsigned char idle_bit = _py_thread__is_idle(self) << offset; + if (idle_bit) { + _tids_idle[index] |= idle_bit; + } else { + _tids_idle[index] &= ~idle_bit; + } + + SUCCESS; +} + +// ---------------------------------------------------------------------------- +#define MAX_STACK_FILE_SIZE 2048 +int +py_thread__save_kernel_stack(py_thread_t * self) { + char stack_path[48]; + int fd; + + if (unlikely(_pthread_tid_offset == 0) || !isvalid(_kstacks) ) { + FAIL; + } + + sfree(_kstacks[self->tid]); + + sprintf(stack_path, "/proc/%d/task/%ld/stack", self->proc->pid, self->tid); + fd = open(stack_path, O_RDONLY); + if (fd == -1) + FAIL; + + _kstacks[self->tid] = (char *) calloc(1, MAX_STACK_FILE_SIZE); + if (read(fd, _kstacks[self->tid], MAX_STACK_FILE_SIZE) == -1) { + log_e("stack: filed to read %s", stack_path); + close(fd); + FAIL; + }; + close(fd); + + SUCCESS; +} + +// ---------------------------------------------------------------------------- +static inline int +_py_thread__unwind_kernel_frame_stack(py_thread_t * self) { + char * line = _kstacks[self->tid]; + if (!isvalid(line)) + SUCCESS; + + log_t("linux: unwinding kernel stack"); + + for (;;) { + char * eol = strchr(line, '\n'); + if (!isvalid(eol)) + break; + *eol = '\0'; + + char * b = strchr(line, ']'); + if (isvalid(b)) { + char * e = strchr(++b, '+'); + if (isvalid(e)) + *e = 0; + strcpy(_stack[_stackp].code.scope, ++b); + strcpy(_stack[_stackp].code.filename, "kernel"); + _stackp++; // TODO: Decide whether to decremet this by 2 before returning. + } + line = eol + 1; + } + + SUCCESS; +} + + +// ---------------------------------------------------------------------------- +static inline int +_py_thread__unwind_native_frame_stack(py_thread_t * self) { + void *context = _tids[self->tid]; + unw_cursor_t cursor; + unw_word_t offset, pc; + + if (unw_init_remote(&cursor, self->proc->unwind.as, context)) + FAIL; + + do { + if (unw_get_reg(&cursor, UNW_REG_IP, &pc)) { + log_e("libunwind: cannot read program counter\n"); + FAIL; + } + + if (unw_get_proc_name(&cursor, _stack[_stackp].code.scope, MAXLEN, &offset) == 0) { + // To retrieve source name and line number we would need to + // - resolve the PC to a map to get the binary path + // - use the offset with the binary to get the line number from DWARF (see + // https://kernel.googlesource.com/pub/scm/linux/kernel/git/hjl/binutils/+/hjl/secondary/binutils/addr2line.c) + _stack[_stackp].code.lineno = offset; + } + else { + strcpy(_stack[_stackp].code.scope, ""); + _stack[_stackp].code.lineno = 0; + } + sprintf(_stack[_stackp].code.filename, "native@%lx", pc); + + _stackp++; + } while (_stackp < MAX_STACK_SIZE && unw_step(&cursor) > 0); + + SUCCESS; +} +#endif + // ---- PUBLIC ---------------------------------------------------------------- // ---------------------------------------------------------------------------- @@ -368,19 +646,15 @@ py_thread__fill_from_raddr(py_thread_t * self, raddr_t * raddr, py_proc_t * proc FAIL; } - if (V_FIELD(void*, ts, py_thread, o_frame) != NULL) { - raddr_t frame_raddr = { .pid = raddr->pid, .addr = V_FIELD(void*, ts, py_thread, o_frame) }; - if (fail(_py_frame__fill_from_raddr(_stack, &frame_raddr))) { - log_d("Failed to fill last frame"); - SUCCESS; - } - self->stack_height = 1; - } + self->proc = proc; self->raddr.pid = raddr->pid; self->raddr.addr = raddr->addr; - self->proc = proc; + + if (isvalid(self->top_frame = V_FIELD(void*, ts, py_thread, o_frame))) { + self->stack_height = 1; + } self->next_raddr.pid = raddr->pid; self->next_raddr.addr = V_FIELD(void*, ts, py_thread, o_next) == raddr->addr \ @@ -412,19 +686,36 @@ py_thread__fill_from_raddr(py_thread_t * self, raddr_t * raddr, py_proc_t * proc _pthread_buffer ))) { self->tid = (uintptr_t) _pthread_buffer[_pthread_tid_offset]; + #ifdef NATIVE + // TODO: If a TID is reused we will never seize it! + if (!isvalid(_tids[self->tid])) { + if (fail(ptrace(PTRACE_SEIZE, self->tid, 0, 0))) { + log_e("ptrace: cannot seize thread %d: %d\n", self->tid, errno); + FAIL; + } + else { + log_d("ptrace: thread %d seized", self->tid); + } + _tids[self->tid] = _UPT_create(self->tid); + if (!isvalid(_tids[self->tid])) { + log_e("libunwind: failed to create context for thread %d", self->tid); + FAIL; + } + } + #endif } } #endif self->invalid = 0; SUCCESS; -} +} /* py_thread__fill_from_raddr */ // ---------------------------------------------------------------------------- int py_thread__next(py_thread_t * self) { - if (!isvalid(self->next_raddr.addr)) + if (self->invalid || !isvalid(self->next_raddr.addr)) FAIL; raddr_t next_raddr = { .pid = self->next_raddr.pid, .addr = self->next_raddr.addr }; @@ -469,25 +760,87 @@ py_thread__print_collapsed_stack(py_thread_t * self, ctime_t time_delta, ssize_t int is_idle = FALSE; if (pargs.full || pargs.sleepless) { + #ifdef NATIVE + size_t index = self->tid >> 3; + int offset = self->tid & 7; + + is_idle = _tids_idle[index] & (1 << offset); + #else is_idle = _py_thread__is_idle(self); - if (!pargs.full && is_idle && pargs.sleepless) + #endif + if (!pargs.full && is_idle && pargs.sleepless) { + #ifdef NATIVE + // If we don't sample the threads stall :( + _stackp = 0; + _py_thread__unwind_native_frame_stack(self); + #endif return; + } + } + + // Reset the frame stack before unwinding + _stackp = 0; + + #ifdef NATIVE + + // We sample the kernel frame stack BEFORE interrupting because otherwise + // we would see the ptrace syscall call stack, which is not very interesting. + // The downside is that the kernel stack might not be in sync with the other + // ones. + if (pargs.kernel) { + _py_thread__unwind_kernel_frame_stack(self); } + if (fail(_py_thread__unwind_native_frame_stack(self))) + return; + + size_t basep = _stackp; + // Update the thread state to improve guarantees that it will be in sync with + // the native stack just collected + py_thread__fill_from_raddr(self, &self->raddr, self->proc); + #endif // Group entries by thread. fprintf(pargs.output_file, SAMPLE_HEAD, self->proc->pid, self->tid); if (self->stack_height) { - _py_thread__unwind_frame_stack(self); + if (fail(_py_thread__unwind_frame_stack(self))) { + fprintf(pargs.output_file, ";:INVALID:"); + stats_count_error(); + } + #ifndef NATIVE // Append frames - register int i = self->stack_height; - while (i > 0) { - py_code_t code = _stack[--i].code; + while (_stackp > 0) { + py_code_t code = _stack[--_stackp].code; fprintf(pargs.output_file, pargs.format, code.filename, code.scope, code.lineno); } + #endif } + #ifdef NATIVE + + register int i = _stackp; + register int j = basep; + + py_code_t * code; + while (j-- > 0) { + if (strstr(_stack[j].code.scope, "PyEval_EvalFrame")) { + code = ((i <= basep) ? &(_stack[j].code) : &(_stack[--i].code)); + } + else { + code = &(_stack[j].code); + } + fprintf(pargs.output_file, pargs.format, code->filename, code->scope, code->lineno); + } + if (i != basep) { + log_e("Stack mismatch: left with %d Python frames after interleaving", i - basep); + austin_errno = ETHREADINV; + #ifdef DEBUG + fprintf(pargs.output_file, ";:%ld FRAMES LEFT:", i - basep); + #endif + } + #endif + if (pargs.gc && py_proc__is_gc_collecting(self->proc) == TRUE) { fprintf(pargs.output_file, ";:GC:"); stats_gc_time(time_delta); @@ -508,15 +861,13 @@ py_thread__print_collapsed_stack(py_thread_t * self, ctime_t time_delta, ssize_t // Update sampling stats stats_count_sample(); - if (austin_errno != EOK) - stats_count_error(); stats_check_duration(stopwatch_duration()); } /* py_thread__print_collapsed_stack */ // ---------------------------------------------------------------------------- int -py_thread_allocate_stack(void) { +py_thread_allocate(void) { if (isvalid(_stack)) SUCCESS; @@ -534,16 +885,52 @@ py_thread_allocate_stack(void) { FAIL; #endif + #ifdef NATIVE + size_t max = pid_max(); + _tids = (void **) calloc(max, sizeof(void *)); + if (!isvalid(_tids)) + FAIL; + + _tids_idle = (unsigned char *) calloc(max >> 8, sizeof(unsigned char)); + if (!isvalid(_tids_idle)) + FAIL; + + if (pargs.kernel) { + _kstacks = (char **) calloc(max, sizeof(char *)); + if (!isvalid(_kstacks)) + FAIL; + } + #endif + SUCCESS; } // ---------------------------------------------------------------------------- void -py_thread_free_stack(void) { +py_thread_free(void) { #if defined PL_WIN sfree(_pi_buffer); #endif sfree(_stack); + sfree(_frames.content); + sfree(_frames_heap.content); + + #ifdef NATIVE + pid_t max_pid = pid_max(); + for (pid_t tid = 0; tid < max_pid; tid++) { + if (isvalid(_tids[tid])) { + _UPT_destroy(_tids[tid]); + ptrace(PTRACE_DETACH, tid, 0, 0); + log_d("ptrace: thread %ld detached", tid); + } + if (isvalid(_kstacks) && isvalid(_kstacks[tid])) { + sfree(_kstacks[tid]); + } + } + sfree(_tids); + sfree(_tids_idle); + sfree(_kstacks); + #endif } diff --git a/src/py_thread.h b/src/py_thread.h index c9a3210b..f02013ec 100644 --- a/src/py_thread.h +++ b/src/py_thread.h @@ -41,6 +41,7 @@ typedef struct thread { struct thread * next; size_t stack_height; + void * top_frame; int invalid; } py_thread_t; @@ -80,19 +81,27 @@ py_thread__print_collapsed_stack(py_thread_t *, ctime_t, ssize_t); /** - * Allocate memory for dumping the frame stack. + * Allocate memory for dumping the thread data. * * @return either SUCCESS or FAIL. */ int -py_thread_allocate_stack(void); +py_thread_allocate(void); /** - * Deallocate memory for dumping the frame stack. + * Deallocate memory for dumping the thread data. */ void -py_thread_free_stack(void); +py_thread_free(void); + +#ifdef NATIVE +int +py_thread__set_idle(py_thread_t *); + +int +py_thread__save_kernel_stack(py_thread_t *); +#endif #endif // PY_THREAD_H diff --git a/src/version.h b/src/version.h index 3a39801a..7ee7318b 100644 --- a/src/version.h +++ b/src/version.h @@ -57,7 +57,8 @@ * @return the value of of the field of py_obj at the offset specified * by the field argument. */ -#define V_FIELD(ctype, py_obj, py_type, field) (*((ctype*) (((char *) &py_obj) + py_v->py_type.field))) +#define V_FIELD(ctype, py_obj, py_type, field) (*((ctype*) (((void *) &py_obj) + py_v->py_type.field))) +#define V_FIELD_PTR(ctype, py_obj_ptr, py_type, field) (*((ctype*) (((void *) py_obj_ptr) + py_v->py_type.field))) typedef unsigned long offset_t; diff --git a/src/win/py_proc.h b/src/win/py_proc.h index 0475678e..8a8fc6ac 100644 --- a/src/win/py_proc.h +++ b/src/win/py_proc.h @@ -40,6 +40,7 @@ struct _proc_extra_info { HANDLE h_proc; + HANDLE h_reader_thread; }; @@ -186,4 +187,93 @@ _py_proc__init(py_proc_t * self) { return _py_proc__get_modules(self); } + +// ---------------------------------------------------------------------------- +// The default stream buffer size should be 4KB, so this chunk size should be +// enough to avoid blocking while keeping the number of reads to a minimum. +#define STDOUT_CHUNK_SIZE (1 << 10) + +DWORD WINAPI +reader_thread(LPVOID lpParam) { + char buffer[STDOUT_CHUNK_SIZE]; + while (ReadFile(lpParam, &buffer, STDOUT_CHUNK_SIZE, NULL, NULL)); + return 0; +} + + +// ---------------------------------------------------------------------------- +// Forward declaration. +static int +_py_proc__run(py_proc_t *, int); + + +// On Windows, if we fail with the parent process we look if it has a single +// child and try to attach to that instead. We keep going until we either find +// a single Python process or more or less than a single child. +static int +_py_proc__try_child_proc(py_proc_t * self) { + log_d("Process is not Python so we look for a single child Python process"); + + HANDLE h = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0); + if (h == INVALID_HANDLE_VALUE) { + log_e("Cannot inspect processes details"); + FAIL; + } + +with_resources; + + HANDLE orig_hproc = self->extra->h_proc; + pid_t orig_pid = self->pid; + while (TRUE) { + pid_t parent_pid = self->pid; + + PROCESSENTRY32 pe = { 0 }; + pe.dwSize = sizeof(PROCESSENTRY32); + + if (Process32First(h, &pe)) { + pid_t child_pid = 0; + do { + if (pe.th32ParentProcessID == parent_pid) { + if (child_pid) { + log_d("Process has more than one child"); + NOK; + } + child_pid = pe.th32ProcessID; + } + } while (Process32Next(h, &pe)); + + if (!child_pid) { + log_d("Process has no children"); + NOK; + } + + self->pid = child_pid; + self->extra->h_proc = OpenProcess( + PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, FALSE, child_pid + ); + if (self->extra->h_proc == INVALID_HANDLE_VALUE) { + log_e("Cannot open child process handle"); + NOK; + } + if (success(_py_proc__run(self, FALSE))) { + log_d("Process has a single Python child with PID %d. We will attach to that", child_pid); + OK; + } + else { + log_d("Process had a single non-Python child with PID %d. Taking it as new parent", child_pid); + CloseHandle(self->extra->h_proc); + } + } + } + +release: + CloseHandle(h); + if (retval) { + self->pid = orig_pid; + self->extra->h_proc = orig_hproc; + } + + released; +} + #endif diff --git a/test/macos/test_attach.bats b/test/macos/test_attach.bats index fdd1292c..c2801497 100644 --- a/test/macos/test_attach.bats +++ b/test/macos/test_attach.bats @@ -52,11 +52,18 @@ function attach_austin { attach_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 attach_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 attach_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 attach_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 attach_austin "/usr/local/anaconda3/bin/python" diff --git a/test/macos/test_fork.bats b/test/macos/test_fork.bats index 457a2ac1..86dcae35 100644 --- a/test/macos/test_fork.bats +++ b/test/macos/test_fork.bats @@ -75,11 +75,18 @@ teardown() { repeat 3 invoke_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 invoke_austin "/usr/local/anaconda3/bin/python" diff --git a/test/macos/test_fork_mp.bats b/test/macos/test_fork_mp.bats index caea118c..072160d7 100644 --- a/test/macos/test_fork_mp.bats +++ b/test/macos/test_fork_mp.bats @@ -55,11 +55,18 @@ function invoke_austin { repeat 3 invoke_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 invoke_austin "/usr/local/anaconda3/bin/python" diff --git a/test/macos/test_pipe.bats b/test/macos/test_pipe.bats index b31d51dc..9b04b0e2 100644 --- a/test/macos/test_pipe.bats +++ b/test/macos/test_pipe.bats @@ -74,11 +74,18 @@ function invoke_austin { repeat 3 invoke_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 invoke_austin "/usr/local/anaconda3/bin/python" diff --git a/test/macos/test_sleepless.bats b/test/macos/test_sleepless.bats index e431750c..42eb6664 100644 --- a/test/macos/test_sleepless.bats +++ b/test/macos/test_sleepless.bats @@ -49,11 +49,18 @@ function invoke_austin { repeat 3 invoke_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 invoke_austin "/usr/local/anaconda3/bin/python" diff --git a/test/macos/test_valgrind.bats b/test/macos/test_valgrind.bats index 75fe80f5..88927309 100644 --- a/test/macos/test_valgrind.bats +++ b/test/macos/test_valgrind.bats @@ -62,11 +62,18 @@ function invoke_austin { repeat 3 invoke_austin "/usr/local/bin/python3" } -@test "Test Austin with Python 3.8 from Homebrew (if available)" { - ignore +@test "Test Austin with Python 3.8 from Homebrew" { repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3" } +@test "Test Austin with Python 3.9 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3" +} + +@test "Test Austin with Python 3.10 from Homebrew" { + repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3" +} + @test "Test Austin with Python 3 from Anaconda (if available)" { ignore repeat 3 invoke_austin "/usr/local/anaconda3/bin/python" diff --git a/utils/resolve.py b/utils/resolve.py new file mode 100644 index 00000000..cf90d55f --- /dev/null +++ b/utils/resolve.py @@ -0,0 +1,131 @@ +import os +import sys +import typing as t +from subprocess import check_output + + +def demangle_cython(function: str) -> str: + if function.startswith("__pyx_pymod_"): + _, _, function = function[12:].partition("_") + return function + + if function.startswith("__pyx_fuse_"): + function = function[function[12:].index("__pyx_") + 12 :] + for i, d in enumerate(function): + if d.isdigit(): + break + else: + raise ValueError(f"Invalid Cython mangled name: {function}") + + if function.startswith("__pyx_pf_"): + function = function[: function.rindex(".isra.")] + + n = 0 + while i < len(function): + c = function[i] + i += 1 + if c.isdigit(): + n = n * 10 + int(c) + else: + i += n + n = 0 + if not function[i].isdigit(): + return function[i:] + + return function + + +class Maps: + def __init__(self): + # TODO: Use an interval tree instead! + self.maps: t.List[t.Tuple(int, int, str)] = [] + self.bases = {} + self.cache = {} + + def addr2line(self, address: str) -> t.Optional[t.Tuple[str, t.Optional[str]]]: + if address in self.cache: + return self.cache[address] + + addr = int(address, 16) + for lo, hi, binary in self.maps: + if lo <= addr <= hi: + break + else: + self.cache[address] = None + return None + + resolved, _, _ = ( + check_output(["addr2line", "-Ce", binary, f"{addr-self.bases[binary]:x}"]) + .decode() + .strip() + .partition(" ") + ) + if resolved.startswith("??"): + # self.cache[address] = (f"{binary}@{addr-self.bases[binary]:x}", None) + self.cache[address] = (f"{binary}", addr - self.bases[binary]) + return self.cache[address] + + self.cache[address] = tuple(resolved.split(":", maxsplit=1)) + return self.cache[address] + + def add(self, line: str) -> None: + bounds, _, binary = line[7:].strip().partition(" ") + low, _, high = bounds.partition("-") + lo = int(low, 16) + hi = int(high, 16) + self.maps.append((lo, hi, binary)) + if binary in self.bases: + self.bases[binary] = min(self.bases[binary], lo) + else: + self.bases[binary] = lo + + def resolve(self, line: str) -> str: + parts = [] + frames, _, metrics = line.strip().rpartition(" ") + for part in frames.split(";"): + if part.startswith("native@"): + head, function, lineno = part.split(":") + if function.startswith("__pyx_pw_"): + # skip Cython wrappers (cpdef) + continue + if function.startswith("__pyx_"): + function = demangle_cython(function) + elif function.startswith("_Z"): + function = demangle_cpp(function) + _, _, address = head.partition("@") + resolved = self.addr2line(address) + if resolved is None: + parts.append(":".join((head, function, lineno))) + else: + source, native_lineno = resolved + parts.append(f"{source}:{function}:{native_lineno or lineno}") + else: + parts.append(part) + + return " ".join((";".join(parts), metrics)) + + +def main(): + try: + stats = sys.argv[1] + assert os.path.isfile(stats) + except IndexError: + print("Usage: python resolve.py ", file=sys.stderr) + sys.exit(1) + except AssertionError: + print("Austin file does not exist", file=sys.stderr) + sys.exit(1) + + maps = Maps() + with open(stats) as s: + for line in s: + if line.startswith("# map: "): + maps.add(line) + elif line.startswith("# ") or line == "\n": + print(line, end="") + else: + print(maps.resolve(line)) + + +if __name__ == "__main__": + main()