diff --git a/.github/workflows/dev_release.yml b/.github/workflows/dev_release.yml
new file mode 100644
index 00000000..c385861e
--- /dev/null
+++ b/.github/workflows/dev_release.yml
@@ -0,0 +1,145 @@
+name: Development release
+on:
+ push:
+ tags:
+ - 'dev'
+jobs:
+ release-linux:
+ runs-on: ubuntu-latest
+ strategy:
+ fail-fast: false
+ name: Release (Linux)
+ steps:
+ - uses: actions/checkout@v2
+ name: Checkout Austin
+
+ - name: Generate artifacts
+ run: |
+ sudo apt-get update
+ sudo apt-get -y install autoconf build-essential libunwind-dev musl-tools
+
+ # Build austin
+ autoreconf --install
+ ./configure
+ make
+
+ # Compute dev version
+ export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+ export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD)
+ sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h
+
+ # Build austinp
+ gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a
+
+ pushd src
+ tar -Jcf austin-$VERSION-gnu-linux-amd64.tar.xz austin
+ tar -Jcf austinp-$VERSION-gnu-linux-amd64.tar.xz austinp
+ popd
+
+ # Build with musl
+ musl-gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austin -D__MUSL__
+ pushd src
+ tar -Jcf austin-$VERSION-musl-linux-amd64.tar.xz austin
+ popd
+
+ - name: Upload artifacts to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: src/austin*xz
+ tag: ${{ github.ref }}
+ overwrite: true
+ prerelease: true
+ release_name: Development build
+ file_glob: true
+
+ release-win:
+ runs-on: windows-latest
+ strategy:
+ fail-fast: false
+ name: Release (Windows)
+ steps:
+ - uses: actions/checkout@v2
+ name: Checkout Austin
+ with:
+ fetch-depth: 0
+
+ - name: Generate artifacts
+ shell: bash
+ run: |
+ echo "C:\Program Files (x86)\WiX Toolset v3.11\bin" >> $GITHUB_PATH
+ export PATH="/c/Program Files (x86)/`ls /c/Program\ Files\ \(x86\) | grep \"[wW]i[xX] [tT]oolset\"`/bin:$PATH"
+
+ # Compute dev version
+ export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+ export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')
+ export VERSION_DEV=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD)
+ sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h
+
+ gcc -s -Wall -O3 -Os -o src/austin src/*.c -lpsapi -lntdll
+
+ git checkout HEAD -- src/austin.h
+
+ git checkout "packaging/msi"
+ git checkout master
+ git checkout "packaging/msi" -- wix
+
+ export WIN_MSI="austin-$VERSION_DEV-win64.msi"
+
+ sed -i "s/%VERSION%/$VERSION/g" wix/Austin.wxs
+ pushd wix
+ candle Austin.wxs -out Austin.wixobj
+ light -ext WixUIExtension Austin.wixobj -out $WIN_MSI
+ popd
+
+ mv wix/$WIN_MSI src/$WIN_MSI;
+ test -f src/$WIN_MSI && echo ">> Windows MSI installer at src/$WIN_MSI" || echo ">> ERROR No Windows MSI installer generated."
+
+ pushd src
+ 7z a -tzip austin-${VERSION_DEV}-win64.zip austin.exe
+ popd
+
+ - name: Upload artifacts to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: src/austin-*
+ tag: ${{ github.ref }}
+ overwrite: true
+ prerelease: true
+ release_name: Development build
+ file_glob: true
+
+ release-osx:
+ runs-on: macos-latest
+ strategy:
+ fail-fast: false
+ name: Release (macOS)
+ steps:
+ - uses: actions/checkout@v2
+ name: Checkout Austin
+
+ - name: Generate artifacts
+ run: |
+ # Compute dev version
+ export PREV_VERSION=$(cat src/austin.h | sed -n -E "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+ export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD)
+ sed -i "" "s/$PREV_VERSION/$VERSION/g" src/austin.h
+ echo "::set-output name=version::$VERSION"
+
+ gcc -Wall -O3 -Os -o src/austin src/*.c
+
+ pushd src
+ zip -r austin-${VERSION}-mac64.zip austin
+ popd
+
+ - name: Upload artifacts to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: src/austin-*
+ tag: ${{ github.ref }}
+ overwrite: true
+ prerelease: true
+ release_name: Development build
+ file_glob: true
\ No newline at end of file
diff --git a/.github/workflows/dev_release_arch.yml b/.github/workflows/dev_release_arch.yml
new file mode 100644
index 00000000..d6e7a49f
--- /dev/null
+++ b/.github/workflows/dev_release_arch.yml
@@ -0,0 +1,71 @@
+name: Development release (Linux archs)
+on:
+ push:
+ tags:
+ - 'dev'
+jobs:
+ release-linux-archs:
+ runs-on: ubuntu-latest
+ strategy:
+ matrix:
+ arch: ["armv7", "aarch64", "ppc64le"]
+ fail-fast: false
+ name: Build on ${{ matrix.arch }}
+ steps:
+ - uses: actions/checkout@v2
+ name: Checkout sources
+ - uses: uraimo/run-on-arch-action@v2.0.5
+ name: Generate artifacts on ${{ matrix.arch }}
+ id: run-tests-on-arch
+ with:
+ arch: ${{ matrix.arch }}
+ distro: ubuntu20.04
+ githubToken: ${{ github.token }}
+ dockerRunArgs: --volume "${GITHUB_WORKSPACE}/artifacts:/artifacts"
+ setup: |
+ mkdir -p ./artifacts
+
+ # Compute dev version
+ export PREV_VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+ export VERSION=$(echo $PREV_VERSION | awk -F. '{A=NF-1; $A = $A + 1; $NF=0} 1' | sed 's/ /./g')-dev+$(git rev-parse --short HEAD)
+ sed -i "s/$PREV_VERSION/$VERSION/g" src/austin.h
+ run: |
+ apt-get update
+ apt-get -y install autoconf build-essential libunwind-dev musl-tools
+
+ # Build austin
+ autoreconf --install
+ ./configure
+ make
+
+ export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+
+ # Build austinp
+ gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a
+
+ pushd src
+ tar -Jcf austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austin
+ tar -Jcf austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austinp
+
+ musl-gcc -O3 -Os -s -Wall -pthread *.c -o austin -D__MUSL__
+ tar -Jcf austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz austin
+
+ mv austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts
+ mv austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts
+ mv austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz /artifacts
+ popd
+
+ - name: Show artifacts
+ run: |
+ ls -al ./artifacts
+
+ - name: Upload binaries to release
+ uses: svenstaro/upload-release-action@v2
+ with:
+ repo_token: ${{ secrets.GITHUB_TOKEN }}
+ file: artifacts/austin*
+ tag: ${{ github.ref }}
+ overwrite: true
+ prerelease: true
+ release_name: Development build
+ file_glob: true
\ No newline at end of file
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 9aba86da..7a48562e 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -16,16 +16,27 @@ jobs:
- name: Generate artifacts
run: |
sudo apt-get update
- sudo apt-get -y install autoconf build-essential
+ sudo apt-get -y install autoconf build-essential libunwind-dev musl-tools
+ # Build austin
autoreconf --install
./configure
make
export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p");
+ # Build austinp
+ gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a
+
+ pushd src
+ tar -Jcf austin-$VERSION-gnu-linux-amd64.tar.xz austin
+ tar -Jcf austinp-$VERSION-gnu-linux-amd64.tar.xz austinp
+ popd
+
+ # Build with musl
+ musl-gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austin -D__MUSL__
pushd src
- tar -Jcf austin-$VERSION-linux-amd64.tar.xz austin
+ tar -Jcf austin-$VERSION-musl-linux-amd64.tar.xz austin
popd
- name: Upload artifacts to release
diff --git a/.github/workflows/release_arch.yml b/.github/workflows/release_arch.yml
index eca8d120..832116cd 100644
--- a/.github/workflows/release_arch.yml
+++ b/.github/workflows/release_arch.yml
@@ -26,17 +26,28 @@ jobs:
mkdir -p ./artifacts
run: |
apt-get update
- apt-get -y install autoconf build-essential
+ apt-get -y install autoconf build-essential libunwind-dev musl-tools
+ # Build austin
autoreconf --install
./configure
make
-
- export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p");
+
+ export VERSION=$(cat src/austin.h | sed -r -n "s/.*VERSION[ ]+\"(.+)\"/\1/p")
+
+ # Build austinp
+ gcc -O3 -Os -s -Wall -pthread src/*.c -o src/austinp -DAUSTINP -l:libunwind-ptrace.a -l:liblzma.a -l:libunwind-generic.a -l:libunwind.a
pushd src
- tar -Jcf austin-$VERSION-linux-${{ matrix.arch }}.tar.xz austin
- mv austin-$VERSION-linux-${{ matrix.arch }}.tar.xz /artifacts
+ tar -Jcf austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austin
+ tar -Jcf austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz austinp
+
+ musl-gcc -O3 -Os -s -Wall -pthread *.c -o austin -D__MUSL__
+ tar -Jcf austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz austin
+
+ mv austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts
+ mv austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts
+ mv austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz /artifacts
popd
- name: Show artifacts
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index cad45831..ee31e2b4 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -41,11 +41,13 @@ jobs:
- name: Install test dependencies
run: |
+ brew update
brew install python || brew upgrade python
brew install python@3.8 || true
brew install python@3.9 || true
+ brew install python@3.10 || true
brew install bats-core || true
- brew cask install anaconda || true
+ brew install --cask anaconda || true
- name: Run tests
run: sudo bats test/macos/test.bats
diff --git a/.gitignore b/.gitignore
index 7df6e943..a65859ed 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,6 +36,7 @@ stamp-h1
*.gcda
src/austin
+src/austinp
src/austin.exe
diff --git a/ChangeLog b/ChangeLog
index 159b17ef..1e8cd11d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2021-xx-xx v3.2.0
+
+ Improved detection of invalid samples
+
+ Added support for Python launchers on Windows
+
+ Improved Python version detection on Linux
+
+ Fixed support of older versions of glibc on Linux
+
+
2021-08-18 v3.1.0
Added garbage collection state sampling for Python 3.7 onward.
diff --git a/README.md b/README.md
index 4b143ee2..ec24b7c4 100644
--- a/README.md
+++ b/README.md
@@ -63,6 +63,7 @@
Synopsis •
Installation •
Usage •
+ Cheat sheet •
Compatibility •
Why Austin •
Examples •
@@ -115,10 +116,12 @@ The key features of Austin are:
- Time and memory profiling;
- Built-in support for multi-process applications (e.g. `mod_wsgi`).
-The simplest way to turn Austin into a full-fledged profiler is to combine it
-with [FlameGraph] or [Speedscope]. However, Austin's simple output format can be
-piped into any other external or custom tool for further processing. Look, for
-instance, at the following Python TUI
+The simplest way to turn Austin into a full-fledged profiler is to use together
+with the [VS
+Code](https://marketplace.visualstudio.com/items?itemName=p403n1x87.austin-vscode)
+extension or combine it with [FlameGraph] or [Speedscope]. However, Austin's
+simple output format can be piped into any other external or custom tool for
+further processing. Look, for instance, at the following Python TUI
@@ -163,7 +166,7 @@ library. Before proceding with the steps below, make sure that the `autotools`
are installed on your system. Refer to your distro's documentation for details
on how to do so.
-~~~ bash
+~~~ console
git clone --depth=1 https://github.com/P403n1x87/austin.git && cd austin
autoreconf --install
./configure
@@ -183,7 +186,7 @@ Austin can be installed on [many major Linux
distributions](https://snapcraft.io/docs/installing-snapd) from the Snap Store
with the following command
-~~~ bash
+~~~ console
sudo snap install austin --classic
~~~
@@ -198,7 +201,7 @@ can therefore be installed with the `apt` utility.
Austin can be installed on macOS using [Homebrew](https://docs.brew.sh):
-~~~bash
+~~~ console
brew install austin
~~~
@@ -208,13 +211,13 @@ brew install austin
To install [Austin from Chocolatey](https://chocolatey.org/packages/austin), run
the following command from the command line or from PowerShell
-~~~ shell
+~~~ console
choco install austin
~~~
To upgrade run the following command from the command line or from PowerShell:
-~~~ shell
+~~~ console
choco upgrade austin
~~~
@@ -224,13 +227,13 @@ choco upgrade austin
To install Austin using Scoop, run the following command from the command line
or from PowerShell
-~~~ shell
+~~~ console
scoop install austin
~~~
To upgrade run the following command from the command line or from PowerShell:
-~~~ shell
+~~~ console
scoop update
~~~
@@ -240,7 +243,7 @@ scoop update
Anaconda users on Linux and macOS can install Austin from [Conda Forge] with the
command
-~~~ bash
+~~~ console
conda install -c conda-forge austin
~~~
@@ -249,25 +252,25 @@ conda install -c conda-forge austin
To install Austin from sources using the GNU C compiler, without `autotools`,
clone the repository with
-~~~ bash
+~~~ console
git clone --depth=1 https://github.com/P403n1x87/austin.git
~~~
On Linux one can then use the command
-~~~ bash
+~~~ console
gcc -O3 -Os -Wall -pthread src/*.c -o src/austin
~~~
whereas on macOS it is enough to run
-~~~ bash
+~~~ console
gcc -O3 -Os -Wall src/*.c -o src/austin
~~~
On Windows, the `-lpsapi -lntdll` switches are needed
-~~~ bash
+~~~ console
gcc -O3 -Os -Wall -lpsapi -lntdll src/*.c -o src/austin
~~~
@@ -287,6 +290,8 @@ Austin -- A frame stack sampler for Python.
stacks.
-f, --full Produce the full set of metrics (time +mem -mem).
-g, --gc Sample the garbage collector state.
+ -h, --heap=n_mb Maximum heap size to allocate to increase sampling
+ accuracy, in MB (default is 256).
-i, --interval=n_us Sampling interval in microseconds (default is
100). Accepted units: s, ms, us.
-m, --memory Profile memory usage.
@@ -379,6 +384,63 @@ garbage collector is in the collecting state. This gives you a measure of how
*Since Austin 3.1.0*.
+## Sampling Accuracy
+
+Austin tries to keep perturbations to the tracee at a minimum. In order to do
+so, the tracee is never halted. To improve sampling accuracy, Austin allocates a
+heap that is used to get large snapshots of the private VM of the tracee that is
+likely to contain frame information in a single attempt. The larger the heap is
+allowed the grow, the more accurate the results. The maximum size of the heap
+that Austin is allowed to allocate can be controlled with the `-h/--heap`
+option, followed by the maximum size in bytes. By default Austin allocates a
+maximum of 256 MB. On systems with low resource limits, it is advisable to
+reduce this value.
+
+*Since Austin 3.2.0*.
+
+
+## Native Frame Stack
+
+If you want observability into the native frame stacks, you can use the
+`austinp` variant of `austin` which can be obtained by compiling the source
+with `-DAUSTINP` on Linux, or from the released binaries.
+
+`austinp` makes use of `ptrace` to halt the application and grab a
+snapshot of the call stack with `libunwind`. If you are compiling `austinp` from
+sources make sure that you have the development version of the `libunwind`
+library available on your system, for example on Ubuntu,
+
+~~~ console
+sudo apt install libunwind-dev
+~~~
+
+and compile with
+
+~~~ console
+gcc -O3 -Os -Wall -pthread src/*.c -DAUSTINP -lunwind-ptrace -lunwind-generic -o src/austinp
+~~~
+
+then use as per normal. The extra `-k/--kernel` option is available with
+`austinp` which allows sampling kernel call stacks as well.
+
+> **WARNING** Since `austinp` uses `ptrace`, the impact on the tracee is no
+> longer minimal and it becomes higher at smaller sampling intervals. Therefore
+> the use of `austinp` is not recommended in production environments. For this
+> reason, the default sampling interval for `austinp` is 10 milliseconds.
+
+The `utils` folder has the script `resolve.py` that can be used to resolve the
+VM addresses to source and line numbers, provided that the referenced binaries
+have DWARF debug symbols. To resolve the references, assuming you have collected
+the samples in `mysamples.austin`, do
+
+~~~
+python3 utils/resolve.py mysamples.austin > mysamples_resolved.austin
+~~~
+
+Internally, the script uses `addr2line(1)` to determine source and line number
+given an address, when possible.
+
+
## Logging
Austin uses `syslog` on Linux and macOS, and `%TEMP%\austin.log` on Windows
@@ -388,6 +450,17 @@ entries for bad frames will not be visible in a flame graph as all tests show
error rates below 1% on average.
+## Cheat sheet
+
+All the above Austin options and arguments are summarised in a cheat sheet that
+you can find in the [art](https://github.com/P403n1x87/austin/blob/master/art/)
+folder in either the SVG or PNG format
+
+
+
+
+
# Compatibility
Austin supports Python 2.3-2.7 and 3.3-3.10 and has been tested on the
@@ -405,7 +478,7 @@ capability. This means that you will have to either use ``sudo`` when attaching
to a running Python process or grant the CAP_SYS_PTRACE capability to the Austin
binary with, e.g.
-~~~ bash
+~~~ console
sudo setcap cap_sys_ptrace+ep `which austin`
~~~
@@ -465,7 +538,7 @@ is written in C, implementing the new changes is rather straight-forward.
The following flame graph has been obtained with the command
-~~~ bash
+~~~ console
austin -i 1ms ./test.py | sed '/^#/d' | ./flamegraph.pl --countname=Ξs > test.svg
~~~
@@ -488,7 +561,7 @@ for i in range(1000):
To profile Apache2 WSGI application, one can attach Austin to the web server
with
-~~~ bash
+~~~ console
austin -Cp `pgrep apache2 | head -n 1`
~~~
@@ -519,13 +592,13 @@ or convert it to the [pprof] format.
If you want to give it a go you can install it using `pip` with
-~~~ bash
+~~~ console
pip install austin-tui --upgrade
~~~
and run it with
-~~~ bash
+~~~ console
austin-tui [OPTION...] command [ARG...]
~~~
@@ -557,13 +630,13 @@ be used for _remote_ profiling by setting the `--host` and `--port` options.
If you want to give it a go you can install it using `pip` with
-~~~ bash
+~~~ console
pip install austin-web --upgrade
~~~
and run it with
-~~~ bash
+~~~ console
austin-web [OPTION...] command [ARG...]
~~~
@@ -588,13 +661,13 @@ Austin to the Speedscope JSON format.
If you want to give it a go you can install it using `pip` with
-~~~ bash
+~~~ console
pip install austin-python --upgrade
~~~
and run it with
-~~~ bash
+~~~ console
austin2speedscope [-h] [--indent INDENT] [-V] input output
~~~
@@ -613,13 +686,13 @@ Austin's format can also be converted to the Google pprof format using the
`austin2pprof` utility that comes with [`austin-python`]. If you want to give it
a go you can install it using `pip` with
-~~~ bash
+~~~ console
pip install austin-python --upgrade
~~~
and run it with
-~~~ bash
+~~~ console
austin2pprof [-h] [-V] input output
~~~
diff --git a/art/austin-tui.gif b/art/austin-tui.gif
index 955bd574..3e8ca8d2 100644
Binary files a/art/austin-tui.gif and b/art/austin-tui.gif differ
diff --git a/art/austin-tui.png b/art/austin-tui.png
index baac2672..452b0d27 100644
Binary files a/art/austin-tui.png and b/art/austin-tui.png differ
diff --git a/art/cheatsheet.png b/art/cheatsheet.png
new file mode 100644
index 00000000..11591fb7
Binary files /dev/null and b/art/cheatsheet.png differ
diff --git a/art/cheatsheet.svg b/art/cheatsheet.svg
new file mode 100644
index 00000000..1d9adc49
--- /dev/null
+++ b/art/cheatsheet.svg
@@ -0,0 +1,7392 @@
+
+
+
+
diff --git a/art/vscode-demo.gif b/art/vscode-demo.gif
index 490d8077..9f98726e 100644
Binary files a/art/vscode-demo.gif and b/art/vscode-demo.gif differ
diff --git a/configure.ac b/configure.ac
index 26e44bee..137ebbaf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.
AC_PREREQ([2.69])
-AC_INIT([austin], [3.1.0], [https://github.com/p403n1x87/austin/issues])
+AC_INIT([austin], [3.2.0], [https://github.com/p403n1x87/austin/issues])
AC_CONFIG_SRCDIR([config.h.in])
AC_CONFIG_HEADERS([config.h])
AM_INIT_AUTOMAKE
diff --git a/snap/snapcraft.yaml b/snap/snapcraft.yaml
index c896ec23..9052143a 100644
--- a/snap/snapcraft.yaml
+++ b/snap/snapcraft.yaml
@@ -1,5 +1,5 @@
name: austin
-version: '3.1.0+git'
+version: '3.2.0+git'
summary: A Python frame stack sampler for CPython
description: |
Austin is a Python frame stack sampler for CPython written in pure C. It
diff --git a/src/Makefile.am b/src/Makefile.am
index 4a5bb33b..6d8de8b7 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -32,6 +32,7 @@ austin_SOURCES = \
logging.c \
version.c \
stats.c \
+ platform.c \
py_proc_list.c \
py_proc.c \
py_thread.c
diff --git a/src/argparse.c b/src/argparse.c
index 24fcb5eb..c5eb13ad 100644
--- a/src/argparse.c
+++ b/src/argparse.c
@@ -29,9 +29,17 @@
#include "hints.h"
#include "platform.h"
+#if defined PL_LINUX && !defined __MUSL__
+#define GNU_ARGP
+#endif
+#ifdef NATIVE
+#define DEFAULT_SAMPLING_INTERVAL 10000 // reduces impact on tracee
+#else
#define DEFAULT_SAMPLING_INTERVAL 100
+#endif
#define DEFAULT_INIT_RETRY_CNT 100
+#define DEFAULT_HEAP_SIZE 256
const char SAMPLE_FORMAT_NORMAL[] = ";%s:%s:%d";
const char SAMPLE_FORMAT_ALTERNATIVE[] = ";%s:%s;L%d";
@@ -53,6 +61,10 @@ parsed_args_t pargs = {
/* exposure */ 0,
/* pipe */ 0,
/* gc */ 0,
+ /* heap */ DEFAULT_HEAP_SIZE,
+ #ifdef NATIVE
+ /* kernel */ 0,
+ #endif
};
static int exec_arg = 0;
@@ -144,7 +156,7 @@ parse_timeout(char * str, long * num) {
// ---- GNU C -----------------------------------------------------------------
-#ifdef PL_LINUX /* LINUX */
+#ifdef GNU_ARGP /* LINUX */
#include
@@ -219,10 +231,22 @@ static struct argp_option options[] = {
"Pipe mode. Use when piping Austin output."
},
{
- "gc", 'g', NULL, 0,
+ "gc", 'g', NULL, 0,
"Sample the garbage collector state."
},
- #ifndef PL_LINUX
+ {
+ "heap", 'h', "n_mb", 0,
+ "Maximum heap size to allocate to increase sampling accuracy, in MB "
+ "(default is 256)."
+ },
+
+ #ifdef NATIVE
+ {
+ "kernel", 'k', NULL, 0,
+ "Sample the kernel call stack."
+ },
+ #endif
+ #ifndef GNU_ARGP
{
"help", '?', NULL
},
@@ -237,7 +261,7 @@ static struct argp_option options[] = {
};
-#ifdef PL_LINUX
+#ifdef GNU_ARGP
// ----------------------------------------------------------------------------
static int
@@ -331,6 +355,20 @@ parse_opt (int key, char *arg, struct argp_state *state)
pargs.gc = 1;
break;
+ case 'h':
+ if (
+ fail(str_to_num(arg, (long *) &(pargs.heap))) ||
+ pargs.heap > LONG_MAX
+ )
+ argp_error(state, "the heap size must be a positive integer");
+ break;
+
+ #ifdef NATIVE
+ case 'k':
+ pargs.kernel = 1;
+ break;
+ #endif
+
case ARGP_KEY_ARG:
case ARGP_KEY_END:
if (pargs.attach_pid != 0 && exec_arg != 0)
@@ -474,6 +512,8 @@ static const char * help_msg = \
" stacks.\n"
" -f, --full Produce the full set of metrics (time +mem -mem).\n"
" -g, --gc Sample the garbage collector state.\n"
+" -h, --heap=n_mb Maximum heap size to allocate to increase sampling\n"
+" accuracy, in MB (default is 256).\n"
" -i, --interval=n_us Sampling interval in microseconds (default is\n"
" 100). Accepted units: s, ms, us.\n"
" -m, --memory Profile memory usage.\n"
@@ -495,11 +535,11 @@ static const char * help_msg = \
"Report bugs to .\n";
static const char * usage_msg = \
-"Usage: austin [-aCefmPs?V] [-i n_us] [-o FILE] [-p PID] [-t n_ms] [-x n_sec]\n"
-" [--alt-format] [--children] [--exclude-empty] [--full]\n"
-" [--interval=n_us] [--memory] [--output=FILE] [--pid=PID] [--pipe]\n"
-" [--sleepless] [--timeout=n_ms] [--exposure=n_sec] [--help]\n"
-" [--usage] [--version] command [ARG...]\n";
+"Usage: austin [-aCefgmPs?V] [-h n_mb] [-i n_us] [-o FILE] [-p PID] [-t n_ms]\n"
+" [-x n_sec] [--alt-format] [--children] [--exclude-empty] [--full]\n"
+" [--gc] [--heap=n_mb] [--interval=n_us] [--memory] [--output=FILE]\n"
+" [--pid=PID] [--pipe] [--sleepless] [--timeout=n_ms]\n"
+" [--exposure=n_sec] [--help] [--usage] [--version] command [ARG...]\n";
static void
@@ -631,6 +671,14 @@ cb(const char opt, const char * arg) {
pargs.gc = 1;
break;
+ case 'h':
+ if (
+ fail(str_to_num((char*) arg, (long *) &(pargs.heap))) ||
+ pargs.heap > LONG_MAX
+ )
+ arg_error("the heap size must be a positive integer");
+ break;
+
case '?':
puts(help_msg);
exit(0);
@@ -662,7 +710,9 @@ cb(const char opt, const char * arg) {
// ----------------------------------------------------------------------------
int
parse_args(int argc, char ** argv) {
- #ifdef PL_LINUX
+ pargs.output_file = stdout;
+
+ #ifdef GNU_ARGP
struct argp args = {options, parse_opt, "command [ARG...]", doc};
argp_parse(&args, argc, argv, 0, 0, 0);
diff --git a/src/argparse.h b/src/argparse.h
index d19f0c11..aea98c8d 100644
--- a/src/argparse.h
+++ b/src/argparse.h
@@ -27,6 +27,7 @@
#include
#include
+#include "platform.h"
#include "stats.h"
typedef struct {
@@ -44,6 +45,10 @@ typedef struct {
ctime_t exposure;
int pipe;
int gc;
+ size_t heap;
+ #ifdef NATIVE
+ int kernel;
+ #endif
} parsed_args_t;
diff --git a/src/austin.1 b/src/austin.1
index 4103b0d6..8d9821e6 100644
--- a/src/austin.1
+++ b/src/austin.1
@@ -1,7 +1,7 @@
-.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.6.
-.TH AUSTIN "1" "August 2021" "austin 3.1.0" "User Commands"
+.\" DO NOT MODIFY THIS FILE! It was generated by help2man 1.47.13.
+.TH AUSTIN "1" "December 2021" "austin 3.2.0" "User Commands"
.SH NAME
-austin \- manual page for austin 3.1.0
+austin \- manual page for austin 3.2.0
.SH SYNOPSIS
.B austin
[\fI\,OPTION\/\fR...] \fI\,command \/\fR[\fI\,ARG\/\fR...]
@@ -24,6 +24,10 @@ Produce the full set of metrics (time +mem \fB\-mem\fR).
\fB\-g\fR, \fB\-\-gc\fR
Sample the garbage collector state.
.TP
+\fB\-h\fR, \fB\-\-heap\fR=\fI\,n_mb\/\fR
+Maximum heap size to allocate to increase sampling
+accuracy, in MB (default is 256).
+.TP
\fB\-i\fR, \fB\-\-interval\fR=\fI\,n_us\/\fR
Sampling interval in microseconds (default is
100). Accepted units: s, ms, us.
diff --git a/src/austin.c b/src/austin.c
index 65ce7e05..ee431ac3 100644
--- a/src/austin.c
+++ b/src/austin.c
@@ -75,8 +75,12 @@ do_single_process(py_proc_t * py_proc) {
if (fail(py_proc__sample(py_proc)))
break;
-
+
+ #ifdef NATIVE
+ stopwatch_pause(0);
+ #else
stopwatch_pause(stopwatch_duration());
+ #endif
}
}
else {
@@ -88,7 +92,11 @@ do_single_process(py_proc_t * py_proc) {
if (fail(py_proc__sample(py_proc)))
break;
+ #ifdef NATIVE
+ stopwatch_pause(0);
+ #else
stopwatch_pause(stopwatch_duration());
+ #endif
if (end_time < gettime())
interrupt++;
@@ -154,20 +162,32 @@ do_child_processes(py_proc_t * py_proc) {
if (pargs.exposure == 0) {
while (!py_proc_list__is_empty(list) && interrupt == FALSE) {
+ #ifndef NATIVE
ctime_t start_time = gettime();
+ #endif
py_proc_list__update(list);
py_proc_list__sample(list);
+ #ifdef NATIVE
+ stopwatch_pause(0);
+ #else
stopwatch_pause(gettime() - start_time);
+ #endif
}
}
else {
log_m("ð Sampling for %d second%s", pargs.exposure, pargs.exposure != 1 ? "s" : "");
ctime_t end_time = gettime() + pargs.exposure * 1000000;
while (!py_proc_list__is_empty(list) && interrupt == FALSE) {
+ #ifndef NATIVE
ctime_t start_time = gettime();
+ #endif
py_proc_list__update(list);
py_proc_list__sample(list);
+ #ifdef NATIVE
+ stopwatch_pause(0);
+ #else
stopwatch_pause(gettime() - start_time);
+ #endif
if (end_time < gettime()) interrupt++;
}
@@ -212,7 +232,7 @@ int main(int argc, char ** argv) {
goto finally;
}
- if (fail(py_thread_allocate_stack())) {
+ if (fail(py_thread_allocate())) {
log_ie("Cannot allocate memory for thread stack");
goto finally;
}
@@ -244,9 +264,7 @@ int main(int argc, char ** argv) {
goto finally;
// Redirect output to STDOUT if not output file was given.
- if (pargs.output_file == NULL)
- pargs.output_file = stdout;
- else
+ if (pargs.output_file != stdout)
log_i("Output file: %s", pargs.output_filename);
log_i("Sampling interval: %lu Ξs", pargs.t_sampling_interval);
@@ -298,7 +316,7 @@ int main(int argc, char ** argv) {
stats_log_metrics();NL;
finally:
- py_thread_free_stack();
+ py_thread_free();
py_proc__destroy(py_proc);
log_d("Last error: %d :: %s", austin_errno, get_last_error());
@@ -345,7 +363,6 @@ int main(int argc, char ** argv) {
retval = SIGTERM;
log_footer();
- logger_close();
release:
if (pargs.output_file != NULL && pargs.output_file != stdout) {
@@ -353,6 +370,8 @@ int main(int argc, char ** argv) {
log_d("Output file closed.");
}
+ logger_close();
+
return retval;
} /* main */
diff --git a/src/austin.h b/src/austin.h
index ac5f7ce8..da5561d4 100644
--- a/src/austin.h
+++ b/src/austin.h
@@ -24,6 +24,6 @@
#define AUSTIN_H
#define PROGRAM_NAME "austin"
-#define VERSION "3.1.0"
+#define VERSION "3.2.0"
#endif
diff --git a/src/heap.h b/src/heap.h
new file mode 100644
index 00000000..7a7162be
--- /dev/null
+++ b/src/heap.h
@@ -0,0 +1,41 @@
+// This file is part of "austin" which is released under GPL.
+//
+// See file LICENCE or go to http://www.gnu.org/licenses/ for full license
+// details.
+//
+// Austin is a Python frame stack sampler for CPython.
+//
+// Copyright (c) 2018-2021 Gabriele N. Tornetta .
+// All rights reserved.
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see .
+
+#ifndef HEAP_H
+#define HEAP_H
+
+#include
+
+typedef struct {
+ void * lo;
+ void * hi;
+ void * newlo;
+ void * newhi;
+} _mem_block_t;
+
+
+typedef struct {
+ void * content;
+ size_t size;
+} _heap_t;
+
+#endif
diff --git a/src/hints.h b/src/hints.h
index 43a858e8..84cf64bd 100644
--- a/src/hints.h
+++ b/src/hints.h
@@ -46,6 +46,7 @@
#endif
#define with_resources int retval = 0;
+#define OK goto release;
#define NOK retval = 1; goto release;
#define released return retval;
diff --git a/src/linux/py_proc.h b/src/linux/py_proc.h
index 78558358..a98a5db4 100644
--- a/src/linux/py_proc.h
+++ b/src/linux/py_proc.h
@@ -35,6 +35,9 @@
#include
#include
+#ifdef NATIVE
+#include "../argparse.h"
+#endif
#include "../dict.h"
#include "../hints.h"
#include "../py_proc.h"
@@ -377,13 +380,11 @@ _py_proc__parse_maps_file(py_proc_t * self) {
while (getline(&line, &len, fp) != -1) {
ssize_t lower, upper;
char pathname[1024];
- char m[sizeof(void *)]; // We don't care about these values.
- int field_count = sscanf(line, "%lx-%lx %4c %lx %x:%x %x %s\n",
- &lower, &upper, // Map bounds
- (char *) m, (ssize_t *) m, (int *) m, (int *) m, (int *) m, // Ignored
- pathname // Binary path
- ) - 7; // We expect between 7 and 8 matches.
+ int field_count = sscanf(line, "%lx-%lx %*s %*x %*x:%*x %*x %s\n",
+ &lower, &upper, // Map bounds
+ pathname // Binary path
+ ) - 3; // We expect between 3 and 4 matches.
if (field_count >= 0) {
if (field_count == 0 || strstr(pathname, "[v") == NULL) {
// Skip meaningless addresses like [vsyscall] which would give
@@ -488,6 +489,51 @@ _py_proc__get_resident_memory(py_proc_t * self) {
} /* _py_proc__get_resident_memory */
+#ifdef NATIVE
+// ----------------------------------------------------------------------------
+static int
+_py_proc__dump_maps(py_proc_t * self) {
+ char file_name[32];
+ FILE * fp = NULL;
+ char * line = NULL;
+ size_t len = 0;
+
+ sprintf(file_name, "/proc/%d/maps", self->pid);
+ fp = fopen(file_name, "r");
+ if (fp == NULL) {
+ switch (errno) {
+ case EACCES: // Needs elevated privileges
+ set_error(EPROCPERM);
+ break;
+ case ENOENT: // Invalid pid
+ set_error(EPROCNPID);
+ break;
+ default:
+ set_error(EPROCVM);
+ }
+ FAIL;
+ }
+
+ while (getline(&line, &len, fp) != -1) {
+ ssize_t lower, upper;
+ char pathname[1024];
+
+ if (sscanf(line, "%lx-%lx %*s %*x %*x:%*x %*x %s\n",
+ &lower, &upper, // Map bounds
+ pathname // Binary path
+ ) == 3 && pathname[0] != '[') {
+ fprintf(pargs.output_file, "# map: %lx-%lx %s\n", lower, upper, pathname);
+ }
+ }
+
+ sfree(line);
+ fclose(fp);
+
+ SUCCESS;
+} /* _py_proc__dump_maps */
+#endif
+
+
// ----------------------------------------------------------------------------
static int
_py_proc__init(py_proc_t * self) {
@@ -503,6 +549,10 @@ _py_proc__init(py_proc_t * self) {
self->last_resident_memory = _py_proc__get_resident_memory(self);
+ #ifdef NATIVE
+ _py_proc__dump_maps(self);
+ #endif
+
SUCCESS;
} /* _py_proc__init */
diff --git a/src/linux/py_thread.h b/src/linux/py_thread.h
index ae96f5d1..0d673028 100644
--- a/src/linux/py_thread.h
+++ b/src/linux/py_thread.h
@@ -44,23 +44,33 @@ void * _pthread_buffer[PTHREAD_BUFFER_SIZE];
// ----------------------------------------------------------------------------
static void
_infer_tid_field_offset(py_thread_t * py_thread) {
- if (success(copy_memory(
+ if (fail(copy_memory(
py_thread->raddr.pid,
(void *) py_thread->tid, // At this point this is still the pthread_t *
PTHREAD_BUFFER_SIZE * sizeof(void *),
_pthread_buffer
))) {
- for (register int i = 0; i < PTHREAD_BUFFER_SIZE; i++) {
- log_d("pthread_t at %p", py_thread->tid);
- if (py_thread->raddr.pid == (uintptr_t) _pthread_buffer[i]) {
- log_d("TID field offset: %d", i);
- _pthread_tid_offset = i;
- return;
- }
+ log_d("Cannot copy pthread_t structure");
+ return;
+ }
+
+ log_d("pthread_t at %p", py_thread->tid);
+
+ for (register int i = 0; i < PTHREAD_BUFFER_SIZE; i++) {
+ if (py_thread->raddr.pid == (uintptr_t) _pthread_buffer[i]) {
+ log_d("TID field offset: %d", i);
+ _pthread_tid_offset = i;
+ return;
}
}
- else {
- log_d("Cannot copy pthread_t structure");
+
+ // Fall-back to smaller steps if we failed
+ for (register int i = 0; i < PTHREAD_BUFFER_SIZE * sizeof(uintptr_t) / sizeof(pid_t); i++) {
+ if (py_thread->raddr.pid == (pid_t) ((pid_t*) _pthread_buffer)[i]) {
+ log_d("TID field offset (from fall-back): %d", i);
+ _pthread_tid_offset = i;
+ return;
+ }
}
}
diff --git a/src/msg.h b/src/msg.h
index 71915500..c02fdfaa 100644
--- a/src/msg.h
+++ b/src/msg.h
@@ -61,15 +61,7 @@ URL("https://github.com/P403n1x87/austin#compatibility")
const char * MFORK =
-#if defined PL_UNIX
-"â Cannot launch the given command. Either it is not valid or the process\n"
-"terminated too quickly";
-#else
-"â Cannot launch the given command. Please make sure it is correct. If you\n"
-"think it is, then try passing an output file via the -o/--output option.\n"
-"Sometimes, the Python wrapper launch fails to duplicate the standard out\n"
-"handle and fails to launch your Python application.";
-#endif
+"â Cannot launch the given command or it terminated too quickly";
const char * MATTACH = \
"ð Cannot attach to the given process. Make sure that the PID you have provided\n"
diff --git a/src/platform.c b/src/platform.c
new file mode 100644
index 00000000..ea281091
--- /dev/null
+++ b/src/platform.c
@@ -0,0 +1,30 @@
+#include
+
+#include "hints.h"
+#include "platform.h"
+
+
+// ----------------------------------------------------------------------------
+size_t
+pid_max() {
+ #if defined PL_LINUX /* LINUX */
+ FILE * pid_max_file = fopen("/proc/sys/kernel/pid_max", "rb");
+ if (!isvalid(pid_max_file))
+ return 0;
+
+ size_t max_pid;
+ int has_pid_max = (fscanf(pid_max_file, "%ld", &max_pid) == 1);
+ fclose(pid_max_file);
+ if (!has_pid_max)
+ return 0;
+
+ return max_pid;
+
+ #elif defined PL_MACOS /* MACOS */
+ return PID_MAX;
+
+ #elif defined PL_WIN /* WIN */
+ return (1 << 22); // 4M. WARNING: This could potentially be violated!
+
+ #endif
+}
\ No newline at end of file
diff --git a/src/platform.h b/src/platform.h
index 40e88eaa..92f3909e 100644
--- a/src/platform.h
+++ b/src/platform.h
@@ -23,6 +23,8 @@
#ifndef PLATFORM_H
#define PLATFORM_H
+#include
+
#if defined(__linux__)
#define PL_LINUX
@@ -40,10 +42,29 @@
// ----------------------------------------------------------------------------
+#if defined(AUSTINP) && defined(PL_LINUX)
+#define NATIVE
+#endif
+
+// ----------------------------------------------------------------------------
+
#if defined(PL_LINUX) || defined(PL_MACOS)
#define PL_UNIX
#define NULL_DEVICE "/dev/null"
#endif
+// ----------------------------------------------------------------------------
+
+#if defined PL_MACOS
+#define PID_MAX 99999 // From sys/proc_internal.h
#endif
+
+
+/**
+ * Get the maximum PID for the platform.
+ */
+size_t
+pid_max();
+
+#endif
\ No newline at end of file
diff --git a/src/py_proc.c b/src/py_proc.c
index e488d693..5f6d1f25 100644
--- a/src/py_proc.c
+++ b/src/py_proc.c
@@ -144,7 +144,11 @@ _get_version_from_executable(char * binary, int * major, int * minor, int * patc
char version[64];
char cmd[256];
+ #if defined PL_WIN
+ sprintf(cmd, "\"\"%s\"\" -V 2>&1", binary);
+ #else
sprintf(cmd, "%s -V 2>&1", binary);
+ #endif
fp = _popen(cmd, "r");
if (!isvalid(fp)) {
@@ -244,10 +248,20 @@ _py_proc__get_version(py_proc_t * self) {
if (isvalid(self->lib_path)) {
#if defined PL_LINUX /* LINUX */
- if (sscanf(
- strstr(self->lib_path, "python"), "python%d.%d", &major, &minor
- ) == 2) {
- return PYVERSION(major, minor, patch) | 0xFF;
+ char * base = self->lib_path;
+ char * end = base + strlen(self->lib_path);
+ const char * needle = "python";
+ const size_t needle_len = strlen(needle);
+
+ while (base < end) {
+ base = strstr(base, needle);
+ if (!isvalid(base)) {
+ break;
+ }
+ base += needle_len;
+ if (sscanf(base,"%d.%d", &major, &minor) == 2) {
+ return PYVERSION(major, minor, patch) | 0xFF;
+ }
}
#elif defined PL_WIN /* WIN */
@@ -326,12 +340,12 @@ _py_proc__check_interp_state(py_proc_t * self, void * raddr) {
if (py_proc__get_type(self, V_FIELD(void *, is, py_is, o_tstate_head), tstate_head)) {
log_t(
"Cannot copy PyThreadState head at %p from PyInterpreterState instance",
- is.tstate_head
+ V_FIELD(void *, is, py_is, o_tstate_head)
);
FAIL;
}
- log_t("PyThreadState head loaded @ %p", is.tstate_head);
+ log_t("PyThreadState head loaded @ %p", V_FIELD(void *, is, py_is, o_tstate_head));
if (V_FIELD(void*, tstate_head, py_thread, o_interp) != raddr)
FAIL;
@@ -343,21 +357,21 @@ _py_proc__check_interp_state(py_proc_t * self, void * raddr) {
log_t(
"PyInterpreterState loaded @ %p. Thread State head @ %p",
- raddr, is.tstate_head
+ raddr, V_FIELD(void *, is, py_is, o_tstate_head)
);
// As an extra sanity check, verify that the thread state is valid
- raddr_t thread_raddr = { .pid = PROC_REF, .addr = V_FIELD(void *, is, py_is, o_tstate_head) };
- py_thread_t thread;
- if (fail(py_thread__fill_from_raddr(&thread, &thread_raddr, self))) {
- log_d("Failed to fill thread structure");
- FAIL;
- }
+ // raddr_t thread_raddr = { .pid = PROC_REF, .addr = V_FIELD(void *, is, py_is, o_tstate_head) };
+ // py_thread_t thread;
+ // if (fail(py_thread__fill_from_raddr(&thread, &thread_raddr, self))) {
+ // log_d("Failed to fill thread structure");
+ // FAIL;
+ // }
- if (thread.invalid) {
- log_d("... but Head Thread State is invalid!");
- FAIL;
- }
+ // if (thread.invalid) {
+ // log_d("... but Head Thread State is invalid!");
+ // FAIL;
+ // }
log_d("Stack trace constructed from possible interpreter state");
@@ -742,6 +756,9 @@ _py_proc__run(py_proc_t * self, int try_once) {
self->timestamp = gettime();
+ #ifdef NATIVE
+ self->unwind.as = unw_create_addr_space(&_UPT_accessors, 0);
+ #endif
SUCCESS;
} /* _py_proc__run */
@@ -765,6 +782,8 @@ py_proc_new() {
}
}
+ py_proc->frames_heap.newlo = py_proc->frames.newlo = (void *) -1;
+
py_proc->extra = (proc_extra_info *) calloc(1, sizeof(proc_extra_info));
if (!isvalid(py_proc->extra))
goto error;
@@ -795,13 +814,19 @@ py_proc__attach(py_proc_t * self, pid_t pid, int child_process) {
self->pid = pid;
if (fail(_py_proc__run(self, child_process))) {
- if (austin_errno == EPROCNPID) {
- set_error(EPROCATTACH);
- }
- else {
- log_ie("Cannot attach to running process.");
+ #if defined PL_WIN
+ if (fail(_py_proc__try_child_proc(self))) {
+ #endif
+ if (austin_errno == EPROCNPID) {
+ set_error(EPROCATTACH);
+ }
+ else {
+ log_ie("Cannot attach to running process.");
+ }
+ FAIL;
+ #if defined PL_WIN
}
- FAIL;
+ #endif
}
SUCCESS;
@@ -819,6 +844,8 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) {
SECURITY_ATTRIBUTES saAttr;
HANDLE hChildStdInRd = NULL;
HANDLE hChildStdInWr = NULL;
+ HANDLE hChildStdOutRd = NULL;
+ HANDLE hChildStdOutWr = NULL;
ZeroMemory(&piProcInfo, sizeof(PROCESS_INFORMATION));
ZeroMemory(&siStartInfo, sizeof(STARTUPINFO));
@@ -828,7 +855,10 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) {
saAttr.lpSecurityDescriptor = NULL;
CreatePipe(&hChildStdInRd, &hChildStdInWr, &saAttr, 0);
+ CreatePipe(&hChildStdOutRd, &hChildStdOutWr, &saAttr, 0);
+
SetHandleInformation(hChildStdInWr, HANDLE_FLAG_INHERIT, 0);
+ SetHandleInformation(hChildStdOutRd, HANDLE_FLAG_INHERIT, 0);
siStartInfo.cb = sizeof(STARTUPINFO);
siStartInfo.hStdInput = hChildStdInRd;
@@ -836,17 +866,23 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) {
siStartInfo.hStdError = GetStdHandle(STD_ERROR_HANDLE);
siStartInfo.dwFlags |= STARTF_USESTDHANDLES;
- if (pargs.output_file == NULL) {
- HANDLE nullStdOut = CreateFile(
- TEXT(NULL_DEVICE), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL
+ if (pargs.output_file == stdout) {
+ log_d("Redirecting child's STDOUT to a pipe");
+ siStartInfo.hStdOutput = hChildStdOutWr;
+
+ // On Windows, Python is normally started by a launcher that duplicates the
+ // standard streams, so redirecting to the NULL device causes issues. To
+ // support these cases, we spawn a reader thread that reads from the pipe
+ // and ensures that the buffer never gets full, stalling STDOUT operations
+ // in the child process.
+ DWORD dwThreadId;
+ self->extra->h_reader_thread = CreateThread(
+ NULL, 0, reader_thread, hChildStdOutRd, 0, &dwThreadId
);
-
- if (nullStdOut == INVALID_HANDLE_VALUE) {
- log_e(error_get_msg(ENULLDEV));
+ if (self->extra->h_reader_thread == NULL) {
+ log_e("Failed to start STDOUT reader thread.");
+ set_error(ENULLDEV);
}
-
- log_d("Redirecting child's STDOUT to " NULL_DEVICE);
- siStartInfo.hStdOutput = nullStdOut;
}
// Concatenate the command line arguments
@@ -888,13 +924,14 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) {
self->pid = (pid_t) piProcInfo.dwProcessId;
CloseHandle(hChildStdInRd);
+ CloseHandle(hChildStdOutWr);
#else /* UNIX */
self->pid = fork();
if (self->pid == 0) {
// If we are not writing to file we need to ensure the child process is
// not writing to stdout.
- if (pargs.output_file == NULL) {
+ if (pargs.output_file == stdout) {
log_d("Redirecting child's STDOUT to " NULL_DEVICE);
if (freopen(NULL_DEVICE, "w", stdout) == NULL)
log_e(error_get_msg(ENULLDEV));
@@ -917,66 +954,15 @@ py_proc__start(py_proc_t * self, const char * exec, char * argv[]) {
if (fail(_py_proc__run(self, FALSE))) {
#if defined PL_WIN
- // On Windows, if we fail with the parent process we look if it has a single
- // child and try to attach to that instead. We keep going until we either
- // find a single Python process or more or less than a single child.
- log_d("Process is not Python so we look for a single child Python process");
- HANDLE orig_hproc = self->extra->h_proc;
- pid_t orig_pid = self->pid;
- while (TRUE) {
- pid_t parent_pid = self->pid;
-
- HANDLE h = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
- if (h == INVALID_HANDLE_VALUE)
- break;
-
- PROCESSENTRY32 pe = { 0 };
- pe.dwSize = sizeof(PROCESSENTRY32);
-
- if (Process32First(h, &pe)) {
- pid_t child_pid = 0;
- do {
- if (pe.th32ParentProcessID == parent_pid) {
- if (child_pid) {
- log_d("Process has more than one child");
- goto exit;
- }
- child_pid = pe.th32ProcessID;
- }
- } while (Process32Next(h, &pe));
-
- if (!child_pid) {
- log_d("Process has no children");
- goto exit;
- }
-
- self->pid = child_pid;
- self->extra->h_proc = OpenProcess(
- PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, FALSE, child_pid
- );
- if (self->extra->h_proc == INVALID_HANDLE_VALUE) {
- goto exit;
- }
- if (success(_py_proc__run(self, FALSE))) {
- log_d("Process has a single Python child with PID %d. We will attach to that", child_pid);
- SUCCESS;
- }
- else {
- log_d("Process had a single non-Python child with PID %d. Taking it as new parent", child_pid);
- CloseHandle(self->extra->h_proc);
- }
- }
-
- CloseHandle(h);
+ if (fail(_py_proc__try_child_proc(self))) {
+ #endif
+ if (austin_errno == EPROCNPID)
+ set_error(EPROCFORK);
+ log_ie("Cannot start new process");
+ FAIL;
+ #if defined PL_WIN
}
- exit:
- self->pid = orig_pid;
- self->extra->h_proc = orig_hproc;
#endif
- if (austin_errno == EPROCNPID)
- set_error(EPROCFORK);
- log_ie("Cannot start new process");
- FAIL;
}
SUCCESS;
@@ -995,10 +981,19 @@ py_proc__wait(py_proc_t * self) {
#endif
#ifdef PL_WIN /* WIN */
+ if (isvalid(self->extra->h_reader_thread)) {
+ WaitForSingleObject(self->extra->h_reader_thread, INFINITE);
+ CloseHandle(self->extra->h_reader_thread);
+ }
WaitForSingleObject(self->extra->h_proc, INFINITE);
+ CloseHandle(self->extra->h_proc);
#else /* UNIX */
+ #ifdef NATIVE
+ wait(NULL);
+ #else
waitpid(self->pid, 0, 0);
#endif
+ #endif
}
@@ -1096,6 +1091,53 @@ py_proc__is_gc_collecting(py_proc_t * self) {
}
+#ifdef NATIVE
+// ----------------------------------------------------------------------------
+static int
+_py_proc__interrupt_threads(py_proc_t * self, raddr_t * tstate_head_raddr) {
+ py_thread_t py_thread;
+
+ if (fail(py_thread__fill_from_raddr(&py_thread, tstate_head_raddr, self))) {
+ FAIL;
+ }
+
+ do {
+ if (fail(py_thread__set_idle(&py_thread)))
+ FAIL;
+ if (pargs.kernel && fail(py_thread__save_kernel_stack(&py_thread)))
+ FAIL;
+ if (ptrace(PTRACE_INTERRUPT, py_thread.tid, 0, 0)) {
+ log_e("ptrace: failed to interrupt thread %d", py_thread.tid);
+ FAIL;
+ }
+ log_t("ptrace: thread %d interrupted", py_thread.tid);
+ } while (success(py_thread__next(&py_thread)));
+
+ SUCCESS;
+}
+
+
+// ----------------------------------------------------------------------------
+static int
+_py_proc__resume_threads(py_proc_t * self, raddr_t * tstate_head_raddr) {
+ py_thread_t py_thread;
+
+ if (fail(py_thread__fill_from_raddr(&py_thread, tstate_head_raddr, self))) {
+ FAIL;
+ }
+
+ do {
+ while (ptrace(PTRACE_CONT, py_thread.tid, 0, 0)) {
+ log_t("ptrace: failed to resume thread %d", py_thread.tid);
+ }
+ log_t("ptrace: thread %d resumed", py_thread.tid);
+ } while (success(py_thread__next(&py_thread)));
+
+ SUCCESS;
+}
+#endif
+
+
// ----------------------------------------------------------------------------
int
py_proc__sample(py_proc_t * self) {
@@ -1111,8 +1153,18 @@ py_proc__sample(py_proc_t * self) {
if (isvalid(tstate_head)) {
raddr_t raddr = { .pid = PROC_REF, .addr = tstate_head };
py_thread_t py_thread;
- if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self)))
- FAIL;
+
+ #ifdef NATIVE
+ _py_proc__interrupt_threads(self, &raddr);
+ time_delta = gettime() - self->timestamp;
+ #endif
+
+ if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self))) {
+ if (is_fatal(austin_errno)) {
+ FAIL;
+ }
+ SUCCESS;
+ }
if (pargs.memory) {
// Use the current thread to determine which thread is manipulating memory
@@ -1140,9 +1192,16 @@ py_proc__sample(py_proc_t * self) {
mem_delta
);
} while (success(py_thread__next(&py_thread)));
+ #ifdef NATIVE
+ self->timestamp = gettime();
+ _py_proc__resume_threads(self, &raddr);
+ #endif
+
}
+ #ifndef NATIVE
self->timestamp += time_delta;
+ #endif
SUCCESS;
} /* py_proc__sample */
@@ -1199,17 +1258,10 @@ py_proc__destroy(py_proc_t * self) {
if (!isvalid(self))
return;
- if (self->bin_path != NULL)
- free(self->bin_path);
-
- if (self->lib_path != NULL)
- free(self->lib_path);
-
- if (self->bss != NULL)
- free(self->bss);
-
- if (self->extra != NULL)
- free(self->extra);
+ sfree(self->bin_path);
+ sfree(self->lib_path);
+ sfree(self->bss);
+ sfree(self->extra);
free(self);
}
diff --git a/src/py_proc.h b/src/py_proc.h
index 41761300..8091a3ad 100644
--- a/src/py_proc.h
+++ b/src/py_proc.h
@@ -26,6 +26,12 @@
#include
+#ifdef NATIVE
+#include
+#include
+#endif
+
+#include "heap.h"
#include "stats.h"
@@ -77,6 +83,16 @@ typedef struct {
// Offset of the tstate_current field within the _PyRuntimeState structure
unsigned int tstate_current_offset;
+ // Frame objects VM ranges
+ _mem_block_t frames;
+ _mem_block_t frames_heap;
+
+ #ifdef NATIVE
+ struct _puw {
+ unw_addr_space_t as;
+ } unwind;
+ #endif
+
// Platform-dependent fields
proc_extra_info * extra;
} py_proc_t;
diff --git a/src/py_proc_list.c b/src/py_proc_list.c
index edd1484d..ffef7a45 100644
--- a/src/py_proc_list.c
+++ b/src/py_proc_list.c
@@ -26,7 +26,6 @@
#include
#elif defined PL_MACOS
#include
-#define PID_MAX 99999 // From sys/proc_internal.h
#elif defined PL_WIN
#include
#include
@@ -114,23 +113,7 @@ py_proc_list_new(py_proc_t * parent_py_proc) {
if (list == NULL)
return NULL;
- #if defined PL_LINUX /* LINUX */
- FILE * pid_max_file = fopen("/proc/sys/kernel/pid_max", "rb");
- if (pid_max_file == NULL)
- return NULL;
-
- int has_pid_max = (fscanf(pid_max_file, "%d", &(list->pids)) == 1);
- fclose(pid_max_file);
- if (!has_pid_max)
- return NULL;
-
- #elif defined PL_MACOS /* MACOS */
- list->pids = PID_MAX;
-
- #elif defined PL_WIN /* WIN */
- list->pids = (1 << 22); // 4M. WARNING: This could potentially be violated!
-
- #endif
+ list->pids = pid_max();
log_t("Maximum number of PIDs: %d", list->pids);
diff --git a/src/py_thread.c b/src/py_thread.c
index 38cd4e4d..338ca285 100644
--- a/src/py_thread.c
+++ b/src/py_thread.c
@@ -23,6 +23,10 @@
#define PY_THREAD_C
#include
+#include
+#include
+#include
+#include
#include "argparse.h"
#include "error.h"
@@ -33,6 +37,7 @@
#include "timing.h"
#include "version.h"
+#include "heap.h"
#include "py_thread.h"
// ----------------------------------------------------------------------------
@@ -77,7 +82,16 @@ typedef struct frame {
} py_frame_t;
-py_frame_t * _stack = NULL;
+static py_frame_t * _stack = NULL;
+static size_t _stackp = 0;
+static _heap_t _frames = {NULL, 0};
+static _heap_t _frames_heap = {NULL, 0};
+
+#ifdef NATIVE
+static void ** _tids = NULL;
+static unsigned char * _tids_idle = NULL;
+static char ** _kstacks = NULL;
+#endif
// ---- PyCode ----------------------------------------------------------------
@@ -288,35 +302,72 @@ _py_code__fill_from_raddr(py_code_t * self, raddr_t * raddr, int lasti) {
// ---- PyFrame ---------------------------------------------------------------
// ----------------------------------------------------------------------------
-static inline int
-_py_frame__fill_from_raddr(py_frame_t * self, raddr_t * raddr) {
- PyFrameObject frame;
+#define _use_heaps (pargs.heap > 0)
+#define _no_heaps {pargs.heap = 0;}
- self->invalid = 1;
+static inline int
+_py_thread__read_frames(py_thread_t * self) {
+ size_t newsize;
+ size_t maxsize = pargs.heap >> 1;
+
+ if (isvalid(self->proc->frames.newhi)) {
+ newsize = self->proc->frames.newhi - self->proc->frames.newlo;
+ if (newsize > maxsize) {
+ newsize = maxsize + sizeof(PyFrameObject);
+ }
+ if (newsize > _frames.size) {
+ _frames.content = realloc(_frames.content, newsize);
+ _frames.size = newsize;
+ self->proc->frames.hi = self->proc->frames.newhi;
+ self->proc->frames.lo = self->proc->frames.newlo;
+ }
+ if (fail(copy_memory(self->raddr.pid, self->proc->frames.lo, newsize, _frames.content)))
+ FAIL;
+ }
- if (fail(copy_from_raddr_v(raddr, frame, py_v->py_frame.size))) {
- log_ie("Cannot read remote PyFrameObject");
- FAIL;
+ if (isvalid(self->proc->frames_heap.newhi)) {
+ newsize = self->proc->frames_heap.newhi - self->proc->frames_heap.newlo;
+ if (newsize > maxsize) {
+ newsize = maxsize + sizeof(PyFrameObject);
+ }
+ if (newsize > _frames_heap.size) {
+ _frames_heap.content = realloc(_frames_heap.content, newsize);
+ _frames_heap.size = newsize;
+ self->proc->frames_heap.hi = self->proc->frames_heap.newhi;
+ self->proc->frames_heap.lo = self->proc->frames_heap.newlo;
+ }
+ return copy_memory(self->raddr.pid, self->proc->frames_heap.lo, newsize, _frames_heap.content);
}
+ SUCCESS;
+}
+
+
+// ----------------------------------------------------------------------------
+static inline int
+_py_frame_fill_from_addr(PyFrameObject * frame, raddr_t * raddr) {
+ py_frame_t * self = _stack + _stackp;
+ self->invalid = TRUE;
raddr_t py_code_raddr = {
.pid = raddr->pid,
- .addr = V_FIELD(void *, frame, py_frame, o_code)
+ .addr = V_FIELD_PTR(void *, frame, py_frame, o_code)
};
if (_py_code__fill_from_raddr(
- &(self->code), &py_code_raddr, V_FIELD(int, frame, py_frame, o_lasti)
+ &(self->code), &py_code_raddr, V_FIELD_PTR(int, frame, py_frame, o_lasti)
)) {
log_ie("Cannot get PyCodeObject for frame");
- SUCCESS;
+ FAIL;
}
self->raddr.pid = raddr->pid;
self->raddr.addr = raddr->addr;
self->prev_raddr.pid = raddr->pid;
- self->prev_raddr.addr = V_FIELD(void *, frame, py_frame, o_back);
+ self->prev_raddr.addr = V_FIELD_PTR(void *, frame, py_frame, o_back);
- self->invalid = 0;
+ self->invalid = FALSE;
+
+ _stackp++;
SUCCESS;
}
@@ -324,35 +375,262 @@ _py_frame__fill_from_raddr(py_frame_t * self, raddr_t * raddr) {
// ----------------------------------------------------------------------------
static inline int
-_py_frame__prev(py_frame_t * self) {
- if (!isvalid(self) || !isvalid(self->prev_raddr.addr))
+_py_frame_fill_from_raddr(raddr_t * raddr) {
+ PyFrameObject frame;
+
+ if (fail(copy_from_raddr_v(raddr, frame, py_v->py_frame.size))) {
+ log_ie("Cannot read remote PyFrameObject");
+ log_d(" raddr: (%p, %ld)", raddr->addr, raddr->pid);
FAIL;
+ }
+
+ return _py_frame_fill_from_addr(&frame, raddr);
+}
+
+
+// ----------------------------------------------------------------------------
+#define REL(raddr, block, base) (raddr->addr - block.lo + base)
+
+static inline int
+_py_frame_fill(raddr_t * raddr, py_thread_t * thread) {
+ if (_use_heaps) {
+ py_proc_t * proc = thread->proc;
+
+ if (isvalid(_frames.content)
+ && raddr->addr >= proc->frames.lo
+ && raddr->addr < proc->frames.lo + _frames.size
+ ) {
+ return _py_frame_fill_from_addr(
+ REL(raddr, proc->frames, _frames.content),
+ raddr
+ );
+ }
+ else if (isvalid(_frames_heap.content)
+ && raddr->addr >= proc->frames_heap.lo
+ && raddr->addr < proc->frames_heap.lo + _frames_heap.size
+ ) {
+ return _py_frame_fill_from_addr(
+ REL(raddr, proc->frames_heap, _frames_heap.content),
+ raddr
+ );
+ }
+
+ // Miss: update ranges
+ // We quite likely set the bss map data so this should be a pretty reliable
+ // platform-independent way of dualising the frame heap.
+ if (raddr->addr >= proc->map.bss.base && raddr->addr <= proc->map.bss.base + (1 << 27)) {
+ if (raddr->addr + sizeof(PyFrameObject) > proc->frames_heap.newhi) {
+ proc->frames_heap.newhi = raddr->addr + sizeof(PyFrameObject);
+ }
+ if (raddr->addr < proc->frames_heap.newlo) {
+ proc->frames_heap.newlo = raddr->addr;
+ }
+ }
+ else {
+ if (raddr->addr + sizeof(PyFrameObject) > proc->frames.newhi) {
+ proc->frames.newhi = raddr->addr + sizeof(PyFrameObject);
+ }
+ if (raddr->addr < proc->frames.newlo) {
+ proc->frames.newlo = raddr->addr;
+ }
+ }
+ }
+
+ return _py_frame_fill_from_raddr(raddr);
+}
+
+
+// ----------------------------------------------------------------------------
+static inline int
+_py_frame__prev(py_thread_t * thread) {
+ if (_stackp <= 0)
+ FAIL;
+
+ py_frame_t * self = _stack + _stackp - 1;
+ if (!isvalid(self) || !isvalid(self->prev_raddr.addr)) {
+ // Double-check it's the end of the stack if we're using the heap.
+ _stackp--;
+ if (fail(_py_frame_fill_from_raddr(&self->raddr)) || !isvalid(self->prev_raddr.addr)) {
+ FAIL;
+ }
+ }
raddr_t prev_raddr = {
.pid = self->prev_raddr.pid,
.addr = self->prev_raddr.addr
};
- return _py_frame__fill_from_raddr(self + 1, &prev_raddr);
+ int result = _py_frame_fill(&prev_raddr, thread);
+
+ if (!_use_heaps) {
+ return result;
+ }
+
+ // This sucks! :(
+ py_frame_t * last = self + 1;
+ for (py_frame_t * f = self; f >= _stack; f--) {
+ if (last->prev_raddr.addr == f->raddr.addr) {
+ log_d("Circular frame reference detected");
+ last->invalid = TRUE;
+ FAIL;
+ }
+ }
+
+ return result;
}
// ----------------------------------------------------------------------------
-static inline void
+static inline int
_py_thread__unwind_frame_stack(py_thread_t * self) {
- register size_t i = 0;
- while (success(_py_frame__prev(_stack + i)) && i < MAX_STACK_SIZE) {
- if (_stack[++i].invalid) {
- log_d("Frame number %d is invalid", i);
- return;
+ size_t basep = _stackp;
+
+ if (_use_heaps && fail(_py_thread__read_frames(self))) {
+ log_ie("Failed to read frames heaps");
+ _no_heaps;
+ FAIL;
+ }
+ raddr_t frame_raddr = { .pid = self->raddr.pid, .addr = self->top_frame };
+ if (fail(_py_frame_fill(&frame_raddr, self))) {
+ log_ie("Failed to fill top frame");
+ FAIL;
+ }
+
+ while (success(_py_frame__prev(self))) {
+ if (_stackp >= MAX_STACK_SIZE) {
+ log_w("Discarding frame stack: too tall");
+ FAIL;
}
}
- if (i >= MAX_STACK_SIZE)
- log_w("Frames limit reached. Discarding the rest");
- self->stack_height += i;
+
+ if (_stack[_stackp-1].invalid) {
+ log_d("Frame number %d is invalid", _stackp - basep);
+ FAIL;
+ }
+
+ self->stack_height += _stackp - basep;
+
+ SUCCESS;
}
+#ifdef NATIVE
+// ----------------------------------------------------------------------------
+int
+py_thread__set_idle(py_thread_t * self) {
+ size_t index = self->tid >> 3;
+ int offset = self->tid & 7;
+
+ if (unlikely(_pthread_tid_offset == 0)) {
+ FAIL;
+ }
+
+ unsigned char idle_bit = _py_thread__is_idle(self) << offset;
+ if (idle_bit) {
+ _tids_idle[index] |= idle_bit;
+ } else {
+ _tids_idle[index] &= ~idle_bit;
+ }
+
+ SUCCESS;
+}
+
+// ----------------------------------------------------------------------------
+#define MAX_STACK_FILE_SIZE 2048
+int
+py_thread__save_kernel_stack(py_thread_t * self) {
+ char stack_path[48];
+ int fd;
+
+ if (unlikely(_pthread_tid_offset == 0) || !isvalid(_kstacks) ) {
+ FAIL;
+ }
+
+ sfree(_kstacks[self->tid]);
+
+ sprintf(stack_path, "/proc/%d/task/%ld/stack", self->proc->pid, self->tid);
+ fd = open(stack_path, O_RDONLY);
+ if (fd == -1)
+ FAIL;
+
+ _kstacks[self->tid] = (char *) calloc(1, MAX_STACK_FILE_SIZE);
+ if (read(fd, _kstacks[self->tid], MAX_STACK_FILE_SIZE) == -1) {
+ log_e("stack: filed to read %s", stack_path);
+ close(fd);
+ FAIL;
+ };
+ close(fd);
+
+ SUCCESS;
+}
+
+// ----------------------------------------------------------------------------
+static inline int
+_py_thread__unwind_kernel_frame_stack(py_thread_t * self) {
+ char * line = _kstacks[self->tid];
+ if (!isvalid(line))
+ SUCCESS;
+
+ log_t("linux: unwinding kernel stack");
+
+ for (;;) {
+ char * eol = strchr(line, '\n');
+ if (!isvalid(eol))
+ break;
+ *eol = '\0';
+
+ char * b = strchr(line, ']');
+ if (isvalid(b)) {
+ char * e = strchr(++b, '+');
+ if (isvalid(e))
+ *e = 0;
+ strcpy(_stack[_stackp].code.scope, ++b);
+ strcpy(_stack[_stackp].code.filename, "kernel");
+ _stackp++; // TODO: Decide whether to decremet this by 2 before returning.
+ }
+ line = eol + 1;
+ }
+
+ SUCCESS;
+}
+
+
+// ----------------------------------------------------------------------------
+static inline int
+_py_thread__unwind_native_frame_stack(py_thread_t * self) {
+ void *context = _tids[self->tid];
+ unw_cursor_t cursor;
+ unw_word_t offset, pc;
+
+ if (unw_init_remote(&cursor, self->proc->unwind.as, context))
+ FAIL;
+
+ do {
+ if (unw_get_reg(&cursor, UNW_REG_IP, &pc)) {
+ log_e("libunwind: cannot read program counter\n");
+ FAIL;
+ }
+
+ if (unw_get_proc_name(&cursor, _stack[_stackp].code.scope, MAXLEN, &offset) == 0) {
+ // To retrieve source name and line number we would need to
+ // - resolve the PC to a map to get the binary path
+ // - use the offset with the binary to get the line number from DWARF (see
+ // https://kernel.googlesource.com/pub/scm/linux/kernel/git/hjl/binutils/+/hjl/secondary/binutils/addr2line.c)
+ _stack[_stackp].code.lineno = offset;
+ }
+ else {
+ strcpy(_stack[_stackp].code.scope, "");
+ _stack[_stackp].code.lineno = 0;
+ }
+ sprintf(_stack[_stackp].code.filename, "native@%lx", pc);
+
+ _stackp++;
+ } while (_stackp < MAX_STACK_SIZE && unw_step(&cursor) > 0);
+
+ SUCCESS;
+}
+#endif
+
// ---- PUBLIC ----------------------------------------------------------------
// ----------------------------------------------------------------------------
@@ -368,19 +646,15 @@ py_thread__fill_from_raddr(py_thread_t * self, raddr_t * raddr, py_proc_t * proc
FAIL;
}
- if (V_FIELD(void*, ts, py_thread, o_frame) != NULL) {
- raddr_t frame_raddr = { .pid = raddr->pid, .addr = V_FIELD(void*, ts, py_thread, o_frame) };
- if (fail(_py_frame__fill_from_raddr(_stack, &frame_raddr))) {
- log_d("Failed to fill last frame");
- SUCCESS;
- }
- self->stack_height = 1;
- }
+ self->proc = proc;
self->raddr.pid = raddr->pid;
self->raddr.addr = raddr->addr;
- self->proc = proc;
+
+ if (isvalid(self->top_frame = V_FIELD(void*, ts, py_thread, o_frame))) {
+ self->stack_height = 1;
+ }
self->next_raddr.pid = raddr->pid;
self->next_raddr.addr = V_FIELD(void*, ts, py_thread, o_next) == raddr->addr \
@@ -412,19 +686,36 @@ py_thread__fill_from_raddr(py_thread_t * self, raddr_t * raddr, py_proc_t * proc
_pthread_buffer
))) {
self->tid = (uintptr_t) _pthread_buffer[_pthread_tid_offset];
+ #ifdef NATIVE
+ // TODO: If a TID is reused we will never seize it!
+ if (!isvalid(_tids[self->tid])) {
+ if (fail(ptrace(PTRACE_SEIZE, self->tid, 0, 0))) {
+ log_e("ptrace: cannot seize thread %d: %d\n", self->tid, errno);
+ FAIL;
+ }
+ else {
+ log_d("ptrace: thread %d seized", self->tid);
+ }
+ _tids[self->tid] = _UPT_create(self->tid);
+ if (!isvalid(_tids[self->tid])) {
+ log_e("libunwind: failed to create context for thread %d", self->tid);
+ FAIL;
+ }
+ }
+ #endif
}
}
#endif
self->invalid = 0;
SUCCESS;
-}
+} /* py_thread__fill_from_raddr */
// ----------------------------------------------------------------------------
int
py_thread__next(py_thread_t * self) {
- if (!isvalid(self->next_raddr.addr))
+ if (self->invalid || !isvalid(self->next_raddr.addr))
FAIL;
raddr_t next_raddr = { .pid = self->next_raddr.pid, .addr = self->next_raddr.addr };
@@ -469,25 +760,87 @@ py_thread__print_collapsed_stack(py_thread_t * self, ctime_t time_delta, ssize_t
int is_idle = FALSE;
if (pargs.full || pargs.sleepless) {
+ #ifdef NATIVE
+ size_t index = self->tid >> 3;
+ int offset = self->tid & 7;
+
+ is_idle = _tids_idle[index] & (1 << offset);
+ #else
is_idle = _py_thread__is_idle(self);
- if (!pargs.full && is_idle && pargs.sleepless)
+ #endif
+ if (!pargs.full && is_idle && pargs.sleepless) {
+ #ifdef NATIVE
+ // If we don't sample the threads stall :(
+ _stackp = 0;
+ _py_thread__unwind_native_frame_stack(self);
+ #endif
return;
+ }
+ }
+
+ // Reset the frame stack before unwinding
+ _stackp = 0;
+
+ #ifdef NATIVE
+
+ // We sample the kernel frame stack BEFORE interrupting because otherwise
+ // we would see the ptrace syscall call stack, which is not very interesting.
+ // The downside is that the kernel stack might not be in sync with the other
+ // ones.
+ if (pargs.kernel) {
+ _py_thread__unwind_kernel_frame_stack(self);
}
+ if (fail(_py_thread__unwind_native_frame_stack(self)))
+ return;
+
+ size_t basep = _stackp;
+ // Update the thread state to improve guarantees that it will be in sync with
+ // the native stack just collected
+ py_thread__fill_from_raddr(self, &self->raddr, self->proc);
+ #endif
// Group entries by thread.
fprintf(pargs.output_file, SAMPLE_HEAD, self->proc->pid, self->tid);
if (self->stack_height) {
- _py_thread__unwind_frame_stack(self);
+ if (fail(_py_thread__unwind_frame_stack(self))) {
+ fprintf(pargs.output_file, ";:INVALID:");
+ stats_count_error();
+ }
+ #ifndef NATIVE
// Append frames
- register int i = self->stack_height;
- while (i > 0) {
- py_code_t code = _stack[--i].code;
+ while (_stackp > 0) {
+ py_code_t code = _stack[--_stackp].code;
fprintf(pargs.output_file, pargs.format, code.filename, code.scope, code.lineno);
}
+ #endif
}
+ #ifdef NATIVE
+
+ register int i = _stackp;
+ register int j = basep;
+
+ py_code_t * code;
+ while (j-- > 0) {
+ if (strstr(_stack[j].code.scope, "PyEval_EvalFrame")) {
+ code = ((i <= basep) ? &(_stack[j].code) : &(_stack[--i].code));
+ }
+ else {
+ code = &(_stack[j].code);
+ }
+ fprintf(pargs.output_file, pargs.format, code->filename, code->scope, code->lineno);
+ }
+ if (i != basep) {
+ log_e("Stack mismatch: left with %d Python frames after interleaving", i - basep);
+ austin_errno = ETHREADINV;
+ #ifdef DEBUG
+ fprintf(pargs.output_file, ";:%ld FRAMES LEFT:", i - basep);
+ #endif
+ }
+ #endif
+
if (pargs.gc && py_proc__is_gc_collecting(self->proc) == TRUE) {
fprintf(pargs.output_file, ";:GC:");
stats_gc_time(time_delta);
@@ -508,15 +861,13 @@ py_thread__print_collapsed_stack(py_thread_t * self, ctime_t time_delta, ssize_t
// Update sampling stats
stats_count_sample();
- if (austin_errno != EOK)
- stats_count_error();
stats_check_duration(stopwatch_duration());
} /* py_thread__print_collapsed_stack */
// ----------------------------------------------------------------------------
int
-py_thread_allocate_stack(void) {
+py_thread_allocate(void) {
if (isvalid(_stack))
SUCCESS;
@@ -534,16 +885,52 @@ py_thread_allocate_stack(void) {
FAIL;
#endif
+ #ifdef NATIVE
+ size_t max = pid_max();
+ _tids = (void **) calloc(max, sizeof(void *));
+ if (!isvalid(_tids))
+ FAIL;
+
+ _tids_idle = (unsigned char *) calloc(max >> 8, sizeof(unsigned char));
+ if (!isvalid(_tids_idle))
+ FAIL;
+
+ if (pargs.kernel) {
+ _kstacks = (char **) calloc(max, sizeof(char *));
+ if (!isvalid(_kstacks))
+ FAIL;
+ }
+ #endif
+
SUCCESS;
}
// ----------------------------------------------------------------------------
void
-py_thread_free_stack(void) {
+py_thread_free(void) {
#if defined PL_WIN
sfree(_pi_buffer);
#endif
sfree(_stack);
+ sfree(_frames.content);
+ sfree(_frames_heap.content);
+
+ #ifdef NATIVE
+ pid_t max_pid = pid_max();
+ for (pid_t tid = 0; tid < max_pid; tid++) {
+ if (isvalid(_tids[tid])) {
+ _UPT_destroy(_tids[tid]);
+ ptrace(PTRACE_DETACH, tid, 0, 0);
+ log_d("ptrace: thread %ld detached", tid);
+ }
+ if (isvalid(_kstacks) && isvalid(_kstacks[tid])) {
+ sfree(_kstacks[tid]);
+ }
+ }
+ sfree(_tids);
+ sfree(_tids_idle);
+ sfree(_kstacks);
+ #endif
}
diff --git a/src/py_thread.h b/src/py_thread.h
index c9a3210b..f02013ec 100644
--- a/src/py_thread.h
+++ b/src/py_thread.h
@@ -41,6 +41,7 @@ typedef struct thread {
struct thread * next;
size_t stack_height;
+ void * top_frame;
int invalid;
} py_thread_t;
@@ -80,19 +81,27 @@ py_thread__print_collapsed_stack(py_thread_t *, ctime_t, ssize_t);
/**
- * Allocate memory for dumping the frame stack.
+ * Allocate memory for dumping the thread data.
*
* @return either SUCCESS or FAIL.
*/
int
-py_thread_allocate_stack(void);
+py_thread_allocate(void);
/**
- * Deallocate memory for dumping the frame stack.
+ * Deallocate memory for dumping the thread data.
*/
void
-py_thread_free_stack(void);
+py_thread_free(void);
+
+#ifdef NATIVE
+int
+py_thread__set_idle(py_thread_t *);
+
+int
+py_thread__save_kernel_stack(py_thread_t *);
+#endif
#endif // PY_THREAD_H
diff --git a/src/version.h b/src/version.h
index 3a39801a..7ee7318b 100644
--- a/src/version.h
+++ b/src/version.h
@@ -57,7 +57,8 @@
* @return the value of of the field of py_obj at the offset specified
* by the field argument.
*/
-#define V_FIELD(ctype, py_obj, py_type, field) (*((ctype*) (((char *) &py_obj) + py_v->py_type.field)))
+#define V_FIELD(ctype, py_obj, py_type, field) (*((ctype*) (((void *) &py_obj) + py_v->py_type.field)))
+#define V_FIELD_PTR(ctype, py_obj_ptr, py_type, field) (*((ctype*) (((void *) py_obj_ptr) + py_v->py_type.field)))
typedef unsigned long offset_t;
diff --git a/src/win/py_proc.h b/src/win/py_proc.h
index 0475678e..8a8fc6ac 100644
--- a/src/win/py_proc.h
+++ b/src/win/py_proc.h
@@ -40,6 +40,7 @@
struct _proc_extra_info {
HANDLE h_proc;
+ HANDLE h_reader_thread;
};
@@ -186,4 +187,93 @@ _py_proc__init(py_proc_t * self) {
return _py_proc__get_modules(self);
}
+
+// ----------------------------------------------------------------------------
+// The default stream buffer size should be 4KB, so this chunk size should be
+// enough to avoid blocking while keeping the number of reads to a minimum.
+#define STDOUT_CHUNK_SIZE (1 << 10)
+
+DWORD WINAPI
+reader_thread(LPVOID lpParam) {
+ char buffer[STDOUT_CHUNK_SIZE];
+ while (ReadFile(lpParam, &buffer, STDOUT_CHUNK_SIZE, NULL, NULL));
+ return 0;
+}
+
+
+// ----------------------------------------------------------------------------
+// Forward declaration.
+static int
+_py_proc__run(py_proc_t *, int);
+
+
+// On Windows, if we fail with the parent process we look if it has a single
+// child and try to attach to that instead. We keep going until we either find
+// a single Python process or more or less than a single child.
+static int
+_py_proc__try_child_proc(py_proc_t * self) {
+ log_d("Process is not Python so we look for a single child Python process");
+
+ HANDLE h = CreateToolhelp32Snapshot(TH32CS_SNAPPROCESS, 0);
+ if (h == INVALID_HANDLE_VALUE) {
+ log_e("Cannot inspect processes details");
+ FAIL;
+ }
+
+with_resources;
+
+ HANDLE orig_hproc = self->extra->h_proc;
+ pid_t orig_pid = self->pid;
+ while (TRUE) {
+ pid_t parent_pid = self->pid;
+
+ PROCESSENTRY32 pe = { 0 };
+ pe.dwSize = sizeof(PROCESSENTRY32);
+
+ if (Process32First(h, &pe)) {
+ pid_t child_pid = 0;
+ do {
+ if (pe.th32ParentProcessID == parent_pid) {
+ if (child_pid) {
+ log_d("Process has more than one child");
+ NOK;
+ }
+ child_pid = pe.th32ProcessID;
+ }
+ } while (Process32Next(h, &pe));
+
+ if (!child_pid) {
+ log_d("Process has no children");
+ NOK;
+ }
+
+ self->pid = child_pid;
+ self->extra->h_proc = OpenProcess(
+ PROCESS_VM_READ | PROCESS_QUERY_INFORMATION, FALSE, child_pid
+ );
+ if (self->extra->h_proc == INVALID_HANDLE_VALUE) {
+ log_e("Cannot open child process handle");
+ NOK;
+ }
+ if (success(_py_proc__run(self, FALSE))) {
+ log_d("Process has a single Python child with PID %d. We will attach to that", child_pid);
+ OK;
+ }
+ else {
+ log_d("Process had a single non-Python child with PID %d. Taking it as new parent", child_pid);
+ CloseHandle(self->extra->h_proc);
+ }
+ }
+ }
+
+release:
+ CloseHandle(h);
+ if (retval) {
+ self->pid = orig_pid;
+ self->extra->h_proc = orig_hproc;
+ }
+
+ released;
+}
+
#endif
diff --git a/test/macos/test_attach.bats b/test/macos/test_attach.bats
index fdd1292c..c2801497 100644
--- a/test/macos/test_attach.bats
+++ b/test/macos/test_attach.bats
@@ -52,11 +52,18 @@ function attach_austin {
attach_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 attach_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 attach_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 attach_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 attach_austin "/usr/local/anaconda3/bin/python"
diff --git a/test/macos/test_fork.bats b/test/macos/test_fork.bats
index 457a2ac1..86dcae35 100644
--- a/test/macos/test_fork.bats
+++ b/test/macos/test_fork.bats
@@ -75,11 +75,18 @@ teardown() {
repeat 3 invoke_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 invoke_austin "/usr/local/anaconda3/bin/python"
diff --git a/test/macos/test_fork_mp.bats b/test/macos/test_fork_mp.bats
index caea118c..072160d7 100644
--- a/test/macos/test_fork_mp.bats
+++ b/test/macos/test_fork_mp.bats
@@ -55,11 +55,18 @@ function invoke_austin {
repeat 3 invoke_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 invoke_austin "/usr/local/anaconda3/bin/python"
diff --git a/test/macos/test_pipe.bats b/test/macos/test_pipe.bats
index b31d51dc..9b04b0e2 100644
--- a/test/macos/test_pipe.bats
+++ b/test/macos/test_pipe.bats
@@ -74,11 +74,18 @@ function invoke_austin {
repeat 3 invoke_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 invoke_austin "/usr/local/anaconda3/bin/python"
diff --git a/test/macos/test_sleepless.bats b/test/macos/test_sleepless.bats
index e431750c..42eb6664 100644
--- a/test/macos/test_sleepless.bats
+++ b/test/macos/test_sleepless.bats
@@ -49,11 +49,18 @@ function invoke_austin {
repeat 3 invoke_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 invoke_austin "/usr/local/anaconda3/bin/python"
diff --git a/test/macos/test_valgrind.bats b/test/macos/test_valgrind.bats
index 75fe80f5..88927309 100644
--- a/test/macos/test_valgrind.bats
+++ b/test/macos/test_valgrind.bats
@@ -62,11 +62,18 @@ function invoke_austin {
repeat 3 invoke_austin "/usr/local/bin/python3"
}
-@test "Test Austin with Python 3.8 from Homebrew (if available)" {
- ignore
+@test "Test Austin with Python 3.8 from Homebrew" {
repeat 3 invoke_austin "/usr/local/opt/python@3.8/bin/python3"
}
+@test "Test Austin with Python 3.9 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.9/bin/python3"
+}
+
+@test "Test Austin with Python 3.10 from Homebrew" {
+ repeat 3 invoke_austin "/usr/local/opt/python@3.10/bin/python3"
+}
+
@test "Test Austin with Python 3 from Anaconda (if available)" {
ignore
repeat 3 invoke_austin "/usr/local/anaconda3/bin/python"
diff --git a/utils/resolve.py b/utils/resolve.py
new file mode 100644
index 00000000..cf90d55f
--- /dev/null
+++ b/utils/resolve.py
@@ -0,0 +1,131 @@
+import os
+import sys
+import typing as t
+from subprocess import check_output
+
+
+def demangle_cython(function: str) -> str:
+ if function.startswith("__pyx_pymod_"):
+ _, _, function = function[12:].partition("_")
+ return function
+
+ if function.startswith("__pyx_fuse_"):
+ function = function[function[12:].index("__pyx_") + 12 :]
+ for i, d in enumerate(function):
+ if d.isdigit():
+ break
+ else:
+ raise ValueError(f"Invalid Cython mangled name: {function}")
+
+ if function.startswith("__pyx_pf_"):
+ function = function[: function.rindex(".isra.")]
+
+ n = 0
+ while i < len(function):
+ c = function[i]
+ i += 1
+ if c.isdigit():
+ n = n * 10 + int(c)
+ else:
+ i += n
+ n = 0
+ if not function[i].isdigit():
+ return function[i:]
+
+ return function
+
+
+class Maps:
+ def __init__(self):
+ # TODO: Use an interval tree instead!
+ self.maps: t.List[t.Tuple(int, int, str)] = []
+ self.bases = {}
+ self.cache = {}
+
+ def addr2line(self, address: str) -> t.Optional[t.Tuple[str, t.Optional[str]]]:
+ if address in self.cache:
+ return self.cache[address]
+
+ addr = int(address, 16)
+ for lo, hi, binary in self.maps:
+ if lo <= addr <= hi:
+ break
+ else:
+ self.cache[address] = None
+ return None
+
+ resolved, _, _ = (
+ check_output(["addr2line", "-Ce", binary, f"{addr-self.bases[binary]:x}"])
+ .decode()
+ .strip()
+ .partition(" ")
+ )
+ if resolved.startswith("??"):
+ # self.cache[address] = (f"{binary}@{addr-self.bases[binary]:x}", None)
+ self.cache[address] = (f"{binary}", addr - self.bases[binary])
+ return self.cache[address]
+
+ self.cache[address] = tuple(resolved.split(":", maxsplit=1))
+ return self.cache[address]
+
+ def add(self, line: str) -> None:
+ bounds, _, binary = line[7:].strip().partition(" ")
+ low, _, high = bounds.partition("-")
+ lo = int(low, 16)
+ hi = int(high, 16)
+ self.maps.append((lo, hi, binary))
+ if binary in self.bases:
+ self.bases[binary] = min(self.bases[binary], lo)
+ else:
+ self.bases[binary] = lo
+
+ def resolve(self, line: str) -> str:
+ parts = []
+ frames, _, metrics = line.strip().rpartition(" ")
+ for part in frames.split(";"):
+ if part.startswith("native@"):
+ head, function, lineno = part.split(":")
+ if function.startswith("__pyx_pw_"):
+ # skip Cython wrappers (cpdef)
+ continue
+ if function.startswith("__pyx_"):
+ function = demangle_cython(function)
+ elif function.startswith("_Z"):
+ function = demangle_cpp(function)
+ _, _, address = head.partition("@")
+ resolved = self.addr2line(address)
+ if resolved is None:
+ parts.append(":".join((head, function, lineno)))
+ else:
+ source, native_lineno = resolved
+ parts.append(f"{source}:{function}:{native_lineno or lineno}")
+ else:
+ parts.append(part)
+
+ return " ".join((";".join(parts), metrics))
+
+
+def main():
+ try:
+ stats = sys.argv[1]
+ assert os.path.isfile(stats)
+ except IndexError:
+ print("Usage: python resolve.py ", file=sys.stderr)
+ sys.exit(1)
+ except AssertionError:
+ print("Austin file does not exist", file=sys.stderr)
+ sys.exit(1)
+
+ maps = Maps()
+ with open(stats) as s:
+ for line in s:
+ if line.startswith("# map: "):
+ maps.add(line)
+ elif line.startswith("# ") or line == "\n":
+ print(line, end="")
+ else:
+ print(maps.resolve(line))
+
+
+if __name__ == "__main__":
+ main()