Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a 2nd plot to show RAM usage of each allocator #5

Open
wants to merge 16 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
16 commits
Select commit Hold shift + click to select a range
c491326
Update to Python3; update README to improve instructions, incl. for U…
ElectricRCAircraftGuy Jun 27, 2021
8c3f76c
Update README to show how to find versions
ElectricRCAircraftGuy Jun 27, 2021
3842b23
Update readme w/reference
ElectricRCAircraftGuy Jun 27, 2021
4c910dd
Update notes in Makefile, and benchmark-src/README.md
ElectricRCAircraftGuy Jun 27, 2021
412db70
Update README
ElectricRCAircraftGuy Jun 27, 2021
93d67bd
Update all benchmark-src files to latest from glibc
ElectricRCAircraftGuy Jun 27, 2021
e32c2a0
wip: continue adding more files to benchmark-src dir
ElectricRCAircraftGuy Jun 27, 2021
a459929
Update to use the latest glibc's bench-malloc-thread program
ElectricRCAircraftGuy Jun 27, 2021
51c531b
Update glibc-benchmark-info.md
ElectricRCAircraftGuy Jun 27, 2021
4d127e8
Improve the results dirnames to be unique down to the second, not jus…
ElectricRCAircraftGuy Jun 27, 2021
f0630c8
bench_collect_results.py: improve the cmd output
ElectricRCAircraftGuy Jun 28, 2021
e0b3246
Update scripts to collect hardware info first
ElectricRCAircraftGuy Jun 28, 2021
41a0498
bench_plot_results.py: wip: add a title and subtitle to the figure
ElectricRCAircraftGuy Jun 28, 2021
a920833
bench_plot_results.py: add plot subtitle; improve plot data structures
ElectricRCAircraftGuy Jun 28, 2021
21c17be
bench_plot_results.py: wip: start adding subplots
ElectricRCAircraftGuy Jun 28, 2021
ce35a51
bench_plot_results.py: finish adding RAM usage plot!
ElectricRCAircraftGuy Jun 28, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 28 additions & 19 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,17 +1,22 @@
#
# This makefile will build a small benchmarking utility for 'malloc' implementations and will
# run it with different implementations saving results into JSON files.
# run it with different implementations, first saving results into JSON files, and then plotting
# them graphically.
#
# Specifically this makefile downloads, configure and compiles 3 different software packages:
# - GNU libc
# - Google perftools (tcmalloc)
# - jemalloc
# Specifically, this makefile downloads, configures and compiles these different software packages:
# 1. GNU libc
# 2. Google perftools (tcmalloc)
# 3. jemalloc
#
# Tested with versions:
# - GNU libc 2.26
# - Google perftools (tcmalloc) 2.6.3
# - jemalloc 5.0.1
# First tested with these versions:
# 1. GNU libc 2.26
# 2. Google perftools (tcmalloc) 2.6.3
# 3. jemalloc 5.0.1
#
# Most-recently tested on Ubuntu 20.04 with these versions:
# 1. GNU libc 2.31
# 2. Google perftools (tcmalloc) 2.9.1
# 3. jemalloc 5.2.1-742
#

#
Expand All @@ -35,8 +40,8 @@ endif
ifdef NUMPROC
parallel_flags := -j$(NUMPROC)
else
# default value
parallel_flags := -j4
# default value: pull from the max number of hardware processes: `nproc` cmd output; ex: 8
parallel_flags := -j$(shell nproc)
endif

ifdef POSTFIX
Expand All @@ -50,7 +55,7 @@ ifdef RESULT_DIRNAME
results_dir := $(RESULT_DIRNAME)
else
# default value
results_dir := results/$(shell date +%F)-$(benchmark_postfix)
results_dir := results/$(shell date '+%Y.%m.%d-%H%Mhrs-%Ssec')--$(benchmark_postfix)
endif

ifdef IMPLEMENTATIONS
Expand All @@ -69,13 +74,14 @@ endif

topdir=$(shell readlink -f .)

benchmark_result_json := results.json
benchmark_result_json := results.json # the suffix for the json file names
benchmark_result_png := results.png

glibc_url := git://sourceware.org/git/glibc.git
tcmalloc_url := https://github.com/gperftools/gperftools.git
jemalloc_url := https://github.com/jemalloc/jemalloc.git

# Alternate download version and source if not using the git repo above
glibc_version := 2.26
glibc_alt_wget_url := https://ftpmirror.gnu.org/libc/glibc-$(glibc_version).tar.xz

Expand Down Expand Up @@ -125,6 +131,7 @@ $(glibc_install_dir)/lib/libc.so.6:
cd $(glibc_build_dir) && \
../glibc/configure --prefix=$(glibc_install_dir) && \
make $(parallel_flags) && \
make bench-build $(parallel_flags) && \
make install
[ -x $(glibc_build_dir)/benchtests/bench-malloc-thread ] && echo "GNU libc benchmarking utility is ready!" || echo "Cannot find GNU libc benchmarking utility! Cannot collect benchmark results"

Expand All @@ -143,7 +150,6 @@ $(jemalloc_install_dir)/lib/libjemalloc.so:
( make install || true )

build:
$(MAKE) -C benchmark-src
ifeq ($(findstring glibc,$(implem_list)),glibc)
$(MAKE) $(glibc_install_dir)/lib/libc.so.6
endif
Expand All @@ -157,21 +163,24 @@ endif

collect_results:
@mkdir -p $(results_dir)
@echo "Starting to collect performance benchmarks."
./bench_collect_results.py "$(implem_list)" $(results_dir)/$(benchmark_result_json) $(benchmark_nthreads)
@echo "Collecting hardware information in $(results_dir)/hardware-inventory.txt"

@echo "Collecting hardware information (sudo required) in $(results_dir)/hardware-inventory.txt"
@sudo lshw -short -class memory -class processor > $(results_dir)/hardware-inventory.txt
@echo -n "Number of CPU cores: " >>$(results_dir)/hardware-inventory.txt
@grep "processor" /proc/cpuinfo | wc -l >>$(results_dir)/hardware-inventory.txt
@(which numactl >/dev/null 2>&1) && echo "NUMA informations:" >>$(results_dir)/hardware-inventory.txt
# NB: you may need to install `numactl` first with `sudo apt install numactl`.
@(which numactl >/dev/null 2>&1) && echo "NUMA information (from 'numactl -H'):" >>$(results_dir)/hardware-inventory.txt
@(which numactl >/dev/null 2>&1) && numactl -H >>$(results_dir)/hardware-inventory.txt

@echo "Starting to collect performance benchmarks."
./bench_collect_results.py "$(implem_list)" $(results_dir)/$(benchmark_result_json) $(benchmark_nthreads)

plot_results:
./bench_plot_results.py $(results_dir)/$(benchmark_result_png) $(results_dir)/*.json

# the following target is mostly useful only to the maintainer of the github project:
upload_results:
git add -f $(results_dir)/*$(benchmark_result_json) $(results_dir)/$(benchmark_result_png) $(results_dir)/hardware-inventory.txt
git commit -m "Adding results from folder $(results_dir) to the GIT repository"
@echo "Run 'git push' to push online your results (required GIT repo write access)"
@echo "Run 'git push' to push online your results (requires GIT repo write access)"

88 changes: 66 additions & 22 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,49 +1,93 @@
See also more glibc source code and build info. here: [glibc-benchmark-info.md](glibc-benchmark-info.md).


# malloc-benchmarks

Simple benchmarking scripts to run on any machine to compare different C/C++ malloc implementations.
The scripts are not meant to face any possible problem, quite the opposite.
Simple benchmarking and plotting scripts to run on any machine to compare different C/C++ malloc implementations.
These scripts are not meant to face any possible problem; quite the opposite.
They will:
- download and build [GNU libc](https://www.gnu.org/software/libc/), [Google perftools](https://github.com/gperftools/gperftools), [Jemalloc](http://jemalloc.net/)
- use GNU libc malloc multi-thread benchmarking utility to generate JSON results for different combinations
of malloc implementation and number of threads
- use [Python matplotlib](https://matplotlib.org/) to produce a summary figure
1. Download and build [GNU libc](https://www.gnu.org/software/libc/), [Google perftools](https://github.com/gperftools/gperftools), [Jemalloc](http://jemalloc.net/)
1. Use the GNU libc malloc multi-threaded benchmarking utility to generate JSON results for different combinations
of malloc implementations and numbers of threads
1. Use [Python matplotlib](https://matplotlib.org/) to produce a plot of the results

**Other high-quality malloc benchmark tools:**
1. https://github.com/daanx/mimalloc-bench


## Dependencies

If `make` below fails, you may need to install (via `sudo apt install`) one or more of the following. If you like, just begin by running the installation commands below. Last tested in Ubuntu 20.04.

```bash
sudo apt update && sudo apt install \
numactl g++ clang llvm-dev unzip dos2unix linuxinfo bc libgmp-dev wget \
cmake python python3 ruby ninja-build libtool autoconf
# For Python
pip3 install matplotlib
```


## How to collect benchmark results and view them

```bash
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
make
# OR, time the process too to help you set expectations for how long it will take
time make
```
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
make

Once you have run `make`, the plot will display. To re-plot the results without rerunning the tests, run the following, specifying the correct dirname:
```bash
RESULT_DIRNAME='results/my_dir_name' make plot_results
```

Note that each time you run `make`, all of the benchmark results will be stored in a folder for your computer within the `results` dir.

You can customize the runs be setting environment variables as you call `make`. See the top of the `Makefile` for details. See the default values for `benchmark_nthreads` and `implem_list` in the `Makefile`.

Examples:
```bash
# Run only 1 and 2 threads, testing only malloc implementations jemalloc and tcmalloc:
NTHREADS="1 2" IMPLEMENTATIONS="jemalloc tcmalloc" time make
```


## How to collect benchmark results on a machine and plot them from another one

On the machine where you want to collect benchmark results:

```
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
make download build collect_results
scp -r results IP_OF_OTHER_MACHINE:
```bash
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
make download build collect_results
scp -r results IP_OF_OTHER_MACHINE:
```

On the other machine where you want to plot results:

```
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
mv ../results .
make plot_results
```bash
git clone https://github.com/f18m/malloc-benchmarks.git
cd malloc-benchmarks
mv ../results .
# manually specify the correct `my_dir_name` below
RESULT_DIRNAME='results/my_dir_name' make plot_results
```


## Example benchmarks

The following are some pictures obtained on different HW systems using however the same benchmarking utility written by
GNU libc developers. They give an idea on how much performances can be different on different CPU/memory HW and varying the number of threads.
Of course the closer the curves are to zero, the better they are (the lower the better!).
The following are some plots of results obtained on different hardware systems using the same benchmarking utility written by the
GNU libc developers. They give an idea of how much performance can differ on different CPU/memory hardware and a varying the number of threads.
Of course, the closer the curves are to zero, the better they are (the lower the better!).

**To verify the version numbers for your benchmarks, look in the following places after running `make`:**
1. **system_default:** run `apt show libc6` to see your system glibc version ([source: "Determining the Installed glibc Version"](https://www.linode.com/docs/guides/patching-glibc-for-the-ghost-vulnerability/)). Ex: `Version: 2.31-0ubuntu9.2`
1. **glibc:** See this file: `malloc-benchmarks/glibc/version.h`
1. **tcmalloc:** See the `TC_VERSION_STRING` value inside `malloc-benchmarks/tcmalloc-install/include/gperftools/tcmalloc.h`
1. **jemalloc:** See the `JEMALLOC_VERSION` value inside `malloc-benchmarks/jemalloc-install/include/jemalloc/jemalloc.h`


<table cellpadding="5" width="100%">
<tbody>
Expand Down
44 changes: 26 additions & 18 deletions bench_collect_results.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
"""Generate benchmarking results in JSON form, using GNU libc benchmarking utility;
different allocators are injected into that utility by using LD_PRELOAD trick.
"""
Expand All @@ -11,7 +11,7 @@
# Constants
#

internal_benchmark_util = 'benchmark-src/bench-malloc-thread'
internal_benchmark_util = 'glibc-build/benchtests/bench-malloc-thread'

glibc_install_dir = 'glibc-install'
tcmalloc_install_dir = 'tcmalloc-install'
Expand All @@ -26,24 +26,27 @@
'jemalloc': jemalloc_install_dir + '/lib/libjemalloc.so'
}

# to successfully preload the tcmalloc,jemalloc libs we will also need to preload the C++ standard lib and gcc_s lib:
# to successfully preload the tcmalloc,jemalloc libs we will also need to preload the C++ standard
# lib and gcc_s lib:
preload_required_libs= [ 'libstdc++.so.6', 'libgcc_s.so.1' ]
preload_required_libs_fullpaths = []

benchmark_util = {
'system_default': internal_benchmark_util,

# to test the latest GNU libc implementation downloaded and compiled locally we use another trick:
# we ask the dynamic linker of the just-built GNU libc to run the benchmarking utility using new GNU libc dyn libs:
'glibc': glibc_install_dir + '/lib/ld-linux-x86-64.so.2 --library-path ' + glibc_install_dir + '/lib ' + internal_benchmark_util,
# to test the latest GNU libc implementation downloaded and compiled locally we use another
# trick: we ask the dynamic linker of the just-built GNU libc to run the benchmarking utility
# using the new GNU libc dynamic libs:
'glibc': (glibc_install_dir + '/lib/ld-linux-x86-64.so.2 --library-path ' + glibc_install_dir +
'/lib ' + internal_benchmark_util),

'tcmalloc': internal_benchmark_util,
'jemalloc': internal_benchmark_util
'jemalloc': internal_benchmark_util,
}

def find(name, paths):
for path in paths:
#print "Searching into: ", path
#print("Searching into: ", path)
for root, dirs, files in os.walk(path, followlinks=False):
if name in files:
return os.path.join(root, name)
Expand Down Expand Up @@ -102,25 +105,30 @@ def run_benchmark(outfile,thread_values,impl_name):
last_nthreads = thread_values[len(thread_values)-1]
bm = {}
for nthreads in thread_values:
# run the external benchmark utility with the LD_PRELOAD trick

try:
# 1. Set the `LD_PRELOAD` environment variable
os.environ["LD_PRELOAD"] = impl_preload_libs[impl_name]
if len(os.environ["LD_PRELOAD"])>0:
# the tcmalloc/jemalloc shared libs require in turn C++ libs:
#print "preload_required_libs_fullpaths is:", preload_required_libs_fullpaths
#print("preload_required_libs_fullpaths is:", preload_required_libs_fullpaths)
for lib in preload_required_libs_fullpaths:
os.environ["LD_PRELOAD"] = os.environ["LD_PRELOAD"] + ':' + lib

utility_fname = benchmark_util[impl_name]


# run the external benchmark utility with the LD_PRELOAD trick
print("Running for nthreads={}:\n LD_PRELOAD='{}' {} {}".format(nthreads,os.environ["LD_PRELOAD"],utility_fname,nthreads))

cmd = "{} {} >/tmp/benchmark-output".format(utility_fname, nthreads)
full_cmd = "LD_PRELOAD='{}' {}".format(os.environ["LD_PRELOAD"], cmd)

print("Running this benchmark cmd for nthreads={}:".format(nthreads))
print(" {}".format(full_cmd))

# 2. Call the benchmark cmd
# the subprocess.check_output() method does not seem to work fine when launching
# the ld-linux-x86-64.so.2 with --library-path
#stdout = subprocess.check_output([utility_fname, nthreads])
os.system("{} {} >/tmp/benchmark-output".format(utility_fname,nthreads))
os.system(cmd)
stdout = open('/tmp/benchmark-output', 'r').read()

# produce valid JSON output:
Expand Down Expand Up @@ -156,13 +164,13 @@ def main(args):
sys.exit(3)

outfile = os.path.join(outfile_path_prefix, implementations[idx] + '-' + outfile_postfix)
print "----------------------------------------------------------------------------------------------"
print "Testing implementation '{}'. Saving results into '{}'".format(implementations[idx],outfile)
print("----------------------------------------------------------------------------------------------")
print("Testing implementation '{}'. Saving results into '{}'".format(implementations[idx],outfile))

print "Will run tests for {} different number of threads".format(len(thread_values))
print("Will run tests for {} different numbers of threads.".format(len(thread_values)))
success = success + run_benchmark(outfile,thread_values,implementations[idx])

print "----------------------------------------------------------------------------------------------"
print("----------------------------------------------------------------------------------------------")
return success

if __name__ == '__main__':
Expand Down
Loading