f18m · ElectricRCAircraftGuy · Jun 27, 2021 · Jun 27, 2021 · Jun 27, 2021 · Jun 27, 2021
diff --git a/Makefile b/Makefile
@@ -1,17 +1,22 @@
 #
 # This makefile will build a small benchmarking utility for 'malloc' implementations and will
-# run it with different implementations saving results into JSON files.
+# run it with different implementations, first saving results into JSON files, and then plotting
+# them graphically.
 #
-# Specifically this makefile downloads, configure and compiles 3 different software packages:
-#  - GNU libc
-#  - Google perftools (tcmalloc)
-#  - jemalloc
+# Specifically, this makefile downloads, configures and compiles these different software packages:
+# 1. GNU libc
+# 2. Google perftools (tcmalloc)
+# 3. jemalloc
 #
-# Tested with versions:
-#  - GNU libc 2.26
-#  - Google perftools (tcmalloc) 2.6.3
-#  - jemalloc 5.0.1
+# First tested with these versions:
+# 1. GNU libc 2.26
+# 2. Google perftools (tcmalloc) 2.6.3
+# 3. jemalloc 5.0.1
 #
+# Most-recently tested on Ubuntu 20.04 with these versions:
+# 1. GNU libc 2.31
+# 2. Google perftools (tcmalloc) 2.9.1
+# 3. jemalloc 5.2.1-742
 #
 
 #
@@ -35,8 +40,8 @@ endif
 ifdef NUMPROC
 parallel_flags := -j$(NUMPROC)
 else
-# default value
-parallel_flags := -j4 
+# default value: pull from the max number of hardware processes: `nproc` cmd output; ex: 8
+parallel_flags := -j$(shell nproc)
 endif
 
 ifdef POSTFIX
@@ -50,7 +55,7 @@ ifdef RESULT_DIRNAME
 results_dir := $(RESULT_DIRNAME)
 else
 # default value
-results_dir := results/$(shell date +%F)-$(benchmark_postfix)
+results_dir := results/$(shell date '+%Y.%m.%d-%H%Mhrs-%Ssec')--$(benchmark_postfix)
 endif
 
 ifdef IMPLEMENTATIONS
@@ -69,13 +74,14 @@ endif
 
 topdir=$(shell readlink -f .)
 
-benchmark_result_json := results.json
+benchmark_result_json := results.json # the suffix for the json file names
 benchmark_result_png := results.png
 
 glibc_url := git://sourceware.org/git/glibc.git
 tcmalloc_url := https://github.com/gperftools/gperftools.git
 jemalloc_url := https://github.com/jemalloc/jemalloc.git
 
+# Alternate download version and source if not using the git repo above
 glibc_version := 2.26
 glibc_alt_wget_url := https://ftpmirror.gnu.org/libc/glibc-$(glibc_version).tar.xz
 
@@ -125,6 +131,7 @@ $(glibc_install_dir)/lib/libc.so.6:
 	cd $(glibc_build_dir) && \
 		../glibc/configure --prefix=$(glibc_install_dir) && \
 		make $(parallel_flags) && \
+		make bench-build $(parallel_flags) && \
 		make install
 	[ -x $(glibc_build_dir)/benchtests/bench-malloc-thread ] && echo "GNU libc benchmarking utility is ready!" || echo "Cannot find GNU libc benchmarking utility! Cannot collect benchmark results"
 
@@ -143,7 +150,6 @@ $(jemalloc_install_dir)/lib/libjemalloc.so:
 		( make install || true )
 
 build:
-	$(MAKE) -C benchmark-src
 ifeq ($(findstring glibc,$(implem_list)),glibc)
 	$(MAKE) $(glibc_install_dir)/lib/libc.so.6
 endif
@@ -157,21 +163,24 @@ endif
 
 collect_results:
 	@mkdir -p $(results_dir)
-	@echo "Starting to collect performance benchmarks."
-	./bench_collect_results.py "$(implem_list)" $(results_dir)/$(benchmark_result_json) $(benchmark_nthreads)
-	@echo "Collecting hardware information in $(results_dir)/hardware-inventory.txt"
+
+	@echo "Collecting hardware information (sudo required) in $(results_dir)/hardware-inventory.txt"
 	@sudo lshw -short -class memory -class processor	> $(results_dir)/hardware-inventory.txt
 	@echo -n "Number of CPU cores: "					>>$(results_dir)/hardware-inventory.txt
 	@grep "processor" /proc/cpuinfo | wc -l				>>$(results_dir)/hardware-inventory.txt
-	@(which numactl >/dev/null 2>&1) && echo "NUMA informations:" >>$(results_dir)/hardware-inventory.txt
+	# NB: you may need to install `numactl` first with `sudo apt install numactl`.
+	@(which numactl >/dev/null 2>&1) && echo "NUMA information (from 'numactl -H'):" >>$(results_dir)/hardware-inventory.txt
 	@(which numactl >/dev/null 2>&1) && numactl -H >>$(results_dir)/hardware-inventory.txt
 
+	@echo "Starting to collect performance benchmarks."
+	./bench_collect_results.py "$(implem_list)" $(results_dir)/$(benchmark_result_json) $(benchmark_nthreads)
+
 plot_results:
 	./bench_plot_results.py $(results_dir)/$(benchmark_result_png) $(results_dir)/*.json
 
 # the following target is mostly useful only to the maintainer of the github project:
 upload_results:
 	git add -f $(results_dir)/*$(benchmark_result_json) $(results_dir)/$(benchmark_result_png) $(results_dir)/hardware-inventory.txt
 	git commit -m "Adding results from folder $(results_dir) to the GIT repository"
-	@echo "Run 'git push' to push online your results (required GIT repo write access)"
+	@echo "Run 'git push' to push online your results (requires GIT repo write access)"
 
diff --git a/README.md b/README.md
@@ -1,49 +1,93 @@
+See also more glibc source code and build info. here: [glibc-benchmark-info.md](glibc-benchmark-info.md).
+
+
 # malloc-benchmarks
 
-Simple benchmarking scripts to run on any machine to compare different C/C++ malloc implementations.
-The scripts are not meant to face any possible problem, quite the opposite.
+Simple benchmarking and plotting scripts to run on any machine to compare different C/C++ malloc implementations.
+These scripts are not meant to face any possible problem; quite the opposite.
 They will:
- - download and build [GNU libc](https://www.gnu.org/software/libc/), [Google perftools](https://github.com/gperftools/gperftools), [Jemalloc](http://jemalloc.net/)
- - use GNU libc malloc multi-thread benchmarking utility to generate JSON results for different combinations
-   of malloc implementation and number of threads
- - use [Python matplotlib](https://matplotlib.org/) to produce a summary figure
+1. Download and build [GNU libc](https://www.gnu.org/software/libc/), [Google perftools](https://github.com/gperftools/gperftools), [Jemalloc](http://jemalloc.net/)
+1. Use the GNU libc malloc multi-threaded benchmarking utility to generate JSON results for different combinations
+   of malloc implementations and numbers of threads
+1. Use [Python matplotlib](https://matplotlib.org/) to produce a plot of the results
+
+**Other high-quality malloc benchmark tools:**
+1. https://github.com/daanx/mimalloc-bench
+
+
+## Dependencies
+
+If `make` below fails, you may need to install (via `sudo apt install`) one or more of the following. If you like, just begin by running the installation commands below. Last tested in Ubuntu 20.04.
+
+```bash
+sudo apt update && sudo apt install \
+   numactl g++ clang llvm-dev unzip dos2unix linuxinfo bc libgmp-dev wget \
+   cmake python python3 ruby ninja-build libtool autoconf
+# For Python
+pip3 install matplotlib
+```
 
 
 ## How to collect benchmark results and view them
 
+```bash
+git clone https://github.com/f18m/malloc-benchmarks.git
+cd malloc-benchmarks
+make
+# OR, time the process too to help you set expectations for how long it will take
+time make 
 ```
-   git clone https://github.com/f18m/malloc-benchmarks.git
-   cd malloc-benchmarks
-   make
+
+Once you have run `make`, the plot will display. To re-plot the results without rerunning the tests, run the following, specifying the correct dirname:
+```bash
+RESULT_DIRNAME='results/my_dir_name' make plot_results
+```
+
+Note that each time you run `make`, all of the benchmark results will be stored in a folder for your computer within the `results` dir.
+
+You can customize the runs be setting environment variables as you call `make`. See the top of the `Makefile` for details. See the default values for `benchmark_nthreads` and `implem_list` in the `Makefile`.  
+
+Examples:
+```bash
+# Run only 1 and 2 threads, testing only malloc implementations jemalloc and tcmalloc:
+NTHREADS="1 2" IMPLEMENTATIONS="jemalloc tcmalloc" time make
 ```
 
 
 ## How to collect benchmark results on a machine and plot them from another one
 
 On the machine where you want to collect benchmark results:
 
-```
-   git clone https://github.com/f18m/malloc-benchmarks.git
-   cd malloc-benchmarks
-   make download build collect_results 
-   scp -r results IP_OF_OTHER_MACHINE:
+```bash
+git clone https://github.com/f18m/malloc-benchmarks.git
+cd malloc-benchmarks
+make download build collect_results 
+scp -r results IP_OF_OTHER_MACHINE:
 ```
 
 On the other machine where you want to plot results:
 
-```
-   git clone https://github.com/f18m/malloc-benchmarks.git
-   cd malloc-benchmarks
-   mv ../results .
-   make plot_results
+```bash
+git clone https://github.com/f18m/malloc-benchmarks.git
+cd malloc-benchmarks
+mv ../results .
+# manually specify the correct `my_dir_name` below
+RESULT_DIRNAME='results/my_dir_name' make plot_results
 ```
 
 
 ## Example benchmarks
 
-The following are some pictures obtained on different HW systems using however the same benchmarking utility written by
-GNU libc developers. They give an idea on how much performances can be different on different CPU/memory HW and varying the number of threads.
-Of course the closer the curves are to zero, the better they are (the lower the better!).
+The following are some plots of results obtained on different hardware systems using the same benchmarking utility written by the
+GNU libc developers. They give an idea of how much performance can differ on different CPU/memory hardware and a varying the number of threads.
+Of course, the closer the curves are to zero, the better they are (the lower the better!).
+
+**To verify the version numbers for your benchmarks, look in the following places after running `make`:**
+1. **system_default:** run `apt show libc6` to see your system glibc version ([source: "Determining the Installed glibc Version"](https://www.linode.com/docs/guides/patching-glibc-for-the-ghost-vulnerability/)). Ex: `Version: 2.31-0ubuntu9.2`
+1. **glibc:** See this file: `malloc-benchmarks/glibc/version.h`
+1. **tcmalloc:** See the `TC_VERSION_STRING` value inside `malloc-benchmarks/tcmalloc-install/include/gperftools/tcmalloc.h`
+1. **jemalloc:** See the `JEMALLOC_VERSION` value inside `malloc-benchmarks/jemalloc-install/include/jemalloc/jemalloc.h`
+
 
 <table cellpadding="5" width="100%">
 <tbody>

diff --git a/bench_collect_results.py b/bench_collect_results.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/python3
 """Generate benchmarking results in JSON form, using GNU libc benchmarking utility;
 different allocators are injected into that utility by using LD_PRELOAD trick.
 """
@@ -11,7 +11,7 @@
 # Constants
 #
 
-internal_benchmark_util = 'benchmark-src/bench-malloc-thread'
+internal_benchmark_util = 'glibc-build/benchtests/bench-malloc-thread'
 
 glibc_install_dir = 'glibc-install'
 tcmalloc_install_dir = 'tcmalloc-install'
@@ -26,24 +26,27 @@
     'jemalloc': jemalloc_install_dir + '/lib/libjemalloc.so'
 }
 
-# to successfully preload the tcmalloc,jemalloc libs we will also need to preload the C++ standard lib and gcc_s lib:
+# to successfully preload the tcmalloc,jemalloc libs we will also need to preload the C++ standard
+# lib and gcc_s lib:
 preload_required_libs= [ 'libstdc++.so.6', 'libgcc_s.so.1' ]
 preload_required_libs_fullpaths = []
 
 benchmark_util = {
     'system_default': internal_benchmark_util,
 
-    # to test the latest GNU libc implementation downloaded and compiled locally we use another trick:
-    # we ask the dynamic linker of the just-built GNU libc to run the benchmarking utility using new GNU libc dyn libs: 
-    'glibc': glibc_install_dir + '/lib/ld-linux-x86-64.so.2 --library-path ' + glibc_install_dir + '/lib ' + internal_benchmark_util,
+    # to test the latest GNU libc implementation downloaded and compiled locally we use another
+    # trick: we ask the dynamic linker of the just-built GNU libc to run the benchmarking utility
+    # using the new GNU libc dynamic libs:
+    'glibc': (glibc_install_dir + '/lib/ld-linux-x86-64.so.2 --library-path ' + glibc_install_dir +
+             '/lib ' + internal_benchmark_util),
 
     'tcmalloc': internal_benchmark_util,
-    'jemalloc': internal_benchmark_util
+    'jemalloc': internal_benchmark_util,
 }
 
 def find(name, paths):
     for path in paths:
-        #print "Searching into: ", path
+        #print("Searching into: ", path)
         for root, dirs, files in os.walk(path, followlinks=False):
             if name in files:
                 return os.path.join(root, name)
@@ -102,25 +105,30 @@ def run_benchmark(outfile,thread_values,impl_name):
     last_nthreads = thread_values[len(thread_values)-1]
     bm = {}
     for nthreads in thread_values:
+        # run the external benchmark utility with the LD_PRELOAD trick
 
         try:
+            # 1. Set the `LD_PRELOAD` environment variable
             os.environ["LD_PRELOAD"] = impl_preload_libs[impl_name]
             if len(os.environ["LD_PRELOAD"])>0:
                 # the tcmalloc/jemalloc shared libs require in turn C++ libs:
-                #print "preload_required_libs_fullpaths is:", preload_required_libs_fullpaths
+                #print("preload_required_libs_fullpaths is:", preload_required_libs_fullpaths)
                 for lib in preload_required_libs_fullpaths:
                     os.environ["LD_PRELOAD"] = os.environ["LD_PRELOAD"] + ':' + lib
 
             utility_fname = benchmark_util[impl_name]
 
-
-            # run the external benchmark utility with the LD_PRELOAD trick
-            print("Running for nthreads={}:\n   LD_PRELOAD='{}' {} {}".format(nthreads,os.environ["LD_PRELOAD"],utility_fname,nthreads))
-
+            cmd = "{} {} >/tmp/benchmark-output".format(utility_fname, nthreads)
+            full_cmd = "LD_PRELOAD='{}' {}".format(os.environ["LD_PRELOAD"], cmd)
+
+            print("Running this benchmark cmd for nthreads={}:".format(nthreads))
+            print("  {}".format(full_cmd))
+
+            # 2. Call the benchmark cmd
             # the subprocess.check_output() method does not seem to work fine when launching
             # the ld-linux-x86-64.so.2 with --library-path
             #stdout = subprocess.check_output([utility_fname, nthreads])
-            os.system("{} {} >/tmp/benchmark-output".format(utility_fname,nthreads))
+            os.system(cmd)
             stdout = open('/tmp/benchmark-output', 'r').read()
 
             # produce valid JSON output:
@@ -156,13 +164,13 @@ def main(args):
             sys.exit(3)
 
         outfile = os.path.join(outfile_path_prefix, implementations[idx] + '-' + outfile_postfix)
-        print "----------------------------------------------------------------------------------------------"
-        print "Testing implementation '{}'. Saving results into '{}'".format(implementations[idx],outfile)
+        print("----------------------------------------------------------------------------------------------")
+        print("Testing implementation '{}'. Saving results into '{}'".format(implementations[idx],outfile))
 
-        print "Will run tests for {} different number of threads".format(len(thread_values))
+        print("Will run tests for {} different numbers of threads.".format(len(thread_values)))
         success = success + run_benchmark(outfile,thread_values,implementations[idx])
 
-    print "----------------------------------------------------------------------------------------------"
+    print("----------------------------------------------------------------------------------------------")
     return success
 
 if __name__ == '__main__':