From 7a5d26bfd5b204fc9a25ce7a326fb720517d0c2d Mon Sep 17 00:00:00 2001 From: wayne Date: Thu, 19 Jun 2014 22:28:59 +0200 Subject: [PATCH] Minor changes and fixes. --- benchmark/self_delimiting_codes/README.md | 4 ++-- .../src/sdc_benchmark.cpp | 20 ++++++++----------- .../self_delimiting_codes/test_case.config | 16 +++++++-------- .../self_delimiting_codes/vectors.config | 12 +++++++++-- .../visualize/self_delimiting_codes.tex | 3 ++- tutorial/expl-02.cpp | 11 ++++------ tutorial/expl-03.cpp | 17 ++++++++-------- 7 files changed, 42 insertions(+), 41 deletions(-) diff --git a/benchmark/self_delimiting_codes/README.md b/benchmark/self_delimiting_codes/README.md index cbe8c3753..a7fe1088a 100644 --- a/benchmark/self_delimiting_codes/README.md +++ b/benchmark/self_delimiting_codes/README.md @@ -21,7 +21,7 @@ Explored dimensions: of free memory (some vectors have very poor compression). * For the visualization you need the following software: - [pdflatex][LT] to generate the pdf reports. - - [pgfplots][PGFP] installed in [LT] to generate plots in pdf reports. + - [pgfplots][PGFP] version 1.10 installed in [LT] to generate plots in pdf reports. ## Usage @@ -32,7 +32,7 @@ Explored dimensions: rates and compression can be found in the file `results/result.csv`. The used test cases can be found in file `results/tc.csv`. The tested vectors can be found in file `results/vat.csv`. - The default benchmark took 14 minutes on my machine (Asus P50IJ + The default benchmark took 81 minutes on my machine (Asus P50IJ Pentium(R) Dual-Core CPU T4500 @ 2.30GHz 2GB). * All created binaries and test results can be deleted by calling `make cleanall`. diff --git a/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp b/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp index e28bbd009..2eaf40315 100644 --- a/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp +++ b/benchmark/self_delimiting_codes/src/sdc_benchmark.cpp @@ -127,7 +127,7 @@ bool runSingleTest( const int_vector<> &testcase, iv_testresult &result ) { Vector test( testcase ); auto stop = timer::now(); result.enc_MBperSec = size_in_mega_bytes( testcase ) - / duration_cast(stop-start).count(); + / duration_cast(stop-start).count() * 1000.0; //care for compression rate result.comp_percent = size_in_mega_bytes(test) @@ -139,20 +139,16 @@ bool runSingleTest( const int_vector<> &testcase, iv_testresult &result ) { //entry, so everything between 2 samples has to be decoded. size_t sample_dens = test.get_sample_dens(); start = timer::now(); - //repeat test 5 times to avoid infinite decoding rates - for (size_t j = 0; j < 5; j++) { - size_t i = sample_dens - 1; - for (; i < test.size(); i += sample_dens) { - test[i]; //acess element right before next sample entry - } - //and finally access last element if not done yet - if (i != test.size() + sample_dens - 1) - test[test.size() - 1]; + size_t i = sample_dens - 1; + for (; i < test.size(); i += sample_dens) { + test[i]; //acess element right before next sample entry } + //and finally access last element if not done yet + if (i != test.size() + sample_dens - 1) + test[test.size() - 1]; stop = timer::now(); result.dec_MBperSec = size_in_mega_bytes( testcase ) - / duration_cast(stop-start).count() - * 5.0; //multiply with 5 since vector was decoded 5 times + / duration_cast(stop-start).count() * 1000.0; return true; //may use this return type for error detection in future } diff --git a/benchmark/self_delimiting_codes/test_case.config b/benchmark/self_delimiting_codes/test_case.config index 9f759a8a1..a1b83f5ed 100644 --- a/benchmark/self_delimiting_codes/test_case.config +++ b/benchmark/self_delimiting_codes/test_case.config @@ -5,15 +5,15 @@ # (4) Download link (if the test is available online) # (5) Test file type(0: serialized int_vector<>, 1: byte sequence, 2: 16-bit word sequence, 4: 32-bit word sequence, 8: 64-bit word sequence, d: Parse decimal numbers) -#ENGLISH;../data/english.200MB;english.200MB;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1 -#DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1 -#DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1 -#PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1 -#SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1 -INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1 -EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1 +ENGLISH;../data/english.200MB;english.200MB;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1 +DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1 +DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1 +PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1 +SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1 +#INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1 +#EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1 #EINSTEIN-en;../data/einstein.en.txt;einstein-en;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz;1 #PARA;../data/para;para;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz;1 -WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1 +#WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1 #E_COLI;../data/Escherichia_Coli;E.coli;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz;1 #ENWIKISMLINT;../data/enwiki-20130805-pages-articles1.int.sdsl;enwiki-sml-int;http://people.eng.unimelb.edu.au/sgog/data/enwiki-20130805-pages-articles1.int.sdsl.gz;0 diff --git a/benchmark/self_delimiting_codes/vectors.config b/benchmark/self_delimiting_codes/vectors.config index f1b53ccc6..2c90e62c9 100644 --- a/benchmark/self_delimiting_codes/vectors.config +++ b/benchmark/self_delimiting_codes/vectors.config @@ -8,11 +8,19 @@ VLC_ED;vlc_vector;VLC-Elias-Delta VLC_FIB;vlc_vector;VLC-Fibonacci VLC_C2;vlc_vector>;VLC-Comma-Base3 #VLC_C3;vlc_vector>;VLC-Comma-Base7 -#VLC_C8;vlc_vector>;VLC-Comma-Base254 +#VLC_C4;vlc_vector>;VLC-Comma-Base15 +#VLC_C5;vlc_vector>;VLC-Comma-Base31 +#VLC_C6;vlc_vector>;VLC-Comma-Base63 +#VLC_C7;vlc_vector>;VLC-Comma-Base127 +#VLC_C8;vlc_vector>;VLC-Comma-Base255 # ENC Vectors ENC_EG;enc_vector;ENC-Elias-Gamma ENC_ED;enc_vector;ENC-Elias-Delta ENC_FIB;enc_vector;ENC-Fibonacci ENC_C2;enc_vector>;ENC-Comma-Base3 #ENC_C3;enc_vector>;ENC-Comma-Base7 -#ENC_C8;enc_vector>;ENC-Comma-Base254 +#ENC_C4;enc_vector>;ENC-Comma-Base15 +#ENC_C5;enc_vector>;ENC-Comma-Base31 +#ENC_C6;enc_vector>;ENC-Comma-Base63 +#ENC_C7;enc_vector>;ENC-Comma-Base127 +#ENC_C8;enc_vector>;ENC-Comma-Base255 diff --git a/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex b/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex index daa2fe270..59d6020a8 100644 --- a/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex +++ b/benchmark/self_delimiting_codes/visualize/self_delimiting_codes.tex @@ -2,6 +2,7 @@ \usepackage{pgfplots} \usepackage{pgfplotstable} +\usepackage{color} \usepackage{booktabs} \usepackage[section]{placeins} \pgfplotsset{compat=1.10} @@ -28,7 +29,7 @@ %background \usetikzlibrary{backgrounds} -\definecolor{graphicbackground}{rgb}{0.96,0.96,0.8} +\definecolor{graphicbackground}{HTML}{F3F3F3} \pgfkeys{/tikz/.cd, background color/.initial=graphicbackground, background color/.get=\backcol, diff --git a/tutorial/expl-02.cpp b/tutorial/expl-02.cpp index cc00bbc68..939a3ffc1 100644 --- a/tutorial/expl-02.cpp +++ b/tutorial/expl-02.cpp @@ -6,13 +6,10 @@ using namespace sdsl; int main() { - int_vector<> v(10*(1<<20)); - for (size_t i=0; i<10; ++i) - for (size_t j=0; j < 1U<<20; ++j) - v[i*(1<<20)+j] = j; - cout << size_in_mega_bytes(v) << endl; + int_vector<> v(10*(1<<20), 0); + v[0] = 1ULL<<63; util::bit_compress(v); cout << size_in_mega_bytes(v) << endl; - enc_vector> ev(v); - cout << size_in_mega_bytes(ev) << endl; + vlc_vector<> vv(v); + cout << size_in_mega_bytes(vv) << endl; } diff --git a/tutorial/expl-03.cpp b/tutorial/expl-03.cpp index de34c89ef..3fe484d6f 100644 --- a/tutorial/expl-03.cpp +++ b/tutorial/expl-03.cpp @@ -1,19 +1,18 @@ #include #include -#include -#include -#include using namespace std; using namespace sdsl; int main() { - int_vector<> v(10*(1<<20), 0); - v[0] = 1ULL<<63; - //util::bit_compress(v); + int_vector<> v(10*(1<<20)); + for (size_t i=0; i<10; ++i) + for (size_t j=0; j < 1U<<20; ++j) + v[i*(1<<20)+j] = j; cout << size_in_mega_bytes(v) << endl; - vlc_vector vv(v); - cout << size_in_mega_bytes(vv) << endl; - cout << "Percentage: " << size_in_mega_bytes(vv) / size_in_mega_bytes(v) * 100 << endl; + util::bit_compress(v); + cout << size_in_mega_bytes(v) << endl; + enc_vector<> ev(v); + cout << size_in_mega_bytes(ev) << endl; }