Skip to content

Commit

Permalink
Minor changes and fixes.
Browse files Browse the repository at this point in the history
  • Loading branch information
wayne authored and wayne committed Jun 19, 2014
1 parent 9de4abc commit 7a5d26b
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 41 deletions.
4 changes: 2 additions & 2 deletions benchmark/self_delimiting_codes/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Explored dimensions:
of free memory (some vectors have very poor compression).
* For the visualization you need the following software:
- [pdflatex][LT] to generate the pdf reports.
- [pgfplots][PGFP] installed in [LT] to generate plots in pdf reports.
- [pgfplots][PGFP] version 1.10 installed in [LT] to generate plots in pdf reports.

## Usage

Expand All @@ -32,7 +32,7 @@ Explored dimensions:
rates and compression can be found in the file `results/result.csv`.
The used test cases can be found in file `results/tc.csv`.
The tested vectors can be found in file `results/vat.csv`.
The default benchmark took 14 minutes on my machine (Asus P50IJ
The default benchmark took 81 minutes on my machine (Asus P50IJ
Pentium(R) Dual-Core CPU T4500 @ 2.30GHz 2GB).
* All created binaries and test results can be deleted
by calling `make cleanall`.
Expand Down
20 changes: 8 additions & 12 deletions benchmark/self_delimiting_codes/src/sdc_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ bool runSingleTest( const int_vector<> &testcase, iv_testresult &result ) {
Vector test( testcase );
auto stop = timer::now();
result.enc_MBperSec = size_in_mega_bytes( testcase )
/ duration_cast<seconds>(stop-start).count();
/ duration_cast<milliseconds>(stop-start).count() * 1000.0;
//care for compression rate
result.comp_percent = size_in_mega_bytes(test)
Expand All @@ -139,20 +139,16 @@ bool runSingleTest( const int_vector<> &testcase, iv_testresult &result ) {
//entry, so everything between 2 samples has to be decoded.
size_t sample_dens = test.get_sample_dens();
start = timer::now();
//repeat test 5 times to avoid infinite decoding rates
for (size_t j = 0; j < 5; j++) {
size_t i = sample_dens - 1;
for (; i < test.size(); i += sample_dens) {
test[i]; //acess element right before next sample entry
}
//and finally access last element if not done yet
if (i != test.size() + sample_dens - 1)
test[test.size() - 1];
size_t i = sample_dens - 1;
for (; i < test.size(); i += sample_dens) {
test[i]; //acess element right before next sample entry
}
//and finally access last element if not done yet
if (i != test.size() + sample_dens - 1)
test[test.size() - 1];
stop = timer::now();
result.dec_MBperSec = size_in_mega_bytes( testcase )
/ duration_cast<seconds>(stop-start).count()
* 5.0; //multiply with 5 since vector was decoded 5 times
/ duration_cast<milliseconds>(stop-start).count() * 1000.0;
return true; //may use this return type for error detection in future
}
Expand Down
16 changes: 8 additions & 8 deletions benchmark/self_delimiting_codes/test_case.config
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@
# (4) Download link (if the test is available online)
# (5) Test file type(0: serialized int_vector<>, 1: byte sequence, 2: 16-bit word sequence, 4: 32-bit word sequence, 8: 64-bit word sequence, d: Parse decimal numbers)

#ENGLISH;../data/english.200MB;english.200MB;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1
#DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1
#DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1
#PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1
#SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1
INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1
EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1
ENGLISH;../data/english.200MB;english.200MB;http://pizzachili.di.unipi.it/texts/nlang/english.200MB.gz;1
DBLPXML;../data/dblp.xml.200MB;dblp.xml.200MB;http://pizzachili.di.unipi.it/texts/xml/dblp.xml.200MB.gz;1
DNA;../data/dna.200MB;dna.200MB;http://pizzachili.di.unipi.it/texts/dna/dna.200MB.gz;1
PROTEINS;../data/proteins.200MB;proteins.200MB;http://pizzachili.di.unipi.it/texts/protein/proteins.200MB.gz;1
SOURCES;../data/sources.200MB;sources.200MB;http://pizzachili.di.unipi.it/texts/code/sources.200MB.gz;1
#INFLUENZA;../data/influenza;influenza;http://pizzachili.dcc.uchile.cl/repcorpus/real/influenza.gz;1
#EINSTEIN-de;../data/einstein.de.txt;einstein-de;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.de.txt.gz;1
#EINSTEIN-en;../data/einstein.en.txt;einstein-en;http://pizzachili.dcc.uchile.cl/repcorpus/real/einstein.en.txt.gz;1
#PARA;../data/para;para;http://pizzachili.dcc.uchile.cl/repcorpus/real/para.gz;1
WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1
#WORLDLEADER;../data/world_leaders;world-leaders;http://pizzachili.dcc.uchile.cl/repcorpus/real/world_leaders.gz;1
#E_COLI;../data/Escherichia_Coli;E.coli;http://pizzachili.dcc.uchile.cl/repcorpus/real/Escherichia_Coli.gz;1
#ENWIKISMLINT;../data/enwiki-20130805-pages-articles1.int.sdsl;enwiki-sml-int;http://people.eng.unimelb.edu.au/sgog/data/enwiki-20130805-pages-articles1.int.sdsl.gz;0
12 changes: 10 additions & 2 deletions benchmark/self_delimiting_codes/vectors.config
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,19 @@ VLC_ED;vlc_vector<coder::elias_delta>;VLC-Elias-Delta
VLC_FIB;vlc_vector<coder::fibonacci>;VLC-Fibonacci
VLC_C2;vlc_vector<coder::comma<2>>;VLC-Comma-Base3
#VLC_C3;vlc_vector<coder::comma<3>>;VLC-Comma-Base7
#VLC_C8;vlc_vector<coder::comma<8>>;VLC-Comma-Base254
#VLC_C4;vlc_vector<coder::comma<4>>;VLC-Comma-Base15
#VLC_C5;vlc_vector<coder::comma<5>>;VLC-Comma-Base31
#VLC_C6;vlc_vector<coder::comma<6>>;VLC-Comma-Base63
#VLC_C7;vlc_vector<coder::comma<7>>;VLC-Comma-Base127
#VLC_C8;vlc_vector<coder::comma<8>>;VLC-Comma-Base255
# ENC Vectors
ENC_EG;enc_vector<coder::elias_gamma>;ENC-Elias-Gamma
ENC_ED;enc_vector<coder::elias_delta>;ENC-Elias-Delta
ENC_FIB;enc_vector<coder::fibonacci>;ENC-Fibonacci
ENC_C2;enc_vector<coder::comma<2>>;ENC-Comma-Base3
#ENC_C3;enc_vector<coder::comma<3>>;ENC-Comma-Base7
#ENC_C8;enc_vector<coder::comma<8>>;ENC-Comma-Base254
#ENC_C4;enc_vector<coder::comma<4>>;ENC-Comma-Base15
#ENC_C5;enc_vector<coder::comma<5>>;ENC-Comma-Base31
#ENC_C6;enc_vector<coder::comma<6>>;ENC-Comma-Base63
#ENC_C7;enc_vector<coder::comma<7>>;ENC-Comma-Base127
#ENC_C8;enc_vector<coder::comma<8>>;ENC-Comma-Base255
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

\usepackage{pgfplots}
\usepackage{pgfplotstable}
\usepackage{color}
\usepackage{booktabs}
\usepackage[section]{placeins}
\pgfplotsset{compat=1.10}
Expand All @@ -28,7 +29,7 @@

%background
\usetikzlibrary{backgrounds}
\definecolor{graphicbackground}{rgb}{0.96,0.96,0.8}
\definecolor{graphicbackground}{HTML}{F3F3F3}
\pgfkeys{/tikz/.cd,
background color/.initial=graphicbackground,
background color/.get=\backcol,
Expand Down
11 changes: 4 additions & 7 deletions tutorial/expl-02.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,10 @@ using namespace sdsl;

int main()
{
int_vector<> v(10*(1<<20));
for (size_t i=0; i<10; ++i)
for (size_t j=0; j < 1U<<20; ++j)
v[i*(1<<20)+j] = j;
cout << size_in_mega_bytes(v) << endl;
int_vector<> v(10*(1<<20), 0);
v[0] = 1ULL<<63;
util::bit_compress(v);
cout << size_in_mega_bytes(v) << endl;
enc_vector<coder::comma<>> ev(v);
cout << size_in_mega_bytes(ev) << endl;
vlc_vector<> vv(v);
cout << size_in_mega_bytes(vv) << endl;
}
17 changes: 8 additions & 9 deletions tutorial/expl-03.cpp
Original file line number Diff line number Diff line change
@@ -1,19 +1,18 @@
#include <iostream>
#include <sdsl/vectors.hpp>
#include <sdsl/coder_elias_gamma.hpp>
#include <sdsl/coder_comma.hpp>
#include <sdsl/coder_fibonacci.hpp>

using namespace std;
using namespace sdsl;

int main()
{
int_vector<> v(10*(1<<20), 0);
v[0] = 1ULL<<63;
//util::bit_compress(v);
int_vector<> v(10*(1<<20));
for (size_t i=0; i<10; ++i)
for (size_t j=0; j < 1U<<20; ++j)
v[i*(1<<20)+j] = j;
cout << size_in_mega_bytes(v) << endl;
vlc_vector<coder::fibonacci> vv(v);
cout << size_in_mega_bytes(vv) << endl;
cout << "Percentage: " << size_in_mega_bytes(vv) / size_in_mega_bytes(v) * 100 << endl;
util::bit_compress(v);
cout << size_in_mega_bytes(v) << endl;
enc_vector<> ev(v);
cout << size_in_mega_bytes(ev) << endl;
}

0 comments on commit 7a5d26b

Please sign in to comment.