Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplified istream handing #367 #764

Merged
merged 18 commits into from
Oct 22, 2017
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
18 commits
Select commit Hold shift + click to select a range
90adf6e
Simplify get_token_string, unnecessary buffering, handle Byte Order Mark
pjkundert Oct 2, 2017
f585fe4
Test to confirm parsing of multiple JSON records in a istream #367
pjkundert Oct 2, 2017
12efead
Further simplify istream handling; use native unget
pjkundert Oct 3, 2017
14ca1f6
Restore istream performance #764
pjkundert Oct 3, 2017
7c52333
Remove unnnecessary NUL termination of yytext (as it may contain NULs)
pjkundert Oct 4, 2017
97a3888
Improve performance by constructing yytext as a std::string
pjkundert Oct 4, 2017
e0d890c
Corrected unnnecessary const restriction on returned std::string
pjkundert Oct 4, 2017
8665e25
Rename get_string to move_string to imply side-effect
pjkundert Oct 4, 2017
546e148
Further performance improvements, and corrections in get_token_string
pjkundert Oct 4, 2017
f775922
Specify initializers for yytest, token_string using initializer-lists
pjkundert Oct 4, 2017
184dab6
Accelerate access to underlying std::istream streambuf
pjkundert Oct 5, 2017
1b43a45
Implement correct handling of std::streambuf int_type, eof()
pjkundert Oct 5, 2017
5e480b5
Further simplify character type handling
pjkundert Oct 6, 2017
45e1e3d
Revert some unnecessary member initializer changes.
pjkundert Oct 6, 2017
23440eb
Remove outdated commentary about the value of eof(), retain input type
pjkundert Oct 6, 2017
0b803d0
Simplify the json/src/benchmarks.cpp to allow more optimal code gen.
pjkundert Oct 7, 2017
a8cc7a1
Consistently use std::char_traits int_type-->char conversion intrinsics
pjkundert Oct 16, 2017
ef40673
Merge branch 'develop' into develop-simplify-istream
nlohmann Oct 22, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
json_unit
json_benchmarks
json_benchmarks_simple
fuzz-testing

*.dSYM
Expand Down
18 changes: 14 additions & 4 deletions benchmarks/Makefile
Original file line number Diff line number Diff line change
@@ -1,11 +1,21 @@
all: json_benchmarks
./json_benchmarks

json_benchmarks: src/benchmarks.cpp ../src/json.hpp number_jsons
#
# Build/run json.hpp benchmarks, eg. CXX=g++-7 make
#
# The existing json_benchmarks did not allow optimization under some compilers
#
all: json_benchmarks json_benchmarks_simple number_jsons
bash -c 'time ./json_benchmarks'
bash -c 'time ./json_benchmarks_simple'

json_benchmarks: src/benchmarks.cpp ../src/json.hpp
$(CXX) -std=c++11 -pthread $(CXXFLAGS) -DNDEBUG -O3 -flto -I thirdparty/benchpress -I thirdparty/cxxopts -I../src src/benchmarks.cpp $(LDFLAGS) -o $@

json_benchmarks_simple: src/benchmarks_simple.cpp ../src/json.hpp
$(CXX) -std=c++11 $(CXXFLAGS) -DNDEBUG -O3 -flto -I../src $(<) $(LDFLAGS) -o $@

number_jsons:
(test -e files/numbers/floats.json -a -e files/numbers/signed_ints.json -a -e files/numbers/unsigned_ints.json) || (cd files/numbers ; python generate.py)

clean:
rm -f json_benchmarks files/numbers/*.json
rm -f json_benchmarks json_benchmarks_simple files/numbers/*.json
20 changes: 17 additions & 3 deletions benchmarks/src/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,19 @@ static void bench(benchpress::context& ctx,
{
// using string streams for benchmarking to factor-out cold-cache disk
// access.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );

// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
Expand All @@ -43,11 +56,12 @@ static void bench(benchpress::context& ctx,

// read the stream once
json j;
j << istr;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif

switch (mode)
{
Expand All @@ -62,7 +76,7 @@ static void bench(benchpress::context& ctx,
istr.clear();
istr.seekg(0);
json j;
j << istr;
istr >> j;
}

break;
Expand All @@ -74,7 +88,7 @@ static void bench(benchpress::context& ctx,
{
// create JSON value from input
json j;
j << istr;
istr >> j;
std::stringstream ostr;

ctx.reset_timer();
Expand Down
158 changes: 158 additions & 0 deletions benchmarks/src/benchmarks_simple.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
//
// benchmarks_simple.cpp -- a less complex version of benchmarks.cpp, that better reflects actual performance
//
// For some reason, the complexity of benchmarks.cpp doesn't allow
// the compiler to optimize code using json.hpp effectively. The
// exact same tests, with the use of benchpress and cxxopts produces
// much faster code, at least under g++.
//
#include <fstream>
#include <iostream>
#include <chrono>
#include <list>
#include <tuple>

#include <json.hpp>

using json = nlohmann::json;

enum class EMode { input, output, indent };

static double bench(const EMode mode, size_t iters, const std::string& in_path )
{
// using string streams for benchmarking to factor-out cold-cache disk
// access. Define FROMFILE to use file I/O instead.
#if defined( FROMFILE )
std::ifstream istr;
{
istr.open( in_path, std::ifstream::in );

// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#else
std::stringstream istr;
{
// read file into string stream
std::ifstream input_file(in_path);
istr << input_file.rdbuf();
input_file.close();

// read the stream once
json j;
istr >> j;
// clear flags and rewind
istr.clear();
istr.seekg(0);
}
#endif
double tps = 0;
switch (mode)
{
// benchmarking input
case EMode::input:
{
auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
// clear flags and rewind
istr.clear();
istr.seekg(0);
json j;
istr >> j;
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();
break;
}

// benchmarking output
case EMode::output:
case EMode::indent:
{
// create JSON value from input
json j;
istr >> j;
std::stringstream ostr;

auto start = std::chrono::system_clock::now();
for (size_t i = 0; i < iters; ++i)
{
if (mode == EMode::indent)
{
ostr << j;
}
else
{
ostr << std::setw(4) << j;
}

// reset data
ostr.str(std::string());
}
auto ended = std::chrono::system_clock::now();
tps = 1.0 / std::chrono::duration<double>( ended - start ).count();

break;
}
}
return tps;
}

template <typename T>
struct average {
T _sum { 0 };
size_t _count { 0 };
T operator+=( const T &val_ ) { _sum += val_; +_count++; return val_; }
operator T() { return _sum / _count; }
};

// Execute each test approximately enough times to get near 1
// transaction per second, and compute the average; a single aggregate
// number that gives a performance metric representing both parsing
// and output.

int main( int, char ** )
{
std::list<std::tuple<std::string, EMode, size_t, std::string>> tests {
{ "parse jeopardy.json", EMode::input, 2, "files/jeopardy/jeopardy.json" },
{ "parse canada.json", EMode::input, 30, "files/nativejson-benchmark/canada.json" },
{ "parse citm_catalog.json", EMode::input, 120, "files/nativejson-benchmark/citm_catalog.json" },
{ "parse twitter.json", EMode::input, 225, "files/nativejson-benchmark/twitter.json" },
{ "parse floats.json", EMode::input, 5, "files/numbers/floats.json" },
{ "parse signed_ints.json", EMode::input, 6, "files/numbers/signed_ints.json" },
{ "parse unsigned_ints.json", EMode::input, 6, "files/numbers/unsigned_ints.json" },
{ "dump jeopardy.json", EMode::output, 5, "files/jeopardy/jeopardy.json" },
{ "dump jeopardy.json w/ind.", EMode::indent, 5, "files/jeopardy/jeopardy.json" },
{ "dump floats.json", EMode::output, 2, "files/numbers/floats.json" },
{ "dump signed_ints.json", EMode::output, 20, "files/numbers/signed_ints.json" },
};

average<double> avg;
for ( auto t : tests ) {
std::string name, path;
EMode mode;
size_t iters;
std::tie(name, mode, iters, path) = t;
auto tps = bench( mode, iters, path );
avg += tps;
std::cout
<< std::left
<< std::setw( 30 ) << name
<< std::right
<< " x " << std::setw( 3 ) << iters
<< std::left
<< " == " << std::setw( 10 ) << tps
<< std::right
<< " TPS, " << std::setw( 8 ) << std::round( tps * 1e6 / iters )
<< " ms/op"
<< std::endl;
}
std::cout << std::setw( 40 ) << "" << std::string( 10, '-' ) << std::endl;
std::cout << std::setw( 40 ) << "" << std::setw( 10 ) << std::left << avg << " TPS Average" << std::endl;
return 0;
}
Loading