Skip to content

Commit

Permalink
Extract multiple bytes from the source in PerByte. (#4)
Browse files Browse the repository at this point in the history
This is useful when the desired number of bytes is known beforehand (e.g., when
parsing structured binary formats) and we just want to get those bytes without
worrying about the underlying buffers of the readers themselves.
  • Loading branch information
LTLA authored Sep 11, 2023
1 parent 7d878b5 commit d5c9cbd
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 6 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
cmake_minimum_required(VERSION 3.24)

project(byteme
VERSION 1.0.1
VERSION 1.1.0
DESCRIPTION "No-frills byte streaming from file"
LANGUAGES CXX)

Expand Down
109 changes: 104 additions & 5 deletions include/byteme/PerByte.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,51 @@ struct PerByte {
size_t position() const {
return overall + current;
}

/**
* Extract up to `n` bytes from the `Reader` source and store them in the `buffer`.
* This is equivalent to calling `get()` and then `advance()` up to `n` times,
* only iterating while the return value of `advance()` is still true.
* The number of successful iterations is returned in the output as the first pair element,
* while the return value of the final `advance()` is returned as the second pair element.
*
* @param n Number of bytes to extract.
* @param[out] output Pointer to an output buffer of length `n`.
* This is filled with up to `n` bytes from the source.
*
* @return Pair containing (1) the number of bytes that were successfully read into `output`,
* and (2) whether there are any more bytes available in the source for future `get()` or `extract()` calls.
*/
std::pair<size_t, bool> extract(size_t n, Type_* output) {
size_t original = n;
bool okay = true;

while (1) {
auto start = ptr + current;
auto leftover = available - current;

if (leftover > n) {
current += n;
n = 0;
std::copy(start, ptr + current, output);
break;

} else {
n -= leftover;
std::copy(start, ptr + available, output);
overall += available;
refill();

okay = (available > 0);
if (n == 0 || !okay) {
break;
}
output += leftover;
}
}

return std::make_pair(original - n, okay);
}
};

/**
Expand Down Expand Up @@ -156,6 +201,14 @@ struct PerByteParallel {
current = 0;
}

void join_and_refill() {
meanwhile.join();
if (thread_err) {
std::rethrow_exception(thread_err);
}
refill();
}

public:
/**
* @copydoc PerByte::PerByte()
Expand Down Expand Up @@ -199,12 +252,8 @@ struct PerByteParallel {
if (!use_meanwhile) {
return false;
}
join_and_refill();

meanwhile.join();
if (thread_err) {
std::rethrow_exception(thread_err);
}
refill();
return available > 0; // confirm there's actually bytes to extract in the next round.
}

Expand All @@ -221,6 +270,56 @@ struct PerByteParallel {
size_t position() const {
return overall + current;
}

/**
* Extract up to `n` bytes from the `Reader` source and store them in the `output`.
* This is equivalent to calling `get()` and then `advance()` up to `n` times,
* only iterating while the return value of `advance()` is still true.
* The number of successful iterations is returned in the output as the first pair element,
* while the return value of the final `advance()` is returned as the second pair element.
*
* @param n Number of bytes to extract.
* @param[out] output Pointer to an output buffer of length `n`.
* This is filled with up to `n` bytes from the source.
*
* @return Pair containing (1) the number of bytes that were successfully read into `output`,
* and (2) whether there are any more bytes available in the source for future `get()` or `extract()` calls.
*/
std::pair<size_t, bool> extract(size_t n, Type_* output) {
size_t original = n;
bool okay = true;

while (1) {
auto start = buffer.data() + current;
auto leftover = available - current;

if (leftover > n) {
current += n;
n = 0;
std::copy(start, buffer.data() + current, output);
break;

} else {
n -= leftover;
std::copy(start, buffer.data() + available, output);

overall += available;
if (!use_meanwhile) {
okay = false;
break;
}
join_and_refill();

okay = (available > 0);
if (n == 0 || !okay) {
break;
}
output += leftover;
}
}

return std::make_pair(original - n, okay);
}
};

}
Expand Down
64 changes: 64 additions & 0 deletions tests/src/PerByte.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,39 @@ TEST_P(PerByteTest, Basic) {
EXPECT_EQ(observed, expected);
}

TEST_P(PerByteTest, Extraction) {
std::vector<std::string> contents { "asdasdasd", "sd738", "93879sdjfsjdf", "caysctgatctv", "oirtueorpr2312", "09798&A*&^&c", "((&9KKJNJSNAKASd" };
auto path = dump_file(contents);

std::string expected;
for (const auto& x : contents) {
expected += x;
expected += '\n';
}

// At least some of these extraction widths should coincide
// with the Reader buffer size, so as to get coverage on
// the case where we extract exactly the buffered content.
std::vector<int> extract_widths { 10, 20, 100 };

for (auto w : extract_widths) {
byteme::RawFileReader reader(path, GetParam());
byteme::PerByte extractor(&reader);

std::string observed;
std::vector<char> buffer(w);
while (1) {
auto out = extractor.extract(w, buffer.data());
observed.insert(observed.end(), buffer.data(), buffer.data() + out.first);
EXPECT_EQ(extractor.position(), observed.size());
if (!out.second) {
break;
}
}
EXPECT_EQ(observed, expected);
}
}

TEST_P(PerByteTest, SmartPointer) {
std::vector<std::string> contents { "asdasdasd", "sd738", "93879sdjfsjdf", "caysctgatctv", "oirtueorpr2312", "09798&A*&^&c", "((&9KKJNJSNAKASd" };
auto path = dump_file(contents);
Expand Down Expand Up @@ -133,6 +166,37 @@ TEST_P(PerByteTest, ParallelSmartPointer) {
EXPECT_EQ(contents, observed);
}

TEST_P(PerByteTest, ParallelExtraction) {
std::vector<std::string> contents { "asdasdasd", "sd738", "93879sdjfsjdf", "caysctgatctv", "oirtueorpr2312", "09798&A*&^&c", "((&9KKJNJSNAKASd" };
auto path = dump_file(contents);

std::string expected;
for (const auto& x : contents) {
expected += x;
expected += '\n';
}

std::vector<int> extract_widths { 10, 20, 100 };

for (auto w : extract_widths) {
byteme::RawFileReader reader(path, GetParam());
byteme::PerByteParallel extractor(&reader);

std::string observed;
std::vector<char> buffer(w);
while (1) {
auto out = extractor.extract(w, buffer.data());
observed.insert(observed.end(), buffer.data(), buffer.data() + out.first);
EXPECT_EQ(extractor.position(), observed.size());
if (!out.second) {
break;
}
}
EXPECT_EQ(observed, expected);
}
}


INSTANTIATE_TEST_SUITE_P(
PerByte,
PerByteTest,
Expand Down

0 comments on commit d5c9cbd

Please sign in to comment.