Skip to content

Commit

Permalink
avoid double read from steam from curl when stream is small
Browse files Browse the repository at this point in the history
  • Loading branch information
sbiscigl committed Dec 11, 2024
1 parent 3a4fc06 commit 5704dcd
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,30 +30,31 @@ class AwsChunkedStream {

size_t BufferedRead(char *dst, size_t amountToRead) {
assert(dst != nullptr);
if (dst == nullptr) {
AWS_LOGSTREAM_ERROR("AwsChunkedStream", "dst is null");
}

// the chunk has ended and cannot be read from
if (m_chunkEnd) {
return 0;
}
// only read and write to chunked stream if the underlying stream
// is still in a valid state
if (m_stream->good()) {
// Try to read in a 64K chunk, if we cant we know the stream is over
size_t bytesRead = 0;
while (m_stream->good() && bytesRead < DataBufferSize) {
m_stream->read(&m_data[bytesRead], DataBufferSize - bytesRead);
bytesRead += static_cast<size_t>(m_stream->gcount());
}

// If we've read all of the underlying stream write the checksum trailing header
// the set that the chunked stream is over.
if (m_stream->eof() && !m_stream->bad() && (m_chunkingStream->eof() || m_chunkingStream->peek() == EOF)) {
return writeTrailer(dst, amountToRead);
}
if (bytesRead > 0) {
writeChunk(bytesRead);
}

// Try to read in a 64K chunk, if we cant we know the stream is over
size_t bytesRead = 0;
while (m_stream->good() && bytesRead < DataBufferSize) {
m_stream->read(&m_data[bytesRead], DataBufferSize - bytesRead);
bytesRead += static_cast<size_t>(m_stream->gcount());
// if we've read everything from the stream, we want to add the trailer
// to the underlying stream
if ((m_stream->peek() == EOF || m_stream->eof()) && !m_stream->bad()) {
writeTrailerToUnderlyingStream();
}
}

if (bytesRead > 0) {
writeChunk(bytesRead);
// if the underlying trailer is empty there is nothing to read
if ((m_chunkingStream->peek() == EOF || m_chunkingStream->eof()) && !m_chunkingStream->bad()) {
return 0;
}

// Read to destination buffer, return how much was read
Expand All @@ -62,7 +63,7 @@ class AwsChunkedStream {
}

private:
size_t writeTrailer(char *dst, size_t amountToRead) {
void writeTrailerToUnderlyingStream() {
Aws::StringStream chunkedTrailerStream;
chunkedTrailerStream << "0\r\n";
if (m_request->GetRequestHash().second != nullptr) {
Expand All @@ -71,13 +72,7 @@ class AwsChunkedStream {
}
chunkedTrailerStream << "\r\n";
const auto chunkedTrailer = chunkedTrailerStream.str();
auto trailerSize = chunkedTrailer.size();
// unreferenced param for assert
AWS_UNREFERENCED_PARAM(amountToRead);
assert(amountToRead >= trailerSize);
memcpy(dst, chunkedTrailer.c_str(), trailerSize);
m_chunkEnd = true;
return trailerSize;
*m_chunkingStream << chunkedTrailer;
}

void writeChunk(size_t bytesRead) {
Expand All @@ -94,7 +89,6 @@ class AwsChunkedStream {

Aws::Utils::Array<char> m_data{DataBufferSize};
std::shared_ptr<Aws::IOStream> m_chunkingStream;
bool m_chunkEnd{false};
Http::HttpRequest *m_request{nullptr};
std::shared_ptr<Aws::IOStream> m_stream;
};
Expand Down
17 changes: 17 additions & 0 deletions tests/aws-cpp-sdk-core-tests/utils/stream/AwsChunkedStreamTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,20 @@ TEST_F(AwsChunkedStreamTest, ChunkedStreamShouldWork) {
auto expectedStreamWithChecksum = "A\r\n1234567890\r\nA\r\n1234567890\r\n5\r\n12345\r\n0\r\nx-amz-checksum-crc32:78DeVw==\r\n\r\n";
EXPECT_EQ(expectedStreamWithChecksum, encodedStr);
}

TEST_F(AwsChunkedStreamTest, ShouldNotRequireTwoReadsOnSmallChunk) {
StandardHttpRequest request{"www.clemar.com/strohl", Http::HttpMethod::HTTP_GET};
auto requestHash = Aws::MakeShared<CRC32>(TEST_LOG_TAG);
request.SetRequestHash("crc32", requestHash);
std::shared_ptr<IOStream> inputStream = Aws::MakeShared<StringStream>(TEST_LOG_TAG, "12345");
AwsChunkedStream<100> chunkedStream{&request, inputStream};
Aws::Utils::Array<char> outputBuffer{100};
Aws::StringStream output;
const auto bufferOffset = chunkedStream.BufferedRead(outputBuffer.GetUnderlyingData(), 100);
std::copy(outputBuffer.GetUnderlyingData(), outputBuffer.GetUnderlyingData() + bufferOffset, std::ostream_iterator<char>(output));
EXPECT_EQ(46ul, bufferOffset);
const auto encodedStr = output.str();
auto expectedStreamWithChecksum = "5\r\n12345\r\n0\r\nx-amz-checksum-crc32:y/U6HA==\r\n\r\n";
EXPECT_EQ(expectedStreamWithChecksum, encodedStr);
}

0 comments on commit 5704dcd

Please sign in to comment.