Skip to content

Commit

Permalink
Merge pull request #889 from openzim/test_reader
Browse files Browse the repository at this point in the history
Add some unittest on get_buffer
  • Loading branch information
kelson42 committed Jun 17, 2024
2 parents 8b241bb + e4cbe41 commit d78acf0
Show file tree
Hide file tree
Showing 9 changed files with 159 additions and 70 deletions.
11 changes: 3 additions & 8 deletions src/buffer_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "buffer_reader.h"
#include "buffer.h"

#include <stdexcept>
#include <cstring>

namespace zim {
Expand Down Expand Up @@ -49,18 +50,12 @@ offset_t BufferReader::offset() const
}


void BufferReader::read(char* dest, offset_t offset, zsize_t size) const {
ASSERT(offset.v, <=, source.size().v);
ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v));
if (! size ) {
return;
}
void BufferReader::readImpl(char* dest, offset_t offset, zsize_t size) const {
memcpy(dest, source.data(offset), size.v);
}


char BufferReader::read(offset_t offset) const {
ASSERT(offset.v, <, source.size().v);
char BufferReader::readImpl(offset_t offset) const {
char dest;
dest = *source.data(offset);
return dest;
Expand Down
16 changes: 9 additions & 7 deletions src/buffer_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,17 @@ class LIBZIM_PRIVATE_API BufferReader : public Reader {
: source(source) {}
virtual ~BufferReader() {};

zsize_t size() const;
offset_t offset() const;
zsize_t size() const override;
offset_t offset() const override;

void read(char* dest, offset_t offset, zsize_t size) const;
char read(offset_t offset) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const override;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const override;

private:
private: // functions
void readImpl(char* dest, offset_t offset, zsize_t size) const override;
char readImpl(offset_t offset) const override;

private: // data
const Buffer source;
};

Expand Down
54 changes: 26 additions & 28 deletions src/file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <fcntl.h>
#include <system_error>
#include <algorithm>
#include <stdexcept>


#ifndef _WIN32
Expand All @@ -43,6 +44,19 @@
typedef SSIZE_T ssize_t;
#endif

namespace {
[[noreturn]] void throwSystemError(const std::string& errorText)
{
#ifdef _WIN32
// Windows doesn't use errno.
throw std::system_error(std::error_code(), errorText);
#else
std::error_code ec(errno, std::generic_category());
throw std::system_error(ec, errorText);
#endif
}
}

namespace zim {

////////////////////////////////////////////////////////////////////////////////
Expand All @@ -60,8 +74,7 @@ MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> sou
ASSERT(offset.v+size.v, <=, source->fsize().v);
}

char MultiPartFileReader::read(offset_t offset) const {
ASSERT(offset.v, <, _size.v);
char MultiPartFileReader::readImpl(offset_t offset) const {
offset += _offset;
auto part_pair = source->locate(offset);
auto& fhandle = part_pair->second->fhandle();
Expand All @@ -81,19 +94,13 @@ char MultiPartFileReader::read(offset_t offset) const {
fmt << " - Reading offset at " << offset.v << "\n";
fmt << " - logical local offset is " << logical_local_offset.v << "\n";
fmt << " - physical local offset is " << physical_local_offset.v << "\n";
fmt << " - error is " << strerror(errno) << "\n";
std::error_code ec(errno, std::generic_category());
throw std::system_error(ec, fmt);
fmt << " - error is " << e.what() << "\n";
throwSystemError(fmt);
};
return ret;
}

void MultiPartFileReader::read(char* dest, offset_t offset, zsize_t size) const {
ASSERT(offset.v, <=, _size.v);
ASSERT(offset.v+size.v, <=, _size.v);
if (! size ) {
return;
}
void MultiPartFileReader::readImpl(char* dest, offset_t offset, zsize_t size) const {
offset += _offset;
auto found_range = source->locate(offset, size);
for(auto current = found_range.first; current!=found_range.second; current++){
Expand All @@ -116,9 +123,8 @@ void MultiPartFileReader::read(char* dest, offset_t offset, zsize_t size) const
fmt << " - Reading offset at " << offset.v << "\n";
fmt << " - logical local offset is " << logical_local_offset.v << "\n";
fmt << " - physical local offset is " << physical_local_offset.v << "\n";
fmt << " - error is " << strerror(errno) << "\n";
std::error_code ec(errno, std::generic_category());
throw std::system_error(ec, fmt);
fmt << " - error is " << e.what() << "\n";
throwSystemError(fmt);
};
ASSERT(size_to_get, <=, size);
dest += size_to_get.v;
Expand Down Expand Up @@ -244,9 +250,8 @@ FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size)
{
}

char FileReader::read(offset_t offset) const
char FileReader::readImpl(offset_t offset) const
{
ASSERT(offset.v, <, _size.v);
offset += _offset;
char ret;
try {
Expand All @@ -256,20 +261,14 @@ char FileReader::read(offset_t offset) const
Formatter fmt;
fmt << "Cannot read a char.\n";
fmt << " - Reading offset at " << offset.v << "\n";
fmt << " - error is " << strerror(errno) << "\n";
std::error_code ec(errno, std::generic_category());
throw std::system_error(ec, fmt);
fmt << " - error is " << e.what() << "\n";
throwSystemError(fmt);
};
return ret;
}

void FileReader::read(char* dest, offset_t offset, zsize_t size) const
void FileReader::readImpl(char* dest, offset_t offset, zsize_t size) const
{
ASSERT(offset.v, <=, _size.v);
ASSERT(offset.v+size.v, <=, _size.v);
if (! size ) {
return;
}
offset += _offset;
try {
_fhandle->readAt(dest, size, offset);
Expand All @@ -278,9 +277,8 @@ void FileReader::read(char* dest, offset_t offset, zsize_t size) const
fmt << "Cannot read chars.\n";
fmt << " - Reading offset at " << offset.v << "\n";
fmt << " - size is " << size.v << "\n";
fmt << " - error is " << strerror(errno) << "\n";
std::error_code ec(errno, std::generic_category());
throw std::system_error(ec, fmt);
fmt << " - error is " << e.what() << "\n";
throwSystemError(fmt);
};
}

Expand Down
26 changes: 14 additions & 12 deletions src/file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,12 +33,12 @@ class LIBZIM_PRIVATE_API BaseFileReader : public Reader {
BaseFileReader(offset_t offset, zsize_t size)
: _offset(offset), _size(size) {}
~BaseFileReader() = default;
zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };
zsize_t size() const override { return _size; };
offset_t offset() const override { return _offset; };

virtual const Buffer get_mmap_buffer(offset_t offset,
zsize_t size) const = 0;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const override;

protected: // data
offset_t _offset;
Expand All @@ -53,11 +53,12 @@ class LIBZIM_PRIVATE_API FileReader : public BaseFileReader {
FileReader(FileHandle fh, offset_t offset, zsize_t size);
~FileReader() = default;

char read(offset_t offset) const;
void read(char *dest, offset_t offset, zsize_t size) const;
const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const override;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const override;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
private: // functions
char readImpl(offset_t offset) const override;
void readImpl(char *dest, offset_t offset, zsize_t size) const override;

private: // data
// The file handle is stored via a shared pointer so that it can be shared
Expand All @@ -71,13 +72,14 @@ class LIBZIM_PRIVATE_API MultiPartFileReader : public BaseFileReader {
explicit MultiPartFileReader(std::shared_ptr<const FileCompound> source);
~MultiPartFileReader() {};

char read(offset_t offset) const;
void read(char *dest, offset_t offset, zsize_t size) const;
const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const override;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const override;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;
private: // functions
char readImpl(offset_t offset) const override;
void readImpl(char *dest, offset_t offset, zsize_t size) const override;

private:
private: // data
MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size);

std::shared_ptr<const FileCompound> source;
Expand Down
5 changes: 4 additions & 1 deletion src/fs_unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,11 @@ zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const
errno = 0;
while (size_to_read > 0) {
auto size_read = PREAD(m_fd, dest, size_to_read, current_offset);
if (size_read == 0) {
throw std::runtime_error("Cannot read past the end of the file");
}
if (size_read == -1) {
return zsize_t(-1);
throw std::runtime_error("Cannot read file");
}
size_to_read -= size_read;
current_offset += size_read;
Expand Down
29 changes: 22 additions & 7 deletions src/fs_windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,26 +69,41 @@ FD::~FD()
zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const
{
if (!mp_impl)
return zsize_t(-1);
throw std::runtime_error("FD is not open");
EnterCriticalSection(&mp_impl->m_criticalSection);
LARGE_INTEGER off;
off.QuadPart = offset.v;
std::string errorMsg;
auto size_to_read = size.v;

if (!SetFilePointerEx(mp_impl->m_handle, off, NULL, FILE_BEGIN)) {
errorMsg = "Seek fail";
goto err;
}

DWORD size_read;
if (!ReadFile(mp_impl->m_handle, dest, size.v, &size_read, NULL)) {
goto err;
}
if (size_read != size.v) {
goto err;
while (size_to_read > 0) {
// Read by batch < 4GiB
// Lets use a batch of 1GiB
auto batch_to_read = std::min(size_to_read, (size_type)1024*1024*1024);
if (!ReadFile(mp_impl->m_handle, dest, batch_to_read, &size_read, NULL)) {
errorMsg = "Read fail";
goto err;
}

if (size_read == 0) {
errorMsg = "Cannot read past the end of the file";
goto err;
}

size_to_read -= size_read;
dest += size_read;
}
LeaveCriticalSection(&mp_impl->m_criticalSection);
return size;
err:
LeaveCriticalSection(&mp_impl->m_criticalSection);
return zsize_t(-1);
throw std::runtime_error(errorMsg);
}

bool FD::seek(offset_t offset)
Expand Down
32 changes: 29 additions & 3 deletions src/reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#define ZIM_READER_H_

#include <memory>
#include <stdexcept>

#include "zim_types.h"
#include "endian_tools.h"
Expand All @@ -31,13 +32,23 @@

namespace zim {

class Reader {
class LIBZIM_PRIVATE_API Reader {
public:
Reader() {};
virtual zsize_t size() const = 0;
virtual ~Reader() {};

virtual void read(char* dest, offset_t offset, zsize_t size) const = 0;
void read(char* dest, offset_t offset, zsize_t size) const {
if (can_read(offset, size)) {
if (size) {
// Do the actuall read only if we have a size to read
readImpl(dest, offset, size);
}
return;
}
throw std::runtime_error("Cannot read after the end of the reader");
}

template<typename T>
T read_uint(offset_t offset) const {
ASSERT(offset.v, <, size().v);
Expand All @@ -46,7 +57,13 @@ class Reader {
read(tmp_buf, offset, zsize_t(sizeof(T)));
return fromLittleEndian<T>(tmp_buf);
}
virtual char read(offset_t offset) const = 0;

char read(offset_t offset) const {
if (can_read(offset, zsize_t(1))) {
return readImpl(offset);
}
throw std::runtime_error("Cannot read after the end of the reader");
}

virtual const Buffer get_buffer(offset_t offset, zsize_t size) const = 0;
const Buffer get_buffer(offset_t offset) const {
Expand All @@ -59,6 +76,15 @@ class Reader {
virtual offset_t offset() const = 0;

bool can_read(offset_t offset, zsize_t size) const;

private:
// Implementation of the read method.
// Check of the validity of the offset/size has already been done.
virtual void readImpl(char* dest, offset_t offset, zsize_t size) const = 0;

// Implementation of the read method.
// Check of the validity of the offset has already been done.
virtual char readImpl(offset_t offset) const = 0;
};

};
Expand Down
Loading

0 comments on commit d78acf0

Please sign in to comment.