diff --git a/src/buffer_reader.cpp b/src/buffer_reader.cpp index 374d4da5..3b64ca13 100644 --- a/src/buffer_reader.cpp +++ b/src/buffer_reader.cpp @@ -22,6 +22,7 @@ #include "buffer_reader.h" #include "buffer.h" +#include #include namespace zim { @@ -49,18 +50,12 @@ offset_t BufferReader::offset() const } -void BufferReader::read(char* dest, offset_t offset, zsize_t size) const { - ASSERT(offset.v, <=, source.size().v); - ASSERT(offset+offset_t(size.v), <=, offset_t(source.size().v)); - if (! size ) { - return; - } +void BufferReader::readImpl(char* dest, offset_t offset, zsize_t size) const { memcpy(dest, source.data(offset), size.v); } -char BufferReader::read(offset_t offset) const { - ASSERT(offset.v, <, source.size().v); +char BufferReader::readImpl(offset_t offset) const { char dest; dest = *source.data(offset); return dest; diff --git a/src/buffer_reader.h b/src/buffer_reader.h index de346944..f0972c37 100644 --- a/src/buffer_reader.h +++ b/src/buffer_reader.h @@ -30,15 +30,17 @@ class LIBZIM_PRIVATE_API BufferReader : public Reader { : source(source) {} virtual ~BufferReader() {}; - zsize_t size() const; - offset_t offset() const; + zsize_t size() const override; + offset_t offset() const override; - void read(char* dest, offset_t offset, zsize_t size) const; - char read(offset_t offset) const; - const Buffer get_buffer(offset_t offset, zsize_t size) const; - std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; + const Buffer get_buffer(offset_t offset, zsize_t size) const override; + std::unique_ptr sub_reader(offset_t offset, zsize_t size) const override; - private: + private: // functions + void readImpl(char* dest, offset_t offset, zsize_t size) const override; + char readImpl(offset_t offset) const override; + + private: // data const Buffer source; }; diff --git a/src/file_reader.cpp b/src/file_reader.cpp index 6dae460c..0efaeaef 100644 --- a/src/file_reader.cpp +++ b/src/file_reader.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #ifndef _WIN32 @@ -43,6 +44,19 @@ typedef SSIZE_T ssize_t; #endif +namespace { + [[noreturn]] void throwSystemError(const std::string& errorText) + { +#ifdef _WIN32 + // Windows doesn't use errno. + throw std::system_error(std::error_code(), errorText); +#else + std::error_code ec(errno, std::generic_category()); + throw std::system_error(ec, errorText); +#endif + } +} + namespace zim { //////////////////////////////////////////////////////////////////////////////// @@ -60,8 +74,7 @@ MultiPartFileReader::MultiPartFileReader(std::shared_ptr sou ASSERT(offset.v+size.v, <=, source->fsize().v); } -char MultiPartFileReader::read(offset_t offset) const { - ASSERT(offset.v, <, _size.v); +char MultiPartFileReader::readImpl(offset_t offset) const { offset += _offset; auto part_pair = source->locate(offset); auto& fhandle = part_pair->second->fhandle(); @@ -81,19 +94,13 @@ char MultiPartFileReader::read(offset_t offset) const { fmt << " - Reading offset at " << offset.v << "\n"; fmt << " - logical local offset is " << logical_local_offset.v << "\n"; fmt << " - physical local offset is " << physical_local_offset.v << "\n"; - fmt << " - error is " << strerror(errno) << "\n"; - std::error_code ec(errno, std::generic_category()); - throw std::system_error(ec, fmt); + fmt << " - error is " << e.what() << "\n"; + throwSystemError(fmt); }; return ret; } -void MultiPartFileReader::read(char* dest, offset_t offset, zsize_t size) const { - ASSERT(offset.v, <=, _size.v); - ASSERT(offset.v+size.v, <=, _size.v); - if (! size ) { - return; - } +void MultiPartFileReader::readImpl(char* dest, offset_t offset, zsize_t size) const { offset += _offset; auto found_range = source->locate(offset, size); for(auto current = found_range.first; current!=found_range.second; current++){ @@ -116,9 +123,8 @@ void MultiPartFileReader::read(char* dest, offset_t offset, zsize_t size) const fmt << " - Reading offset at " << offset.v << "\n"; fmt << " - logical local offset is " << logical_local_offset.v << "\n"; fmt << " - physical local offset is " << physical_local_offset.v << "\n"; - fmt << " - error is " << strerror(errno) << "\n"; - std::error_code ec(errno, std::generic_category()); - throw std::system_error(ec, fmt); + fmt << " - error is " << e.what() << "\n"; + throwSystemError(fmt); }; ASSERT(size_to_get, <=, size); dest += size_to_get.v; @@ -244,9 +250,8 @@ FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size) { } -char FileReader::read(offset_t offset) const +char FileReader::readImpl(offset_t offset) const { - ASSERT(offset.v, <, _size.v); offset += _offset; char ret; try { @@ -256,20 +261,14 @@ char FileReader::read(offset_t offset) const Formatter fmt; fmt << "Cannot read a char.\n"; fmt << " - Reading offset at " << offset.v << "\n"; - fmt << " - error is " << strerror(errno) << "\n"; - std::error_code ec(errno, std::generic_category()); - throw std::system_error(ec, fmt); + fmt << " - error is " << e.what() << "\n"; + throwSystemError(fmt); }; return ret; } -void FileReader::read(char* dest, offset_t offset, zsize_t size) const +void FileReader::readImpl(char* dest, offset_t offset, zsize_t size) const { - ASSERT(offset.v, <=, _size.v); - ASSERT(offset.v+size.v, <=, _size.v); - if (! size ) { - return; - } offset += _offset; try { _fhandle->readAt(dest, size, offset); @@ -278,9 +277,8 @@ void FileReader::read(char* dest, offset_t offset, zsize_t size) const fmt << "Cannot read chars.\n"; fmt << " - Reading offset at " << offset.v << "\n"; fmt << " - size is " << size.v << "\n"; - fmt << " - error is " << strerror(errno) << "\n"; - std::error_code ec(errno, std::generic_category()); - throw std::system_error(ec, fmt); + fmt << " - error is " << e.what() << "\n"; + throwSystemError(fmt); }; } diff --git a/src/file_reader.h b/src/file_reader.h index 248df152..817e24c3 100644 --- a/src/file_reader.h +++ b/src/file_reader.h @@ -33,12 +33,12 @@ class LIBZIM_PRIVATE_API BaseFileReader : public Reader { BaseFileReader(offset_t offset, zsize_t size) : _offset(offset), _size(size) {} ~BaseFileReader() = default; - zsize_t size() const { return _size; }; - offset_t offset() const { return _offset; }; + zsize_t size() const override { return _size; }; + offset_t offset() const override { return _offset; }; virtual const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const = 0; - const Buffer get_buffer(offset_t offset, zsize_t size) const; + const Buffer get_buffer(offset_t offset, zsize_t size) const override; protected: // data offset_t _offset; @@ -53,11 +53,12 @@ class LIBZIM_PRIVATE_API FileReader : public BaseFileReader { FileReader(FileHandle fh, offset_t offset, zsize_t size); ~FileReader() = default; - char read(offset_t offset) const; - void read(char *dest, offset_t offset, zsize_t size) const; + const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const override; + std::unique_ptr sub_reader(offset_t offset, zsize_t size) const override; - const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const; - std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; + private: // functions + char readImpl(offset_t offset) const override; + void readImpl(char *dest, offset_t offset, zsize_t size) const override; private: // data // The file handle is stored via a shared pointer so that it can be shared @@ -71,13 +72,14 @@ class LIBZIM_PRIVATE_API MultiPartFileReader : public BaseFileReader { explicit MultiPartFileReader(std::shared_ptr source); ~MultiPartFileReader() {}; - char read(offset_t offset) const; - void read(char *dest, offset_t offset, zsize_t size) const; + const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const override; + std::unique_ptr sub_reader(offset_t offset, zsize_t size) const override; - const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const; - std::unique_ptr sub_reader(offset_t offset, zsize_t size) const; + private: // functions + char readImpl(offset_t offset) const override; + void readImpl(char *dest, offset_t offset, zsize_t size) const override; - private: + private: // data MultiPartFileReader(std::shared_ptr source, offset_t offset, zsize_t size); std::shared_ptr source; diff --git a/src/fs_unix.cpp b/src/fs_unix.cpp index 758c5334..315838f7 100644 --- a/src/fs_unix.cpp +++ b/src/fs_unix.cpp @@ -47,8 +47,11 @@ zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const errno = 0; while (size_to_read > 0) { auto size_read = PREAD(m_fd, dest, size_to_read, current_offset); + if (size_read == 0) { + throw std::runtime_error("Cannot read past the end of the file"); + } if (size_read == -1) { - return zsize_t(-1); + throw std::runtime_error("Cannot read file"); } size_to_read -= size_read; current_offset += size_read; diff --git a/src/fs_windows.cpp b/src/fs_windows.cpp index e6f6cfe0..1bc226b4 100644 --- a/src/fs_windows.cpp +++ b/src/fs_windows.cpp @@ -69,26 +69,41 @@ FD::~FD() zsize_t FD::readAt(char* dest, zsize_t size, offset_t offset) const { if (!mp_impl) - return zsize_t(-1); + throw std::runtime_error("FD is not open"); EnterCriticalSection(&mp_impl->m_criticalSection); LARGE_INTEGER off; off.QuadPart = offset.v; + std::string errorMsg; + auto size_to_read = size.v; + if (!SetFilePointerEx(mp_impl->m_handle, off, NULL, FILE_BEGIN)) { + errorMsg = "Seek fail"; goto err; } DWORD size_read; - if (!ReadFile(mp_impl->m_handle, dest, size.v, &size_read, NULL)) { - goto err; - } - if (size_read != size.v) { - goto err; + while (size_to_read > 0) { + // Read by batch < 4GiB + // Lets use a batch of 1GiB + auto batch_to_read = std::min(size_to_read, (size_type)1024*1024*1024); + if (!ReadFile(mp_impl->m_handle, dest, batch_to_read, &size_read, NULL)) { + errorMsg = "Read fail"; + goto err; + } + + if (size_read == 0) { + errorMsg = "Cannot read past the end of the file"; + goto err; + } + + size_to_read -= size_read; + dest += size_read; } LeaveCriticalSection(&mp_impl->m_criticalSection); return size; err: LeaveCriticalSection(&mp_impl->m_criticalSection); - return zsize_t(-1); + throw std::runtime_error(errorMsg); } bool FD::seek(offset_t offset) diff --git a/src/reader.h b/src/reader.h index 767b5e2c..8a4dc655 100644 --- a/src/reader.h +++ b/src/reader.h @@ -22,6 +22,7 @@ #define ZIM_READER_H_ #include +#include #include "zim_types.h" #include "endian_tools.h" @@ -31,13 +32,23 @@ namespace zim { -class Reader { +class LIBZIM_PRIVATE_API Reader { public: Reader() {}; virtual zsize_t size() const = 0; virtual ~Reader() {}; - virtual void read(char* dest, offset_t offset, zsize_t size) const = 0; + void read(char* dest, offset_t offset, zsize_t size) const { + if (can_read(offset, size)) { + if (size) { + // Do the actuall read only if we have a size to read + readImpl(dest, offset, size); + } + return; + } + throw std::runtime_error("Cannot read after the end of the reader"); + } + template T read_uint(offset_t offset) const { ASSERT(offset.v, <, size().v); @@ -46,7 +57,13 @@ class Reader { read(tmp_buf, offset, zsize_t(sizeof(T))); return fromLittleEndian(tmp_buf); } - virtual char read(offset_t offset) const = 0; + + char read(offset_t offset) const { + if (can_read(offset, zsize_t(1))) { + return readImpl(offset); + } + throw std::runtime_error("Cannot read after the end of the reader"); + } virtual const Buffer get_buffer(offset_t offset, zsize_t size) const = 0; const Buffer get_buffer(offset_t offset) const { @@ -59,6 +76,15 @@ class Reader { virtual offset_t offset() const = 0; bool can_read(offset_t offset, zsize_t size) const; + + private: + // Implementation of the read method. + // Check of the validity of the offset/size has already been done. + virtual void readImpl(char* dest, offset_t offset, zsize_t size) const = 0; + + // Implementation of the read method. + // Check of the validity of the offset has already been done. + virtual char readImpl(offset_t offset) const = 0; }; }; diff --git a/test/istreamreader.cpp b/test/istreamreader.cpp index 2a913c62..a96b5915 100644 --- a/test/istreamreader.cpp +++ b/test/istreamreader.cpp @@ -37,20 +37,31 @@ class InfiniteZeroStream : public IStreamReader void readImpl(char* buf, zim::zsize_t nbytes) { memset(buf, 0, nbytes.v); } }; +class InfiniteIncreasingStream: public IStreamReader +{ + zim::offset_type current_offset = 0; + + void readImpl(char* buf, zim::zsize_t nbytes) { + for (size_type i=0; i()); - EXPECT_EQ(0L, ids.read()); + EXPECT_EQ(0, ids.read()); + EXPECT_EQ(0L, ids.read()); // zim::fromLittleEndian() handles only integer types // EXPECT_EQ(0.0, ids.read()); } -TEST(IStreamReader, sub_reader) +TEST(IStreamReader, sub_reader_zero) { const size_t N = 16; const char zerobuf[N] = {0}; @@ -62,5 +73,34 @@ TEST(IStreamReader, sub_reader) EXPECT_EQ(buffer.size().v, N); EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N)); } + +TEST(IStreamReader, read_increasing) +{ + InfiniteIncreasingStream iis; + IStreamReader& ids = iis; + EXPECT_EQ(0x03020100, ids.read()); + EXPECT_EQ(0x0B0A090807060504, ids.read()); + + // zim::fromLittleEndian() handles only integer types + // EXPECT_EQ(0.0, ids.read()); +} + +TEST(IStreamReader, sub_reader_increasing) +{ + const size_t N = 16; + const char refbuf[N] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; + InfiniteIncreasingStream iis; + IStreamReader& ids = iis; + auto subReader = ids.sub_reader(zim::zsize_t(N)); + EXPECT_EQ(subReader->size().v, N); + auto buffer = subReader->get_buffer(zim::offset_t(0), zim::zsize_t(N)); + EXPECT_EQ(buffer.size().v, N); + EXPECT_EQ(0, memcmp(buffer.data(), refbuf, N)); + + buffer = subReader->get_buffer(zim::offset_t(5), zim::zsize_t(N-5)); + EXPECT_EQ(buffer.size().v, N-5); + EXPECT_EQ(0, memcmp(buffer.data(), refbuf+5, N-5)); +} + } // unnamed namespace diff --git a/test/reader.cpp b/test/reader.cpp index 183d857c..a871389a 100644 --- a/test/reader.cpp +++ b/test/reader.cpp @@ -84,6 +84,10 @@ TEST(FileReader, shouldJustWork) reader->read(out, offset_t(10), zsize_t(4)); ASSERT_EQ(0, memcmp(out, "klmn", 4)); + ASSERT_EQ(0, memcmp(reader->get_buffer(offset_t(0), zsize_t(4)).data(), "abcd", 4)); + ASSERT_EQ(0, memcmp(reader->get_buffer(offset_t(5), zsize_t(4)).data(), "fghi", 4)); + ASSERT_EQ(0, memcmp(reader->get_buffer(offset_t(5), zsize_t(2)).data(), "fg", 2)); + // Can read last bit of the file. ASSERT_EQ('z', reader->read(offset_t(25))); reader->read(out, offset_t(25), zsize_t(1)); @@ -119,6 +123,10 @@ TEST(FileReader, subReader) subReader->read(out, offset_t(5), zsize_t(2)); ASSERT_EQ(0, memcmp(out, "jkgh", 4)); + ASSERT_EQ(0, memcmp(subReader->get_buffer(offset_t(0), zsize_t(4)).data(), "efgh", 4)); + ASSERT_EQ(0, memcmp(subReader->get_buffer(offset_t(5), zsize_t(4)).data(), "jklm", 4)); + ASSERT_EQ(0, memcmp(subReader->get_buffer(offset_t(5), zsize_t(2)).data(), "jk", 2)); + // Can read last bit of the file. ASSERT_EQ('x', subReader->read(offset_t(19))); subReader->read(out, offset_t(19), zsize_t(1));