Skip to content

Commit

Permalink
Merge pull request #867 from openzim/fix_macos_mmap
Browse files Browse the repository at this point in the history
  • Loading branch information
mgautierfr committed Apr 3, 2024
2 parents 48da5b8 + 916afdf commit d7a8cac
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 55 deletions.
82 changes: 45 additions & 37 deletions src/file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,8 @@ MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> sou
: MultiPartFileReader(source, offset_t(0), source->fsize()) {}

MultiPartFileReader::MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size)
: source(source),
_offset(offset),
_size(size)
: BaseFileReader(offset, size),
source(source)
{
ASSERT(offset.v, <=, source->fsize().v);
ASSERT(offset.v+size.v, <=, source->fsize().v);
Expand Down Expand Up @@ -147,11 +146,13 @@ mmapReadOnly(int fd, offset_type offset, size_type size)
#endif

const auto p = (char*)mmap(NULL, size, PROT_READ, MAP_FLAGS, fd, offset);
if (p == MAP_FAILED)
throw std::runtime_error(Formatter()
<< "Cannot mmap size " << size << " at off "
<< offset << " : " << strerror(errno));

if (p == MAP_FAILED) {
// mmap may fails for a lot of reason.
// Most of them (io error, too big size...) may not recoverable but some of
// them may be relative to mmap only and a "simple" read from the file would work.
// Let's throw a MMapException to fallback to read (and potentially fail again there).
throw MMapException();
}
return p;
}

Expand All @@ -178,36 +179,48 @@ makeMmappedBuffer(int fd, offset_t offset, zsize_t size)
} // unnamed namespace
#endif // ENABLE_USE_MMAP

const Buffer MultiPartFileReader::get_buffer(offset_t offset, zsize_t size) const {
const Buffer BaseFileReader::get_buffer(offset_t offset, zsize_t size) const {
ASSERT(size, <=, _size);
#ifdef ENABLE_USE_MMAP
try {
auto found_range = source->locate(_offset+offset, size);
auto first_part_containing_it = found_range.first;
if (++first_part_containing_it != found_range.second) {
throw MMapException();
}

// The range is in only one part
auto range = found_range.first->first;
auto part = found_range.first->second;
auto logical_local_offset = offset + _offset - range.min;
ASSERT(size, <=, part->size());
int fd = part->fhandle().getNativeHandle();
auto physical_local_offset = logical_local_offset + part->offset();
return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size);
return get_mmap_buffer(offset, size);
} catch(MMapException& e)
#endif
{
// The range is several part, or we are on Windows.
// We will have to do some memory copies :/
// We cannot do the mmap, for several possible reasons:
// - Mmap offset is too big (>4GB on 32 bits)
// - The range is several part
// - We are on Windows.
// - Mmap itself has failed
// We will have to do some memory copies (or fail trying to) :/
// [TODO] Use Windows equivalent for mmap.
auto ret_buffer = Buffer::makeBuffer(size);
read(const_cast<char*>(ret_buffer.data()), offset, size);
return ret_buffer;
}
}

const Buffer MultiPartFileReader::get_mmap_buffer(offset_t offset, zsize_t size) const {
#ifdef ENABLE_USE_MMAP
auto found_range = source->locate(_offset + offset, size);
auto first_part_containing_it = found_range.first;
if (++first_part_containing_it != found_range.second) {
throw MMapException();
}

// The range is in only one part
auto range = found_range.first->first;
auto part = found_range.first->second;
auto logical_local_offset = offset + _offset - range.min;
ASSERT(size, <=, part->size());
int fd = part->fhandle().getNativeHandle();
auto physical_local_offset = logical_local_offset + part->offset();
return Buffer::makeBuffer(makeMmappedBuffer(fd, physical_local_offset, size), size);
#else
return Buffer::makeBuffer(size); // unreachable
#endif
}

bool Reader::can_read(offset_t offset, zsize_t size) const
{
return (offset.v <= this->size().v && (offset.v+size.v) <= this->size().v);
Expand All @@ -226,9 +239,8 @@ std::unique_ptr<const Reader> MultiPartFileReader::sub_reader(offset_t offset, z
////////////////////////////////////////////////////////////////////////////////

FileReader::FileReader(FileHandle fh, offset_t offset, zsize_t size)
: _fhandle(fh)
, _offset(offset)
, _size(size)
: BaseFileReader(offset, size)
, _fhandle(fh)
{
}

Expand Down Expand Up @@ -272,17 +284,13 @@ void FileReader::read(char* dest, offset_t offset, zsize_t size) const
};
}

const Buffer FileReader::get_buffer(offset_t offset, zsize_t size) const
{
ASSERT(size, <=, _size);
const Buffer FileReader::get_mmap_buffer(offset_t offset, zsize_t size) const {
#ifdef ENABLE_USE_MMAP
offset += _offset;
auto local_offset = offset + _offset;
int fd = _fhandle->getNativeHandle();
return Buffer::makeBuffer(makeMmappedBuffer(fd, offset, size), size);
#else // We are on Windows. [TODO] Use Windows equivalent for mmap.
auto ret_buffer = Buffer::makeBuffer(size);
read(const_cast<char*>(ret_buffer.data()), offset, size);
return ret_buffer;
return Buffer::makeBuffer(makeMmappedBuffer(fd, local_offset, size), size);
#else
return Buffer::makeBuffer(size); // unreachable
#endif
}

Expand Down
43 changes: 25 additions & 18 deletions src/file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,52 +28,59 @@ namespace zim {

class FileCompound;

class FileReader : public Reader {
class BaseFileReader : public Reader {
public: // functions
BaseFileReader(offset_t offset, zsize_t size)
: _offset(offset), _size(size) {}
~BaseFileReader() = default;
zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

virtual const Buffer get_mmap_buffer(offset_t offset,
zsize_t size) const = 0;
const Buffer get_buffer(offset_t offset, zsize_t size) const;

protected: // data
offset_t _offset;
zsize_t _size;
};

class FileReader : public BaseFileReader {
public: // types
typedef std::shared_ptr<const DEFAULTFS::FD> FileHandle;

public: // functions
explicit FileReader(FileHandle fh, offset_t offset, zsize_t size);
FileReader(FileHandle fh, offset_t offset, zsize_t size);
~FileReader() = default;

zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

char read(offset_t offset) const;
void read(char* dest, offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
void read(char *dest, offset_t offset, zsize_t size) const;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;

private: // data
// The file handle is stored via a shared pointer so that it can be shared
// by a sub_reader (otherwise the file handle would be invalidated by
// FD destructor when the sub-reader is destroyed).
FileHandle _fhandle;
offset_t _offset;
zsize_t _size;
};

class MultiPartFileReader : public Reader {
class MultiPartFileReader : public BaseFileReader {
public:
MultiPartFileReader(std::shared_ptr<const FileCompound> source);
explicit MultiPartFileReader(std::shared_ptr<const FileCompound> source);
~MultiPartFileReader() {};

zsize_t size() const { return _size; };
offset_t offset() const { return _offset; };

char read(offset_t offset) const;
void read(char* dest, offset_t offset, zsize_t size) const;
const Buffer get_buffer(offset_t offset, zsize_t size) const;
void read(char *dest, offset_t offset, zsize_t size) const;

const Buffer get_mmap_buffer(offset_t offset, zsize_t size) const;
std::unique_ptr<const Reader> sub_reader(offset_t offset, zsize_t size) const;

private:
MultiPartFileReader(std::shared_ptr<const FileCompound> source, offset_t offset, zsize_t size);

std::shared_ptr<const FileCompound> source;
offset_t _offset;
zsize_t _size;
};

};
Expand Down

0 comments on commit d7a8cac

Please sign in to comment.