From 98867a2bc99f18a5bdc8db281ca65e5660ff6b5b Mon Sep 17 00:00:00 2001 From: Cole Faust Date: Wed, 28 Feb 2024 10:01:28 -0800 Subject: [PATCH] Fall back to reading instead of mmap on windows Windows doesn't have support for mmap (at least not with the same apis) --- src/file_pool.rs | 174 +++++++++++++++++++++++++++++------------------ 1 file changed, 109 insertions(+), 65 deletions(-) diff --git a/src/file_pool.rs b/src/file_pool.rs index 6c8d1ad..499812c 100644 --- a/src/file_pool.rs +++ b/src/file_pool.rs @@ -1,83 +1,127 @@ use anyhow::bail; use core::slice; -use libc::{ - c_void, mmap, munmap, sysconf, MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, PROT_READ, - PROT_WRITE, _SC_PAGESIZE, -}; -use std::{ - os::fd::{AsFd, AsRawFd}, - path::Path, - ptr::null_mut, - sync::Mutex, -}; +use std::{path::Path, sync::Mutex}; -/// FilePool is a datastructure that is intended to hold onto byte buffers and give out immutable -/// references to them. But it can also accept new byte buffers while old ones are still lent out. -/// This requires interior mutability / unsafe code. Appending to a Vec while references to other -/// elements are held is generally unsafe, because the Vec can reallocate all the prior elements -/// to a new memory location. But if the elements themselves are pointers to stable memory, the -/// contents of those pointers can be referenced safely. This also requires guarding the outer -/// Vec with a Mutex so that two threads don't append to it at the same time. -pub struct FilePool { - files: Mutex>, -} -impl FilePool { - pub fn new() -> FilePool { - FilePool { - files: Mutex::new(Vec::new()), - } +#[cfg(unix)] +mod mmap { + use super::*; + use libc::{ + c_void, mmap, munmap, sysconf, MAP_ANONYMOUS, MAP_FAILED, MAP_FIXED, MAP_PRIVATE, + PROT_READ, PROT_WRITE, _SC_PAGESIZE, + }; + use std::{ + os::fd::{AsFd, AsRawFd}, + ptr::null_mut, + }; + /// FilePool is a datastructure that is intended to hold onto byte buffers and give out immutable + /// references to them. But it can also accept new byte buffers while old ones are still lent out. + /// This requires interior mutability / unsafe code. Appending to a Vec while references to other + /// elements are held is generally unsafe, because the Vec can reallocate all the prior elements + /// to a new memory location. But if the elements themselves are pointers to stable memory, the + /// contents of those pointers can be referenced safely. This also requires guarding the outer + /// Vec with a Mutex so that two threads don't append to it at the same time. + pub struct FilePool { + files: Mutex>, } - - pub fn read_file(&self, path: &Path) -> anyhow::Result<&[u8]> { - let page_size = unsafe { sysconf(_SC_PAGESIZE) } as usize; - let file = std::fs::File::open(path)?; - let fd = file.as_fd().as_raw_fd(); - let file_size = file.metadata()?.len() as usize; - let mapping_size = (file_size + page_size).next_multiple_of(page_size); - unsafe { - // size + 1 to add a null terminator. - let addr = mmap(null_mut(), mapping_size, PROT_READ, MAP_PRIVATE, fd, 0); - if addr == MAP_FAILED { - bail!("mmap failed"); + impl FilePool { + pub fn new() -> FilePool { + FilePool { + files: Mutex::new(Vec::new()), } + } + + pub fn read_file(&self, path: &Path) -> anyhow::Result<&[u8]> { + let page_size = unsafe { sysconf(_SC_PAGESIZE) } as usize; + let file = std::fs::File::open(path)?; + let fd = file.as_fd().as_raw_fd(); + let file_size = file.metadata()?.len() as usize; + let mapping_size = (file_size + page_size).next_multiple_of(page_size); + unsafe { + // size + 1 to add a null terminator. + let addr = mmap(null_mut(), mapping_size, PROT_READ, MAP_PRIVATE, fd, 0); + if addr == MAP_FAILED { + bail!("mmap failed"); + } + + let addr2 = mmap( + addr.add(mapping_size).sub(page_size), + page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, + -1, + 0, + ); + if addr2 == MAP_FAILED { + bail!("mmap failed"); + } + *(addr.add(mapping_size).sub(page_size) as *mut u8) = 0; + // The manpages say the extra bytes past the end of the file are + // zero-filled, but just to make sure: + assert!(*(addr.add(file_size) as *mut u8) == 0); + + let files = &mut self.files.lock().unwrap(); + files.push((addr, mapping_size)); - let addr2 = mmap( - addr.add(mapping_size).sub(page_size), - page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, - -1, - 0, - ); - if addr2 == MAP_FAILED { - bail!("mmap failed"); + Ok(slice::from_raw_parts(addr as *mut u8, file_size + 1)) } - *(addr.add(mapping_size).sub(page_size) as *mut u8) = 0; - // The manpages say the extra bytes past the end of the file are - // zero-filled, but just to make sure: - assert!(*(addr.add(file_size) as *mut u8) == 0); + } + } - let files = &mut self.files.lock().unwrap(); - files.push((addr, mapping_size)); + // SAFETY: Sync isn't implemented automatically because we have a *mut pointer, + // but that pointer isn't used at all aside from the drop implementation, so + // we won't have data races. + unsafe impl Sync for FilePool {} + unsafe impl Send for FilePool {} - Ok(slice::from_raw_parts(addr as *mut u8, file_size + 1)) + impl Drop for FilePool { + fn drop(&mut self) { + let files = self.files.lock().unwrap(); + for &(addr, len) in files.iter() { + unsafe { + munmap(addr, len); + } + } } } } -// SAFETY: Sync isn't implemented automatically because we have a *mut pointer, -// but that pointer isn't used at all aside from the drop implementation, so -// we won't have data races. -unsafe impl Sync for FilePool {} -unsafe impl Send for FilePool {} +#[cfg(not(unix))] +mod read { + use crate::scanner::read_file_with_nul; -impl Drop for FilePool { - fn drop(&mut self) { - let files = self.files.lock().unwrap(); - for &(addr, len) in files.iter() { - unsafe { - munmap(addr, len); + use super::*; + + /// FilePool is a datastructure that is intended to hold onto byte buffers and give out immutable + /// references to them. But it can also accept new byte buffers while old ones are still lent out. + /// This requires interior mutability / unsafe code. Appending to a Vec while references to other + /// elements are held is generally unsafe, because the Vec can reallocate all the prior elements + /// to a new memory location. But if the elements themselves are unchanging Vecs, the + /// contents of those Vecs can be referenced safely. This also requires guarding the outer + /// Vec with a Mutex so that two threads don't append to it at the same time. + pub struct FilePool { + files: Mutex>>, + } + + impl FilePool { + pub fn new() -> FilePool { + FilePool { + files: Mutex::new(Vec::new()), } } + + pub fn read_file(&self, path: &Path) -> anyhow::Result<&[u8]> { + let bytes = read_file_with_nul(path)?; + let addr = bytes.as_ptr(); + let len = bytes.len(); + self.files.lock().unwrap().push(bytes); + + unsafe { Ok(slice::from_raw_parts(addr as *mut u8, len)) } + } } } + +#[cfg(unix)] +pub use mmap::FilePool; + +#[cfg(not(unix))] +pub use read::FilePool;