Skip to content

Commit

Permalink
Feat/munmap (#171)
Browse files Browse the repository at this point in the history
* support munmap and multi mmap

Signed-off-by: anti-entropy123 <1348651580@qq.com>
  • Loading branch information
anti-entropy123 authored Nov 25, 2023
1 parent 877a1ba commit 52b0dea
Show file tree
Hide file tree
Showing 11 changed files with 259 additions and 72 deletions.
15 changes: 14 additions & 1 deletion common_service/fatfs/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,20 @@ pub fn fatfs_read(fd: Fd, buf: &mut [u8]) -> Result<Size, ()> {
let mut table = FTABLE.lock().expect("require lock failed.");
let file = table.get_file_mut(fd);

Ok(file.read(buf).expect("fatfs_read failed."))
let mut read_size = 0;
let mut buf = buf;
while !buf.is_empty() {
match file.read(buf) {
Ok(0) => break,
Ok(size) => {
read_size += size;
buf = &mut buf[size..]
}
Err(e) => panic!("fatfs read failed: {}", e),
}
}

Ok(read_size)
}

#[no_mangle]
Expand Down
27 changes: 25 additions & 2 deletions common_service/mm/src/mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,13 @@ use ms_hostcall::{
err::{LibOSErr, LibOSResult},
types::{Fd, ProtFlags},
};
use ms_std::{libos::libos, println};
use ms_std::libos::libos;

const PAGE_SIZE: usize = 0x1000;

#[no_mangle]
pub fn libos_mmap(length: usize, prot: ProtFlags, fd: Fd) -> LibOSResult<usize> {
if length % 0x1000 > 0 {
if length % PAGE_SIZE > 0 {
return Err(LibOSErr::BadArgs);
}
let layout = Layout::from_size_align(length, 0x1000).map_err(|_| LibOSErr::BadArgs)?;
Expand Down Expand Up @@ -42,6 +44,27 @@ pub fn libos_mmap(length: usize, prot: ProtFlags, fd: Fd) -> LibOSResult<usize>
Ok(mmap_addr)
}

#[no_mangle]
pub fn libos_munmap(mem_region: &mut [u8], _file_based: bool) -> LibOSResult<()> {
libos!(unregister_file_backend(mem_region.as_ptr() as usize))
.expect("unregister file backend failed.");

let aligned_length = (mem_region.len() + PAGE_SIZE - 1) & (!PAGE_SIZE + 1);
unsafe {
libc::mprotect(
mem_region.as_mut_ptr() as usize as *mut libc::c_void,
aligned_length,
libc::PROT_READ | libc::PROT_WRITE,
);
alloc::alloc::dealloc(
mem_region.as_mut_ptr(),
Layout::from_size_align(aligned_length, PAGE_SIZE).expect("wrong align."),
);
};

Ok(())
}

pub fn trans_protflag(flags: ProtFlags) -> i32 {
let mut result = Default::default();
if flags.contains(ProtFlags::READ) {
Expand Down
2 changes: 1 addition & 1 deletion common_service/mmap_file_backend/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,6 @@ crate-type = ["dylib"]
ms_std = { path = "../../ms_std" }
ms_hostcall = { path = "../../ms_hostcall" }

nix = { version = "0.27.1", features = ["poll"] }
nix = { version = "0.27.1", features = ["poll", "signal", "fs", "event"] }
userfaultfd = { version = "0.7.0", features = [] }
lazy_static = "1.4.0"
234 changes: 178 additions & 56 deletions common_service/mmap_file_backend/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,25 +1,69 @@
use std::{
ffi::c_void,
mem::{ManuallyDrop, MaybeUninit},
os::raw::c_void,
os::fd::{AsFd, BorrowedFd, RawFd},
slice::from_raw_parts_mut,
sync::Mutex,
sync::{Mutex, MutexGuard, RwLock},
u64,
};

use lazy_static::lazy_static;
use ms_std::{fs::File, io::Read, libos::libos, println};
use nix::poll::{poll, PollFd, PollFlags};
use nix::{
fcntl::{fcntl, FcntlArg, OFlag},
sys::epoll::{Epoll, EpollCreateFlags, EpollEvent, EpollFlags},
};
use userfaultfd::{Event, Uffd, UffdBuilder};

use ms_hostcall::{err::LibOSResult, types::Fd};
pub use ms_std;
use ms_std::{fs::File, io::Read, libos::libos};

#[repr(C, align(4096))]
struct Page([u8; PAGE_SIZE]);

const PAGE_SIZE: usize = 0x1000;

struct NotifyPipe {
recevier: RawFd,
sender: RawFd,
}

impl NotifyPipe {
fn new() -> Self {
let (recevier, sender) = nix::unistd::pipe().expect("make os pipe failed");
let flags = fcntl(sender, FcntlArg::F_GETFL).expect("get flags failed");
fcntl(
sender,
FcntlArg::F_SETFL(OFlag::from_bits(flags).unwrap() | OFlag::O_NONBLOCK),
)
.expect("set non block failed");

Self { recevier, sender }
}

fn consume(&self) {
let mut buf = [0u8];
unsafe { nix::libc::read(self.recevier, buf.as_mut_ptr() as usize as *mut c_void, 1) };
}

fn notify(&self) {
let buf = [0u8];
unsafe { nix::libc::write(self.sender, buf.as_ptr() as usize as *mut c_void, 1) };
}
}

impl Drop for NotifyPipe {
fn drop(&mut self) {
unsafe {
nix::libc::close(self.recevier);
nix::libc::close(self.sender);
}
}
}

lazy_static! {
static ref REGISTER: Mutex<()> = Default::default();
static ref REGISTERD_REGIONS: Mutex<Vec<RegisterdMemRegion>> = Default::default();
static ref NOTIFY_PIPE: RwLock<Option<NotifyPipe>> = Default::default();
}

#[derive(Debug)]
Expand All @@ -29,70 +73,125 @@ struct RegisterdMemRegion {
src_fd: Fd,
}

fn read_at_offset(fd: Fd, offset: u32, page: *mut u8) {
// Copy the page pointed to by 'page' into the faulting region. Vary the contents that are
// copied in, so that it is more obvious that each fault is handled separately.
let mut src_file = ManuallyDrop::new(File::from_raw_fd(fd));
src_file.seek(offset);

let page = unsafe { from_raw_parts_mut(page, PAGE_SIZE) };

let _read_size = src_file.read(page).expect("read file failed.");
// println!(
// "src_file aligned_offset={}, read {} bytes",
// offset, read_size
// );
}

fn do_page_fault(page: *mut u8, region: &RegisterdMemRegion) {
let uffd = &region.uffd;
let event = uffd
.read_event()
.expect("read uffd_msg")
.expect("uffd_msg ready");

if let Event::Pagefault { addr, .. } = event {
// println!(
// "UFFD_EVENT_PAGEFAULT event: {:?}, register_info: {:?}",
// event, region
// );
let offset = addr as usize - region.start_addr;
let aligned_offset = offset & (!PAGE_SIZE + 1);
read_at_offset(region.src_fd, aligned_offset as u32, page);

let dst = (addr as usize & !(PAGE_SIZE - 1)) as *mut c_void;
let _copy = unsafe {
uffd.copy(page as usize as *mut c_void, dst, PAGE_SIZE, true)
.expect("uffd copy failed.")
};

// println!("(uffdio_copy.copy returned {})", copy);
} else {
panic!("Unexpected event on userfaultfd");
}
}

fn init_notify_pipe() {
let mut notify_pipe = NOTIFY_PIPE.write().unwrap();
if notify_pipe.is_some() {
panic!("notify_pipe has exist")
}
let pipe = NotifyPipe::new();
*notify_pipe = Some(pipe);
}

#[no_mangle]
pub fn file_page_fault_handler() -> LibOSResult<()> {
pub fn file_page_fault_handler() {
let notify_fd =
unsafe { BorrowedFd::borrow_raw(NOTIFY_PIPE.read().unwrap().as_ref().unwrap().recevier) };

let mut page: Box<MaybeUninit<Page>> = Box::new(MaybeUninit::uninit());

loop {
let epoll = Epoll::new(EpollCreateFlags::empty()).expect("create epoll failed");

let regions = REGISTERD_REGIONS.lock().unwrap();
let uffds: Vec<_> = regions.iter().map(|region| &region.uffd).collect();
if regions.is_empty() {
break;
}

let mut pollfds: Vec<_> = uffds
let uffd_events: Vec<_> = regions
.iter()
.map(|uffd| PollFd::new(uffd, PollFlags::POLLIN))
.map(|region| region.uffd.as_fd())
.enumerate()
.collect();
let notify_event = [(u64::MAX as usize, notify_fd.as_fd())];

let _nready = poll(pollfds.as_mut_slice(), -1).expect("poll");
for (idx, fd) in uffd_events.iter().chain(notify_event.iter()) {
epoll
.add(fd, EpollEvent::new(EpollFlags::EPOLLIN, *idx as u64))
.expect("add event failed");
}

let mut ready_events = [EpollEvent::empty()];
epoll
.wait(&mut ready_events, -1)
.expect("epoll wait failed");
// let revents = pollfd.revents().unwrap();

let region = regions.get(0).unwrap();
let uffd = &region.uffd;
let event = uffd
.read_event()
.expect("read uffd_msg")
.expect("uffd_msg ready");

if let Event::Pagefault { addr, .. } = event {
// println!(
// "UFFD_EVENT_PAGEFAULT event: {:?}, register_info: {:?}",
// event, region
// );
// Copy the page pointed to by 'page' into the faulting region. Vary the contents that are
// copied in, so that it is more obvious that each fault is handled separately.
let mut src_file = ManuallyDrop::new(File::from_raw_fd(region.src_fd));
let offset = addr as usize - region.start_addr;
let aligned_offset = offset & (!PAGE_SIZE + 1);

src_file.seek(aligned_offset as u32);
let page: &mut [u8] =
unsafe { from_raw_parts_mut(page.as_mut_ptr() as usize as *mut u8, 0x1000) };

let read_size = src_file.read(page).expect("read file failed.");
// println!(
// "src_file aligned_offset={}, read {} bytes",
// aligned_offset, read_size
// );

let dst = (addr as usize & !(PAGE_SIZE - 1)) as *mut c_void;
let copy = unsafe {
uffd.copy(
page.as_mut_ptr() as usize as *mut c_void,
dst,
PAGE_SIZE,
true,
)
.expect("uffd copy")
};

// println!("(uffdio_copy.copy returned {})", copy);
if !ready_events[0].events().contains(EpollFlags::EPOLLIN) {
continue;
}
let event_idx = ready_events[0].data();
if let Some(region) = regions.get(event_idx as usize) {
do_page_fault(page.as_mut_ptr() as usize as *mut u8, region);
} else {
panic!("Unexpected event on userfaultfd");
drop(regions);
let pipe = NOTIFY_PIPE.read().unwrap();
let pipe = pipe.as_ref().expect("pipe not exist?");
pipe.consume()
}
}

let mut notify_pipe = NOTIFY_PIPE.write().unwrap();
*notify_pipe = None;
// println!("page fault handler exit.");
}

fn acquire_regions_or_notify() -> MutexGuard<'static, std::vec::Vec<RegisterdMemRegion>> {
match REGISTERD_REGIONS.try_lock() {
Ok(regions) => regions,
Err(_) => {
let notify_pipe = NOTIFY_PIPE.read().unwrap();
notify_pipe.as_ref().expect("notify has not init?").notify();
REGISTERD_REGIONS.lock().unwrap()
}
}
}

#[no_mangle]
pub fn register_file_backend(mm_region: &mut [c_void], file_fd: Fd) -> LibOSResult<()> {
let _lock = REGISTER.lock().unwrap();
// If have error: `OpenDevUserfaultfd(Os { code: 13, kind: PermissionDenied, message: "Permission denied" })`
// ,use this command:
// `setfacl -m u:${USER}:rw /dev/userfaultfd`
Expand All @@ -106,17 +205,40 @@ pub fn register_file_backend(mm_region: &mut [c_void], file_fd: Fd) -> LibOSResu
uffd.register(mm_region.as_mut_ptr(), mm_region.len())
.expect("register failed");

REGISTERD_REGIONS.lock().unwrap().push(RegisterdMemRegion {
let mut regions = acquire_regions_or_notify();
regions.push(RegisterdMemRegion {
uffd,
start_addr: mm_region.as_ptr() as usize,
src_fd: file_fd,
});

libos!(spawn_fault_handler(
ms_std::init_context::isolation_ctx().isol_id
))
.expect("spawn_fault_handler failed.");
if NOTIFY_PIPE.read().unwrap().is_none() {
init_notify_pipe();

libos!(spawn_fault_handler(
ms_std::init_context::isolation_ctx().isol_id
))
.expect("spawn_fault_handler failed.");
}
// println!("spawn_fault_handler successfully.");

Ok(())
}

#[no_mangle]
pub fn unregister_file_backend(addr: usize) -> LibOSResult<()> {
let _lock = REGISTER.lock().unwrap();
let pipe = NOTIFY_PIPE.read().unwrap();
let pipe = pipe.as_ref().expect("notify pipe not exist?");
pipe.notify();
let mut regions = acquire_regions_or_notify();

for (idx, region) in (*regions).iter().enumerate() {
if region.start_addr == addr {
regions.remove(idx);
break;
}
}

Ok(())
}
13 changes: 9 additions & 4 deletions ms_hostcall/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,13 @@ pub enum CommonHostCall {
BufferDealloc,
#[display(fmt = "libos_mmap")]
Mmap,
#[display(fmt = "libos_munmap")]
Munmap,

#[display(fmt = "register_file_backend")]
RegisterFileBackend,
#[display(fmt = "unregister_file_backend")]
UnregisterFileBackend,
#[display(fmt = "file_page_fault_handler")]
FilePageFaultHandler,

Expand Down Expand Up @@ -137,11 +141,12 @@ impl HostCallID {
CommonHostCall::BufferAlloc
| CommonHostCall::AccessBuffer
| CommonHostCall::BufferDealloc
| CommonHostCall::Mmap => "mm".to_owned(),
| CommonHostCall::Mmap
| CommonHostCall::Munmap => "mm".to_owned(),

CommonHostCall::RegisterFileBackend | CommonHostCall::FilePageFaultHandler => {
"mmap_file_backend".to_owned()
}
CommonHostCall::RegisterFileBackend
| CommonHostCall::FilePageFaultHandler
| CommonHostCall::UnregisterFileBackend => "mmap_file_backend".to_owned(),

CommonHostCall::GetTime => "time".to_owned(),
},
Expand Down
Loading

0 comments on commit 52b0dea

Please sign in to comment.