Skip to content

Commit b1c7804

Browse files
committed
revise interface to read directory entries
The new interface has some similarities to Linux system call getdents64. The system call reads several dirent64 structures. At the end of each dirent64 is stored the name of the file. The length of file name is implictly part of dirent64 because d_reclen contains size of dirent64 plus the length of the file name.
1 parent aa11bf6 commit b1c7804

File tree

1 file changed

+116
-97
lines changed
  • library/std/src/sys/pal/hermit

1 file changed

+116
-97
lines changed

library/std/src/sys/pal/hermit/fs.rs

+116-97
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
1-
use super::abi::{self, O_APPEND, O_CREAT, O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY};
1+
use super::abi::{
2+
self, dirent64, stat as stat_struct, DT_DIR, DT_LNK, DT_REG, DT_UNKNOWN, O_APPEND, O_CREAT,
3+
O_EXCL, O_RDONLY, O_RDWR, O_TRUNC, O_WRONLY, S_IFDIR, S_IFLNK, S_IFMT, S_IFREG,
4+
};
25
use super::fd::FileDesc;
3-
use crate::ffi::{CStr, OsString};
6+
use crate::ffi::{CStr, OsStr, OsString};
47
use crate::fmt;
58
use crate::io::{self, Error, ErrorKind};
69
use crate::io::{BorrowedCursor, IoSlice, IoSliceMut, SeekFrom};
710
use crate::mem;
811
use crate::os::hermit::ffi::OsStringExt;
912
use crate::os::hermit::io::{AsFd, AsRawFd, BorrowedFd, FromRawFd, IntoRawFd, RawFd};
1013
use crate::path::{Path, PathBuf};
11-
use crate::ptr;
1214
use crate::sync::Arc;
1315
use crate::sys::common::small_c_string::run_path_with_cstr;
1416
use crate::sys::cvt;
@@ -17,7 +19,6 @@ use crate::sys::unsupported;
1719
use crate::sys_common::{AsInner, AsInnerMut, FromInner, IntoInner};
1820

1921
pub use crate::sys_common::fs::{copy, try_exists};
20-
//pub use crate::sys_common::fs::remove_dir_all;
2122

2223
#[derive(Debug)]
2324
pub struct File(FileDesc);
@@ -34,32 +35,38 @@ impl FileAttr {
3435

3536
// all DirEntry's will have a reference to this struct
3637
struct InnerReadDir {
37-
dirp: FileDesc,
3838
root: PathBuf,
39+
dir: Vec<u8>,
40+
}
41+
42+
impl InnerReadDir {
43+
pub fn new(root: PathBuf, dir: Vec<u8>) -> Self {
44+
Self { root, dir }
45+
}
3946
}
4047

4148
pub struct ReadDir {
4249
inner: Arc<InnerReadDir>,
43-
end_of_stream: bool,
50+
pos: i64,
4451
}
4552

4653
impl ReadDir {
4754
fn new(inner: InnerReadDir) -> Self {
48-
Self { inner: Arc::new(inner), end_of_stream: false }
55+
Self { inner: Arc::new(inner), pos: 0 }
4956
}
5057
}
5158

5259
pub struct DirEntry {
53-
dir: Arc<InnerReadDir>,
54-
entry: dirent_min,
60+
/// path to the entry
61+
root: PathBuf,
62+
/// 64-bit inode number
63+
ino: u64,
64+
/// File type
65+
type_: u32,
66+
/// name of the entry
5567
name: OsString,
5668
}
5769

58-
struct dirent_min {
59-
d_ino: u64,
60-
d_type: u32,
61-
}
62-
6370
#[derive(Clone, Debug)]
6471
pub struct OpenOptions {
6572
// generic
@@ -105,15 +112,24 @@ pub struct DirBuilder {
105112

106113
impl FileAttr {
107114
pub fn modified(&self) -> io::Result<SystemTime> {
108-
Ok(SystemTime::new(self.stat_val.st_mtime, self.stat_val.st_mtime_nsec))
115+
Ok(SystemTime::new(
116+
self.stat_val.st_mtime.try_into().unwrap(),
117+
self.stat_val.st_mtime_nsec.try_into().unwrap(),
118+
))
109119
}
110120

111121
pub fn accessed(&self) -> io::Result<SystemTime> {
112-
Ok(SystemTime::new(self.stat_val.st_atime, self.stat_val.st_atime_nsec))
122+
Ok(SystemTime::new(
123+
self.stat_val.st_atime.try_into().unwrap(),
124+
self.stat_val.st_atime_nsec.try_into().unwrap(),
125+
))
113126
}
114127

115128
pub fn created(&self) -> io::Result<SystemTime> {
116-
Ok(SystemTime::new(self.stat_val.st_ctime, self.stat_val.st_ctime_nsec))
129+
Ok(SystemTime::new(
130+
self.stat_val.st_ctime.try_into().unwrap(),
131+
self.stat_val.st_ctime_nsec.try_into().unwrap(),
132+
))
117133
}
118134

119135
pub fn size(&self) -> u64 {
@@ -171,7 +187,7 @@ impl FileType {
171187
impl fmt::Debug for ReadDir {
172188
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
173189
// This will only be called from std::fs::ReadDir, which will add a "ReadDir()" frame.
174-
// Thus the result will be e g 'ReadDir("/home")'
190+
// Thus the result will be e.g. 'ReadDir("/home")'
175191
fmt::Debug::fmt(&*self.inner.root, f)
176192
}
177193
}
@@ -180,101 +196,74 @@ impl Iterator for ReadDir {
180196
type Item = io::Result<DirEntry>;
181197

182198
fn next(&mut self) -> Option<io::Result<DirEntry>> {
183-
if self.end_of_stream {
184-
return None;
185-
}
199+
let mut counter: usize = 0;
200+
let mut offset: i64 = 0;
201+
202+
// loop over all directory entries and search the entry for the current position
203+
loop {
204+
// leave function, if the loop reaches the of the buffer (with all entries)
205+
if offset >= self.inner.dir.len().try_into().unwrap() {
206+
return None;
207+
}
186208

187-
unsafe {
188-
loop {
189-
// As of POSIX.1-2017, readdir() is not required to be thread safe; only
190-
// readdir_r() is. However, readdir_r() cannot correctly handle platforms
191-
// with unlimited or variable NAME_MAX. Many modern platforms guarantee
192-
// thread safety for readdir() as long an individual DIR* is not accessed
193-
// concurrently, which is sufficient for Rust.
194-
let entry_ptr = match abi::readdir(self.inner.dirp.as_raw_fd()) {
195-
abi::DirectoryEntry::Invalid(e) => {
196-
// We either encountered an error, or reached the end. Either way,
197-
// the next call to next() should return None.
198-
self.end_of_stream = true;
199-
200-
return Some(Err(Error::from_raw_os_error(e)));
201-
}
202-
abi::DirectoryEntry::Valid(ptr) => {
203-
if ptr.is_null() {
204-
return None;
205-
}
206-
207-
ptr
208-
}
209+
let dir = unsafe {
210+
&*(self.inner.dir.as_ptr().offset(offset.try_into().unwrap()) as *const dirent64)
211+
};
212+
213+
if counter == self.pos.try_into().unwrap() {
214+
self.pos += 1;
215+
216+
// After dirent64, the file name is stored. d_reclen represents the length of the dirent64
217+
// plus the length of the file name. Consequently, file name has a size of d_reclen minus
218+
// the size of dirent64. The file name is always a C string and terminated by `\0`.
219+
// Consequently, we are able to ignore the last byte.
220+
let name_bytes = unsafe {
221+
core::slice::from_raw_parts(
222+
&dir.d_name as *const _ as *const u8,
223+
dir.d_reclen as usize - core::mem::size_of::<dirent64>() - 1,
224+
)
225+
.to_vec()
209226
};
210-
211-
macro_rules! offset_ptr {
212-
($entry_ptr:expr, $field:ident) => {{
213-
const OFFSET: isize = {
214-
let delusion = MaybeUninit::<dirent>::uninit();
215-
let entry_ptr = delusion.as_ptr();
216-
unsafe {
217-
ptr::addr_of!((*entry_ptr).$field)
218-
.cast::<u8>()
219-
.offset_from(entry_ptr.cast::<u8>())
220-
}
221-
};
222-
if true {
223-
// Cast to the same type determined by the else branch.
224-
$entry_ptr.byte_offset(OFFSET).cast::<_>()
225-
} else {
226-
#[allow(deref_nullptr)]
227-
{
228-
ptr::addr_of!((*ptr::null::<dirent>()).$field)
229-
}
230-
}
231-
}};
232-
}
233-
234-
// d_name is NOT guaranteed to be null-terminated.
235-
let name_bytes = core::slice::from_raw_parts(
236-
offset_ptr!(entry_ptr, d_name) as *const u8,
237-
*offset_ptr!(entry_ptr, d_namelen) as usize,
238-
)
239-
.to_vec();
240-
241-
if name_bytes == b"." || name_bytes == b".." {
242-
continue;
243-
}
244-
245-
let name = OsString::from_vec(name_bytes);
246-
247-
let entry = dirent_min {
248-
d_ino: *offset_ptr!(entry_ptr, d_ino),
249-
d_type: *offset_ptr!(entry_ptr, d_type),
227+
let entry = DirEntry {
228+
root: self.inner.root.clone(),
229+
ino: dir.d_ino,
230+
type_: dir.d_type as u32,
231+
name: OsString::from_vec(name_bytes),
250232
};
251233

252-
return Some(Ok(DirEntry { entry, name: name, dir: Arc::clone(&self.inner) }));
234+
return Some(Ok(entry));
253235
}
236+
237+
counter += 1;
238+
239+
// move to the next dirent64, which is directly stored after the previous one
240+
offset = offset + dir.d_off;
254241
}
255242
}
256243
}
257244

258245
impl DirEntry {
259246
pub fn path(&self) -> PathBuf {
260-
self.dir.root.join(self.file_name_os_str())
247+
self.root.join(self.file_name_os_str())
261248
}
262249

263250
pub fn file_name(&self) -> OsString {
264251
self.file_name_os_str().to_os_string()
265252
}
266253

267254
pub fn metadata(&self) -> io::Result<FileAttr> {
268-
lstat(&self.path())
255+
let mut path = self.path();
256+
path.set_file_name(self.file_name_os_str());
257+
lstat(&path)
269258
}
270259

271260
pub fn file_type(&self) -> io::Result<FileType> {
272-
Ok(FileType { mode: self.entry.d_type })
261+
Ok(FileType { mode: self.type_ as u32 })
273262
}
274263

275264
#[allow(dead_code)]
276265
pub fn ino(&self) -> u64 {
277-
self.entry.d_ino
266+
self.ino
278267
}
279268

280269
pub fn file_name_os_str(&self) -> &OsStr {
@@ -456,7 +445,7 @@ impl DirBuilder {
456445
}
457446

458447
pub fn mkdir(&self, path: &Path) -> io::Result<()> {
459-
run_path_with_cstr(path, |path| {
448+
run_path_with_cstr(path, &|path| {
460449
cvt(unsafe { abi::mkdir(path.as_ptr(), self.mode) }).map(|_| ())
461450
})
462451
}
@@ -519,11 +508,42 @@ impl FromRawFd for File {
519508
}
520509

521510
pub fn readdir(path: &Path) -> io::Result<ReadDir> {
522-
let fd_raw = run_path_with_cstr(path, |path| cvt(unsafe { abi::opendir(path.as_ptr()) }))?;
511+
let fd_raw = run_path_with_cstr(path, &|path| cvt(unsafe { abi::opendir(path.as_ptr()) }))?;
523512
let fd = unsafe { FileDesc::from_raw_fd(fd_raw as i32) };
524513
let root = path.to_path_buf();
525-
let inner = InnerReadDir { dirp: fd, root };
526-
Ok(ReadDir::new(inner))
514+
515+
// read all director entries
516+
let mut vec: Vec<u8> = Vec::new();
517+
let mut sz = 512;
518+
loop {
519+
// reserve memory to receive all directory entries
520+
vec.resize(sz, 0);
521+
522+
let readlen =
523+
unsafe { abi::getdents64(fd.as_raw_fd(), vec.as_mut_ptr() as *mut dirent64, sz) };
524+
if readlen > 0 {
525+
// shrink down to the minimal size
526+
vec.resize(readlen.try_into().unwrap(), 0);
527+
break;
528+
}
529+
530+
// if the buffer is too small, getdents64 returns EINVAL
531+
// otherwise, getdents64 returns an error number
532+
if readlen != (-abi::errno::EINVAL).into() {
533+
return Err(Error::from_raw_os_error(readlen.try_into().unwrap()));
534+
}
535+
536+
// we don't have enough memory => try to increase the vector size
537+
sz = sz * 2;
538+
539+
// 1 MB for directory entries should be enough
540+
// stop here to avoid an endless loop
541+
if sz > 0x100000 {
542+
return Err(Error::from(ErrorKind::Uncategorized));
543+
}
544+
}
545+
546+
Ok(ReadDir::new(InnerReadDir::new(root, vec)))
527547
}
528548

529549
pub fn unlink(path: &Path) -> io::Result<()> {
@@ -539,12 +559,11 @@ pub fn set_perm(_p: &Path, _perm: FilePermissions) -> io::Result<()> {
539559
}
540560

541561
pub fn rmdir(path: &Path) -> io::Result<()> {
542-
run_path_with_cstr(path, |path| cvt(unsafe { abi::rmdir(path.as_ptr()) }).map(|_| ()))
562+
run_path_with_cstr(path, &|path| cvt(unsafe { abi::rmdir(path.as_ptr()) }).map(|_| ()))
543563
}
544564

545565
pub fn remove_dir_all(_path: &Path) -> io::Result<()> {
546-
//unsupported()
547-
Ok(())
566+
unsupported()
548567
}
549568

550569
pub fn readlink(_p: &Path) -> io::Result<PathBuf> {
@@ -560,15 +579,15 @@ pub fn link(_original: &Path, _link: &Path) -> io::Result<()> {
560579
}
561580

562581
pub fn stat(path: &Path) -> io::Result<FileAttr> {
563-
run_path_with_cstr(path, |path| {
582+
run_path_with_cstr(path, &|path| {
564583
let mut stat_val: stat_struct = unsafe { mem::zeroed() };
565584
cvt(unsafe { abi::stat(path.as_ptr(), &mut stat_val) })?;
566585
Ok(FileAttr::from_stat(stat_val))
567586
})
568587
}
569588

570589
pub fn lstat(path: &Path) -> io::Result<FileAttr> {
571-
run_path_with_cstr(path, |path| {
590+
run_path_with_cstr(path, &|path| {
572591
let mut stat_val: stat_struct = unsafe { mem::zeroed() };
573592
cvt(unsafe { abi::lstat(path.as_ptr(), &mut stat_val) })?;
574593
Ok(FileAttr::from_stat(stat_val))

0 commit comments

Comments
 (0)