Skip to content

Commit 60c1075

Browse files
committed
Auto merge of #1564 - Aaron1011:readlink, r=RalfJung
Implement `readlink` Due to the truncating behavior of `readlink`, I was not able to directly use any of the existing C-cstring helper functions.
2 parents 9202f7d + 3aaab3d commit 60c1075

File tree

7 files changed

+183
-91
lines changed

7 files changed

+183
-91
lines changed

src/shims/os_str.rs

+74-69
Original file line numberDiff line numberDiff line change
@@ -14,72 +14,48 @@ use rustc_target::abi::LayoutOf;
1414
use crate::*;
1515

1616
/// Represent how path separator conversion should be done.
17-
enum Pathconversion {
17+
pub enum PathConversion {
1818
HostToTarget,
1919
TargetToHost,
2020
}
2121

22-
/// Perform path separator conversion if needed.
23-
fn convert_path_separator<'a>(
24-
os_str: Cow<'a, OsStr>,
25-
target_os: &str,
26-
direction: Pathconversion,
27-
) -> Cow<'a, OsStr> {
28-
#[cfg(windows)]
29-
return if target_os == "windows" {
30-
// Windows-on-Windows, all fine.
31-
os_str
32-
} else {
33-
// Unix target, Windows host.
34-
let (from, to) = match direction {
35-
Pathconversion::HostToTarget => ('\\', '/'),
36-
Pathconversion::TargetToHost => ('/', '\\'),
37-
};
38-
let converted = os_str
39-
.encode_wide()
40-
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
41-
.collect::<Vec<_>>();
42-
Cow::Owned(OsString::from_wide(&converted))
43-
};
44-
#[cfg(unix)]
45-
return if target_os == "windows" {
46-
// Windows target, Unix host.
47-
let (from, to) = match direction {
48-
Pathconversion::HostToTarget => ('/', '\\'),
49-
Pathconversion::TargetToHost => ('\\', '/'),
50-
};
51-
let converted = os_str
52-
.as_bytes()
53-
.iter()
54-
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
55-
.collect::<Vec<_>>();
56-
Cow::Owned(OsString::from_vec(converted))
57-
} else {
58-
// Unix-on-Unix, all is fine.
59-
os_str
60-
};
22+
#[cfg(unix)]
23+
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
24+
Ok(os_str.as_bytes())
25+
}
26+
27+
#[cfg(not(unix))]
28+
pub fn os_str_to_bytes<'a, 'tcx>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
29+
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
30+
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
31+
// valid.
32+
os_str
33+
.to_str()
34+
.map(|s| s.as_bytes())
35+
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
36+
}
37+
38+
#[cfg(unix)]
39+
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
40+
Ok(OsStr::from_bytes(bytes))
41+
}
42+
#[cfg(not(unix))]
43+
pub fn bytes_to_os_str<'a, 'tcx>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
44+
let s = std::str::from_utf8(bytes)
45+
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
46+
Ok(OsStr::new(s))
6147
}
6248

6349
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
6450
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
51+
6552
/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
6653
/// the Unix APIs usually handle.
6754
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
6855
where
6956
'tcx: 'a,
7057
'mir: 'a,
7158
{
72-
#[cfg(unix)]
73-
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
74-
Ok(OsStr::from_bytes(bytes))
75-
}
76-
#[cfg(not(unix))]
77-
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
78-
let s = std::str::from_utf8(bytes)
79-
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
80-
Ok(OsStr::new(s))
81-
}
82-
8359
let this = self.eval_context_ref();
8460
let bytes = this.memory.read_c_str(scalar)?;
8561
bytes_to_os_str(bytes)
@@ -118,20 +94,6 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
11894
scalar: Scalar<Tag>,
11995
size: u64,
12096
) -> InterpResult<'tcx, (bool, u64)> {
121-
#[cfg(unix)]
122-
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
123-
Ok(os_str.as_bytes())
124-
}
125-
#[cfg(not(unix))]
126-
fn os_str_to_bytes<'tcx, 'a>(os_str: &'a OsStr) -> InterpResult<'tcx, &'a [u8]> {
127-
// On non-unix platforms the best we can do to transform bytes from/to OS strings is to do the
128-
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
129-
// valid.
130-
os_str
131-
.to_str()
132-
.map(|s| s.as_bytes())
133-
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
134-
}
13597

13698
let bytes = os_str_to_bytes(os_str)?;
13799
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required null
@@ -226,7 +188,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
226188
let this = self.eval_context_ref();
227189
let os_str = this.read_os_str_from_c_str(scalar)?;
228190

229-
Ok(match convert_path_separator(Cow::Borrowed(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost) {
191+
Ok(match this.convert_path_separator(Cow::Borrowed(os_str), PathConversion::TargetToHost) {
230192
Cow::Borrowed(x) => Cow::Borrowed(Path::new(x)),
231193
Cow::Owned(y) => Cow::Owned(PathBuf::from(y)),
232194
})
@@ -237,7 +199,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
237199
let this = self.eval_context_ref();
238200
let os_str = this.read_os_str_from_wide_str(scalar)?;
239201

240-
Ok(convert_path_separator(Cow::Owned(os_str), &this.tcx.sess.target.target.target_os, Pathconversion::TargetToHost).into_owned().into())
202+
Ok(this.convert_path_separator(Cow::Owned(os_str), PathConversion::TargetToHost).into_owned().into())
241203
}
242204

243205
/// Write a Path to the machine memory (as a null-terminated sequence of bytes),
@@ -249,7 +211,7 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
249211
size: u64,
250212
) -> InterpResult<'tcx, (bool, u64)> {
251213
let this = self.eval_context_mut();
252-
let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
214+
let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
253215
this.write_os_str_to_c_str(&os_str, scalar, size)
254216
}
255217

@@ -262,7 +224,50 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
262224
size: u64,
263225
) -> InterpResult<'tcx, (bool, u64)> {
264226
let this = self.eval_context_mut();
265-
let os_str = convert_path_separator(Cow::Borrowed(path.as_os_str()), &this.tcx.sess.target.target.target_os, Pathconversion::HostToTarget);
227+
let os_str = this.convert_path_separator(Cow::Borrowed(path.as_os_str()), PathConversion::HostToTarget);
266228
this.write_os_str_to_wide_str(&os_str, scalar, size)
267229
}
230+
231+
fn convert_path_separator<'a>(
232+
&self,
233+
os_str: Cow<'a, OsStr>,
234+
direction: PathConversion,
235+
) -> Cow<'a, OsStr> {
236+
let this = self.eval_context_ref();
237+
let target_os = &this.tcx.sess.target.target.target_os;
238+
#[cfg(windows)]
239+
return if target_os == "windows" {
240+
// Windows-on-Windows, all fine.
241+
os_str
242+
} else {
243+
// Unix target, Windows host.
244+
let (from, to) = match direction {
245+
PathConversion::HostToTarget => ('\\', '/'),
246+
PathConversion::TargetToHost => ('/', '\\'),
247+
};
248+
let converted = os_str
249+
.encode_wide()
250+
.map(|wchar| if wchar == from as u16 { to as u16 } else { wchar })
251+
.collect::<Vec<_>>();
252+
Cow::Owned(OsString::from_wide(&converted))
253+
};
254+
#[cfg(unix)]
255+
return if target_os == "windows" {
256+
// Windows target, Unix host.
257+
let (from, to) = match direction {
258+
PathConversion::HostToTarget => ('/', '\\'),
259+
PathConversion::TargetToHost => ('\\', '/'),
260+
};
261+
let converted = os_str
262+
.as_bytes()
263+
.iter()
264+
.map(|&wchar| if wchar == from as u8 { to as u8 } else { wchar })
265+
.collect::<Vec<_>>();
266+
Cow::Owned(OsString::from_vec(converted))
267+
} else {
268+
// Unix-on-Unix, all is fine.
269+
os_str
270+
};
271+
}
268272
}
273+

src/shims/posix/foreign_items.rs

+5
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,11 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
123123
let result = this.fdatasync(fd)?;
124124
this.write_scalar(Scalar::from_i32(result), dest)?;
125125
}
126+
"readlink" => {
127+
let &[pathname, buf, bufsize] = check_arg_count(args)?;
128+
let result = this.readlink(pathname, buf, bufsize)?;
129+
this.write_scalar(Scalar::from_machine_isize(result, this), dest)?;
130+
}
126131

127132
// Allocation
128133
"posix_memalign" => {

src/shims/posix/fs.rs

+36
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ use std::fs::{read_dir, remove_dir, remove_file, rename, DirBuilder, File, FileT
44
use std::io::{self, Read, Seek, SeekFrom, Write};
55
use std::path::Path;
66
use std::time::SystemTime;
7+
use std::borrow::Cow;
78

89
use log::trace;
910

@@ -1353,6 +1354,41 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
13531354
this.handle_not_found()
13541355
}
13551356
}
1357+
1358+
fn readlink(
1359+
&mut self,
1360+
pathname_op: OpTy<'tcx, Tag>,
1361+
buf_op: OpTy<'tcx, Tag>,
1362+
bufsize_op: OpTy<'tcx, Tag>
1363+
) -> InterpResult<'tcx, i64> {
1364+
let this = self.eval_context_mut();
1365+
1366+
this.check_no_isolation("readlink")?;
1367+
1368+
let pathname = this.read_path_from_c_str(this.read_scalar(pathname_op)?.check_init()?)?;
1369+
let buf = this.read_scalar(buf_op)?.check_init()?;
1370+
let bufsize = this.read_scalar(bufsize_op)?.to_machine_usize(this)?;
1371+
1372+
let result = std::fs::read_link(pathname);
1373+
match result {
1374+
Ok(resolved) => {
1375+
let resolved = this.convert_path_separator(Cow::Borrowed(resolved.as_ref()), crate::shims::os_str::PathConversion::HostToTarget);
1376+
let mut path_bytes = crate::shims::os_str::os_str_to_bytes(resolved.as_ref())?;
1377+
let bufsize: usize = bufsize.try_into().unwrap();
1378+
if path_bytes.len() > bufsize {
1379+
path_bytes = &path_bytes[..bufsize]
1380+
}
1381+
// 'readlink' truncates the resolved path if
1382+
// the provided buffer is not large enough.
1383+
this.memory.write_bytes(buf, path_bytes.iter().copied())?;
1384+
Ok(path_bytes.len().try_into().unwrap())
1385+
}
1386+
Err(e) => {
1387+
this.set_last_error_from_io_error(e)?;
1388+
Ok(-1)
1389+
}
1390+
}
1391+
}
13561392
}
13571393

13581394
/// Extracts the number of seconds and nanoseconds elapsed between `time` and the unix epoch when

tests/run-pass/fs.rs

+68-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,18 @@
11
// ignore-windows: File handling is not implemented yet
22
// compile-flags: -Zmiri-disable-isolation
33

4+
#![feature(rustc_private)]
5+
46
use std::fs::{
57
File, create_dir, OpenOptions, read_dir, remove_dir, remove_dir_all, remove_file, rename,
68
};
7-
use std::io::{Read, Write, ErrorKind, Result, Seek, SeekFrom};
9+
use std::ffi::CString;
10+
use std::io::{Read, Write, Error, ErrorKind, Result, Seek, SeekFrom};
811
use std::path::{PathBuf, Path};
912

13+
extern crate libc;
14+
15+
1016
fn main() {
1117
test_file();
1218
test_file_clone();
@@ -19,10 +25,23 @@ fn main() {
1925
test_errors();
2026
test_rename();
2127
test_directory();
28+
test_dup_stdout_stderr();
2229
}
2330

2431
fn tmp() -> PathBuf {
25-
std::env::var("MIRI_TEMP").map(PathBuf::from).unwrap_or_else(|_| std::env::temp_dir())
32+
std::env::var("MIRI_TEMP")
33+
.map(|tmp| {
34+
// MIRI_TEMP is set outside of our emulated
35+
// program, so it may have path separators that don't
36+
// correspond to our target platform. We normalize them here
37+
// before constructing a `PathBuf`
38+
39+
#[cfg(windows)]
40+
return PathBuf::from(tmp.replace("/", "\\"));
41+
42+
#[cfg(not(windows))]
43+
return PathBuf::from(tmp.replace("\\", "/"));
44+
}).unwrap_or_else(|_| std::env::temp_dir())
2645
}
2746

2847
/// Prepare: compute filename and make sure the file does not exist.
@@ -215,6 +234,43 @@ fn test_symlink() {
215234
let mut contents = Vec::new();
216235
symlink_file.read_to_end(&mut contents).unwrap();
217236
assert_eq!(bytes, contents.as_slice());
237+
238+
239+
#[cfg(unix)]
240+
{
241+
use std::os::unix::ffi::OsStrExt;
242+
243+
let expected_path = path.as_os_str().as_bytes();
244+
245+
// Test that the expected string gets written to a buffer of proper
246+
// length, and that a trailing null byte is not written.
247+
let symlink_c_str = CString::new(symlink_path.as_os_str().as_bytes()).unwrap();
248+
let symlink_c_ptr = symlink_c_str.as_ptr();
249+
250+
// Make the buf one byte larger than it needs to be,
251+
// and check that the last byte is not overwritten.
252+
let mut large_buf = vec![0xFF; expected_path.len() + 1];
253+
let res = unsafe { libc::readlink(symlink_c_ptr, large_buf.as_mut_ptr().cast(), large_buf.len()) };
254+
// Check that the resovled path was properly written into the buf.
255+
assert_eq!(&large_buf[..(large_buf.len() - 1)], expected_path);
256+
assert_eq!(large_buf.last(), Some(&0xFF));
257+
assert_eq!(res, large_buf.len() as isize - 1);
258+
259+
// Test that the resolved path is truncated if the provided buffer
260+
// is too small.
261+
let mut small_buf = [0u8; 2];
262+
let res = unsafe { libc::readlink(symlink_c_ptr, small_buf.as_mut_ptr().cast(), small_buf.len()) };
263+
assert_eq!(small_buf, &expected_path[..small_buf.len()]);
264+
assert_eq!(res, small_buf.len() as isize);
265+
266+
// Test that we report a proper error for a missing path.
267+
let bad_path = CString::new("MIRI_MISSING_FILE_NAME").unwrap();
268+
let res = unsafe { libc::readlink(bad_path.as_ptr(), small_buf.as_mut_ptr().cast(), small_buf.len()) };
269+
assert_eq!(res, -1);
270+
assert_eq!(Error::last_os_error().kind(), ErrorKind::NotFound);
271+
}
272+
273+
218274
// Test that metadata of a symbolic link is correct.
219275
check_metadata(bytes, &symlink_path).unwrap();
220276
// Test that the metadata of a symbolic link is correct when not following it.
@@ -292,3 +348,13 @@ fn test_directory() {
292348
// Reading the metadata of a non-existent directory should fail with a "not found" error.
293349
assert_eq!(ErrorKind::NotFound, check_metadata(&[], &dir_path).unwrap_err().kind());
294350
}
351+
352+
fn test_dup_stdout_stderr() {
353+
let bytes = b"hello dup fd\n";
354+
unsafe {
355+
let new_stdout = libc::fcntl(1, libc::F_DUPFD, 0);
356+
let new_stderr = libc::fcntl(2, libc::F_DUPFD, 0);
357+
libc::write(new_stdout, bytes.as_ptr() as *const libc::c_void, bytes.len());
358+
libc::write(new_stderr, bytes.as_ptr() as *const libc::c_void, bytes.len());
359+
}
360+
}
File renamed without changes.
File renamed without changes.

tests/run-pass/fs_libc.rs

-20
This file was deleted.

0 commit comments

Comments
 (0)