Skip to content

Commit

Permalink
Auto merge of #1225 - JOE1994:rw_widestr, r=<try>
Browse files Browse the repository at this point in the history
Add helper functions and shims for env var emulation in Windows

This PR attempts to implement the final step of the instructions laid out in #707 (comment) , and is yet a work in progress.

### STATUS
- [x] Add general **_target_** methods for **read_str/alloc_str** that dispatch to either **c_str** or **wide_str** variants
(**helpers.rs**)
- [x] Implement shims `fn getenvironmentvariablew`/`fn setenvironmentvariablew`
(`std::env::var()`, `std::env::set_var()`)
- [x] Implement shim `GetEnvironmentStringsW` (`std::env::vars()`)
- [x] Implement shim `FreeEnvironmentStringsW`

### ISSUES (updated on 03/21/2020)
- MIRI errors while running `std::env::remove_var()` in Windows.
    MIRI complaining about raw pointer usage in
Rust standard library [*src/libstd/sys/windows/os.rs*](#1225 (comment)).

### TODO (probably on a separate PR)
  - Move string helpers into a new file to avoid bloating **src/helpers.rs** too much. (**shims/os_str.rs**)
  • Loading branch information
bors committed Mar 23, 2020
2 parents aaa16a5 + 3bb5d85 commit 5735ce5
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 33 deletions.
112 changes: 108 additions & 4 deletions src/helpers.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::ffi::OsStr;
use std::ffi::{OsStr, OsString};
use std::{iter, mem};
use std::convert::TryFrom;

Expand Down Expand Up @@ -456,6 +456,17 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
}
}

/// Dispatches to appropriate implementations for reading an OsString from Memory,
/// depending on the interpretation target.
fn read_os_str_from_target_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
match target_os {
"linux" | "macos" => self.read_os_str_from_c_str(scalar).map(|x| x.to_os_string()),
"windows" => self.read_os_str_from_wide_str(scalar),
_ => throw_unsup_format!("OsString support for target OS not yet available"),
}
}

/// Helper function to read an OsString from a null-terminated sequence of bytes, which is what
/// the Unix APIs usually handle.
fn read_os_str_from_c_str<'a>(&'a self, scalar: Scalar<Tag>) -> InterpResult<'tcx, &'a OsStr>
Expand All @@ -471,14 +482,22 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
fn bytes_to_os_str<'tcx, 'a>(bytes: &'a [u8]) -> InterpResult<'tcx, &'a OsStr> {
let s = std::str::from_utf8(bytes)
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-8 string", bytes))?;
Ok(&OsStr::new(s))
Ok(OsStr::new(s))
}

let this = self.eval_context_ref();
let bytes = this.memory.read_c_str(scalar)?;
bytes_to_os_str(bytes)
}

/// Helper function to read an OsString from a 0x0000-terminated sequence of u16,
/// which is what the Windows APIs usually handle.
fn read_os_str_from_wide_str(&self, scalar: Scalar<Tag>) -> InterpResult<'tcx, OsString> {
let u16_vec = self.eval_context_ref().memory.read_wide_str(scalar)?;
u16vec_to_osstring(u16_vec)
}


/// Helper function to write an OsStr as a null-terminated sequence of bytes, which is what
/// the Unix APIs usually handle. This function returns `Ok((false, length))` without trying
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
Expand Down Expand Up @@ -518,21 +537,106 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx
Ok((true, string_length))
}

/// Helper function to write an OsStr as a 0x0000-terminated u16-sequence, which is what
/// the Windows APIs usually handle. This function returns `Ok((false, length))` without trying
/// to write if `size` is not large enough to fit the contents of `os_string` plus a null
/// terminator. It returns `Ok((true, length))` if the writing process was successful. The
/// string length returned does not include the null terminator.
fn write_os_str_to_wide_str(
&mut self,
os_str: &OsStr,
mplace: MPlaceTy<'tcx, Tag>,
size: u64,
) -> InterpResult<'tcx, (bool, u64)> {
#[cfg(target_os = "windows")]
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
Ok(std::os::windows::ffi::OsStrExt::encode_wide(os_str).collect())
}
#[cfg(not(target_os = "windows"))]
fn os_str_to_u16vec<'tcx>(os_str: &OsStr) -> InterpResult<'tcx, Vec<u16>> {
// On non-unix platforms the best we can do to transform Vec<u16> from/to OS strings is to do the
// intermediate transformation into strings. Which invalidates non-utf8 paths that are actually
// valid.
os_str
.to_str()
.map(|s| s.encode_utf16().collect())
.ok_or_else(|| err_unsup_format!("{:?} is not a valid utf-8 string", os_str).into())
}

let u16_vec = os_str_to_u16vec(os_str)?;
// If `size` is smaller or equal than `bytes.len()`, writing `bytes` plus the required
// 0x0000 terminator to memory would cause an out-of-bounds access.
let string_length = u16_vec.len() as u64;
if size <= string_length {
return Ok((false, string_length));
}

let this = self.eval_context_mut();

// Store the UTF-16 string.
let char_size = Size::from_bytes(2);
for (idx, c) in u16_vec.into_iter().chain(iter::once(0x0000)).enumerate() {
let place = this.mplace_field(mplace, idx as u64)?;
this.write_scalar(Scalar::from_uint(c, char_size), place.into())?;
}
Ok((true, string_length))
}

/// Dispatches to appropriate implementations for allocating & writing OsString in Memory,
/// depending on the interpretation target.
fn alloc_os_str_as_target_str(
&mut self,
os_str: &OsStr,
memkind: MemoryKind<MiriMemoryKind>,
) -> InterpResult<'tcx, Pointer<Tag>> {
let target_os = self.eval_context_ref().tcx.sess.target.target.target_os.as_str();
match target_os {
"linux" | "macos" => self.alloc_os_str_as_c_str(os_str, memkind),
"windows" => self.alloc_os_str_as_wide_str(os_str, memkind),
_ => throw_unsup_format!("OsString support for target OS not yet available"),
}
}

fn alloc_os_str_as_c_str(
&mut self,
os_str: &OsStr,
memkind: MemoryKind<MiriMemoryKind>,
) -> Pointer<Tag> {
) -> InterpResult<'tcx, Pointer<Tag>> {
let size = u64::try_from(os_str.len()).unwrap().checked_add(1).unwrap(); // Make space for `0` terminator.
let this = self.eval_context_mut();

let arg_type = this.tcx.mk_array(this.tcx.types.u8, size);
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
self.write_os_str_to_c_str(os_str, arg_place.ptr, size).unwrap();
arg_place.ptr.assert_ptr()
Ok(arg_place.ptr.assert_ptr())
}

fn alloc_os_str_as_wide_str(
&mut self,
os_str: &OsStr,
memkind: MemoryKind<MiriMemoryKind>,
) -> InterpResult<'tcx, Pointer<Tag>> {
let size = os_str.len() as u64 + 1; // Make space for `0x0000` terminator.
let this = self.eval_context_mut();

let arg_type = this.tcx.mk_array(this.tcx.types.u16, size);
let arg_place = this.allocate(this.layout_of(arg_type).unwrap(), memkind);
self.write_os_str_to_wide_str(os_str, arg_place, size).unwrap();
Ok(arg_place.ptr.assert_ptr())
}
}

#[cfg(target_os = "windows")]
pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
Ok(std::os::windows::ffi::OsStringExt::from_wide(&u16_vec[..]))
}
#[cfg(not(target_os = "windows"))]
pub fn u16vec_to_osstring<'tcx>(u16_vec: Vec<u16>) -> InterpResult<'tcx, OsString> {
let s = String::from_utf16(&u16_vec[..])
.map_err(|_| err_unsup_format!("{:?} is not a valid utf-16 string", u16_vec))?;
Ok(s.into())
}

pub fn immty_from_int_checked<'tcx>(
int: impl Into<i128>,
layout: TyLayout<'tcx>,
Expand Down
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub use crate::diagnostics::{
TerminationInfo, NonHaltingDiagnostic,
};
pub use crate::eval::{create_ecx, eval_main, MiriConfig};
pub use crate::helpers::EvalContextExt as HelpersEvalContextExt;
pub use crate::helpers::{EvalContextExt as HelpersEvalContextExt, u16vec_to_osstring};
pub use crate::machine::{
AllocExtra, Evaluator, FrameData, MemoryExtra, MiriEvalContext, MiriEvalContextExt,
MiriMemoryKind, NUM_CPUS, PAGE_SIZE, STACK_ADDR, STACK_SIZE,
Expand Down
98 changes: 83 additions & 15 deletions src/shims/env.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::ffi::{OsString, OsStr};
use std::env;
use std::convert::TryFrom;
use std::collections::hash_map::Values;

use crate::stacked_borrows::Tag;
use crate::rustc_target::abi::LayoutOf;
Expand All @@ -13,7 +14,7 @@ use rustc_mir::interpret::Pointer;
#[derive(Default)]
pub struct EnvVars<'tcx> {
/// Stores pointers to the environment variables. These variables must be stored as
/// null-terminated C strings with the `"{name}={value}"` format.
/// null-terminated target strings(c_str or wide_str) with the `"{name}={value}"` format.
map: FxHashMap<OsString, Pointer<Tag>>,

/// Place where the `environ` static is stored. Lazily initialized, but then never changes.
Expand All @@ -29,42 +30,100 @@ impl<'tcx> EnvVars<'tcx> {
for (name, value) in env::vars() {
if !excluded_env_vars.contains(&name) {
let var_ptr =
alloc_env_var_as_c_str(name.as_ref(), value.as_ref(), ecx);
alloc_env_var_as_target_str(name.as_ref(), value.as_ref(), ecx)?;
ecx.machine.env_vars.map.insert(OsString::from(name), var_ptr);
}
}
}
ecx.update_environ()
}

pub(super) fn values(&self) -> InterpResult<'tcx, Values<'_, OsString, Pointer<Tag>>> {
Ok(self.map.values())
}
}

fn alloc_env_var_as_c_str<'mir, 'tcx>(
fn alloc_env_var_as_target_str<'mir, 'tcx>(
name: &OsStr,
value: &OsStr,
ecx: &mut InterpCx<'mir, 'tcx, Evaluator<'tcx>>,
) -> Pointer<Tag> {
) -> InterpResult<'tcx, Pointer<Tag>> {
let mut name_osstring = name.to_os_string();
name_osstring.push("=");
name_osstring.push(value);
ecx.alloc_os_str_as_c_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())
Ok(ecx.alloc_os_str_as_target_str(name_osstring.as_os_str(), MiriMemoryKind::Machine.into())?)
}

impl<'mir, 'tcx> EvalContextExt<'mir, 'tcx> for crate::MiriEvalContext<'mir, 'tcx> {}
pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx> {
fn getenv(&mut self, name_op: OpTy<'tcx, Tag>) -> InterpResult<'tcx, Scalar<Tag>> {
let this = self.eval_context_mut();
fn getenv(&self, name_op: OpTy<'tcx, Tag>) -> InterpResult<'tcx, Scalar<Tag>> {
let this = self.eval_context_ref();

let name_ptr = this.read_scalar(name_op)?.not_undef()?;
let name = this.read_os_str_from_c_str(name_ptr)?;
Ok(match this.machine.env_vars.map.get(name) {
// The offset is used to strip the "{name}=" part of the string.
let name = this.read_os_str_from_target_str(name_ptr)?;
Ok(match this.machine.env_vars.map.get(&name) {
Some(var_ptr) => {
// The offset is used to strip the "{name}=" part of the string.
Scalar::from(var_ptr.offset(Size::from_bytes(u64::try_from(name.len()).unwrap().checked_add(1).unwrap()), this)?)
}
None => Scalar::ptr_null(&*this.tcx),
})
}


fn getenvironmentvariablew(
&mut self,
name_op: OpTy<'tcx, Tag>, // LPCWSTR lpName
buf_op: OpTy<'tcx, Tag>, // LPWSTR lpBuffer
size_op: OpTy<'tcx, Tag>, // DWORD nSize
) -> InterpResult<'tcx, u32> {
let this = self.eval_context_mut();

let name_ptr = this.read_scalar(name_op)?.not_undef()?;
let name = this.read_os_str_from_target_str(name_ptr)?;
Ok(match this.machine.env_vars.map.get(&name) {
Some(var_ptr) => {
// The offset is used to strip the "{name}=" part of the string.
let var_ptr = Scalar::from(var_ptr.offset(Size::from_bytes((name.len() as u64 + 1) * 2), this)?);
let buf_size = this.read_scalar(size_op)?.to_i32()? as u64;
let buf_ptr = this.read_scalar(buf_op)?.not_undef()?;
let size_u16 = Size::from_bytes(2);

// The following loop attempts to figure out the length of env_var (`var_size`)
let mut var_size = 0u64;
loop {
let temp_var_ptr = var_ptr.ptr_offset(Size::from_bytes(var_size * 2), this)?;
let bytes = this.memory.read_bytes(temp_var_ptr, size_u16)?;
var_size += 1;
// encountered 0x0000 terminator
if bytes[0] == 0 && bytes[1] == 0 { break; }
}

let return_val = if var_size > buf_size {
// If lpBuffer is not large enough to hold the data, the return value is the buffer size, in characters,
// required to hold the string and its terminating null character and the contents of lpBuffer are undefined.
var_size
} else {
for i in 0..var_size {
this.memory.copy(
this.force_ptr(var_ptr.ptr_offset(Size::from_bytes(i * 2), this)?)?,
this.force_ptr(buf_ptr.ptr_offset(Size::from_bytes(i * 2), this)?)?,
size_u16,
true,
)?;
}
// If the function succeeds, the return value is the number of characters stored in the buffer pointed to by lpBuffer,
// not including the terminating null character.
var_size - 1
};
assert_eq!(return_val as u32 as u64, return_val);
return_val as u32
}
// return zero upon failure
None => 0u32
})
}

fn setenv(
&mut self,
name_op: OpTy<'tcx, Tag>,
Expand All @@ -74,34 +133,43 @@ pub trait EvalContextExt<'mir, 'tcx: 'mir>: crate::MiriEvalContextExt<'mir, 'tcx

let name_ptr = this.read_scalar(name_op)?.not_undef()?;
let value_ptr = this.read_scalar(value_op)?.not_undef()?;
let value = this.read_os_str_from_c_str(value_ptr)?;
let value = this.read_os_str_from_target_str(value_ptr)?;
let mut new = None;
if !this.is_null(name_ptr)? {
let name = this.read_os_str_from_c_str(name_ptr)?;
let name = this.read_os_str_from_target_str(name_ptr)?;
if !name.is_empty() && !name.to_string_lossy().contains('=') {
new = Some((name.to_owned(), value.to_owned()));
}
}
if let Some((name, value)) = new {
let var_ptr = alloc_env_var_as_c_str(&name, &value, &mut this);
let var_ptr = alloc_env_var_as_target_str(&name, &value, &mut this)?;
if let Some(var) = this.machine.env_vars.map.insert(name.to_owned(), var_ptr) {
this.memory
.deallocate(var, None, MiriMemoryKind::Machine.into())?;
}
this.update_environ()?;
Ok(0)
Ok(0) // return zero on success
} else {
Ok(-1)
}
}

fn setenvironmentvariablew(
&mut self,
name_op: OpTy<'tcx, Tag>, // LPCWSTR lpName,
value_op: OpTy<'tcx, Tag>, // LPCWSTR lpValue,
) -> InterpResult<'tcx, i32> {
// return non-zero on success
self.setenv(name_op, value_op).map(|x| x + 1)
}

fn unsetenv(&mut self, name_op: OpTy<'tcx, Tag>) -> InterpResult<'tcx, i32> {
let this = self.eval_context_mut();

let name_ptr = this.read_scalar(name_op)?.not_undef()?;
let mut success = None;
if !this.is_null(name_ptr)? {
let name = this.read_os_str_from_c_str(name_ptr)?.to_owned();
let name = this.read_os_str_from_target_str(name_ptr)?.to_owned();
if !name.is_empty() && !name.to_string_lossy().contains('=') {
success = Some(this.machine.env_vars.map.remove(&name));
}
Expand Down
Loading

0 comments on commit 5735ce5

Please sign in to comment.