Skip to content

Fix some UTF-8 issues on Windows. #9812

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 20, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 10 additions & 4 deletions src/libstd/libc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1208,7 +1208,7 @@ pub mod consts {
pub static ERROR_ALREADY_EXISTS : c_int = 183;
pub static ERROR_INVALID_ADDRESS : c_int = 487;
pub static ERROR_FILE_INVALID : c_int = 1006;
pub static INVALID_HANDLE_VALUE: c_int = -1;
pub static INVALID_HANDLE_VALUE : c_int = -1;

pub static DELETE : DWORD = 0x00010000;
pub static READ_CONTROL : DWORD = 0x00020000;
Expand Down Expand Up @@ -3352,11 +3352,14 @@ pub mod funcs {
LPSECURITY_ATTRIBUTES)
-> BOOL;
pub fn CopyFileW(lpExistingFileName: LPCWSTR,
lpNewFileName: LPCWSTR,
bFailIfExists: BOOL)
-> BOOL;
lpNewFileName: LPCWSTR,
bFailIfExists: BOOL)
-> BOOL;
pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL;
pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL;
pub fn GetCurrentDirectoryW(nBufferLength: DWORD,
lpBuffer: LPWSTR)
-> DWORD;
pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL;
pub fn GetLastError() -> DWORD;
pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE)
Expand Down Expand Up @@ -3462,6 +3465,9 @@ pub mod funcs {
-> BOOL;
pub fn DeleteFileW(lpPathName: LPCWSTR) -> BOOL;
pub fn RemoveDirectoryW(lpPathName: LPCWSTR) -> BOOL;
pub fn GetCurrentDirectoryW(nBufferLength: DWORD,
lpBuffer: LPWSTR)
-> DWORD;
pub fn SetCurrentDirectoryW(lpPathName: LPCWSTR) -> BOOL;
pub fn GetLastError() -> DWORD;
pub fn FindFirstFileW(fileName: *u16, findFileData: HANDLE)
Expand Down
87 changes: 86 additions & 1 deletion src/libstd/os.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@

#[allow(missing_doc)];

use c_str::{CString, ToCStr};
#[cfg(unix)]
use c_str::CString;
use clone::Clone;
use container::Container;
use io;
Expand Down Expand Up @@ -56,6 +57,11 @@ pub fn close(fd: c_int) -> c_int {
}
}

// On Windows, wide character version of function must be used to support
// unicode, so functions should be split into at least two versions,
// which are for Windows and for non-Windows, if necessary.
// See https://github.com/mozilla/rust/issues/9822 for more information.

pub mod rustrt {
use libc::{c_char, c_int};
use libc;
Expand All @@ -64,11 +70,19 @@ pub mod rustrt {
pub fn rust_path_is_dir(path: *libc::c_char) -> c_int;
pub fn rust_path_exists(path: *libc::c_char) -> c_int;
}

// Uses _wstat instead of stat.
#[cfg(windows)]
extern {
pub fn rust_path_is_dir_u16(path: *u16) -> c_int;
pub fn rust_path_exists_u16(path: *u16) -> c_int;
}
}

pub static TMPBUF_SZ : uint = 1000u;
static BUF_BYTES : uint = 2048u;

#[cfg(unix)]
pub fn getcwd() -> Path {
#[fixed_stack_segment]; #[inline(never)];
let mut buf = [0 as libc::c_char, ..BUF_BYTES];
Expand All @@ -83,6 +97,22 @@ pub fn getcwd() -> Path {
}
}

#[cfg(windows)]
pub fn getcwd() -> Path {
#[fixed_stack_segment]; #[inline(never)];
use libc::DWORD;
use libc::GetCurrentDirectoryW;
let mut buf = [0 as u16, ..BUF_BYTES];
do buf.as_mut_buf |buf, len| {
unsafe {
if libc::GetCurrentDirectoryW(len as DWORD, buf) == 0 as DWORD {
fail2!();
}
}
}
Path::new(str::from_utf16(buf))
}

#[cfg(windows)]
pub mod win32 {
use libc;
Expand Down Expand Up @@ -613,6 +643,7 @@ pub fn walk_dir(p: &Path, f: &fn(&Path) -> bool) -> bool {
})
}

#[cfg(unix)]
/// Indicates whether a path represents a directory
pub fn path_is_dir(p: &Path) -> bool {
#[fixed_stack_segment]; #[inline(never)];
Expand All @@ -623,6 +654,18 @@ pub fn path_is_dir(p: &Path) -> bool {
}
}


#[cfg(windows)]
pub fn path_is_dir(p: &Path) -> bool {
#[fixed_stack_segment]; #[inline(never)];
unsafe {
do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| {
rustrt::rust_path_is_dir_u16(buf) != 0 as c_int
}
}
}

#[cfg(unix)]
/// Indicates whether a path exists
pub fn path_exists(p: &Path) -> bool {
#[fixed_stack_segment]; #[inline(never)];
Expand All @@ -633,6 +676,16 @@ pub fn path_exists(p: &Path) -> bool {
}
}

#[cfg(windows)]
pub fn path_exists(p: &Path) -> bool {
#[fixed_stack_segment]; #[inline(never)];
unsafe {
do os::win32::as_utf16_p(p.as_str().unwrap()) |buf| {
rustrt::rust_path_exists_u16(buf) != 0 as c_int
}
}
}

/**
* Convert a relative path to an absolute path
*
Expand Down Expand Up @@ -1922,15 +1975,47 @@ mod tests {

#[test]
fn path_is_dir() {
use rt::io::file::open;
use rt::io::{OpenOrCreate, Read};

assert!((os::path_is_dir(&Path::new("."))));
assert!((!os::path_is_dir(&Path::new("test/stdtest/fs.rs"))));

let mut dirpath = os::tmpdir();
dirpath.push(format!("rust-test-{}/test-\uac00\u4e00\u30fc\u4f60\u597d",
rand::random::<u32>())); // 가一ー你好
debug2!("path_is_dir dirpath: {}", dirpath.display());

let mkdir_result = os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32);
debug2!("path_is_dir mkdir_result: {}", mkdir_result);

assert!((os::path_is_dir(&dirpath)));

let mut filepath = dirpath;
filepath.push("unicode-file-\uac00\u4e00\u30fc\u4f60\u597d.rs");
debug2!("path_is_dir filepath: {}", filepath.display());

open(&filepath, OpenOrCreate, Read); // ignore return; touch only
assert!((!os::path_is_dir(&filepath)));

assert!((!os::path_is_dir(&Path::new(
"test/unicode-bogus-dir-\uac00\u4e00\u30fc\u4f60\u597d"))));
}

#[test]
fn path_exists() {
assert!((os::path_exists(&Path::new("."))));
assert!((!os::path_exists(&Path::new(
"test/nonexistent-bogus-path"))));

let mut dirpath = os::tmpdir();
dirpath.push(format!("rust-test-{}/test-\uac01\u4e01\u30fc\u518d\u89c1",
rand::random::<u32>())); // 각丁ー再见

os::mkdir_recursive(&dirpath, (S_IRUSR | S_IWUSR | S_IXUSR) as i32);
assert!((os::path_exists(&dirpath)));
assert!((!os::path_exists(&Path::new(
"test/unicode-bogus-path-\uac01\u4e01\u30fc\u518d\u89c1"))));
}

#[test]
Expand Down
46 changes: 42 additions & 4 deletions src/rt/rust_builtin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ rust_list_dir_wfd_fp_buf(void* wfd) {
#endif

extern "C" CDECL int
rust_path_is_dir(char *path) {
rust_path_is_dir(const char *path) {
struct stat buf;
if (stat(path, &buf)) {
return 0;
Expand All @@ -116,14 +116,48 @@ rust_path_is_dir(char *path) {
}

extern "C" CDECL int
rust_path_exists(char *path) {
#if defined(__WIN32__)
rust_path_is_dir_u16(const wchar_t *path) {
struct _stat buf;
// Don't use GetFileAttributesW, it cannot get attributes of
// some system files (e.g. pagefile.sys).
if (_wstat(path, &buf)) {
return 0;
}
return S_ISDIR(buf.st_mode);
}
#else
rust_path_is_dir_u16(const void *path) {
// Wide version of function is only used on Windows.
return 0;
}
#endif

extern "C" CDECL int
rust_path_exists(const char *path) {
struct stat buf;
if (stat(path, &buf)) {
return 0;
}
return 1;
}

extern "C" CDECL int
#if defined(__WIN32__)
rust_path_exists_u16(const wchar_t *path) {
struct _stat buf;
if (_wstat(path, &buf)) {
return 0;
}
return 1;
}
#else
rust_path_exists_u16(const void *path) {
// Wide version of function is only used on Windows.
return 0;
}
#endif

extern "C" CDECL FILE* rust_get_stdin() {return stdin;}
extern "C" CDECL FILE* rust_get_stdout() {return stdout;}
extern "C" CDECL FILE* rust_get_stderr() {return stderr;}
Expand Down Expand Up @@ -294,8 +328,12 @@ rust_localtime(int64_t sec, int32_t nsec, rust_tm *timeptr) {
const char* zone = NULL;
#if defined(__WIN32__)
int32_t gmtoff = -timezone;
char buffer[64];
if (strftime(buffer, sizeof(buffer), "%Z", &tm) > 0) {
wchar_t wbuffer[64];
char buffer[256];
// strftime("%Z") can contain non-UTF-8 characters on non-English locale (issue #9418),
// so time zone should be converted from UTF-16 string set by wcsftime.
if (wcsftime(wbuffer, sizeof(wbuffer) / sizeof(wchar_t), L"%Z", &tm) > 0) {
WideCharToMultiByte(CP_UTF8, 0, wbuffer, -1, buffer, sizeof(buffer), NULL, NULL);
zone = buffer;
}
#else
Expand Down
8 changes: 8 additions & 0 deletions src/rt/rust_globals.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@
#include <assert.h>

#if defined(__WIN32__)
// Prevent unnecessary #include's from <windows.h>
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN
#endif
// Prevent defining min and max macro
#ifndef NOMINMAX
#define NOMINMAX
#endif
extern "C" {
#include <windows.h>
#include <tchar.h>
Expand Down
2 changes: 2 additions & 0 deletions src/rt/rustrt.def.in
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@ rust_timegm
rust_mktime
precise_time_ns
rust_path_is_dir
rust_path_is_dir_u16
rust_path_exists
rust_path_exists_u16
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this will fail to compile on osx and linux because these symbols aren't defined in rust_builtin.cpp. Feel free to just make them empty functions, that's been the pattern for platform-specific C++ helpers so far.

rust_get_stdin
rust_get_stdout
rust_get_stderr
Expand Down