|
| 1 | +use super::{c, fill_utf16_buf, to_u16s}; |
1 | 2 | use crate::ffi::OsStr;
|
| 3 | +use crate::io; |
2 | 4 | use crate::mem;
|
| 5 | +use crate::path::Path; |
3 | 6 | use crate::path::Prefix;
|
| 7 | +use crate::ptr; |
4 | 8 |
|
5 | 9 | #[cfg(test)]
|
6 | 10 | mod tests;
|
@@ -141,3 +145,96 @@ fn parse_next_component(path: &OsStr, verbatim: bool) -> (&OsStr, &OsStr) {
|
141 | 145 | None => (path, OsStr::new("")),
|
142 | 146 | }
|
143 | 147 | }
|
| 148 | + |
| 149 | +/// Returns a UTF-16 encoded path capable of bypassing the legacy `MAX_PATH` limits. |
| 150 | +/// |
| 151 | +/// This path may or may not have a verbatim prefix. |
| 152 | +pub(crate) fn maybe_verbatim(path: &Path) -> io::Result<Vec<u16>> { |
| 153 | + const LEGACY_MAX_PATH: usize = 260; |
| 154 | + // UTF-16 encoded code points, used in parsing and building UTF-16 paths. |
| 155 | + // All of these are in the ASCII range so they can be cast directly to `u16`. |
| 156 | + const SEP: u16 = b'\\' as _; |
| 157 | + const ALT_SEP: u16 = b'/' as _; |
| 158 | + const QUERY: u16 = b'?' as _; |
| 159 | + const COLON: u16 = b':' as _; |
| 160 | + const DOT: u16 = b'.' as _; |
| 161 | + const U: u16 = b'U' as _; |
| 162 | + const N: u16 = b'N' as _; |
| 163 | + const C: u16 = b'C' as _; |
| 164 | + |
| 165 | + // \\?\ |
| 166 | + const VERBATIM_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP]; |
| 167 | + // \??\ |
| 168 | + const NT_PREFIX: &[u16] = &[SEP, QUERY, QUERY, SEP]; |
| 169 | + // \\?\UNC\ |
| 170 | + const UNC_PREFIX: &[u16] = &[SEP, SEP, QUERY, SEP, U, N, C, SEP]; |
| 171 | + |
| 172 | + let mut path = to_u16s(path)?; |
| 173 | + if path.starts_with(VERBATIM_PREFIX) || path.starts_with(NT_PREFIX) { |
| 174 | + // Early return for paths that are already verbatim. |
| 175 | + return Ok(path); |
| 176 | + } else if path.len() < LEGACY_MAX_PATH { |
| 177 | + // Early return if an absolute path is less < 260 UTF-16 code units. |
| 178 | + // This is an optimization to avoid calling `GetFullPathNameW` unnecessarily. |
| 179 | + match path.as_slice() { |
| 180 | + // Starts with `D:`, `D:\`, `D:/`, etc. |
| 181 | + // Does not match if the path starts with a `\` or `/`. |
| 182 | + [drive, COLON, 0] | [drive, COLON, SEP | ALT_SEP, ..] |
| 183 | + if *drive != SEP && *drive != ALT_SEP => |
| 184 | + { |
| 185 | + return Ok(path); |
| 186 | + } |
| 187 | + // Starts with `\\`, `//`, etc |
| 188 | + [SEP | ALT_SEP, SEP | ALT_SEP, ..] => return Ok(path), |
| 189 | + _ => {} |
| 190 | + } |
| 191 | + } |
| 192 | + |
| 193 | + // Firstly, get the absolute path using `GetFullPathNameW`. |
| 194 | + // https://docs.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-getfullpathnamew |
| 195 | + let lpfilename = path.as_ptr(); |
| 196 | + fill_utf16_buf( |
| 197 | + // SAFETY: `fill_utf16_buf` ensures the `buffer` and `size` are valid. |
| 198 | + // `lpfilename` is a pointer to a null terminated string that is not |
| 199 | + // invalidated until after `GetFullPathNameW` returns successfully. |
| 200 | + |buffer, size| unsafe { |
| 201 | + // While the docs for `GetFullPathNameW` have the standard note |
| 202 | + // about needing a `\\?\` path for a long lpfilename, this does not |
| 203 | + // appear to be true in practice. |
| 204 | + // See: |
| 205 | + // https://stackoverflow.com/questions/38036943/getfullpathnamew-and-long-windows-file-paths |
| 206 | + // https://googleprojectzero.blogspot.com/2016/02/the-definitive-guide-on-win32-to-nt.html |
| 207 | + c::GetFullPathNameW(lpfilename, size, buffer, ptr::null_mut()) |
| 208 | + }, |
| 209 | + |mut absolute| { |
| 210 | + path.clear(); |
| 211 | + |
| 212 | + // Secondly, add the verbatim prefix. This is easier here because we know the |
| 213 | + // path is now absolute and fully normalized (e.g. `/` has been changed to `\`). |
| 214 | + let prefix = match absolute { |
| 215 | + // C:\ => \\?\C:\ |
| 216 | + [_, COLON, SEP, ..] => VERBATIM_PREFIX, |
| 217 | + // \\.\ => \\?\ |
| 218 | + [SEP, SEP, DOT, SEP, ..] => { |
| 219 | + absolute = &absolute[4..]; |
| 220 | + VERBATIM_PREFIX |
| 221 | + } |
| 222 | + // Leave \\?\ and \??\ as-is. |
| 223 | + [SEP, SEP, QUERY, SEP, ..] | [SEP, QUERY, QUERY, SEP, ..] => &[], |
| 224 | + // \\ => \\?\UNC\ |
| 225 | + [SEP, SEP, ..] => { |
| 226 | + absolute = &absolute[2..]; |
| 227 | + UNC_PREFIX |
| 228 | + } |
| 229 | + // Anything else we leave alone. |
| 230 | + _ => &[], |
| 231 | + }; |
| 232 | + |
| 233 | + path.reserve_exact(prefix.len() + absolute.len() + 1); |
| 234 | + path.extend_from_slice(prefix); |
| 235 | + path.extend_from_slice(absolute); |
| 236 | + path.push(0); |
| 237 | + }, |
| 238 | + )?; |
| 239 | + Ok(path) |
| 240 | +} |
0 commit comments