diff --git a/benches/memory_fs.rs b/benches/memory_fs.rs index 54ade755..750c9752 100644 --- a/benches/memory_fs.rs +++ b/benches/memory_fs.rs @@ -218,11 +218,10 @@ impl FileSystem for BenchMemoryFS { Self::default() } - fn read_to_string(&self, path: &Path) -> io::Result { + fn read(&self, path: &Path) -> io::Result> { // Try direct lookup first if let Some(bytes) = self.files.get(path) { - return String::from_utf8(bytes.clone()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)); + return Ok(bytes.clone()); } // Try following symlinks @@ -241,14 +240,18 @@ impl FileSystem for BenchMemoryFS { }; if let Some(bytes) = self.files.get(¤t) { - return String::from_utf8(bytes.clone()) - .map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)); + return Ok(bytes.clone()); } } Err(io::Error::new(io::ErrorKind::NotFound, format!("File not found: {}", path.display()))) } + fn read_to_string(&self, path: &Path) -> io::Result { + let bytes = self.read(path)?; + String::from_utf8(bytes).map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e)) + } + fn metadata(&self, path: &Path) -> io::Result { // Check if it's a file (direct) if self.files.contains_key(path) { diff --git a/benches/resolver.rs b/benches/resolver.rs index 634f04be..01a217fe 100644 --- a/benches/resolver.rs +++ b/benches/resolver.rs @@ -371,7 +371,7 @@ fn bench_package_json_deserialization(c: &mut Criterion) { for (name, json) in data { group.bench_function(name, |b| { b.iter_with_setup_wrapper(|runner| { - let json = json.clone(); + let json = json.clone().into_bytes(); runner.run(|| { PackageJson::parse(&fs, test_path.clone(), test_realpath.clone(), json) .expect("Failed to parse JSON"); diff --git a/src/cache/cache_impl.rs b/src/cache/cache_impl.rs index 5718158f..9a45117b 100644 --- a/src/cache/cache_impl.rs +++ b/src/cache/cache_impl.rs @@ -109,9 +109,7 @@ impl Cache { .package_json .get_or_try_init(|| { let package_json_path = path.path.join("package.json"); - let Ok(package_json_string) = - self.fs.read_to_string_bypass_system_cache(&package_json_path) - else { + let Ok(package_json_bytes) = self.fs.read(&package_json_path) else { return Ok(None); }; @@ -120,7 +118,7 @@ impl Cache { } else { package_json_path.clone() }; - PackageJson::parse(&self.fs, package_json_path, real_path, package_json_string) + PackageJson::parse(&self.fs, package_json_path, real_path, package_json_bytes) .map(|package_json| Some(Arc::new(package_json))) .map_err(ResolveError::Json) }) @@ -167,7 +165,7 @@ impl Cache { }; let mut tsconfig_string = self .fs - .read_to_string_bypass_system_cache(&tsconfig_path) + .read_to_string(&tsconfig_path) .map_err(|_| ResolveError::TsconfigNotFound(path.to_path_buf()))?; let mut tsconfig = TsConfig::parse(root, &tsconfig_path, &mut tsconfig_string).map_err(|error| { diff --git a/src/file_system.rs b/src/file_system.rs index d139da2d..92535e6e 100644 --- a/src/file_system.rs +++ b/src/file_system.rs @@ -17,29 +17,19 @@ pub trait FileSystem: Send + Sync { #[cfg(not(feature = "yarn_pnp"))] fn new() -> Self; - /// See [std::fs::read_to_string] + /// See [std::fs::read] /// /// # Errors /// - /// * See [std::fs::read_to_string] - /// ## Warning - /// Use `&Path` instead of a generic `P: AsRef` here, - /// because object safety requirements, it is especially useful, when - /// you want to store multiple `dyn FileSystem` in a `Vec` or use a `ResolverGeneric` in - /// napi env. - fn read_to_string(&self, path: &Path) -> io::Result; + /// * See [std::fs::read] + fn read(&self, path: &Path) -> io::Result>; - /// Reads a file while bypassing the system cache. - /// - /// This is useful in scenarios where the file content is already cached in memory - /// and you want to avoid the overhead of using the system cache. + /// See [std::fs::read_to_string] /// /// # Errors /// /// * See [std::fs::read_to_string] - fn read_to_string_bypass_system_cache(&self, path: &Path) -> io::Result { - self.read_to_string(path) - } + fn read_to_string(&self, path: &Path) -> io::Result; /// See [std::fs::metadata] /// @@ -236,51 +226,26 @@ impl FileSystem for FileSystemOs { Self } - fn read_to_string(&self, path: &Path) -> io::Result { + fn read(&self, path: &Path) -> io::Result> { cfg_if! { if #[cfg(feature = "yarn_pnp")] { if self.yarn_pnp { return match VPath::from(path)? { VPath::Zip(info) => { - self.pnp_lru.read_to_string(info.physical_base_path(), info.zip_path) + self.pnp_lru.read(info.physical_base_path(), info.zip_path) } - VPath::Virtual(info) => Self::read_to_string(&info.physical_base_path()), - VPath::Native(path) => Self::read_to_string(&path), + VPath::Virtual(info) => fs::read(info.physical_base_path()), + VPath::Native(path) => fs::read(path), } } } } - Self::read_to_string(path) + fs::read(path) } - #[allow(clippy::items_after_statements)] - fn read_to_string_bypass_system_cache(&self, path: &Path) -> io::Result { - #[cfg(feature = "yarn_pnp")] - if self.yarn_pnp { - return match VPath::from(path)? { - VPath::Zip(info) => { - self.pnp_lru.read_to_string(info.physical_base_path(), info.zip_path) - } - VPath::Virtual(info) => Self::read_to_string(&info.physical_base_path()), - VPath::Native(path) => Self::read_to_string(&path), - }; - } - - cfg_if! { - if #[cfg(target_os = "macos")] { - use std::io::Read; - let mut fd = fs::OpenOptions::new().read(true).open(path)?; - // Apply F_NOCACHE to bypass filesystem cache - rustix::fs::fcntl_nocache(&fd, true)?; - let meta = fd.metadata()?; - #[allow(clippy::cast_possible_truncation)] - let mut buffer = Vec::with_capacity(meta.len() as usize); - fd.read_to_end(&mut buffer)?; - Self::validate_string(buffer) - } else { - Self::read_to_string(path) - } - } + fn read_to_string(&self, path: &Path) -> io::Result { + let bytes = self.read(path)?; + Self::validate_string(bytes) } fn metadata(&self, path: &Path) -> io::Result { diff --git a/src/package_json/serde.rs b/src/package_json/serde.rs index 92492b05..17f87cd5 100644 --- a/src/package_json/serde.rs +++ b/src/package_json/serde.rs @@ -217,27 +217,23 @@ impl PackageJson { Ok(None) } - /// Parse a package.json file from JSON string + /// Parse a package.json file from JSON bytes /// /// # Errors pub fn parse( _fs: &Fs, path: PathBuf, realpath: PathBuf, - json: String, + json: Vec, ) -> Result { - // Strip BOM - let json_string = if json.starts_with('\u{FEFF}') { - json.trim_start_matches('\u{FEFF}') - } else { - json.as_str() - }; + // Strip BOM - UTF-8 BOM is 3 bytes: 0xEF, 0xBB, 0xBF + let json_bytes = if json.starts_with(b"\xEF\xBB\xBF") { &json[3..] } else { &json[..] }; // Check if empty after BOM stripping - super::check_if_empty(json_string.as_bytes(), path.clone())?; + super::check_if_empty(json_bytes, path.clone())?; - // Parse JSON - let value = serde_json::from_str::(json_string).map_err(|error| JSONError { + // Parse JSON directly from bytes + let value = serde_json::from_slice::(json_bytes).map_err(|error| JSONError { path: path.clone(), message: error.to_string(), line: error.line(), diff --git a/src/package_json/simd.rs b/src/package_json/simd.rs index 526f33eb..92b8c7da 100644 --- a/src/package_json/simd.rs +++ b/src/package_json/simd.rs @@ -250,7 +250,7 @@ impl PackageJson { Ok(None) } - /// Parse a package.json file from JSON string + /// Parse a package.json file from JSON bytes /// /// # Panics /// # Errors @@ -258,10 +258,10 @@ impl PackageJson { fs: &Fs, path: PathBuf, realpath: PathBuf, - json: String, + json: Vec, ) -> Result { // Strip BOM in place by replacing with spaces (no reallocation) - let mut json_bytes = json.into_bytes(); + let mut json_bytes = json; if json_bytes.starts_with(b"\xEF\xBB\xBF") { json_bytes[0] = b' '; json_bytes[1] = b' '; @@ -281,15 +281,15 @@ impl PackageJson { // We re-read because simd_json may have mutated the buffer during its failed parse attempt // simd_json doesn't provide line/column info, so we use serde_json for better error messages let fallback_result = fs - .read_to_string(&realpath) + .read(&realpath) .map_err(|io_error| JSONError { path: path.clone(), message: format!("Failed to re-read file for error reporting: {io_error}"), line: 0, column: 0, }) - .and_then(|content| { - serde_json::from_str::(&content).map_err(|serde_error| { + .and_then(|bytes| { + serde_json::from_slice::(&bytes).map_err(|serde_error| { JSONError { path: path.clone(), message: serde_error.to_string(), diff --git a/src/tests/memory_fs.rs b/src/tests/memory_fs.rs index 3d5ab09f..6044d8a6 100644 --- a/src/tests/memory_fs.rs +++ b/src/tests/memory_fs.rs @@ -52,17 +52,22 @@ impl FileSystem for MemoryFS { Self::default() } - fn read_to_string(&self, path: &Path) -> io::Result { + fn read(&self, path: &Path) -> io::Result> { use vfs::FileSystem; let mut file = self .fs .open_file(path.to_string_lossy().as_ref()) .map_err(|err| io::Error::new(io::ErrorKind::NotFound, err))?; - let mut buffer = String::new(); - file.read_to_string(&mut buffer).unwrap(); + let mut buffer = Vec::new(); + file.read_to_end(&mut buffer).unwrap(); Ok(buffer) } + fn read_to_string(&self, path: &Path) -> io::Result { + let bytes = self.read(path)?; + crate::FileSystemOs::validate_string(bytes) + } + fn metadata(&self, path: &Path) -> io::Result { use vfs::FileSystem; let metadata = self