From 7a65911f9e8b7361b07accc0dbdcd357aa8a4b78 Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Mon, 13 May 2024 20:32:33 +0200 Subject: [PATCH 1/2] Optimize `Seek::stream_len` impl for `File` It uses the file metadata on Unix with a fallback for files incorrectly reported as zero-sized. It uses `GetFileSizeEx` on Windows. This reduces the number of syscalls needed for determining the file size of an open file from 3 to 1. --- library/std/src/fs.rs | 36 +++++++++++++++++++ library/std/src/io/mod.rs | 24 +++++++------ library/std/src/sys/fs/hermit.rs | 4 +++ library/std/src/sys/fs/solid.rs | 4 +++ library/std/src/sys/fs/unix.rs | 9 +++++ library/std/src/sys/fs/unsupported.rs | 4 +++ library/std/src/sys/fs/wasi.rs | 4 +++ library/std/src/sys/fs/windows.rs | 8 +++++ .../std/src/sys/pal/windows/c/bindings.txt | 1 + .../std/src/sys/pal/windows/c/windows_sys.rs | 1 + 10 files changed, 85 insertions(+), 10 deletions(-) diff --git a/library/std/src/fs.rs b/library/std/src/fs.rs index fa98db693066a..73c36e1970c86 100644 --- a/library/std/src/fs.rs +++ b/library/std/src/fs.rs @@ -1247,9 +1247,39 @@ impl Write for &File { } #[stable(feature = "rust1", since = "1.0.0")] impl Seek for &File { + /// Seek to an offset, in bytes in a file. + /// + /// See [`Seek::seek`] docs for more info. + /// + /// # Platform-specific behavior + /// + /// This function currently corresponds to the `lseek64` function on Unix + /// and the `SetFilePointerEx` function on Windows. Note that this [may + /// change in the future][changes]. + /// + /// [changes]: io#platform-specific-behavior fn seek(&mut self, pos: SeekFrom) -> io::Result { self.inner.seek(pos) } + + /// Returns the length of this file (in bytes). + /// + /// See [`Seek::stream_len`] docs for more info. + /// + /// # Platform-specific behavior + /// + /// This function currently corresponds to the `statx` function on Linux + /// (with fallbacks) and the `GetFileSizeEx` function on Windows. Note that + /// this [may change in the future][changes]. + /// + /// [changes]: io#platform-specific-behavior + fn stream_len(&mut self) -> io::Result { + if let Some(result) = self.inner.size() { + return result; + } + io::stream_len_default(self) + } + fn stream_position(&mut self) -> io::Result { self.inner.tell() } @@ -1299,6 +1329,9 @@ impl Seek for File { fn seek(&mut self, pos: SeekFrom) -> io::Result { (&*self).seek(pos) } + fn stream_len(&mut self) -> io::Result { + (&*self).stream_len() + } fn stream_position(&mut self) -> io::Result { (&*self).stream_position() } @@ -1348,6 +1381,9 @@ impl Seek for Arc { fn seek(&mut self, pos: SeekFrom) -> io::Result { (&**self).seek(pos) } + fn stream_len(&mut self) -> io::Result { + (&**self).stream_len() + } fn stream_position(&mut self) -> io::Result { (&**self).stream_position() } diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index 6579b6887aaae..f3b23de973c64 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2062,16 +2062,7 @@ pub trait Seek { /// ``` #[unstable(feature = "seek_stream_len", issue = "59359")] fn stream_len(&mut self) -> Result { - let old_pos = self.stream_position()?; - let len = self.seek(SeekFrom::End(0))?; - - // Avoid seeking a third time when we were already at the end of the - // stream. The branch is usually way cheaper than a seek operation. - if old_pos != len { - self.seek(SeekFrom::Start(old_pos))?; - } - - Ok(len) + stream_len_default(self) } /// Returns the current seek position from the start of the stream. @@ -2132,6 +2123,19 @@ pub trait Seek { } } +pub(crate) fn stream_len_default(self_: &mut T) -> Result { + let old_pos = self_.stream_position()?; + let len = self_.seek(SeekFrom::End(0))?; + + // Avoid seeking a third time when we were already at the end of the + // stream. The branch is usually way cheaper than a seek operation. + if old_pos != len { + self_.seek(SeekFrom::Start(old_pos))?; + } + + Ok(len) +} + /// Enumeration of possible methods to seek within an I/O object. /// /// It is used by the [`Seek`] trait. diff --git a/library/std/src/sys/fs/hermit.rs b/library/std/src/sys/fs/hermit.rs index f83a2f90ed22a..5d350e3038afb 100644 --- a/library/std/src/sys/fs/hermit.rs +++ b/library/std/src/sys/fs/hermit.rs @@ -421,6 +421,10 @@ impl File { self.0.seek(pos) } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { self.0.tell() } diff --git a/library/std/src/sys/fs/solid.rs b/library/std/src/sys/fs/solid.rs index 39de933b7248b..2e879842ad636 100644 --- a/library/std/src/sys/fs/solid.rs +++ b/library/std/src/sys/fs/solid.rs @@ -458,6 +458,10 @@ impl File { self.tell() } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { unsafe { let mut out_offset = MaybeUninit::uninit(); diff --git a/library/std/src/sys/fs/unix.rs b/library/std/src/sys/fs/unix.rs index 7c3ed8029f7dd..cabbffe19293a 100644 --- a/library/std/src/sys/fs/unix.rs +++ b/library/std/src/sys/fs/unix.rs @@ -1450,6 +1450,15 @@ impl File { Ok(n as u64) } + pub fn size(&self) -> Option> { + match self.file_attr().map(|attr| attr.size()) { + // Fall back to default implementation if the returned size is 0, + // we might be in a proc mount. + Ok(0) => None, + result => Some(result), + } + } + pub fn tell(&self) -> io::Result { self.seek(SeekFrom::Current(0)) } diff --git a/library/std/src/sys/fs/unsupported.rs b/library/std/src/sys/fs/unsupported.rs index 45e93deffa3a4..0bb44b0d3234a 100644 --- a/library/std/src/sys/fs/unsupported.rs +++ b/library/std/src/sys/fs/unsupported.rs @@ -258,6 +258,10 @@ impl File { self.0 } + pub fn size(&self) -> Option> { + self.0 + } + pub fn tell(&self) -> io::Result { self.0 } diff --git a/library/std/src/sys/fs/wasi.rs b/library/std/src/sys/fs/wasi.rs index 773040571bc97..52d74b811bbef 100644 --- a/library/std/src/sys/fs/wasi.rs +++ b/library/std/src/sys/fs/wasi.rs @@ -515,6 +515,10 @@ impl File { self.fd.seek(pos) } + pub fn size(&self) -> Option> { + None + } + pub fn tell(&self) -> io::Result { self.fd.tell() } diff --git a/library/std/src/sys/fs/windows.rs b/library/std/src/sys/fs/windows.rs index 362e64abf1ac3..a554a61a9929b 100644 --- a/library/std/src/sys/fs/windows.rs +++ b/library/std/src/sys/fs/windows.rs @@ -619,6 +619,14 @@ impl File { Ok(newpos as u64) } + pub fn size(&self) -> Option> { + let mut result = 0; + Some( + cvt(unsafe { c::GetFileSizeEx(self.handle.as_raw_handle(), &mut result) }) + .map(|_| result as u64), + ) + } + pub fn tell(&self) -> io::Result { self.seek(SeekFrom::Current(0)) } diff --git a/library/std/src/sys/pal/windows/c/bindings.txt b/library/std/src/sys/pal/windows/c/bindings.txt index e2c2163327968..d8ef709f8cf56 100644 --- a/library/std/src/sys/pal/windows/c/bindings.txt +++ b/library/std/src/sys/pal/windows/c/bindings.txt @@ -2156,6 +2156,7 @@ GetExitCodeProcess GetFileAttributesW GetFileInformationByHandle GetFileInformationByHandleEx +GetFileSizeEx GetFileType GETFINALPATHNAMEBYHANDLE_FLAGS GetFinalPathNameByHandleW diff --git a/library/std/src/sys/pal/windows/c/windows_sys.rs b/library/std/src/sys/pal/windows/c/windows_sys.rs index 1d0e89f5d0f0e..489210cc503c5 100644 --- a/library/std/src/sys/pal/windows/c/windows_sys.rs +++ b/library/std/src/sys/pal/windows/c/windows_sys.rs @@ -44,6 +44,7 @@ windows_targets::link!("kernel32.dll" "system" fn GetExitCodeProcess(hprocess : windows_targets::link!("kernel32.dll" "system" fn GetFileAttributesW(lpfilename : PCWSTR) -> u32); windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandle(hfile : HANDLE, lpfileinformation : *mut BY_HANDLE_FILE_INFORMATION) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn GetFileInformationByHandleEx(hfile : HANDLE, fileinformationclass : FILE_INFO_BY_HANDLE_CLASS, lpfileinformation : *mut core::ffi::c_void, dwbuffersize : u32) -> BOOL); +windows_targets::link!("kernel32.dll" "system" fn GetFileSizeEx(hfile : HANDLE, lpfilesize : *mut i64) -> BOOL); windows_targets::link!("kernel32.dll" "system" fn GetFileType(hfile : HANDLE) -> FILE_TYPE); windows_targets::link!("kernel32.dll" "system" fn GetFinalPathNameByHandleW(hfile : HANDLE, lpszfilepath : PWSTR, cchfilepath : u32, dwflags : GETFINALPATHNAMEBYHANDLE_FLAGS) -> u32); windows_targets::link!("kernel32.dll" "system" fn GetFullPathNameW(lpfilename : PCWSTR, nbufferlength : u32, lpbuffer : PWSTR, lpfilepart : *mut PWSTR) -> u32); From cbe64ce349e29f1a4a39186e63d51ee54a56c63f Mon Sep 17 00:00:00 2001 From: Tobias Bucher Date: Thu, 9 Jan 2025 10:31:03 +0100 Subject: [PATCH 2/2] Clarify description of `Seek::stream_len` It can only describe the inner workings of the default implementation, other implementations might not be implemented using seeks at all. --- library/std/src/io/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/std/src/io/mod.rs b/library/std/src/io/mod.rs index f3b23de973c64..6e225eed8c418 100644 --- a/library/std/src/io/mod.rs +++ b/library/std/src/io/mod.rs @@ -2028,7 +2028,7 @@ pub trait Seek { /// Returns the length of this stream (in bytes). /// - /// This method is implemented using up to three seek operations. If this + /// The default implementation uses up to three seek operations. If this /// method returns successfully, the seek position is unchanged (i.e. the /// position before calling this method is the same as afterwards). /// However, if this method returns an error, the seek position is