switch to unbuffered I/O for reads

Signed-off-by: Lance-Drane <ldraneutk@gmail.com>
Lance-Drane · Nov 16, 2023 · ff687e6 · ff687e6
1 parent 1361021
commit ff687e6
Show file tree

Hide file tree

Showing 26 changed files with 685 additions and 626 deletions.
diff --git a/README.md b/README.md
@@ -8,7 +8,7 @@ The CSES environment is defined [here](https://cses.fi/howto/). At time of writi
 
 `cargo install` should get you started.
 
-## running a problem 
+## running a problem
 
 Since only single-file submissions are accepted on CSES, everything gets stored in the `src/bin` directory per [Cargo conventions](https://doc.rust-lang.org/cargo/reference/cargo-targets.html?highlight=src%2Fbin#binaries). You can then run your file with `cargo run --bin <bin-name> < <STDIN_FILE>`. (Add the `--release` flag if testing for performance.)
 
@@ -23,6 +23,10 @@ To save yourself from having to write the same I/O code for every single problem
 ## design decisions
 
 - Competitive programming in general de-emphasizes error handling, particularly regarding stdin and stdout. It's always assumed that stdin will match the constraints, focus on speed and clarity over safety here. In a real world application, obviously don't neglect safety.
+  - All entries on CSES use ASCII, so you can optimize slightly by skipping Rust's UTF-8 checks. DON'T do this in a real-world problem.
+  - Since CSES does not seem to have interactive problems (from what I've seen), and since input size doesn't seem to go over 2MB, you can read all stdin at once to minimize I/O calls. Additionally, you can get away with just parsing generic whitespace instead of handling newlines as a special case; you can predict what the next line will be either from the problem statement or from the first "tokens" parsed, so there's never really a reason to read line-by-line. In a real-world application, you'll probably be processing potentially arbitrary files, so you'd probably want to make use of one of the `BufRead` trait's functions instead.
+  - Similarly, you definitely don't want to be calling `unwrapped_unchecked()` at any point in a real-world application during I/O. Handle the `Err` from the Result properly.
+  - In certain cases, you can allocate huge arrays on the stack, though be warned that you have a limit of 2MB!
 - Strong typing and powerful compilers are great at catching errors as you're writing the code, instead of after you've run the code.
 - Be concise in how much code is written. Minimal expressions and statements have an appealing aesthetic on their own.
 - General goal is to be among the fastest applications, but not _the_ fastest. **NOTE**: in proper competitive programming environments, you really should try to be the fastest (in both code completion speed and execution speed).
@@ -31,6 +35,10 @@ To save yourself from having to write the same I/O code for every single problem
 - I try to use functional-style programming (using Rust iterator patterns) as much as possible, but sometimes imperative programming (or, rarely, OO programming) is just the best way to solve a problem.
 - Don't cheat with lookup tables (or small lookup tables which could potentially be invalidated by a new submission under the listed constraints). Obviously, there are many problems where that's the fastest case, and in a real-world application you would of course use the pre-computed values. But where's the fun in that?
 
+## linting
+
+`cargo clippy -- -Wclippy::pedantic` should catch all of the important lints.
+
 ## testing
 
 Run `cargo test`, all files should have basic unit tests by default. Note that successful test execution does not guarantee an accepted code submission on CSES, as there's a maximum time limit (usually 1 second) and memory usage.
@@ -42,3 +50,4 @@ Note that these tests are NOT representative of the actual test cases on CSES -
 ## Credits
 
 - [EbTech](https://github.com/EbTech/rust-algorithms/commit/6198cf16f667859ca60babb4b2264b9b9d039ade) : scanner boilerplate, well-designed algorithm implementations
+- [The Rust Performance Book](https://nnethercote.github.io/perf-book/introduction.html) - some great tips on how to improve performance
diff --git a/src/bin/0_cses_template.rs b/src/bin/0_cses_template.rs
@@ -1,37 +1,39 @@
 // I/O boilerplate //
 
-pub struct UnsafeScanner<R> {
-    reader: R,
+pub struct UnsafeScanner {
+    // not actually dead code, needed for buf_iter to work
+    #[allow(dead_code)]
     buf_str: Vec<u8>,
     buf_iter: std::str::SplitAsciiWhitespace<'static>,
 }
 
-impl<R: std::io::BufRead> UnsafeScanner<R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            buf_str: vec![],
-            buf_iter: "".split_ascii_whitespace(),
+impl UnsafeScanner {
+    pub fn new<R: std::io::BufRead>(mut reader: R) -> Self {
+        let mut buf_str = vec![];
+        unsafe {
+            reader.read_to_end(&mut buf_str).unwrap_unchecked();
         }
+        let buf_iter = unsafe {
+            let slice = std::str::from_utf8_unchecked(&buf_str);
+            std::mem::transmute(slice.split_ascii_whitespace())
+        };
+        // optional memory clear
+        buf_str.clear();
+
+        Self { buf_str, buf_iter }
     }
 
     /// Use "turbofish" syntax `token::<T>()` to select data type of next token.
     ///
     /// # Panics
     /// Panics if there's an I/O error or if the token cannot be parsed as T.
     pub fn token<T: std::str::FromStr>(&mut self) -> T {
-        loop {
-            if let Some(token) = self.buf_iter.next() {
-                return token.parse().ok().expect("Failed parse");
-            }
-            self.buf_str.clear();
-            self.reader
-                .read_until(b'\n', &mut self.buf_str)
-                .expect("Failed read");
-            self.buf_iter = unsafe {
-                let slice = std::str::from_utf8_unchecked(&self.buf_str);
-                std::mem::transmute(slice.split_ascii_whitespace())
-            }
+        unsafe {
+            self.buf_iter
+                .next()
+                .unwrap_unchecked()
+                .parse()
+                .unwrap_unchecked()
         }
     }
 }
@@ -53,7 +55,7 @@ impl<R: std::io::BufRead> UnsafeScanner<R> {
 /// <ul>
 /// <li>−10<sup>6</sup> ≤ A,B ≤ 10<sup>6</sup></li>
 /// </ul>
-fn solve<R: std::io::BufRead, W: std::io::Write>(scan: &mut UnsafeScanner<R>, out: &mut W) {
+fn solve<W: std::io::Write>(mut scan: UnsafeScanner, out: &mut W) {
     let a = scan.token::<i32>();
     let b = scan.token::<i32>();
     writeln!(out, "{}", a + b).ok();
@@ -62,19 +64,19 @@ fn solve<R: std::io::BufRead, W: std::io::Write>(scan: &mut UnsafeScanner<R>, ou
 // entrypoints //
 
 fn main() {
-    let mut scan = UnsafeScanner::new(std::io::stdin().lock());
+    let scan = UnsafeScanner::new(std::io::stdin().lock());
     let mut out = std::io::BufWriter::new(std::io::stdout().lock());
-    solve(&mut scan, &mut out);
+    solve(scan, &mut out);
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
 
     fn test(input: &[u8], target: &[u8]) {
-        let mut scan = UnsafeScanner::new(input);
+        let scan = UnsafeScanner::new(input);
         let mut out = Vec::with_capacity(target.len());
-        solve(&mut scan, &mut out);
+        solve(scan, &mut out);
 
         assert_eq!(out, target);
     }

diff --git a/src/bin/dynamic_coin_combinations_1.rs b/src/bin/dynamic_coin_combinations_1.rs
@@ -1,37 +1,39 @@
 // I/O boilerplate //
 
-pub struct UnsafeScanner<R> {
-    reader: R,
+pub struct UnsafeScanner {
+    // not actually dead code, needed for buf_iter to work
+    #[allow(dead_code)]
     buf_str: Vec<u8>,
     buf_iter: std::str::SplitAsciiWhitespace<'static>,
 }
 
-impl<R: std::io::BufRead> UnsafeScanner<R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            buf_str: vec![],
-            buf_iter: "".split_ascii_whitespace(),
+impl UnsafeScanner {
+    pub fn new<R: std::io::BufRead>(mut reader: R) -> Self {
+        let mut buf_str = vec![];
+        unsafe {
+            reader.read_to_end(&mut buf_str).unwrap_unchecked();
         }
+        let buf_iter = unsafe {
+            let slice = std::str::from_utf8_unchecked(&buf_str);
+            std::mem::transmute(slice.split_ascii_whitespace())
+        };
+        // optional memory clear
+        buf_str.clear();
+
+        Self { buf_str, buf_iter }
     }
 
     /// Use "turbofish" syntax `token::<T>()` to select data type of next token.
     ///
     /// # Panics
     /// Panics if there's an I/O error or if the token cannot be parsed as T.
     pub fn token<T: std::str::FromStr>(&mut self) -> T {
-        loop {
-            if let Some(token) = self.buf_iter.next() {
-                return token.parse().ok().expect("Failed parse");
-            }
-            self.buf_str.clear();
-            self.reader
-                .read_until(b'\n', &mut self.buf_str)
-                .expect("Failed read");
-            self.buf_iter = unsafe {
-                let slice = std::str::from_utf8_unchecked(&self.buf_str);
-                std::mem::transmute(slice.split_ascii_whitespace())
-            }
+        unsafe {
+            self.buf_iter
+                .next()
+                .unwrap_unchecked()
+                .parse()
+                .unwrap_unchecked()
         }
     }
 }
@@ -71,7 +73,7 @@ const MODULO: u64 = 1_000_000_007;
 /// <li>1 ≤ x ≤ 10<sup>6</sup></li>
 /// <li>1 ≤ c<sub>i</sub> ≤ 10<sup>6</sup></li>
 /// </ul>
-fn solve<R: std::io::BufRead, W: std::io::Write>(scan: &mut UnsafeScanner<R>, out: &mut W) {
+fn solve<W: std::io::Write>(mut scan: UnsafeScanner, out: &mut W) {
     let capacity: u8 = scan.token();
     let target: usize = scan.token();
     let mut coins: Vec<usize> = (0..capacity).map(|_| scan.token::<usize>()).collect();
@@ -94,19 +96,19 @@ fn solve<R: std::io::BufRead, W: std::io::Write>(scan: &mut UnsafeScanner<R>, ou
 // entrypoints //
 
 fn main() {
-    let mut scan = UnsafeScanner::new(std::io::stdin().lock());
+    let scan = UnsafeScanner::new(std::io::stdin().lock());
     let mut out = std::io::BufWriter::new(std::io::stdout().lock());
-    solve(&mut scan, &mut out);
+    solve(scan, &mut out);
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
 
     fn test(input: &[u8], target: &[u8]) {
-        let mut scan = UnsafeScanner::new(input);
+        let scan = UnsafeScanner::new(input);
         let mut out = Vec::with_capacity(target.len());
-        solve(&mut scan, &mut out);
+        solve(scan, &mut out);
 
         assert_eq!(out, target);
     }

diff --git a/src/bin/dynamic_dice_combinations.rs b/src/bin/dynamic_dice_combinations.rs
@@ -1,37 +1,39 @@
 // I/O boilerplate //
 
-pub struct UnsafeScanner<R> {
-    reader: R,
+pub struct UnsafeScanner {
+    // not actually dead code, needed for buf_iter to work
+    #[allow(dead_code)]
     buf_str: Vec<u8>,
     buf_iter: std::str::SplitAsciiWhitespace<'static>,
 }
 
-impl<R: std::io::BufRead> UnsafeScanner<R> {
-    pub fn new(reader: R) -> Self {
-        Self {
-            reader,
-            buf_str: vec![],
-            buf_iter: "".split_ascii_whitespace(),
+impl UnsafeScanner {
+    pub fn new<R: std::io::BufRead>(mut reader: R) -> Self {
+        let mut buf_str = vec![];
+        unsafe {
+            reader.read_to_end(&mut buf_str).unwrap_unchecked();
         }
+        let buf_iter = unsafe {
+            let slice = std::str::from_utf8_unchecked(&buf_str);
+            std::mem::transmute(slice.split_ascii_whitespace())
+        };
+        // optional memory clear
+        buf_str.clear();
+
+        Self { buf_str, buf_iter }
     }
 
     /// Use "turbofish" syntax `token::<T>()` to select data type of next token.
     ///
     /// # Panics
     /// Panics if there's an I/O error or if the token cannot be parsed as T.
     pub fn token<T: std::str::FromStr>(&mut self) -> T {
-        loop {
-            if let Some(token) = self.buf_iter.next() {
-                return token.parse().ok().expect("Failed parse");
-            }
-            self.buf_str.clear();
-            self.reader
-                .read_until(b'\n', &mut self.buf_str)
-                .expect("Failed read");
-            self.buf_iter = unsafe {
-                let slice = std::str::from_utf8_unchecked(&self.buf_str);
-                std::mem::transmute(slice.split_ascii_whitespace())
-            }
+        unsafe {
+            self.buf_iter
+                .next()
+                .unwrap_unchecked()
+                .parse()
+                .unwrap_unchecked()
         }
     }
 }
@@ -65,7 +67,7 @@ type Matrix = [[usize; 6]; 6];
 /// <ul>
 /// <li>1 ≤ n ≤ 10<sup>6</sup></li>
 /// </ul>
-fn solve<R: std::io::BufRead, W: std::io::Write>(scan: &mut UnsafeScanner<R>, out: &mut W) {
+fn solve<W: std::io::Write>(mut scan: UnsafeScanner, out: &mut W) {
     let mut exponent = scan.token::<u32>();
 
     let mut base: Matrix = [
@@ -112,19 +114,19 @@ fn multiply_matrix(a: &Matrix, b: &Matrix) -> Matrix {
 // entrypoints //
 
 fn main() {
-    let mut scan = UnsafeScanner::new(std::io::stdin().lock());
+    let scan = UnsafeScanner::new(std::io::stdin().lock());
     let mut out = std::io::BufWriter::new(std::io::stdout().lock());
-    solve(&mut scan, &mut out);
+    solve(scan, &mut out);
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
 
     fn test(input: &[u8], target: &[u8]) {
-        let mut scan = UnsafeScanner::new(input);
+        let scan = UnsafeScanner::new(input);
         let mut out = Vec::with_capacity(target.len());
-        solve(&mut scan, &mut out);
+        solve(scan, &mut out);
 
         assert_eq!(out, target);
     }