From 0f7d8d7dbbdc07c4660468a82d1ab712ca2e095d Mon Sep 17 00:00:00 2001 From: David Raznick Date: Fri, 18 Oct 2024 09:40:22 +0100 Subject: [PATCH] change to rust_xlsxwriter --- Cargo.toml | 6 ++--- src/lib.rs | 69 ++++++++++++++---------------------------------------- 2 files changed, 21 insertions(+), 54 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 8c8b583..bb2e5a1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libflatterer" -version = "0.19.18" +version = "0.20.0" authors = ["David Raznick "] edition = "2021" description = "Lib to make JSON flatterer" @@ -28,7 +28,7 @@ lazy_static = "1" typed-builder = "0.18.0" num_cpus = "1.13.1" flate2 = "1.0.24" -csvs_convert = { version = "0.8.12", default-features = false, features = ["converters"] } +csvs_convert = { version = "0.9.0", default-features = false, features = ["converters"] } bytes = "1.4.0" url = "2.3.1" jsonpath-rust = "0.3.0" @@ -42,9 +42,9 @@ jsonref = "0.4" object_store = { version = "0.9.0", features = ["aws", "http"] } #object_store = { git = "https://github.com/apache/arrow-rs.git", features = ["aws", "http"]} +rust_xlsxwriter = { version = "0.79.0", features = ["constant_memory"] } crossbeam-channel = "0.5" tempfile = "3" -xlsxwriter = "0.6" nanoid = "0.4.0" tokio = { version = "1.37.0", features = ["rt"] } csv-async = { version = "1.2.6", features = ["tokio"] } diff --git a/src/lib.rs b/src/lib.rs index d7c56f6..2dc82ca 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -83,8 +83,6 @@ mod yajlparser; use std::convert::TryInto; #[cfg(not(target_family = "wasm"))] use std::io::BufRead; -#[cfg(not(target_family = "wasm"))] -use std::io::{self}; use std::{ fmt, fs::{create_dir_all, remove_dir_all, File}, @@ -140,7 +138,7 @@ use tokio::runtime; use tokio::sync::mpsc; use typed_builder::TypedBuilder; #[cfg(not(target_family = "wasm"))] -use xlsxwriter::Workbook; +use rust_xlsxwriter::Workbook; #[cfg(not(target_family = "wasm"))] use yajlish::Parser; #[cfg(not(target_family = "wasm"))] @@ -248,7 +246,7 @@ pub enum Error { FlattererOSError { message: String }, #[cfg(not(target_family = "wasm"))] #[snafu(display("Error with writing XLSX file"))] - FlattererXLSXError { source: xlsxwriter::XlsxError }, + FlattererXLSXError { source: rust_xlsxwriter::XlsxError }, #[snafu(display("Could not convert usize to int"))] FlattererIntError { source: std::num::TryFromIntError }, #[snafu(display("YAJLish parse error: {}", error))] @@ -558,37 +556,6 @@ struct TablesRecord { table_title: String, } -#[cfg(not(target_family = "wasm"))] -struct JLWriter { - pub buf: Vec, - pub buf_sender: Sender<(Vec, bool)>, -} - -#[cfg(not(target_family = "wasm"))] -impl Write for JLWriter { - fn write(&mut self, buf: &[u8]) -> io::Result { - if buf == [b'\n'] { - if self.buf_sender.send((self.buf.clone(), false)).is_err() { - log::error!( - "Unable to process any data, most likely caused by termination of worker" - ); - return Err(io::Error::new( - io::ErrorKind::Other, - "Unable to process any data, most likely caused by termination of worker", - )); - } - self.buf.clear(); - Ok(buf.len()) - } else { - self.buf.extend_from_slice(buf); - Ok(buf.len()) - } - } - fn flush(&mut self) -> io::Result<()> { - Ok(()) - } -} - impl FlatFiles { pub fn new_with_defaults(output_dir: String) -> Result { let options = Options::builder().build(); @@ -2523,13 +2490,16 @@ impl FlatFiles { .output_dir .join(if self.direct { "csv" } else { "tmp" }); - let workbook = Workbook::new_with_options( - &self.output_dir.join("output.xlsx").to_string_lossy(), - true, - Some(&csv_path.to_string_lossy()), - false, - ) - .context(FlattererXLSXSnafu)?; + let mut workbook = Workbook::new(); + + // let workbook = Workbook::new_with_options( + // &self.output_dir.join("output.xlsx").to_string_lossy(), + // true, + // Some(&csv_path.to_string_lossy()), + // false, + // ) + //.context(FlattererXLSXSnafu)?; + for (table_name, metadata) in self.table_metadata.iter() { if metadata.rows > 1048576 { @@ -2574,9 +2544,9 @@ impl FlatFiles { let metadata = self.table_metadata.get_mut(table_name).unwrap(); //key known - let mut worksheet = workbook - .add_worksheet(Some(&new_table_title)) - .context(FlattererXLSXSnafu {})?; + let worksheet = workbook.add_worksheet_with_low_memory(); + worksheet.set_name(&new_table_title).context(FlattererXLSXSnafu {})?; + let filepath = csv_path.join(format!("{}.csv", table_name)); let csv_reader = ReaderBuilder::new() @@ -2600,7 +2570,7 @@ impl FlatFiles { } worksheet - .write_string(0, col_index, &title, None) + .write(0, col_index, &title) .context(FlattererXLSXSnafu {})?; col_index += 1; } @@ -2648,8 +2618,7 @@ impl FlatFiles { .write_number( (row_num + 1).try_into().context(FlattererIntSnafu {})?, col_index, - number, - None, + number ) .context(FlattererXLSXSnafu {})?; } else { @@ -2661,7 +2630,6 @@ impl FlatFiles { (row_num + 1).try_into().context(FlattererIntSnafu {})?, col_index, &cell, - None, ) .context(FlattererXLSXSnafu {})?; }; @@ -2671,7 +2639,6 @@ impl FlatFiles { (row_num + 1).try_into().context(FlattererIntSnafu {})?, col_index, &cell, - None, ) .context(FlattererXLSXSnafu {})?; } @@ -2679,7 +2646,7 @@ impl FlatFiles { } } } - workbook.close().context(FlattererXLSXSnafu {})?; + workbook.save(&self.output_dir.join("output.xlsx").to_string_lossy().to_string()).context(FlattererXLSXSnafu {})?; Ok(()) }