Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Oom alloc #463

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions src/cfb.rs
Original file line number Diff line number Diff line change
Expand Up @@ -273,12 +273,9 @@ impl Sectors {
r: &mut R,
len: usize,
) -> Result<Vec<u8>, CfbError> {
let mut chain = if len > 0 {
Vec::with_capacity(len)
} else {
Vec::new()
};
let mut chain = Vec::with_capacity(len);
while sector_id != ENDOFCHAIN {
// FIXME: see tests/OOM_alloc3.xls, this call OOMs
chain.extend_from_slice(self.get(sector_id, r)?);
sector_id = fats[sector_id as usize];
}
Expand Down Expand Up @@ -421,6 +418,11 @@ pub struct XlsEncoding {
}

impl XlsEncoding {
pub fn unicode() -> XlsEncoding {
XlsEncoding {
encoding: encoding_rs::UTF_16LE,
}
}
pub fn from_codepage(codepage: u16) -> Result<XlsEncoding, CfbError> {
let e = codepage::to_encoding(codepage).ok_or(CfbError::CodePageNotFound(codepage))?;
Ok(XlsEncoding { encoding: e })
Expand Down
72 changes: 32 additions & 40 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,48 +471,40 @@ impl<T: CellType> Range<T> {
/// Coordinate list (COO) is the natural way cells are stored
/// Inner size is defined only by non empty.
///
/// cells: `Vec` of non empty `Cell`s, sorted by row
///
/// # Panics
///
/// panics when a `Cell` row is lower than the first `Cell` row or
/// bigger than the last `Cell` row.
pub fn from_sparse(cells: Vec<Cell<T>>) -> Range<T> {
if cells.is_empty() {
Range::empty()
} else {
// search bounds
let row_start = cells.first().unwrap().pos.0;
let row_end = cells.last().unwrap().pos.0;
let mut col_start = u32::MAX;
let mut col_end = 0;
for c in cells.iter().map(|c| c.pos.1) {
if c < col_start {
col_start = c;
}
if c > col_end {
col_end = c
}
}
let cols = (col_end - col_start + 1) as usize;
let rows = (row_end - row_start + 1) as usize;
let len = cols.saturating_mul(rows);
let mut v = vec![T::default(); len];
v.shrink_to_fit();
for c in cells {
let row = (c.pos.0 - row_start) as usize;
let col = (c.pos.1 - col_start) as usize;
let idx = row.saturating_mul(cols) + col;
if let Some(v) = v.get_mut(idx) {
*v = c.val;
}
}
Range {
start: (row_start, col_start),
end: (row_end, col_end),
inner: v,
/// cells: `Vec` of non empty `Cell`s
pub fn from_sparse(mut cells: Vec<Cell<T>>) -> Range<T> {
// cells do not always appear in (row, col) order
cells.sort_by_key(|cell| (cell.pos.0, cell.pos.1));
let (row_start, row_end) = match &cells[..] {
[] => return Range::empty(),
[first] => (first.pos.0, first.pos.0),
[first, .., last] => (first.pos.0, last.pos.0),
};
// search bounds
let mut col_start = u32::MAX;
let mut col_end = 0;
for c in cells.iter().map(|c| c.pos.1) {
col_start = min(col_start, c);
col_end = max(col_end, c);
}
let cols = (col_end - col_start + 1) as usize;
let rows = (row_end - row_start + 1) as usize;
let len = cols.saturating_mul(rows);
let mut v = vec![T::default(); len];
v.shrink_to_fit();
for c in cells {
let row = (c.pos.0 - row_start) as usize;
let col = (c.pos.1 - col_start) as usize;
let idx = row.saturating_mul(cols) + col;
if let Some(v) = v.get_mut(idx) {
*v = c.val;
}
}
Range {
start: (row_start, col_start),
end: (row_end, col_end),
inner: v,
}
}

/// Set inner value from absolute position
Expand Down
93 changes: 52 additions & 41 deletions src/xls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ impl<RS: Read + Seek> Xls<RS> {
let cch = r.data[3] as usize;
let cce = read_u16(&r.data[4..]) as usize;
let mut name = String::new();
read_unicode_string_no_cch(&encoding, &r.data[14..], &cch, &mut name);
read_unicode_string_no_cch(&r.data[14..], &cch, &mut name);
let rgce = &r.data[r.data.len() - cce..];
let formula = parse_defined_names(rgce)?;
defined_names.push((name, formula));
Expand Down Expand Up @@ -442,8 +442,8 @@ impl<RS: Read + Seek> Xls<RS> {
}
//0x0201 => cells.push(parse_blank(r.data)?), // 513: Blank
0x0203 => cells.push(parse_number(r.data, &self.formats, self.is_1904)?), // 515: Number
0x0204 => cells.extend(parse_label(r.data, &encoding, biff)?), // 516: Label [MS-XLS 2.4.148]
0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr
0x0204 => cells.push(parse_label(r.data, &encoding, biff)?), // 516: Label [MS-XLS 2.4.148]
0x0205 => cells.push(parse_bool_err(r.data)?), // 517: BoolErr
0x0207 => {
// 519 String (formula value)
let val = Data::String(parse_string(r.data, &encoding, biff)?);
Expand Down Expand Up @@ -471,24 +471,27 @@ impl<RS: Read + Seek> Xls<RS> {
// it will appear in 0x0207 record coming next
cells.push(Cell::new(fmla_pos, val));
}
let fmla = parse_formula(
&r.data[20..],
&fmla_sheet_names,
&defined_names,
&xtis,
&encoding,
)
.unwrap_or_else(|e| {
debug!("{}", e);
format!(
"Unrecognised formula \
let fmla =
parse_formula(&r.data[20..], &fmla_sheet_names, &defined_names, &xtis)
.unwrap_or_else(|e| {
debug!("{}", e);
format!(
"Unrecognised formula \
for cell ({}, {}): {:?}",
row, col, e
)
});
row, col, e
)
});
formulas.push(Cell::new(fmla_pos, fmla));
}
_ => (),
// tests/high_byte_string.xls contains a record type that
// cannot be found in the "By Number" 2.3.2 table
0x00D6 => {
let Ok(s) = parse_label(r.data, &encoding, biff) else {
continue;
};
cells.push(s);
}
_ => {}
}
}
let range = Range::from_sparse(cells);
Expand Down Expand Up @@ -771,36 +774,37 @@ fn parse_short_string(
}

let mut s = String::with_capacity(cch);
let _ = encoding.decode_to(r.data, cch, &mut s, high_byte);
encoding.decode_to(r.data, cch, &mut s, high_byte);
Ok(s)
}

/// XLUnicodeString [MS-XLS 2.5.294]
fn parse_string(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<String, XlsError> {
if r.len() < 4 {
let (mut high_byte, expected) = match biff {
Biff::Biff2 | Biff::Biff3 | Biff::Biff4 | Biff::Biff5 => (None, 2),
_ => (Some(false), 3),
};
if r.len() < expected {
if 2 == r.len() && read_u16(r) == 0 {
// tests/high_byte_string.xls
return Ok(String::new());
}
return Err(XlsError::Len {
typ: "string",
expected: 4,
expected,
found: r.len(),
});
}
let cch = read_u16(r) as usize;

let (high_byte, start) = match biff {
Biff::Biff2 | Biff::Biff3 | Biff::Biff4 | Biff::Biff5 => (None, 2),
_ => (Some(r[2] & 0x1 != 0), 3),
};
// delay populating Some(_) variant until length checks guarantee r[2] can't crash
high_byte = high_byte.map(|_| r[2] & 0x1 != 0);

let cch = read_u16(r) as usize;
let mut s = String::with_capacity(cch);
let _ = encoding.decode_to(&r[start..], cch, &mut s, high_byte);
encoding.decode_to(&r[expected..], cch, &mut s, high_byte);
Ok(s)
}

fn parse_label(
r: &[u8],
encoding: &XlsEncoding,
biff: Biff,
) -> Result<Option<Cell<Data>>, XlsError> {
fn parse_label(r: &[u8], encoding: &XlsEncoding, biff: Biff) -> Result<Cell<Data>, XlsError> {
if r.len() < 6 {
return Err(XlsError::Len {
typ: "label",
Expand All @@ -811,10 +815,10 @@ fn parse_label(
let row = read_u16(r);
let col = read_u16(&r[2..]);
let _ixfe = read_u16(&r[4..]);
Ok(Some(Cell::new(
Ok(Cell::new(
(row as u32, col as u32),
Data::String(parse_string(&r[6..], encoding, biff)?),
)))
))
}

fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<Data>>, XlsError> {
Expand All @@ -840,7 +844,7 @@ fn parse_label_sst(r: &[u8], strings: &[String]) -> Result<Option<Cell<Data>>, X
}

fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
let (rf, rl, cf, cl) = match r.len() {
let (rf, rl, mut cf, cl) = match r.len() {
10 => (
read_u16(&r[0..2]) as u32,
read_u16(&r[2..4]) as u32,
Expand All @@ -861,6 +865,12 @@ fn parse_dimensions(r: &[u8]) -> Result<Dimensions, XlsError> {
});
}
};
// 2.5.53 ColU must be <= 0xFF, if larger, reasonable to assume
// starts at 0
// tests/OOM_alloc2.xls
if 0xFF < cf || cl < cf {
cf = 0;
}
if 1 <= rl && 1 <= cl {
Ok(Dimensions {
start: (rf, cf),
Expand Down Expand Up @@ -1005,8 +1015,10 @@ fn read_dbcs(
Ok(s)
}

fn read_unicode_string_no_cch(encoding: &XlsEncoding, buf: &[u8], len: &usize, s: &mut String) {
encoding.decode_to(&buf[1..=*len], *len, s, Some(buf[0] & 0x1 != 0));
fn read_unicode_string_no_cch(buf: &[u8], len: &usize, s: &mut String) -> usize {
XlsEncoding::unicode()
.decode_to(&buf[1..], *len, s, Some(buf[0] & 0x1 != 0))
.1
}

struct Record<'a> {
Expand Down Expand Up @@ -1147,7 +1159,6 @@ fn parse_formula(
sheets: &[String],
names: &[(String, String)],
xtis: &[Xti],
encoding: &XlsEncoding,
) -> Result<String, XlsError> {
let mut stack = Vec::new();
let mut formula = String::with_capacity(rgce.len());
Expand Down Expand Up @@ -1266,9 +1277,9 @@ fn parse_formula(
stack.push(formula.len());
formula.push('\"');
let cch = rgce[0] as usize;
read_unicode_string_no_cch(encoding, &rgce[1..], &cch, &mut formula);
let l = read_unicode_string_no_cch(&rgce[1..], &cch, &mut formula);
formula.push('\"');
rgce = &rgce[2 + cch..];
rgce = &rgce[2 + l..];
}
0x18 => {
rgce = &rgce[5..];
Expand Down
Binary file added tests/OOM_alloc.xls
Binary file not shown.
Binary file added tests/OOM_alloc2.xls
Binary file not shown.
Binary file added tests/OOM_alloc3.xls
Binary file not shown.
Binary file added tests/high_byte_string.xls
Binary file not shown.
47 changes: 39 additions & 8 deletions tests/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -714,7 +714,7 @@ fn date_xls() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -749,7 +749,7 @@ fn date_xls_1904() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -784,7 +784,7 @@ fn date_xlsx() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -819,7 +819,7 @@ fn date_xlsx_1904() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -850,7 +850,7 @@ fn date_xlsx_iso() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));
assert_eq!(range.get_value((0, 0)).unwrap().as_time(), None);
assert_eq!(range.get_value((0, 0)).unwrap().as_datetime(), None);
Expand Down Expand Up @@ -894,7 +894,7 @@ fn date_ods() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let time = chrono::NaiveTime::from_hms_opt(10, 10, 10).unwrap();
Expand Down Expand Up @@ -942,7 +942,7 @@ fn date_xlsb() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -977,7 +977,7 @@ fn date_xlsb_1904() {

#[cfg(feature = "dates")]
{
let date = chrono::NaiveDate::from_ymd_opt(2021, 01, 01).unwrap();
let date = chrono::NaiveDate::from_ymd_opt(2021, 1, 1).unwrap();
assert_eq!(range.get_value((0, 0)).unwrap().as_date(), Some(date));

let duration = chrono::Duration::seconds(255 * 60 * 60 + 10 * 60 + 10);
Expand Down Expand Up @@ -2138,3 +2138,34 @@ fn test_string_ref() {
// second sheet is the same with a cell reference to the first sheet
range_eq!(xlsx.worksheet_range_at(1).unwrap().unwrap(), expected_range);
}

#[test]
fn test_high_byte_strings_and_unicode_strings_without_reserved_tags() {
// file contains XLUnicodeString with cch = 0 and do not have a reserved byte tag
// as well as record types that do not seem to be present in the spec
let mut xls: Xls<_> = wb("high_byte_string.xls");
for (_name, ws) in xls.worksheets() {
for (row, _col, cell) in ws.used_cells() {
if row == 3 {
assert_eq!(
cell.as_string().unwrap(),
"Inside FERC's Gas Market Report monthly bidweek price file. "
);
}
}
}
// FIXME: Libreoffice recognizes a REPT("O", I44) formula
let formulas = xls.worksheet_formula("Sheet1").unwrap();
assert_eq!(
"Unrecognised formula for cell (43, 9): Unrecognized { typ: \"ptg\", val: 192 }",
formulas.get_value((43, 9)).unwrap()
);
}

#[test]
fn test_oom_allocation() {
let _xls: Xls<_> = wb("OOM_alloc.xls");
let _xls: Xls<_> = wb("OOM_alloc2.xls");
// FIXME: kills all tests with abort unless unstable set_alloc_error_hook is used
// let _xls: Xls<_> = wb("OOM_alloc3.xls");
}