diff --git a/debug/debug.R b/debug/debug.R index 565ad9db..4c58160e 100644 --- a/debug/debug.R +++ b/debug/debug.R @@ -1,7 +1,20 @@ devtools::clean_dll() devtools::load_all() -files <- fs::dir_ls("investigations/Data", recurse = TRUE, glob = "*.XLS") -#files -read_xls(files[[1]]) +# success <- read_excel( +# "investigations/sample_data/success.xls", +# col_names = FALSE, +# range = "A1" +# ) +failure <- read_excel("investigations/sample_data/failure.xls", col_names=F) +# cell_is_readable +# cell->id is 214 for the failure +# 214 in hexadecimal is 0xD6 +# in xlsstruct.h: +# #define XLS_RECORD_RSTRING 0x00D6 + +# cell->id is 516 for the success +# 516 in hexadecimal is 0x204 +# in xlsstruct.h: +# #define XLS_RECORD_LABEL 0x0204 diff --git a/src/XlsCell.h b/src/XlsCell.h index f41f8a40..0321f017 100644 --- a/src/XlsCell.h +++ b/src/XlsCell.h @@ -130,6 +130,7 @@ class XlsCell { switch(cell_->id) { case XLS_RECORD_LABELSST: case XLS_RECORD_LABEL: + case XLS_RECORD_RSTRING: { std::string s = cell_->str == NULL ? "" : cell_->str; ct = na.contains(s, trimWs) ? CELL_BLANK : CELL_TEXT; diff --git a/src/XlsCellSet.h b/src/XlsCellSet.h index 603c2fed..12cda78b 100644 --- a/src/XlsCellSet.h +++ b/src/XlsCellSet.h @@ -152,6 +152,7 @@ class XlsCellSet { bool cell_is_readable(const xls::xlsCell* cell) { return cell && ( cell->id == XLS_RECORD_MULRK || + cell->id == XLS_RECORD_RSTRING || cell->id == XLS_RECORD_NUMBER || cell->id == XLS_RECORD_RK || cell->id == XLS_RECORD_LABELSST || diff --git a/tests/testthat/sheets/biff5-rich-text-string.xls b/tests/testthat/sheets/biff5-rich-text-string.xls new file mode 100755 index 00000000..ed8963fc Binary files /dev/null and b/tests/testthat/sheets/biff5-rich-text-string.xls differ diff --git a/tests/testthat/test-compatibility.R b/tests/testthat/test-compatibility.R index 728f5102..94e9d192 100644 --- a/tests/testthat/test-compatibility.R +++ b/tests/testthat/test-compatibility.R @@ -63,3 +63,20 @@ test_that("we can read LAPD arrest sheets", { expect_match(lapd$ARR_LOC[9], "HOLLYWOOD") expect_identical(lapd$CHG_DESC[27], "EX CON W/ A GUN") }) + +# https://github.com/tidyverse/readxl/issues/611 +# xls file produced by ABBYY FineReader (OCR of PDFs) +# inspired libxls to add support for rich-text strings in BIFF5 +# https://github.com/libxls/libxls/commit/b6d9d872756f69780b743dbaec9cd2ec30c37740 +test_that("we can read xls from ABBYY FineReader", { + expect_error_free( + abbyy <- read_excel( + test_sheet("biff5-rich-text-string.xls"), + col_names = FALSE, + n_max = 1 + ) + ) + expect_equal(nrow(abbyy), 1) + expect_equal(ncol(abbyy), 1) + expect_match(abbyy[[1,1]], "^ELECTORAL") +})