@@ -82,6 +82,8 @@ dir_manifest <- function(
82
82
# ' @param sep grep term to match as column delimiters within the file
83
83
# ' @param values_to_match values in \code{col} to match given as a vector
84
84
# ' @param drop Vector of column names or numbers to drop, keep the rest.
85
+ # ' @param schema_detect_nrow numeric. how many rows to sample to guess the
86
+ # ' arrow schema to use.
85
87
# ' @param verbose be verbose
86
88
# ' @param ... additional parameters to pass to [arrow::open_delim_dataset()]
87
89
# ' @keywords internal
@@ -103,6 +105,7 @@ read_colmatch <- function(file,
103
105
sep = NULL ,
104
106
values_to_match ,
105
107
drop = NULL ,
108
+ schema_detect_nrow = 1000 ,
106
109
verbose = FALSE ,
107
110
... ) {
108
111
# check dependencies
@@ -123,8 +126,8 @@ read_colmatch <- function(file,
123
126
}
124
127
}
125
128
126
- a <- arrow :: open_delim_dataset (file ,
127
- schema = .arrow_infer_schema(file ),
129
+ a <- arrow :: read_delim_arrow (file ,
130
+ schema = .arrow_infer_schema(file , n_rows = schema_detect_nrow ),
128
131
skip = 1L ,
129
132
delim = sep ,
130
133
...
@@ -231,7 +234,7 @@ fread_colmatch <- function(...) {
231
234
}
232
235
233
236
# Use data.table to get a sample and infer schema
234
- .arrow_infer_schema <- function (file , n_rows = 10 ) {
237
+ .arrow_infer_schema <- function (file , n_rows = 1000 ) {
235
238
lines <- readLines(file , n = n_rows )
236
239
# Parse with fread as string input
237
240
sample_dt <- data.table :: fread(paste(lines , collapse = " \n " ))
0 commit comments