-
Notifications
You must be signed in to change notification settings - Fork 285
/
Copy pathTokenizers.Rd
102 lines (87 loc) · 2.88 KB
/
Tokenizers.Rd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tokenizer.R
\name{Tokenizers}
\alias{Tokenizers}
\alias{tokenizer_delim}
\alias{tokenizer_csv}
\alias{tokenizer_tsv}
\alias{tokenizer_line}
\alias{tokenizer_log}
\alias{tokenizer_fwf}
\alias{tokenizer_ws}
\title{Tokenizers.}
\usage{
tokenizer_delim(
delim,
quote = "\\"",
na = "NA",
quoted_na = TRUE,
comment = "",
trim_ws = TRUE,
escape_double = TRUE,
escape_backslash = FALSE,
skip_empty_rows = TRUE
)
tokenizer_csv(
na = "NA",
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip_empty_rows = TRUE
)
tokenizer_tsv(
na = "NA",
quoted_na = TRUE,
quote = "\\"",
comment = "",
trim_ws = TRUE,
skip_empty_rows = TRUE
)
tokenizer_line(na = character(), skip_empty_rows = TRUE)
tokenizer_log(trim_ws)
tokenizer_fwf(
begin,
end,
na = "NA",
comment = "",
trim_ws = TRUE,
skip_empty_rows = TRUE
)
tokenizer_ws(na = "NA", comment = "", skip_empty_rows = TRUE)
}
\arguments{
\item{delim}{Single character used to separate fields within a record.}
\item{quote}{Single character used to quote strings.}
\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}
\item{quoted_na}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Should missing values
inside quotes be treated as missing values (the default) or strings. This
parameter is soft deprecated as of readr 2.0.0.}
\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}
\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}
\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}
\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}
\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all. If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}
\item{begin, end}{Begin and end offsets for each file. These are C++
offsets so the first column is column zero, and the ranges are
[begin, end) (i.e inclusive-exclusive).}
}
\description{
Explicitly create tokenizer objects. Usually you will not call these
function, but will instead use one of the use friendly wrappers like
\code{\link[=read_csv]{read_csv()}}.
}
\examples{
tokenizer_csv()
}
\keyword{internal}