man/Tokenizers.Rd

% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/tokenizer.R
\name{Tokenizers}
\alias{Tokenizers}
\alias{tokenizer_delim}
\alias{tokenizer_csv}
\alias{tokenizer_tsv}
\alias{tokenizer_line}
\alias{tokenizer_log}
\alias{tokenizer_fwf}
\alias{tokenizer_ws}
\title{Tokenizers.}
\usage{
tokenizer_delim(
  delim,
  quote = "\\"",
  na = "NA",
  quoted_na = TRUE,
  comment = "",
  trim_ws = TRUE,
  escape_double = TRUE,
  escape_backslash = FALSE,
  skip_empty_rows = TRUE
)

tokenizer_csv(
  na = "NA",
  quoted_na = TRUE,
  quote = "\\"",
  comment = "",
  trim_ws = TRUE,
  skip_empty_rows = TRUE
)

tokenizer_tsv(
  na = "NA",
  quoted_na = TRUE,
  quote = "\\"",
  comment = "",
  trim_ws = TRUE,
  skip_empty_rows = TRUE
)

tokenizer_line(na = character(), skip_empty_rows = TRUE)

tokenizer_log(trim_ws)

tokenizer_fwf(
  begin,
  end,
  na = "NA",
  comment = "",
  trim_ws = TRUE,
  skip_empty_rows = TRUE
)

tokenizer_ws(na = "NA", comment = "", skip_empty_rows = TRUE)
}
\arguments{
\item{delim}{Single character used to separate fields within a record.}

\item{quote}{Single character used to quote strings.}

\item{na}{Character vector of strings to interpret as missing values. Set this
option to \code{character()} to indicate no missing values.}

\item{quoted_na}{\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#deprecated}{\figure{lifecycle-deprecated.svg}{options: alt='[Deprecated]'}}}{\strong{[Deprecated]}} Should missing values
inside quotes be treated as missing values (the default) or strings. This
parameter is soft deprecated as of readr 2.0.0.}

\item{comment}{A string used to identify comments. Any text after the
comment characters will be silently ignored.}

\item{trim_ws}{Should leading and trailing whitespace (ASCII spaces and tabs) be trimmed from
each field before parsing it?}

\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}

\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}

\item{skip_empty_rows}{Should blank rows be ignored altogether? i.e. If this
option is \code{TRUE} then blank rows will not be represented at all.  If it is
\code{FALSE} then they will be represented by \code{NA} values in all the columns.}

\item{begin, end}{Begin and end offsets for each file. These are C++
offsets so the first column is column zero, and the ranges are
[begin, end) (i.e inclusive-exclusive).}
}
\description{
Explicitly create tokenizer objects. Usually you will not call these
function, but will instead use one of the use friendly wrappers like
\code{\link[=read_csv]{read_csv()}}.
}
\examples{
tokenizer_csv()
}
\keyword{internal}