diff --git a/DESCRIPTION b/DESCRIPTION index 8791981..7d8c184 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,6 +14,8 @@ LinkingTo: Rcpp Suggests: knitr (>= 1.12.3),testthat, rmarkdown (>= 0.9.5),microbenchmark,rex +Enhances: + directlabels, ggplot2 ,stringi URL: https://github.com/qinwf/re2r/ BugReports: https://github.com/qinwf/re2r/issues VignetteBuilder: knitr diff --git a/NAMESPACE b/NAMESPACE index 677c3b2..f08dfc7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,6 @@ # Generated by roxygen2: do not edit by hand S3method(print,re2exp) -S3method(re2_extract,character) -S3method(re2_extract,re2exp) -S3method(re2_match,character) -S3method(re2_match,re2exp) -S3method(re2_replace,character) -S3method(re2_replace,re2exp) export("%!~%") export("%<~%") export("%=~%") diff --git a/R/RcppExports.R b/R/RcppExports.R index 49790af..a84c3d0 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -1,8 +1,8 @@ # This file was generated by Rcpp::compileAttributes # Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 -cpp_match <- function(pattern, input, value, anchor, all) { - .Call('re2r_cpp_match', PACKAGE = 're2r', pattern, input, value, anchor, all) +cpp_match <- function(input, pattern, value, anchor, all) { + .Call('re2r_cpp_match', PACKAGE = 're2r', input, pattern, value, anchor, all) } cpp_re2_compile <- function(pattern, log_errors_value, utf_8_value, posix_syntax_value, case_sensitive_value, dot_nl_value, literal_value, longest_match_value, never_nl_value, never_capture_value, one_line_value, perl_classes_value, word_boundary_value, max_mem_value) { @@ -52,12 +52,12 @@ cpp_quote_meta <- function(input) { .Call('re2r_cpp_quote_meta', PACKAGE = 're2r', input) } -cpp_replace <- function(regexp, rewrite, input, global_) { - .Call('re2r_cpp_replace', PACKAGE = 're2r', regexp, rewrite, input, global_) +cpp_replace <- function(input, regexp, rewrite, global_) { + .Call('re2r_cpp_replace', PACKAGE = 're2r', input, regexp, rewrite, global_) } -cpp_extract <- function(regexp, rewrite, input) { - .Call('re2r_cpp_extract', PACKAGE = 're2r', regexp, rewrite, input) +cpp_extract <- function(input, regexp, rewrite) { + .Call('re2r_cpp_extract', PACKAGE = 're2r', input, regexp, rewrite) } cpp_get_program_fanout <- function(regexp) { diff --git a/R/aaa.R b/R/aaa.R index 0f2a192..85881e9 100644 --- a/R/aaa.R +++ b/R/aaa.R @@ -30,19 +30,19 @@ -`%==%` = function(x1,x2){ - identical(x1,x2) +`%==%` = function(x1, x2) { + identical(x1, x2) } -`%!==%` = function(x1,x2){ - !identical(x1,x2) +`%!==%` = function(x1, x2) { + !identical(x1, x2) } -check_windows_strings = function(strings){ +check_windows_strings = function(strings) { .Platform$OS.type %==% "windows" && all(Encoding(strings) == "UTF-8") } -update_windows_strings = function(){ +update_windows_strings = function() { .Platform$OS.type %==% "windows" } diff --git a/R/compile.R b/R/compile.R index 21553ea..ced04ce 100644 --- a/R/compile.R +++ b/R/compile.R @@ -91,36 +91,38 @@ #' regexp #' @export re2 = function(pattern, - utf_8 = TRUE, - case_sensitive = TRUE, - posix_syntax = FALSE, - dot_nl = FALSE, - literal = FALSE, - longest_match = FALSE, - never_nl = FALSE, - never_capture = FALSE, - one_line= FALSE, - perl_classes = FALSE, - word_boundary = FALSE, - max_mem = 8388608){ + utf_8 = TRUE, + case_sensitive = TRUE, + posix_syntax = FALSE, + dot_nl = FALSE, + literal = FALSE, + longest_match = FALSE, + never_nl = FALSE, + never_capture = FALSE, + one_line = FALSE, + perl_classes = FALSE, + word_boundary = FALSE, + max_mem = 8388608) { # if ( .Platform$OS.type %==% "windows" && # Encoding(pattern[1]) %!==% "UTF-8" ) { # pattern = enc2utf8(pattern) # } - regexp = cpp_re2_compile(pattern, - log_errors_value = FALSE, - utf_8_value = utf_8, - case_sensitive_value = case_sensitive, - posix_syntax_value = posix_syntax, - dot_nl_value = dot_nl, - literal_value = literal, - longest_match_value = longest_match, - never_nl_value = never_nl, - never_capture_value = never_capture, - one_line_value = one_line, - perl_classes_value = perl_classes, - word_boundary_value = word_boundary, - max_mem_value = max_mem) + regexp = cpp_re2_compile( + pattern, + log_errors_value = FALSE, + utf_8_value = utf_8, + case_sensitive_value = case_sensitive, + posix_syntax_value = posix_syntax, + dot_nl_value = dot_nl, + literal_value = literal, + longest_match_value = longest_match, + never_nl_value = never_nl, + never_capture_value = never_capture, + one_line_value = one_line, + perl_classes_value = perl_classes, + word_boundary_value = word_boundary, + max_mem_value = max_mem + ) class(regexp) = "re2exp" regexp @@ -134,7 +136,7 @@ re2 = function(pattern, #' get_pattern(regexp) #' @return a string #' @export -get_pattern = function(regexp){ +get_pattern = function(regexp) { res = cpp_get_pattern(regexp) # if (.Platform$OS.type %==% "windows") { # Encoding(res) = "UTF-8" @@ -152,7 +154,7 @@ get_pattern = function(regexp){ #' (res = get_named_groups(regexp)) #' names(res) #' @export -get_named_groups = function(regexp){ +get_named_groups = function(regexp) { res = cpp_get_named_groups(regexp) # if (.Platform$OS.type %==% "windows") { # Encoding(names(res)) = "UTF-8" @@ -175,7 +177,7 @@ get_named_groups = function(regexp){ #' quote_meta(c("1.2","abc")) #' @return quoted string #' @export -quote_meta = function(unquoted){ +quote_meta = function(unquoted) { # if (check_windows_strings(unquoted)) { # unquoted = enc2utf8(unquoted) # } diff --git a/R/extract.R b/R/extract.R index 1deca85..d571b35 100644 --- a/R/extract.R +++ b/R/extract.R @@ -41,28 +41,12 @@ #' @param input a character vector #' @param ... further arguments passed to or from other methods. #' @examples -#' re2_extract("(.)","yabba dabba doo") -#' re2_extract("(.*)@([^.]*)","test@me.com","\\2!\\1") +#' re2_extract("yabba dabba doo", "(.)") +#' re2_extract("test@me.com", "(.*)@([^.]*)", "\\2!\\1") #' @export -re2_extract = function(pattern, input, rewrite = "\\1", ...) UseMethod("re2_extract") - -#' @rdname re2_extract -#' @export -re2_extract.re2exp = function(pattern, input, rewrite = "\\1", ...){ - # if (check_windows_strings(input)) input = enc2utf8(input) - # if (check_windows_strings(rewrite)) rewrite = enc2utf8(rewrite) - - res = cpp_extract(pattern, rewrite, input) - - # if (update_windows_strings()) { - # Encoding(res) = "UTF-8" - # } - return(res) -} - -#' @rdname re2_extract -#' @export -re2_extract.character = function(pattern, input, rewrite = "\\1", ...){ - pattern = re2(pattern, ...) - re2_extract.re2exp(pattern, input, rewrite) +re2_extract = function(input, pattern, rewrite = "\\1", ...) { + if (!inherits(pattern, "re2exp")) { + pattern = re2(pattern, ...) + } + cpp_extract(input, pattern, rewrite) } diff --git a/R/fanout.R b/R/fanout.R index 85b7922..8c9b595 100644 --- a/R/fanout.R +++ b/R/fanout.R @@ -1,12 +1,41 @@ +## This file is part of the 're2r' package for R. +## Copyright (C) 2016, Qin Wenfeng +## All rights reserved. +## +## Redistribution and use in source and binary forms, with or without +## modification, are permitted provided that the following conditions are met: +## +## 1. Redistributions of source code must retain the above copyright notice, +## this list of conditions and the following disclaimer. +## +## 2. Redistributions in binary form must reproduce the above copyright notice, +## this list of conditions and the following disclaimer in the documentation +## and/or other materials provided with the distribution. +## +## 3. Neither the name of the copyright holder nor the names of its +## contributors may be used to endorse or promote products derived from +## this software without specific prior written permission. +## +## THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +## "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, +## BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +## FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +## HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +## PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +## OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +## WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +## OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +## EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + #' Get program fanout #' Outputs the program fanout as a histogram bucketed by powers of 2. #' Returns the number of the largest non-empty bucket. #' @param regexp a pre-compiled regular expression #' @export -get_program_fanout = function(regexp){ +get_program_fanout = function(regexp) { res = cpp_get_program_fanout(regexp) - return(data.frame( - index = as.numeric(names(res)), - value = as.numeric(res)) - ) + return(data.frame(index = as.numeric(names(res)), + value = as.numeric(res))) } diff --git a/R/match.R b/R/match.R index 10533fc..83c5f2f 100644 --- a/R/match.R +++ b/R/match.R @@ -33,23 +33,41 @@ #' Match patterns in a string. #' #' @param pattern a pre-compiled regular expression or a string -#' @param string a character vector +#' @param input a character vector #' @param value return value instead of bool result #' @param anchor "start": anchor match at the beginning of the string, "both": anchor match at the beginning and the end of the string, "none": no anchor. #' @param all find all matches instead of the first match. When result = "value", a matched character matrix will be returned. #' @param ... further arguments passed to or from other methods. +#' @examples +#' +#' test_string = "this is just one test"; +#' re2_match(test_string, "(o.e)") +#' +#' (res = re2_match(test_string, "(o.e)", value = TRUE)) +#' str(res) +#' +#' (res = re2_match(test_string, "(?Pthis)( is)", value = TRUE)) +#' str(res) +#' +#' (res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) +#' +#' test_string = c("this is just one test", "the second test"); +#' (res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) +#' +#' test_string = c("this is just one test", "the second test") +#' (res = re2_match(test_string, "is", value = TRUE)) +#' +#' regexp = re2("test",case_sensitive = FALSE) +#' re2_match("TEST", regexp) #' @export -re2_match = function(pattern, string, value = FALSE, anchor = "none", all = FALSE, ...) UseMethod("re2_match") - -#' @rdname re2_match -#' @export -re2_match.re2exp = function(pattern, string, value = FALSE, anchor = "none", all = FALSE, ...){ - cpp_match(pattern, string, value, anchor, all) -} - -#' @rdname re2_match -#' @export -re2_match.character = function(pattern, string, value = FALSE, anchor = "none", all = FALSE, ...){ - pattern = re2(pattern, ...) - re2_match.re2exp(pattern, string, value, anchor, all) +re2_match = function(input, + pattern, + value = FALSE, + anchor = "none", + all = FALSE, + ...) { + if (!inherits(pattern, "re2exp")) { + pattern = re2(pattern, ...) + } + cpp_match(input, pattern, value, anchor, all) } diff --git a/R/ops.R b/R/ops.R index cbfa0fe..f8412bf 100644 --- a/R/ops.R +++ b/R/ops.R @@ -39,7 +39,7 @@ #' regexp #' @seealso more options with \code{\link{re2}} #' @export -`%<~%` = function(varible, pattern){ +`%<~%` = function(varible, pattern) { parent <- parent.frame() rhss <- substitute(pattern) # the right-hand sides lhs <- substitute(varible) # the left-hand side. @@ -56,12 +56,12 @@ #' c("pt","sd") %=~% "sd" #' c("pt","sd") %!~% "sd" #' @export -`%=~%` = function(string, pattern){ - re2_match(pattern, string, value = FALSE, anchor = "none") +`%=~%` = function(string, pattern) { + re2_match(string, pattern, value = FALSE, anchor = "none") } #' @rdname grapes-equals-twiddle-grapes #' @export -`%!~%` = function(string, pattern){ - !re2_match(pattern, string, value = FALSE, anchor = "none") +`%!~%` = function(string, pattern) { + !re2_match(string, pattern, value = FALSE, anchor = "none") } diff --git a/R/print.R b/R/print.R index 646017c..4cd460a 100644 --- a/R/print.R +++ b/R/print.R @@ -37,11 +37,18 @@ #' re2("(.*)@([^.]*)") #' re2("(?Psd)") #' @export -print.re2exp = function(x, ...){ +print.re2exp = function(x, ...) { cat("re2 pre-compiled regular expression\n\n") - cat("pattern: "); cat(get_pattern(x)); cat("\n") - cat("number of capturing subpatterns: "); cat(get_number_of_groups(x)) ; cat("\n") - cat("capturing names with indices: \n"); print(get_named_groups(x)) ; - cat("expression size: "); cat(get_expression_size(x)) + cat("pattern: ") + cat(get_pattern(x)) + cat("\n") + cat("number of capturing subpatterns: ") + cat(get_number_of_groups(x)) + cat("\n") + cat("capturing names with indices: \n") + print(get_named_groups(x)) + + cat("expression size: ") + cat(get_expression_size(x)) invisible() } diff --git a/R/replace.R b/R/replace.R index 56344a2..75e1d10 100644 --- a/R/replace.R +++ b/R/replace.R @@ -45,28 +45,12 @@ #' @return a character vector #' @examples #' regexp = re2("b+") -#' re2_replace(regexp,"d", "yabba dabba doo") == "yada dada doo" -#' re2_replace("b+","d", "yabba dabba doo", all = FALSE) == "yada dabba doo" +#' re2_replace("yabba dabba doo", regexp,"d") == "yada dada doo" +#' re2_replace("yabba dabba doo", "b+","d", all = FALSE) == "yada dabba doo" #' @export -re2_replace = function(pattern, rewrite, input, all = FALSE, ...) UseMethod("re2_replace") - -#' @rdname re2_replace -#' @export -re2_replace.re2exp = function(pattern, rewrite, input, all = FALSE, ...){ - # if (check_windows_strings(input)) input = enc2utf8(input) - # if (check_windows_strings(rewrite)) rewrite = enc2utf8(rewrite) - - res = cpp_replace(pattern, rewrite, input, all) - - # if (update_windows_strings()) { - # Encoding(res) = "UTF-8" - # } - return(res) -} - -#' @rdname re2_replace -#' @export -re2_replace.character = function(pattern, rewrite, input, all = FALSE, ...){ - pattern = re2(pattern, ...) - re2_replace.re2exp(pattern, rewrite, input, all) +re2_replace = function(input, pattern, rewrite, all = FALSE, ...) { + if (!inherits(pattern, "re2exp")) { + pattern = re2(pattern, ...) + } + cpp_replace(input, pattern, rewrite, all) } diff --git a/README.md b/README.md index 3ce022a..2af1e15 100644 --- a/README.md +++ b/README.md @@ -11,9 +11,7 @@ To install from GitHub: ```r library(devtools) -install_github("rstudio/rmarkdown") -install_github("yihui/knitr") -install_github("qinwf/re2r", build_vignettes = T, force = T) +install_github("qinwf/re2r", build_vignettes = T) ``` To learn how to use, you can check out the [vignettes](vignettes/re2r-intro.Rmd). @@ -28,10 +26,10 @@ To learn how to use, you can check out the [vignettes](vignettes/re2r-intro.Rmd) ```r ## Sys.setlocale(locale = "English") ## for Windows users with non-UTF8 locale -## re2_match(pattern, string) +## re2_match(string, pattern) test_string = "this is just one test"; -re2_match("(o.e)", test_string) +re2_match(test_string, "(o.e)") ``` ```r @@ -43,7 +41,7 @@ Searches the string expression for the occurence(s) of a substring that matches With `value = TRUE` option, function will return the capture groups with `()`. ```r -(res = re2_match("(o.e)", test_string, value = TRUE)) +(res = re2_match(test_string, "(o.e)", value = TRUE)) ## ?1 ##[1,] "one" @@ -61,7 +59,7 @@ The return result is a character matrix. `?1` is the first capture group and it We can create named capture group with `(?Ppattern)` syntax. ```r -(res = re2_match("(?Pthis)( is)", test_string, value = TRUE)) +(res = re2_match(test_string, "(?Pthis)( is)", value = TRUE)) ## testname ?2 ##[1,] "this" " is" @@ -74,7 +72,7 @@ str(res) With `all = TRUE` option, function will return the all of patterns in a string instead of just the first one. ```r -(res = re2_match("(is)", test_string, value = TRUE, all = TRUE)) +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) ``` ```r @@ -87,7 +85,7 @@ With `all = TRUE` option, function will return the all of patterns in a string i ```r test_string = c("this is just one test", "the second test"); -(res = re2_match("(is)", test_string, value = TRUE, all = TRUE)) +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) ``` ```r @@ -101,7 +99,7 @@ If there is no capture group, and `value = TRUE`, the matched origin strings wil ```r test_string = c("this is just one test", "the second test"); -(res = re2_match("is", test_string, value = TRUE)) +(res = re2_match(test_string, "is", value = TRUE)) ``` ```r @@ -113,7 +111,7 @@ test_string = c("this is just one test", "the second test"); ### 2. Replace a substring ```r -## re2_replace(pattern, rewrite, input) +## re2_replace(input, pattern, rewrite) ``` Searches the string "input string" for the occurence(s) of a substring that matches 'pattern' and replaces the found substrings with "rewrite text". @@ -121,7 +119,7 @@ Searches the string "input string" for the occurence(s) of a substring that matc ```r input_string = "this is just one test"; new_string = "my" -re2_replace("(o.e)", new_string, input_string) +re2_replace(new_string, "(o.e)", input_string) ``` ```r @@ -131,13 +129,13 @@ re2_replace("(o.e)", new_string, input_string) ### 3. Extract a substring ```r -## re2_extract(pattern, input, rewrite = optional) +## re2_extract(input, pattern, rewrite = optional) ``` Searches the string "input string" for the occurence(s) of a substring that matches 'pattern' and return the found substrings with "rewrite text". ```r -re2_extract("(.)","yabba dabba doo") +re2_extract("yabba dabba doo", "(.)") ``` ```r @@ -145,7 +143,7 @@ re2_extract("(.)","yabba dabba doo") ``` ```r -re2_extract("(.*)@([^.]*)","test@me.com","\\2!\\1") +re2_extract("test@me.com", "(.*)@([^.]*)", "\\2!\\1") ``` ```r @@ -187,9 +185,9 @@ regexp ```r regexp = re2("test",case_sensitive = FALSE) -re2_match(regexp, "TEST") +re2_match("TEST", regexp) ## [1] TRUE -re2_replace(regexp, "ops", "TEST") +re2_replace("TEST", regexp, "ops") ## [1] "ops" ``` diff --git a/man/re2_extract.Rd b/man/re2_extract.Rd index 5009f93..be7436c 100644 --- a/man/re2_extract.Rd +++ b/man/re2_extract.Rd @@ -2,21 +2,15 @@ % Please edit documentation in R/extract.R \name{re2_extract} \alias{re2_extract} -\alias{re2_extract.character} -\alias{re2_extract.re2exp} \title{Extract one matched patterns in a string.} \usage{ -re2_extract(pattern, input, rewrite = "\\\\1", ...) - -\method{re2_extract}{re2exp}(pattern, input, rewrite = "\\\\1", ...) - -\method{re2_extract}{character}(pattern, input, rewrite = "\\\\1", ...) +re2_extract(input, pattern, rewrite = "\\\\1", ...) } \arguments{ -\item{pattern}{a pre-compiled regular expression or a string} - \item{input}{a character vector} +\item{pattern}{a pre-compiled regular expression or a string} + \item{rewrite}{replace the first match of "pattern" in "input" with "rewrite"} \item{...}{further arguments passed to or from other methods.} @@ -30,7 +24,7 @@ portions of "text" are ignored. Returns true iff a match occurred and the extraction happened } \examples{ -re2_extract("(.)","yabba dabba doo") -re2_extract("(.*)@([^.]*)","test@me.com","\\\\2!\\\\1") +re2_extract("yabba dabba doo", "(.)") +re2_extract("test@me.com", "(.*)@([^.]*)", "\\\\2!\\\\1") } diff --git a/man/re2_match.Rd b/man/re2_match.Rd index 167c78b..4f600ff 100644 --- a/man/re2_match.Rd +++ b/man/re2_match.Rd @@ -2,23 +2,15 @@ % Please edit documentation in R/match.R \name{re2_match} \alias{re2_match} -\alias{re2_match.character} -\alias{re2_match.re2exp} \title{Match patterns in a string.} \usage{ -re2_match(pattern, string, value = FALSE, anchor = "none", all = FALSE, +re2_match(input, pattern, value = FALSE, anchor = "none", all = FALSE, ...) - -\method{re2_match}{re2exp}(pattern, string, value = FALSE, anchor = "none", - all = FALSE, ...) - -\method{re2_match}{character}(pattern, string, value = FALSE, - anchor = "none", all = FALSE, ...) } \arguments{ -\item{pattern}{a pre-compiled regular expression or a string} +\item{input}{a character vector} -\item{string}{a character vector} +\item{pattern}{a pre-compiled regular expression or a string} \item{value}{return value instead of bool result} @@ -31,4 +23,26 @@ re2_match(pattern, string, value = FALSE, anchor = "none", all = FALSE, \description{ Match patterns in a string. } +\examples{ + +test_string = "this is just one test"; +re2_match(test_string, "(o.e)") + +(res = re2_match(test_string, "(o.e)", value = TRUE)) +str(res) + +(res = re2_match(test_string, "(?Pthis)( is)", value = TRUE)) +str(res) + +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) + +test_string = c("this is just one test", "the second test"); +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) + +test_string = c("this is just one test", "the second test") +(res = re2_match(test_string, "is", value = TRUE)) + +regexp = re2("test",case_sensitive = FALSE) +re2_match("TEST", regexp) +} diff --git a/man/re2_replace.Rd b/man/re2_replace.Rd index a39bc49..992a4a0 100644 --- a/man/re2_replace.Rd +++ b/man/re2_replace.Rd @@ -2,23 +2,17 @@ % Please edit documentation in R/replace.R \name{re2_replace} \alias{re2_replace} -\alias{re2_replace.character} -\alias{re2_replace.re2exp} \title{Replace matched patterns in a string.} \usage{ -re2_replace(pattern, rewrite, input, all = FALSE, ...) - -\method{re2_replace}{re2exp}(pattern, rewrite, input, all = FALSE, ...) - -\method{re2_replace}{character}(pattern, rewrite, input, all = FALSE, ...) +re2_replace(input, pattern, rewrite, all = FALSE, ...) } \arguments{ +\item{input}{a character vector} + \item{pattern}{a pre-compiled regular expression or a string} \item{rewrite}{replace the first match or all of the match of "pattern" in "input" with "rewrite"} -\item{input}{a character vector} - \item{all}{if it is TRUE, it will replaces successive non-overlapping occurrences} \item{...}{further arguments passed to or from other methods.} @@ -35,7 +29,7 @@ text. } \examples{ regexp = re2("b+") -re2_replace(regexp,"d", "yabba dabba doo") == "yada dada doo" -re2_replace("b+","d", "yabba dabba doo", all = FALSE) == "yada dabba doo" +re2_replace("yabba dabba doo", regexp,"d") == "yada dada doo" +re2_replace("yabba dabba doo", "b+","d", all = FALSE) == "yada dabba doo" } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 7f4ac70..c391433 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -7,17 +7,17 @@ using namespace Rcpp; // cpp_match -SEXP cpp_match(XPtr& pattern, vector& input, bool value, string& anchor, bool all); -RcppExport SEXP re2r_cpp_match(SEXP patternSEXP, SEXP inputSEXP, SEXP valueSEXP, SEXP anchorSEXP, SEXP allSEXP) { +SEXP cpp_match(vector& input, XPtr& pattern, bool value, string& anchor, bool all); +RcppExport SEXP re2r_cpp_match(SEXP inputSEXP, SEXP patternSEXP, SEXP valueSEXP, SEXP anchorSEXP, SEXP allSEXP) { BEGIN_RCPP Rcpp::RObject __result; Rcpp::RNGScope __rngScope; - Rcpp::traits::input_parameter< XPtr& >::type pattern(patternSEXP); Rcpp::traits::input_parameter< vector& >::type input(inputSEXP); + Rcpp::traits::input_parameter< XPtr& >::type pattern(patternSEXP); Rcpp::traits::input_parameter< bool >::type value(valueSEXP); Rcpp::traits::input_parameter< string& >::type anchor(anchorSEXP); Rcpp::traits::input_parameter< bool >::type all(allSEXP); - __result = Rcpp::wrap(cpp_match(pattern, input, value, anchor, all)); + __result = Rcpp::wrap(cpp_match(input, pattern, value, anchor, all)); return __result; END_RCPP } @@ -101,29 +101,29 @@ BEGIN_RCPP END_RCPP } // cpp_replace -CharacterVector cpp_replace(XPtr& regexp, string& rewrite, vector& input, bool global_); -RcppExport SEXP re2r_cpp_replace(SEXP regexpSEXP, SEXP rewriteSEXP, SEXP inputSEXP, SEXP global_SEXP) { +CharacterVector cpp_replace(vector& input, XPtr& regexp, string& rewrite, bool global_); +RcppExport SEXP re2r_cpp_replace(SEXP inputSEXP, SEXP regexpSEXP, SEXP rewriteSEXP, SEXP global_SEXP) { BEGIN_RCPP Rcpp::RObject __result; Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< vector& >::type input(inputSEXP); Rcpp::traits::input_parameter< XPtr& >::type regexp(regexpSEXP); Rcpp::traits::input_parameter< string& >::type rewrite(rewriteSEXP); - Rcpp::traits::input_parameter< vector& >::type input(inputSEXP); Rcpp::traits::input_parameter< bool >::type global_(global_SEXP); - __result = Rcpp::wrap(cpp_replace(regexp, rewrite, input, global_)); + __result = Rcpp::wrap(cpp_replace(input, regexp, rewrite, global_)); return __result; END_RCPP } // cpp_extract -CharacterVector cpp_extract(XPtr& regexp, string& rewrite, vector& input); -RcppExport SEXP re2r_cpp_extract(SEXP regexpSEXP, SEXP rewriteSEXP, SEXP inputSEXP) { +CharacterVector cpp_extract(vector& input, XPtr& regexp, string& rewrite); +RcppExport SEXP re2r_cpp_extract(SEXP inputSEXP, SEXP regexpSEXP, SEXP rewriteSEXP) { BEGIN_RCPP Rcpp::RObject __result; Rcpp::RNGScope __rngScope; + Rcpp::traits::input_parameter< vector& >::type input(inputSEXP); Rcpp::traits::input_parameter< XPtr& >::type regexp(regexpSEXP); Rcpp::traits::input_parameter< string& >::type rewrite(rewriteSEXP); - Rcpp::traits::input_parameter< vector& >::type input(inputSEXP); - __result = Rcpp::wrap(cpp_extract(regexp, rewrite, input)); + __result = Rcpp::wrap(cpp_extract(input, regexp, rewrite)); return __result; END_RCPP } diff --git a/src/re2r_match.cpp b/src/re2r_match.cpp index e4e0439..3bc5f33 100644 --- a/src/re2r_match.cpp +++ b/src/re2r_match.cpp @@ -155,8 +155,8 @@ RE2::Anchor get_anchor_type(const string& anchor){ } // [[Rcpp::export]] -SEXP cpp_match(XPtr& pattern, - vector& input, +SEXP cpp_match(vector& input, + XPtr& pattern, bool value, string& anchor, bool all){ diff --git a/src/re2r_wrap.cpp b/src/re2r_wrap.cpp index 2424596..d8de795 100644 --- a/src/re2r_wrap.cpp +++ b/src/re2r_wrap.cpp @@ -162,7 +162,7 @@ CharacterVector cpp_quote_meta(vector& input){ } // [[Rcpp::export]] -CharacterVector cpp_replace(XPtr& regexp, string& rewrite, vector& input, bool global_){ +CharacterVector cpp_replace(vector& input, XPtr& regexp, string& rewrite, bool global_){ string errmsg; if(!regexp->CheckRewriteString(rewrite, &errmsg)){ @@ -184,7 +184,7 @@ CharacterVector cpp_replace(XPtr& regexp, string& rewrite, vector& } // [[Rcpp::export]] -CharacterVector cpp_extract(XPtr& regexp, string& rewrite, vector& input){ +CharacterVector cpp_extract(vector& input, XPtr& regexp, string& rewrite){ string errmsg; if(!regexp->CheckRewriteString(rewrite, &errmsg)){ diff --git a/tests/testthat/test-check_rewrite.R b/tests/testthat/test-check_rewrite.R index 98bd53d..99cf4f6 100644 --- a/tests/testthat/test-check_rewrite.R +++ b/tests/testthat/test-check_rewrite.R @@ -19,10 +19,10 @@ test_that("check rewrite", { regexp = re2(ind[[1]]) for (exp in 2:length(ind)) { if (ind[[exp]][2] == "FALSE") { - expect_error(re2_replace(ind[[1]], ind[[exp]][1], "a") , + expect_error(re2_replace("a", ind[[1]], ind[[exp]][1] ) , "rewrite string error") } else { # "TRUE" - expect_equal(re2_replace(ind[[1]], ind[[exp]][1], "q"), "q") + expect_equal( re2_replace("q", ind[[1]], ind[[exp]][1]), "q") } } } diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 49e1e72..a675b54 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -2,9 +2,9 @@ context("check extract") test_that("check rewrite", { # from re2_test.cc - expect_identical(re2_extract("(.*)@([^.]*)", "boris@kremvax.ru","\\2!\\1"),"kremvax!boris") - expect_identical(re2_extract(".*", "foo","'\\0'"),"'foo'") - expect_identical(re2_extract("bar", "baz","'\\0'"),"") + expect_identical(re2_extract("boris@kremvax.ru", "(.*)@([^.]*)","\\2!\\1"),"kremvax!boris") + expect_identical(re2_extract("foo", ".*", "'\\0'"),"'foo'") + expect_identical(re2_extract("baz", "bar", "'\\0'"),"") }) diff --git a/tests/testthat/test-match_group.R b/tests/testthat/test-match_group.R index d0e7539..8f747a2 100644 --- a/tests/testthat/test-match_group.R +++ b/tests/testthat/test-match_group.R @@ -3,15 +3,15 @@ context("check match group") test_that("check match group 1", { # from re2_test.cc ree1 = re2("\\s*(\\w+)") - res1 = re2_match(ree1, " aaa b!@#$@#$cccc", value = TRUE, anchor = "start", all = TRUE) + res1 = re2_match(" aaa b!@#$@#$cccc", ree1, value = TRUE, anchor = "start", all = TRUE) exp1 = structure(c("1", "1", "aaa", "b"), .Dim = c(2L, 2L), .Dimnames = list(NULL, c("!n", "?1"))) expect_identical(res1, exp1) - res2 = re2_match(ree1, " aaa b!@#$@#$cccc", value = TRUE, anchor = "none", all = TRUE) + res2 = re2_match(" aaa b!@#$@#$cccc", ree1, value = TRUE, anchor = "none", all = TRUE) exp2 = structure(c("1", "1", "1", "aaa", "b", "cccc"), .Dim = c(3L, 2L), .Dimnames = list(NULL, c("!n", "?1"))) expect_identical(res2, exp2) - res3 = re2_match("(\\w+)", " one two three 4", value = TRUE, anchor = "none", all = TRUE) + res3 = re2_match(" one two three 4", "(\\w+)", value = TRUE, anchor = "none", all = TRUE) exp3 = structure(c("1", "1", "1", "1", "one", "two", "three", "4"), .Dim = c(4L, 2L), .Dimnames = list(NULL, c("!n", "?1"))) expect_identical(res3, exp3) @@ -20,23 +20,23 @@ test_that("check match group 1", { test_that("Test Match Number Peculiarity",{ ree1 = re2("(foo)|(bar)|(baz)") - res1 = re2_match(ree1, "foo", value = TRUE) + res1 = re2_match("foo", ree1, value = TRUE) exp1 = structure(c("foo", NA, NA), .Dim = c(1L, 3L), .Dimnames = list(NULL, c("?1", "?2", "?3"))) expect_identical(res1, exp1) - res2 = re2_match(ree1, "baz", value = TRUE) + res2 = re2_match("baz", ree1, value = TRUE) exp2 = structure(c(NA, NA, "baz"), .Dim = c(1L, 3L), .Dimnames = list(NULL, c("?1", "?2", "?3"))) expect_identical(res2, exp2) - res3 = re2_match(ree1, "bar", value = TRUE) + res3 = re2_match("bar", ree1, value = TRUE) exp3 = structure(c(NA, "bar", NA), .Dim = c(1L, 3L), .Dimnames = list(NULL, c("?1", "?2", "?3"))) expect_identical(res3, exp3) - res4 = re2_match(ree1, "f", value = TRUE) + res4 = re2_match("f", ree1, value = TRUE) exp4 = structure(c(NA_character_, NA_character_, NA_character_), .Dim = c(1L, 3L), .Dimnames = list(NULL, c("?1", "?2", "?3"))) expect_identical(res4, exp4) - res5 = re2_match("(foo)|hello", "hello", value = TRUE, anchor = "start") + res5 = re2_match("hello", "(foo)|hello", value = TRUE, anchor = "start") exp5 = structure(NA_character_, .Dim = c(1L, 1L), .Dimnames = list(NULL, "?1")) expect_identical(res5, exp5) @@ -45,7 +45,7 @@ test_that("Test Match Number Peculiarity",{ test_that("simple match",{ ree1 = re2("((\\w+):([0-9]+))") expect_false("zyzzyva" %=~% ree1) - res1 = re2_match(ree1, "a chrisr:9000 here", value = TRUE) + res1 = re2_match("a chrisr:9000 here", ree1, value = TRUE) exp1 = structure(c("chrisr:9000", "chrisr", "9000"), .Dim = c(1L, 3L ), .Dimnames = list(NULL, c("?1", "?2", "?3"))) expect_identical(res1, exp1) diff --git a/tests/testthat/test-replace.R b/tests/testthat/test-replace.R index d515e47..b3df981 100644 --- a/tests/testthat/test-replace.R +++ b/tests/testthat/test-replace.R @@ -74,9 +74,9 @@ test_that("re2_replace", { for (ind in replace_list) { - expect_identical(re2_replace(ind[1],ind[2],ind[3], all = FALSE), ind[4]) + expect_identical(re2_replace(ind[3], ind[1], ind[2], all = FALSE), ind[4]) - res = re2_replace(ind[1],ind[2],ind[3], all = TRUE) + res = re2_replace(ind[3], ind[1],ind[2], all = TRUE) expect_identical(as.character(res), ind[5]) expect_identical(attr(res, "count"), as.numeric(ind[6])) diff --git a/vignettes/disabled/re2r-benchmark.Rmd b/vignettes/disabled/re2r-benchmark.Rmd index cc5cc6e..3b10449 100644 --- a/vignettes/disabled/re2r-benchmark.Rmd +++ b/vignettes/disabled/re2r-benchmark.Rmd @@ -41,7 +41,7 @@ for(N in 1:max.N){ ICU = stri_match(subject, regex = pattern), PCRE = regexpr(pattern, subject, perl = TRUE), TRE = regexpr(pattern, subject, perl = FALSE), - RE2 = re2_match(regexp, subject), + RE2 = re2_match(subject, regexp), times = 10) times.list[[N]] <- data.frame(N, N.times) } diff --git a/vignettes/re2r-intro.Rmd b/vignettes/re2r-intro.Rmd index 2e111ed..6de5181 100644 --- a/vignettes/re2r-intro.Rmd +++ b/vignettes/re2r-intro.Rmd @@ -81,14 +81,14 @@ Sys.setlocale(locale = "English") Here a quick overview over the most common methods on how to execute a regular expression. ```{r, eval=FALSE, include=TRUE} -re2_match(pattern, string) +re2_match(string, pattern) ``` Searches the string expression for the occurence(s) of a substring that matches 'pattern' and returns boolean result. ```{r} test_string = "this is just one test"; -re2_match("(o.e)", test_string) +re2_match(test_string, "(o.e)") ``` `.` means `any character, possibly including newline` . For more syntax, you can check out the `RE2 Syntax` vignette. @@ -96,7 +96,7 @@ re2_match("(o.e)", test_string) With `value = TRUE` option, function will return the capture groups with `()`. ```{r,collapse=TRUE} -(res = re2_match("(o.e)", test_string, value = TRUE)) +(res = re2_match(test_string, "(o.e)",value = TRUE)) str(res) ``` @@ -105,14 +105,14 @@ The return result is a character matrix. `?1` is the first capture group and it We can create named capture group with `(?Ppattern)` syntax. ```{r,collapse=TRUE} -(res = re2_match("(?Pthis)( is)", test_string, value = TRUE)) +(res = re2_match(test_string, "(?Pthis)( is)", value = TRUE)) str(res) ``` With `all = TRUE` option, function will return the all of patterns in a string instead of just the first one. ```{r,collapse=TRUE} -(res = re2_match("(is)", test_string, value = TRUE, all = TRUE)) +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) str(res) ``` @@ -120,7 +120,7 @@ str(res) ```{r,collapse=TRUE} test_string = c("this is just one test", "the second test"); -(res = re2_match("(is)", test_string, value = TRUE, all = TRUE)) +(res = re2_match(test_string, "(is)", value = TRUE, all = TRUE)) str(res) ``` @@ -128,14 +128,14 @@ If there is no capture group, and `value = TRUE`, the matched origin strings wil ```{r,collapse=TRUE} test_string = c("this is just one test", "the second test"); -(res = re2_match("is", test_string, value = TRUE)) +(res = re2_match(test_string, "is", value = TRUE)) str(res) ``` ### 2. Replace a substring ```{r, eval=FALSE, include=TRUE} -re2_replace(pattern, rewrite, input) +re2_replace(input, pattern, rewrite) ``` Searches the string "input string" for the occurence(s) of a substring that matches 'pattern' and replaces the found substrings with "rewrite text". @@ -143,20 +143,20 @@ Searches the string "input string" for the occurence(s) of a substring that matc ```{r} input_string = "this is just one test"; new_string = "my" -re2_replace("(o.e)", new_string, input_string) +re2_replace(input_string, "(o.e)", new_string) ``` ### 3. Extract a substring ```{r, eval=FALSE, include=TRUE} -re2_extract(pattern, input, rewrite = optional) +re2_extract(input, pattern, rewrite = optional) ``` Searches the string "input string" for the occurence(s) of a substring that matches 'pattern' and return the found substrings with "rewrite text". ```{r} -re2_extract("(.)","yabba dabba doo") -re2_extract("(.*)@([^.]*)","test@me.com","\\2!\\1") +re2_extract("yabba dabba doo", "(.)") +re2_extract("test@me.com", "(.*)@([^.]*)", "\\2!\\1") ``` `\\1` and `\\2` are the first and second capture groups. @@ -168,7 +168,7 @@ We can create a regular expression object (RE2 object) from a string. It will re And this will also give us more option for the pattern. run `help(re2)` to get more detials. ```{r,collapse=TRUE} -regexp = re2("test",case_sensitive = FALSE) +regexp = re2("test", case_sensitive = FALSE) print(regexp) regexp %<~% "(?P1*)" regexp @@ -178,8 +178,8 @@ regexp ```{r,collapse=TRUE} regexp = re2("test",case_sensitive = FALSE) -re2_match(regexp, "TEST") -re2_replace(regexp, "ops", "TEST") +re2_match("TEST", regexp) +re2_replace( "TEST", regexp, "ops") ``` If you come from a `Perl` world, you may be insterested in `%=~%` `%!~%`.