From c9eb21c478594bd362c7ca7fd3efb04a4a985adc Mon Sep 17 00:00:00 2001 From: Gittaca Date: Fri, 31 Jul 2020 17:26:07 +0200 Subject: [PATCH] Add regression test against error in rt_get_fields 'names' attribute [2] must be the same length as the vector [0] --- inst/robotstxts/robots_commented_token.txt | 7 +++++++ tests/testthat/test_parser.R | 14 ++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 inst/robotstxts/robots_commented_token.txt diff --git a/inst/robotstxts/robots_commented_token.txt b/inst/robotstxts/robots_commented_token.txt new file mode 100644 index 0000000..6a1d208 --- /dev/null +++ b/inst/robotstxts/robots_commented_token.txt @@ -0,0 +1,7 @@ +User-agent: bot_1 +Disallow: /path_1 + +# User-agent: bot_2 +# Disallow: /path_2 + +# Sitemap: /sitemap.php diff --git a/tests/testthat/test_parser.R b/tests/testthat/test_parser.R index fcd71e6..1591804 100644 --- a/tests/testthat/test_parser.R +++ b/tests/testthat/test_parser.R @@ -25,6 +25,7 @@ rtxt_fb_nsp <- rt_get_rtxt("robots_facebook_unsupported.txt") rtxt_cdc <- rt_get_rtxt("robots_cdc.txt") rtxt_cdc2 <- paste(rt_get_rtxt("robots_cdc2.txt"), collapse = "\r\n") rtxt_rbloggers <- rt_get_rtxt("rbloggers.txt") +rtxt_ct <- rt_get_rtxt("robots_commented_token.txt") test_that( "all robots.txt files are valid", { @@ -118,6 +119,10 @@ test_that( expect_true( is_valid_robotstxt( rtxt_cdc ) ) + + expect_true( + is_valid_robotstxt( rtxt_ct ) + ) }) @@ -213,6 +218,15 @@ test_that( } ) +context("Commented-out tokens get parsed correctly") + +test_that( + "Commented-out tokens get ignored", { + expect_true( + nrow(parse_robotstxt(rtxt_ct)$permissions) == 1 + ) + } +)