From f648b5e247b76295630aabd21f79719b89fedcf8 Mon Sep 17 00:00:00 2001 From: Shivan Kaul Sahib Date: Wed, 27 Apr 2022 17:45:07 -0700 Subject: [PATCH] =?UTF-8?q?Add=20De-AMP=20check=20for=20empty=20attribute-?= =?UTF-8?q?value=20=E2=9A=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- components/de_amp/browser/de_amp_util.cc | 5 ++- .../browser/test/de_amp_util_unittest.cc | 33 +++++++++++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/components/de_amp/browser/de_amp_util.cc b/components/de_amp/browser/de_amp_util.cc index a7ce66323198..cb3b39f59f12 100644 --- a/components/de_amp/browser/de_amp_util.cc +++ b/components/de_amp/browser/de_amp_util.cc @@ -18,8 +18,11 @@ namespace { // Check for "amp" or "⚡" in tag // https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/?format=websites#ampd constexpr char kGetHtmlTagPattern[] = "(<\\s*?html\\s.*?>)"; +// To see the expected behaviour of this regex, please see unit tests in +// de_amp_util_unittest.cc constexpr char kDetectAmpPattern[] = - "(?:<.*?\\s.*?(amp|⚡|amp=\"\\s*\"|amp='\\s*')(?:\\s.*?>|>|/>))"; + "(?:<.*?\\s.*?(amp|⚡|⚡=\"\\s*\"|⚡=\'\\s*\'|amp=\"\\s*\"|amp='\\s*')(?:\\s.*" + "?>|>|/>))"; // Look for canonical link tag and get href // https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/?format=websites#canon constexpr char kFindCanonicalLinkTagPattern[] = diff --git a/components/de_amp/browser/test/de_amp_util_unittest.cc b/components/de_amp/browser/test/de_amp_util_unittest.cc index 98c9c397063d..e4a0862b675a 100644 --- a/components/de_amp/browser/test/de_amp_util_unittest.cc +++ b/components/de_amp/browser/test/de_amp_util_unittest.cc @@ -62,6 +62,39 @@ TEST(DeAmpUtilUnitTest, DetectAmpWithWordAmpNotAtEnd) { CheckFindCanonicalLinkResult("https://abc.com", body, true); } +TEST(DeAmpUtilUnitTest, DetectAmpWithAmpEmptyAttribute) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + +TEST(DeAmpUtilUnitTest, DetectAmpWithEmojiEmptyAttribute) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + +TEST(DeAmpUtilUnitTest, DetectAmpWithEmojiEmptyAttributeSingleQuotes) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + TEST(DeAmpUtilUnitTest, DetectAmpMixedCase) { const std::string body = "\n"