diff --git a/components/de_amp/browser/de_amp_util.cc b/components/de_amp/browser/de_amp_util.cc index a7ce66323198..cb3b39f59f12 100644 --- a/components/de_amp/browser/de_amp_util.cc +++ b/components/de_amp/browser/de_amp_util.cc @@ -18,8 +18,11 @@ namespace { // Check for "amp" or "⚡" in tag // https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/?format=websites#ampd constexpr char kGetHtmlTagPattern[] = "(<\\s*?html\\s.*?>)"; +// To see the expected behaviour of this regex, please see unit tests in +// de_amp_util_unittest.cc constexpr char kDetectAmpPattern[] = - "(?:<.*?\\s.*?(amp|⚡|amp=\"\\s*\"|amp='\\s*')(?:\\s.*?>|>|/>))"; + "(?:<.*?\\s.*?(amp|⚡|⚡=\"\\s*\"|⚡=\'\\s*\'|amp=\"\\s*\"|amp='\\s*')(?:\\s.*" + "?>|>|/>))"; // Look for canonical link tag and get href // https://amp.dev/documentation/guides-and-tutorials/learn/spec/amphtml/?format=websites#canon constexpr char kFindCanonicalLinkTagPattern[] = diff --git a/components/de_amp/browser/test/de_amp_util_unittest.cc b/components/de_amp/browser/test/de_amp_util_unittest.cc index 98c9c397063d..e4a0862b675a 100644 --- a/components/de_amp/browser/test/de_amp_util_unittest.cc +++ b/components/de_amp/browser/test/de_amp_util_unittest.cc @@ -62,6 +62,39 @@ TEST(DeAmpUtilUnitTest, DetectAmpWithWordAmpNotAtEnd) { CheckFindCanonicalLinkResult("https://abc.com", body, true); } +TEST(DeAmpUtilUnitTest, DetectAmpWithAmpEmptyAttribute) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + +TEST(DeAmpUtilUnitTest, DetectAmpWithEmojiEmptyAttribute) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + +TEST(DeAmpUtilUnitTest, DetectAmpWithEmojiEmptyAttributeSingleQuotes) { + const std::string body = + "" + "" + "" + "" + "" + ""; + CheckFindCanonicalLinkResult("https://abc.com", body, true); +} + TEST(DeAmpUtilUnitTest, DetectAmpMixedCase) { const std::string body = "\n"