diff --git a/bin/amphtml-update.py b/bin/amphtml-update.py index 54656f7724b..d5634490249 100755 --- a/bin/amphtml-update.py +++ b/bin/amphtml-update.py @@ -517,6 +517,7 @@ def GetTagSpec(tag_spec, attr_lists): raise Exception( 'Missing error_message for disallowed_cdata_regex.' ); if entry['error_message'] not in ( 'contents', 'html comments', 'CSS i-amphtml- name prefix' ): raise Exception( 'Unexpected error_message "%s" for disallowed_cdata_regex.' % entry['error_message'] ); + entry['regex'] = EscapeRegex( entry['regex'] ) tag_spec_dict['cdata'] = cdata_dict if 'spec_name' not in tag_spec_dict['tag_spec']: @@ -726,7 +727,7 @@ def GetValues(attr_spec): # Add disallowed value regex if attr_spec.HasField('disallowed_value_regex'): - value_dict['disallowed_value_regex'] = attr_spec.disallowed_value_regex + value_dict['disallowed_value_regex'] = EscapeRegex( attr_spec.disallowed_value_regex ) # dispatch_key is an int if attr_spec.HasField('dispatch_key'): @@ -746,11 +747,11 @@ def GetValues(attr_spec): # value_regex if attr_spec.HasField('value_regex'): - value_dict['value_regex'] = attr_spec.value_regex + value_dict['value_regex'] = EscapeRegex( attr_spec.value_regex ) # value_regex_casei if attr_spec.HasField('value_regex_casei'): - value_dict['value_regex_casei'] = attr_spec.value_regex_casei + value_dict['value_regex_casei'] = EscapeRegex( attr_spec.value_regex_casei ) #value_properties is a dictionary of dictionaries if attr_spec.HasField('value_properties'): @@ -799,6 +800,9 @@ def UnicodeEscape(string): """ return ('' + string).encode('unicode-escape') +def EscapeRegex(string): + return re.sub( r'(? array( 'mandatory' => true, - 'value_regex' => '[^=/?:]+', + 'value_regex' => '[^=\\/?:]+', ), 'data-mode' => array( 'value' => array( @@ -7747,10 +7747,10 @@ class AMP_Allowed_Tags_Generated { ), 'data-amp-bind-data-videoid' => array(), 'data-live-channelid' => array( - 'value_regex' => '[^=/?:]+', + 'value_regex' => '[^=\\/?:]+', ), 'data-videoid' => array( - 'value_regex' => '[^=/?:]+', + 'value_regex' => '[^=\\/?:]+', ), 'dock' => array( 'requires_extension' => array( @@ -12257,7 +12257,7 @@ class AMP_Allowed_Tags_Generated { 'crossorigin' => array(), 'href' => array( 'mandatory' => true, - 'value_regex' => 'https://cdn\\.materialdesignicons\\.com/([0-9]+\\.?)+/css/materialdesignicons\\.min\\.css|https://cloud\\.typography\\.com/[0-9]*/[0-9]*/css/fonts\\.css|https://fast\\.fonts\\.net/.*|https://fonts\\.googleapis\\.com/css2?\\?.*|https://fonts\\.googleapis\\.com/icon\\?.*|https://fonts\\.googleapis\\.com/earlyaccess/.*\\.css|https://maxcdn\\.bootstrapcdn\\.com/font-awesome/([0-9]+\\.?)+/css/font-awesome\\.min\\.css(\\?.*)?|https://(use|pro|kit)\\.fontawesome\\.com/releases/v([0-9]+\\.?)+/css/[0-9a-zA-Z-]+\\.css|https://(use|pro|kit)\\.fontawesome\\.com/[0-9a-zA-Z-]+\\.css|https://use\\.typekit\\.net/[\\w\\p{L}\\p{N}_]+\\.css', + 'value_regex' => 'https:\\/\\/cdn\\.materialdesignicons\\.com\\/([0-9]+\\.?)+\\/css\\/materialdesignicons\\.min\\.css|https:\\/\\/cloud\\.typography\\.com\\/[0-9]*\\/[0-9]*\\/css\\/fonts\\.css|https:\\/\\/fast\\.fonts\\.net\\/.*|https:\\/\\/fonts\\.googleapis\\.com\\/css2?\\?.*|https:\\/\\/fonts\\.googleapis\\.com\\/icon\\?.*|https:\\/\\/fonts\\.googleapis\\.com\\/earlyaccess\\/.*\\.css|https:\\/\\/maxcdn\\.bootstrapcdn\\.com\\/font-awesome\\/([0-9]+\\.?)+\\/css\\/font-awesome\\.min\\.css(\\?.*)?|https:\\/\\/(use|pro|kit)\\.fontawesome\\.com\\/releases\\/v([0-9]+\\.?)+\\/css\\/[0-9a-zA-Z-]+\\.css|https:\\/\\/(use|pro|kit)\\.fontawesome\\.com\\/[0-9a-zA-Z-]+\\.css|https:\\/\\/use\\.typekit\\.net\\/[\\w\\p{L}\\p{N}_]+\\.css', ), 'integrity' => array(), 'media' => array(), diff --git a/includes/sanitizers/class-amp-style-sanitizer.php b/includes/sanitizers/class-amp-style-sanitizer.php index 6660588b58a..8e82f4d0990 100644 --- a/includes/sanitizers/class-amp-style-sanitizer.php +++ b/includes/sanitizers/class-amp-style-sanitizer.php @@ -426,7 +426,7 @@ public function __construct( $dom, array $args = [] ) { $spec_name = 'link rel=stylesheet for fonts'; // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet foreach ( AMP_Allowed_Tags_Generated::get_allowed_tag( 'link' ) as $spec_rule ) { if ( isset( $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) && $spec_name === $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) { - $this->allowed_font_src_regex = '@^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$@'; + $this->allowed_font_src_regex = '/^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$/'; break; } } diff --git a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php index 2086180565f..3105ce9ee76 100644 --- a/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php +++ b/includes/sanitizers/class-amp-tag-and-attribute-sanitizer.php @@ -356,7 +356,7 @@ private function get_rule_spec_list_to_validate( DOMElement $node, $rule_spec ) '', [ '^', - preg_quote( 'https://cdn.ampproject.org/v0/' . $extension_spec['name'] . '-' ), // phpcs:ignore WordPress.PHP.PregQuoteDelimiter.Missing + preg_quote( 'https://cdn.ampproject.org/v0/' . $extension_spec['name'] . '-', '/' ), '(' . implode( '|', array_merge( $extension_spec['version'], [ 'latest' ] ) ) . ')', '\.js$', ] @@ -1784,7 +1784,6 @@ private function check_attr_spec_rule_value_regex( DOMElement $node, $attr_name, // Check 'value_regex' - case sensitive regex match. if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ] ) && $node->hasAttribute( $attr_name ) ) { $rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ]; - $rule_value = str_replace( '/', '\\/', $rule_value ); /* * The regex pattern has '^' and '$' though they are not in the AMP spec. @@ -1818,7 +1817,6 @@ private function check_attr_spec_rule_value_regex_casei( DOMElement $node, $attr // Check 'value_regex_casei' - case insensitive regex match. if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ] ) && $node->hasAttribute( $attr_name ) ) { $rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ]; - $rule_value = str_replace( '/', '\\/', $rule_value ); // See note above regarding the '^' and '$' that are added here. if ( preg_match( '/^(' . $rule_value . ')$/ui', $node->getAttribute( $attr_name ) ) ) {