Skip to content

Commit

Permalink
Escape slashes in spec regexes at ingestion time
Browse files Browse the repository at this point in the history
  • Loading branch information
westonruter committed May 28, 2021
1 parent 85e22e9 commit afd1172
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 11 deletions.
10 changes: 7 additions & 3 deletions bin/amphtml-update.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,7 @@ def GetTagSpec(tag_spec, attr_lists):
raise Exception( 'Missing error_message for disallowed_cdata_regex.' );
if entry['error_message'] not in ( 'contents', 'html comments', 'CSS i-amphtml- name prefix' ):
raise Exception( 'Unexpected error_message "%s" for disallowed_cdata_regex.' % entry['error_message'] );
entry['regex'] = EscapeRegex( entry['regex'] )
tag_spec_dict['cdata'] = cdata_dict

if 'spec_name' not in tag_spec_dict['tag_spec']:
Expand Down Expand Up @@ -726,7 +727,7 @@ def GetValues(attr_spec):

# Add disallowed value regex
if attr_spec.HasField('disallowed_value_regex'):
value_dict['disallowed_value_regex'] = attr_spec.disallowed_value_regex
value_dict['disallowed_value_regex'] = EscapeRegex( attr_spec.disallowed_value_regex )

# dispatch_key is an int
if attr_spec.HasField('dispatch_key'):
Expand All @@ -746,11 +747,11 @@ def GetValues(attr_spec):

# value_regex
if attr_spec.HasField('value_regex'):
value_dict['value_regex'] = attr_spec.value_regex
value_dict['value_regex'] = EscapeRegex( attr_spec.value_regex )

# value_regex_casei
if attr_spec.HasField('value_regex_casei'):
value_dict['value_regex_casei'] = attr_spec.value_regex_casei
value_dict['value_regex_casei'] = EscapeRegex( attr_spec.value_regex_casei )

#value_properties is a dictionary of dictionaries
if attr_spec.HasField('value_properties'):
Expand Down Expand Up @@ -799,6 +800,9 @@ def UnicodeEscape(string):
"""
return ('' + string).encode('unicode-escape')

def EscapeRegex(string):
return re.sub( r'(?<!\\)/', r'\\/', string )

def GetMandatoryOf( attr, constraint ):
"""Gets the attributes with the passed mandatory_*of constraint, if there are any.
Expand Down
8 changes: 4 additions & 4 deletions includes/sanitizers/class-amp-allowed-tags-generated.php

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion includes/sanitizers/class-amp-style-sanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ public function __construct( $dom, array $args = [] ) {
$spec_name = 'link rel=stylesheet for fonts'; // phpcs:ignore WordPress.WP.EnqueuedResources.NonEnqueuedStylesheet
foreach ( AMP_Allowed_Tags_Generated::get_allowed_tag( 'link' ) as $spec_rule ) {
if ( isset( $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) && $spec_name === $spec_rule[ AMP_Rule_Spec::TAG_SPEC ]['spec_name'] ) {
$this->allowed_font_src_regex = '@^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$@';
$this->allowed_font_src_regex = '/^(' . $spec_rule[ AMP_Rule_Spec::ATTR_SPEC_LIST ]['href']['value_regex'] . ')$/';
break;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,7 @@ private function get_rule_spec_list_to_validate( DOMElement $node, $rule_spec )
'',
[
'^',
preg_quote( 'https://cdn.ampproject.org/v0/' . $extension_spec['name'] . '-' ), // phpcs:ignore WordPress.PHP.PregQuoteDelimiter.Missing
preg_quote( 'https://cdn.ampproject.org/v0/' . $extension_spec['name'] . '-', '/' ),
'(' . implode( '|', array_merge( $extension_spec['version'], [ 'latest' ] ) ) . ')',
'\.js$',
]
Expand Down Expand Up @@ -1784,7 +1784,6 @@ private function check_attr_spec_rule_value_regex( DOMElement $node, $attr_name,
// Check 'value_regex' - case sensitive regex match.
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ] ) && $node->hasAttribute( $attr_name ) ) {
$rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX ];
$rule_value = str_replace( '/', '\\/', $rule_value );

/*
* The regex pattern has '^' and '$' though they are not in the AMP spec.
Expand Down Expand Up @@ -1818,7 +1817,6 @@ private function check_attr_spec_rule_value_regex_casei( DOMElement $node, $attr
// Check 'value_regex_casei' - case insensitive regex match.
if ( isset( $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ] ) && $node->hasAttribute( $attr_name ) ) {
$rule_value = $attr_spec_rule[ AMP_Rule_Spec::VALUE_REGEX_CASEI ];
$rule_value = str_replace( '/', '\\/', $rule_value );

// See note above regarding the '^' and '$' that are added here.
if ( preg_match( '/^(' . $rule_value . ')$/ui', $node->getAttribute( $attr_name ) ) ) {
Expand Down

0 comments on commit afd1172

Please sign in to comment.