From d5c1f80ad91c0d230032781f5c45538a31702853 Mon Sep 17 00:00:00 2001
From: Del Putnam <del@putnams.net>
Date: Mon, 16 Jan 2017 06:36:46 -0500
Subject: [PATCH] Changes to address issue #604. Replaced  with  which uses
 AMP_Allowed_Styles_Generated to validate CSS. Also replaced use of  with a
 custom function  which ignores delimiters in parentheses and quotation marks.

---
 bin/amp_wp_build_styles.py                    | 754 ++++++++++++++++++
 .../class-amp-allowed-styles-generated.php    | 234 ++++++
 .../sanitizers/class-amp-style-sanitizer.php  | 397 ++++++++-
 tests/test-amp-style-sanitizer.php            | 204 +++--
 4 files changed, 1506 insertions(+), 83 deletions(-)
 create mode 100644 bin/amp_wp_build_styles.py
 create mode 100644 includes/sanitizers/class-amp-allowed-styles-generated.php

diff --git a/bin/amp_wp_build_styles.py b/bin/amp_wp_build_styles.py
new file mode 100644
index 00000000000..3c5e3857111
--- /dev/null
+++ b/bin/amp_wp_build_styles.py
@@ -0,0 +1,754 @@
+"""
+This script is used to generate the 'class-amp-allowed-tags-generated.php'
+file that is used by the class AMP_Tag_And_Attribute_Sanitizer.
+
+Follow the steps below to generate a new version of the allowed tags class:
+
+- Download a copy of the latet AMPHTML repository from github:
+	
+	git clone git@github.com:ampproject/amphtml.git
+
+- Copy this file into the repo's validator subdirectory:
+
+	cp amp_wp_build.py amphtml/validator
+
+- Run the file from the validator subdirectory:
+	cd amphtml/validator;python amp_wp_build.py
+
+- The class-amp-allowed-tags-generated.php will be generated at:
+	amphtml/validator/amp_wp/class-amp-allowed-tags-generated.php
+
+- copy this file into the amp-wp plugin:
+	cp amp_wp/class-amp-allowed-tags-generated.php /path/to/wordpress/wp-content/plugins/amp-wp/includes/sanitizers/
+
+Then have fun sanitizing your AMP posts!
+"""
+
+import glob
+import logging
+import os
+import platform
+import re
+import shutil
+import subprocess
+import sys
+import tempfile
+import collections
+
+def Die(msg):
+		print >> sys.stderr, msg
+		sys.exit(1)
+
+
+def SetupOutDir(out_dir):
+	"""Sets up a clean output directory.
+
+	Args:
+		out_dir: directory name of the output directory. Must not have slashes,
+			dots, etc.
+	"""
+	#logging.info('entering ...')
+	assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir
+
+	if os.path.exists(out_dir):
+		subprocess.check_call(['rm', '-rf', out_dir])
+	os.mkdir(out_dir)
+	#logging.info('... done')
+
+
+def GenValidatorPb2Py(out_dir):
+	"""Calls the proto compiler to generate validator_pb2.py.
+
+	Args:
+		out_dir: directory name of the output directory. Must not have slashes,
+			dots, etc.
+	"""
+	#logging.info('entering ...')
+	assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir
+
+	subprocess.check_call(['protoc', 'validator.proto',
+												 '--python_out=%s' % out_dir])
+	open('%s/__init__.py' % out_dir, 'w').close()
+	#logging.info('... done')
+
+
+def GenValidatorProtoascii(out_dir):
+	"""Assembles the validator protoascii file from the main and extensions.
+
+	Args:
+		out_dir: directory name of the output directory. Must not have slashes,
+			dots, etc.
+	"""
+	#logging.info('entering ...')
+	assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir
+
+	protoascii_segments = [open('validator-main.protoascii').read()]
+	extensions = glob.glob('extensions/*/0.1/validator-*.protoascii')
+	# In the Github project, the extensions are located in a sibling directory
+	# to the validator rather than a child directory.
+	if not extensions:
+		extensions = glob.glob('../extensions/*/0.1/validator-*.protoascii')
+	extensions.sort()
+	for extension in extensions:
+		protoascii_segments.append(open(extension).read())
+	f = open('%s/validator.protoascii' % out_dir, 'w')
+	f.write(''.join(protoascii_segments))
+	f.close()
+
+	#logging.info('... done')
+
+
+def GeneratePHP(out_dir):
+	"""Calls validator_gen_md to generate validator-generated.md.
+
+	Args:
+		out_dir: directory name of the output directory. Must not have slashes,
+			dots, etc.
+	"""
+	#logging.info('entering ...')
+	assert re.match(r'^[a-zA-Z_\-0-9]+$', out_dir), 'bad out_dir: %s' % out_dir
+
+	allowed_tags, attr_lists, versions = ParseRules(out_dir)
+
+	#Generate the output
+	out = []
+	GenerateHeaderPHP(out)
+	GenerateSpecVersionPHP(out, versions)
+	GenerateAllowedStylesPHP(out, allowed_tags)
+	# GenerateLayoutAttributesPHP(out, attr_lists)
+	# GenerateGlobalAttributesPHP(out, attr_lists)
+	GenerateFooterPHP(out)
+
+	# join out array into a single string and remove unneeded whitespace
+	output = re.sub("\\(\\s*\\)", "()", '\n'.join(out))
+
+	# replace 'True' with true and 'False' with false
+	output = re.sub("'True'", "true", output)
+	output = re.sub("'False'", "false", output)
+
+	# Write the php file to disk.
+	f = open('%s/class-amp-allowed-styles-generated.php' % out_dir, 'w')
+	# f.write('\n'.join(out))
+	f.write(output)
+	f.close()
+	#logging.info('... done')
+
+
+def GenerateHeaderPHP(out):
+	#logging.info('entering ...')
+
+	# Output the file's header
+	out.append('<?php')
+	out.append('/**')
+	out.append(' * Generated by %s - do not edit.' %
+						 os.path.basename(__file__))
+	out.append(' *')
+	out.append(' * This is a list of CSS attributes that are allowed by the')
+	out.append(' * AMP specification.')
+	out.append(' */')
+	out.append('class AMP_Allowed_Styles_Generated {')
+	out.append('')
+	#logging.info('... done')
+
+
+def GenerateSpecVersionPHP(out, versions):
+	#logging.info('entering ...')
+
+	# Output the version of the spec file and matching validator version
+	if versions['spec_file_revision']:
+		out.append('\tprivate static $spec_file_revision = %d;' % versions['spec_file_revision'])
+	if versions['min_validator_revision_required']:
+		out.append('\tprivate static $minimum_validator_revision_required = %d;' %
+							 versions['min_validator_revision_required'])
+	#logging.info('... done')
+
+
+def GenerateAllowedStylesPHP(out, allowed_tags):
+	#logging.info('entering ...')
+
+  # Output the allowed tags dictionary along with each tag's allowed attributes
+	out.append('')
+	out.append('\tprivate static $allowed_styles = array(')
+	sorted_tags = sorted(allowed_tags.items())
+	for (tag, attributes_list) in collections.OrderedDict(sorted_tags).iteritems():
+		GenerateTagPHP(out, tag, attributes_list)
+	out.append('\t);')
+	#logging.info('... done')
+
+
+def GenerateLayoutAttributesPHP(out, attr_lists):
+	#logging.info('entering ...')
+
+	# Output the attribute list allowed for layouts.
+	out.append('')
+	out.append('\tprivate static $layout_allowed_attrs = array(')
+	GenerateAttributesPHP(out, attr_lists['$AMP_LAYOUT_ATTRS'], 2)
+	out.append('\t);')
+	out.append('')
+	#logging.info('... done')
+
+
+def GenerateGlobalAttributesPHP(out, attr_lists):
+	#logging.info('entering ...')
+
+	# Output the globally allowed attribute list.
+	out.append('')
+	out.append('\tprivate static $globally_allowed_attrs = array(')
+	GenerateAttributesPHP(out, attr_lists['$GLOBAL_ATTRS'], 2)
+	out.append('\t);')
+	out.append('')
+	#logging.info('... done')
+
+
+def GenerateTagPHP(out, tag, attributes_list):
+	#logging.info('generating php for tag: %s...' % tag.lower())
+
+	# Output an attributes list for a tag
+	out.append('\t\t\'%s\' => array(' % tag.lower())
+	for attributes in attributes_list:
+		out.append('\t\t\tarray(')
+		GenerateAttributesPHP(out, attributes)
+		out.append('\t\t\t),')
+	out.append('\t\t),')
+	#logging.info('... done with: %s' % tag.lower())
+
+
+def GenerateAttributesPHP(out, attributes, indent_level = 4):
+	#logging.info('entering ...')
+
+	indent = ''
+	for i in range(0,indent_level):
+		indent += '\t'
+	
+	sorted_attributes = sorted(attributes.items())
+	for (attribute, values) in collections.OrderedDict(sorted_attributes).iteritems():
+		#logging.info('generating php for attribute: %s...' % attribute.lower())
+		out.append('%s\'%s\' => array(' % (indent, attribute.lower()))
+		GeneratePropertiesPHP(out, values)
+		out.append('%s),' % indent)
+		#logging.info('...done with: %s' % attribute.lower())
+	
+	out.append('')
+	#logging.info('... done')
+
+
+def GeneratePropertiesPHP(out, properties, indent_level = 5):
+	#logging.info('entering ...')
+	indent = ''
+	for i in range(0,indent_level):
+		indent += '\t'
+
+	sorted_properties = sorted(properties.items())
+	for (prop, values) in collections.OrderedDict(sorted_properties).iteritems():
+		if 'html_format' == prop:
+			sorted_values = sorted(values.items())
+			for(value_type, value) in collections.OrderedDict(sorted_values).iteritems():
+				out.append('%s\'%s\' => array(' % (indent, value_type.lower()))
+				GenerateValuesPHP(out, value, 5)
+				out.append('%s),' % indent)
+		#logging.info('generating php for property: %s...' % prop.lower())
+		elif isinstance(values, (str, bool, int)):
+			if isinstance(values, str):
+				values = values.lower()
+			out.append('%s\'%s\' => \'%s\',' % (indent, prop.lower(), values))
+		else:
+			out.append('%s\'%s\' => array(' % (indent, prop.lower()))
+			if isinstance(values, dict):
+				sorted_values = sorted(values.items())
+				for(value_type, value) in collections.OrderedDict(sorted_values).iteritems():
+					if isinstance(value, (str, bool, int)):
+						if isinstance(value, str):
+							value = value.lower()
+						out.append('%s\t\'%s\' => \'%s\',' % (indent, value_type, value))
+					else:
+						out.append('%s\t\'%s\' => array(' % (indent, value_type.lower()))
+						GenerateValuesPHP(out, value, 7)
+						out.append('%s\t),' % indent)
+			if isinstance(values, list):
+				for value in values:
+					if isinstance(value, dict):
+						out.append('%sarray(' % indent)
+						sorted_items = sorted(value.items())
+						for (k,v) in collections.OrderedDict(sorted_items).iteritems():
+							out.append('%s\t\'%s\' => \'%s\',' % (indent, k, v))
+						out.append('%s),' % indent)
+
+					elif isinstance(value, (str,int,bool)):
+						out.append('%s\t\'%s\',' % (indent,value))
+
+			out.append('%s),' % indent)
+
+		#logging.info('...done with: %s' % prop.lower())
+
+	#logging.info('...done')
+
+
+def GenerateValuesPHP(out, values, indent_level = 6):
+	#logging.info('entering...')
+
+	indent = ''
+	for i in range(0, indent_level):
+		indent += '\t'
+
+	if isinstance(values, dict):
+		sorted_values = sorted(values.items())
+		for (key, value) in collections.OrderedDict(sorted_values).iteritems():
+
+			#logging.info('generating php for value: %s...' % key.lower())
+
+			if isinstance(value, (str, bool)):
+				out.append('%s\'%s\' => \'%s\',' % (indent, key.lower(), value))
+
+			if isinstance(value, list):
+				out.append('%s\'%s\' => array(' % (indent, key.lower()))
+				sorted_value = sorted(value)
+				for v in sorted_value:
+					out.append('%s\t\'%s\',' % (indent, v))
+				out.append('%s),' % indent)
+
+			#logging.info('...done with: %s' % key.lower())
+
+	elif isinstance(values, list):
+		sorted_values = sorted(values)
+		for v in sorted_values:
+			#logging.info('generating php for value: %s' % v.lower())
+			out.append('%s\t\'%s\',' % (indent, v.lower()))
+			#logging.info('...done with: %s' % v.lower())
+
+	#logging.info('...done')
+
+
+def GenerateFooterPHP(out):
+	#logging.info('entering ...')
+
+	# Output the footer.
+	out.append('\tpublic static function get_allowed_styles() {')
+	out.append('\t\treturn self::$allowed_styles;')
+	out.append('\t}')
+	out.append('')
+
+	out.append('\tpublic static function get_custom_styles_for_amp_html() {')
+	out.append('\t\tforeach ( self::$allowed_styles[\'style\'] as $value) {')
+	out.append('\t\t\tif ( isset( $value[\'attr_spec_list\'][\'amp-custom\'] ) &&')
+	out.append('\t\t\t\tin_array( \'amp\', $value[\'tag_spec\'][\'html_format\'] ) ) {')
+	out.append('\t\t\t\treturn $value;')
+	out.append('\t\t\t}')
+	out.append('\t\t}')
+	out.append('\t}')
+	out.append('')
+
+	# out.append('\tpublic static function get_allowed_attributes() {')
+	# out.append('\t\treturn self::$globally_allowed_attrs;')
+	# out.append('\t}')
+	# out.append('')
+
+	# out.append('\tpublic static function get_layout_attributes() {')
+	# out.append('\t\treturn self::$layout_allowed_attrs;')
+	# out.append('\t}')
+	# out.append('')
+
+	out.append('}')
+	out.append('')
+
+	out.append('?>')
+	out.append('')
+	#logging.info('... done')
+
+
+def ParseRules(out_dir):
+	#logging.info('entering ...')
+
+	# These imports happen late, within this method because they don't necessarily
+	# exist when the module starts running, and the ones that probably do
+	# are checked by CheckPrereqs.
+	from google.protobuf import text_format
+	from amp_wp import validator_pb2
+	import validator_gen_md
+
+	allowed_tags = {}
+	attr_lists = {}
+	versions = {}
+
+	specfile='%s/validator.protoascii' % out_dir
+
+	validator_pb2=validator_pb2
+	text_format=text_format
+
+	# Merge specfile with message buffers.
+	rules = validator_pb2.ValidatorRules()
+	text_format.Merge(open(specfile).read(), rules)
+
+	# Record the version of this specfile and the corresponding validator version.
+	if rules.HasField('spec_file_revision'):
+		versions['spec_file_revision'] = rules.spec_file_revision
+
+	if rules.HasField('min_validator_revision_required'):
+		versions['min_validator_revision_required'] = rules.min_validator_revision_required
+
+	# Build a dictionary of the named attribute lists that are used by multiple tags.
+	for (field_desc, field_val) in rules.ListFields():
+		if 'attr_lists' == field_desc.name:
+			for attr_spec in field_val:
+				attr_lists[UnicodeEscape(attr_spec.name)] = GetAttrs(attr_spec.attrs)
+
+	# Build a dictionary of allowed tags and an associated list of their allowed
+	# attributes, values and other criteria.
+
+	# Don't include tags that have a mandatory parent with one of these tag names
+	# since we're only concerned with using this tag list to validate the body
+	# of the DOM
+	mandatory_parent_blacklist = [
+		'$ROOT',
+		'!DOCTYPE',
+		'HTML',
+		'HEAD',
+	]
+
+	for (field_desc, field_val) in rules.ListFields():
+		if 'tags' == field_desc.name:
+			for tag_spec in field_val:
+
+				# Ignore tags that are outside of the body
+				#if tag_spec.HasField('mandatory_parent') and tag_spec.mandatory_parent in mandatory_parent_blacklist and tag_spec.tag_name != 'BODY':
+				#	continue
+					
+				if tag_spec.tag_name != 'STYLE':
+					continue
+
+				# Ignore the special $REFERENCE_POINT tag
+				# if '$REFERENCE_POINT' == tag_spec.tag_name:
+				# 	continue
+
+				# Ignore deprecated tags
+				if tag_spec.HasField('deprecation'):
+					continue
+
+				# If we made it here, then start adding the tag_spec
+				if tag_spec.tag_name not in allowed_tags:
+					tag_list = []
+				else:
+					tag_list = allowed_tags[UnicodeEscape(tag_spec.tag_name)]
+				# AddTag(allowed_tags, tag_spec, attr_lists)
+				tag_list.append(GetTagSpec(tag_spec, attr_lists))
+				allowed_tags[UnicodeEscape(tag_spec.tag_name)] = tag_list
+
+	#logging.info('... done')
+	return allowed_tags, attr_lists, versions
+
+
+def GetTagSpec(tag_spec, attr_lists):
+	#logging.info('entering ...')
+
+	tag_dict = GetTagRules(tag_spec)
+	attr_dict = GetAttrs(tag_spec.attrs)
+	cdata_dict = GetCdataRules(tag_spec.cdata)
+	# print( cdata_dict )
+	# TODO: add CDATA section if validation of non-body elements is required.
+
+	# Now add attributes from any attribute lists to this tag.
+	for (tag_field_desc, tag_field_val) in tag_spec.ListFields():
+		if 'attr_lists' == tag_field_desc.name:
+			for attr_list in tag_field_val:
+				attr_dict.update(attr_lists[UnicodeEscape(attr_list)])
+
+	#logging.info('... done')
+	return {'tag_spec':tag_dict, 'attr_spec_list':attr_dict, 'cdata_spec_list':cdata_dict}
+
+
+def GetTagRules(tag_spec):
+	#logging.info('entering ...')
+
+	tag_rules = {}
+
+	if tag_spec.also_requires_tag:
+		also_requires_tag_list = []
+		for also_requires_tag in tag_spec.also_requires_tag:
+			also_requires_tag_list.append(UnicodeEscape(also_requires_tag))
+		tag_rules['also_requires_tag'] = {'also_requires_tag': also_requires_tag_list}
+
+	if tag_spec.disallowed_ancestor:
+		disallowed_ancestor_list = []
+		for disallowed_ancestor in tag_spec.disallowed_ancestor:
+			disallowed_ancestor_list.append(UnicodeEscape(disallowed_ancestor))
+		tag_rules['disallowed_ancestor'] = {'disallowed_ancestor': disallowed_ancestor_list}
+
+	if tag_spec.html_format:
+		html_format_list = []
+		for html_format in tag_spec.html_format:
+			if 1 == html_format:
+				html_format_list.append('amp')
+			elif 2 == html_format:
+				html_format_list.append('amp4ads')
+		tag_rules['html_format'] = {'html_format': html_format_list}
+
+	if tag_spec.HasField('mandatory'):
+		tag_rules['mandatory'] = tag_spec.mandatory
+
+	if tag_spec.HasField('mandatory_alternatives'):
+		tag_rules['mandatory_alternatives'] = UnicodeEscape(tag_spec.mandatory_alternatives)
+
+	if tag_spec.HasField('mandatory_ancestor'):
+		tag_rules['mandatory_ancestor'] = UnicodeEscape(tag_spec.mandatory_ancestor)
+
+	if tag_spec.HasField('mandatory_ancestor_suggested_alternative'):
+		tag_rules['mandatory_ancestor_suggested_alternative'] = UnicodeEscape(tag_spec.mandatory_ancestor_suggested_alternative)
+
+	if tag_spec.HasField('mandatory_parent'):
+		tag_rules['mandatory_parent'] = UnicodeEscape(tag_spec.mandatory_parent)
+
+	if tag_spec.HasField('spec_name'):
+		tag_rules['spec_name'] = UnicodeEscape(tag_spec.spec_name)
+
+	if tag_spec.HasField('spec_url'):
+		tag_rules['spec_url'] = UnicodeEscape(tag_spec.spec_url)
+
+	if tag_spec.HasField('unique'):
+		tag_rules['unique'] = tag_spec.unique
+
+	if tag_spec.HasField('unique_warning'):
+		tag_rules['unique_warning'] = tag_spec.unique_warning
+
+
+
+	#logging.info('... done')
+	return tag_rules
+
+
+def GetAttrs(attrs):
+	#logging.info('entering ...')
+
+	attr_dict = {}
+	for attr_spec in attrs:
+
+		value_dict = GetValues(attr_spec)
+
+		# Add attribute name and alternative_names
+		attr_dict[UnicodeEscape(attr_spec.name)] = value_dict
+
+	#logging.info('... done')
+	return attr_dict
+
+
+def GetValues(attr_spec):
+	#logging.info('entering ...')
+
+	value_dict = {}
+
+	# Add alternative names
+	if attr_spec.alternative_names:
+		alt_names_list = []
+		for alternative_name in attr_spec.alternative_names:
+			alt_names_list.append(UnicodeEscape(alternative_name))
+		value_dict['alternative_names'] = {'alternative_names': alt_names_list}
+
+	# Add blacklisted value regex
+	if attr_spec.HasField('blacklisted_value_regex'):
+		value_dict['blacklisted_value_regex'] = UnicodeEscape(attr_spec.blacklisted_value_regex)
+
+	# dispatch_key is a boolean
+	if attr_spec.HasField('dispatch_key'):
+		value_dict['dispatch_key'] = attr_spec.dispatch_key
+
+	# mandatory is a boolean
+	if attr_spec.HasField('mandatory'):
+		value_dict['mandatory'] = attr_spec.mandatory
+
+	# Add allowed value
+	if attr_spec.HasField('value'):
+		value_dict['value'] = UnicodeEscape(attr_spec.value)
+
+	# value_casei
+	if attr_spec.HasField('value_casei'):
+		value_dict['value_casei'] = UnicodeEscape(attr_spec.value_casei)
+
+	# value_regex
+	if attr_spec.HasField('value_regex'):
+		value_dict['value_regex'] = UnicodeEscape(attr_spec.value_regex)
+
+	# value_regex_casei
+	if attr_spec.HasField('value_regex_casei'):
+		value_dict['value_regex_casei'] = UnicodeEscape(attr_spec.value_regex_casei)
+
+	#value_properties is a dictionary of dictionaries
+	if attr_spec.HasField('value_properties'):
+		value_properties_dict = {}
+		for (value_properties_key, value_properties_val) in attr_spec.value_properties.ListFields():
+			for value_property in value_properties_val:
+				property_dict = {}
+				# print 'value_property.name: %s' % value_property.name
+				for (key,val) in value_property.ListFields():
+					if val != value_property.name:
+						if isinstance(val, unicode):
+							val = UnicodeEscape(val)
+						property_dict[UnicodeEscape(key.name)] = val
+				value_properties_dict[UnicodeEscape(value_property.name)] = property_dict
+		value_dict['value_properties'] = value_properties_dict
+
+	# value_url is a dictionary
+	if attr_spec.HasField('value_url'):
+		value_url_dict = {}
+		for (value_url_key, value_url_val) in attr_spec.value_url.ListFields():
+			if isinstance(value_url_val, (list, collections.Sequence)):
+				value_url_val_val = []
+				for val in value_url_val:
+					value_url_val_val.append(UnicodeEscape(val))
+			else:
+				value_url_val_val = value_url_val
+			value_url_dict[value_url_key.name] = value_url_val_val
+		value_dict['value_url'] = value_url_dict
+
+	#logging.info('... done')
+	return value_dict
+
+def GetCdataRules(cdata):
+	#logging.info('entering ...')
+
+	cdata_rules = {}
+
+	for (key,val) in cdata.ListFields():
+
+		if isinstance(val, (str,bool,int)):
+			cdata_rules[key.name] = val
+
+		elif isinstance(val, unicode):
+			cdata_rules[key.name] = UnicodeEscape(val)
+
+		elif 'css_spec' == key.name:
+			css_spec = {}
+			for (k,v) in val.ListFields():
+
+				if 'at_rule_spec' == k.name:
+					if k.name not in css_spec:
+						at_rule_spec = []
+					else:
+						at_rule_spec = css_spec[UnicodeEscape(k.name)]
+
+					for vv in v:
+						if vv.HasField('name'):
+							at_rule_spec.append(vv.name)
+
+					css_spec[UnicodeEscape(k.name)] = at_rule_spec
+
+				elif 'image_url_spec' == k.name or 'font_url_spec' == k.name:
+					if k.name not in css_spec:
+						url_spec = {}
+					else:
+						url_spec = css_spec[UnicodeEscape(k.name)]
+
+					for (usk,usv) in v.ListFields():
+
+						if isinstance(usv,(str,bool,int)):
+							url_spec[UnicodeEscape(usk.name)] = usv
+
+						elif isinstance(usv,(unicode)):
+							url_spec[UnicodeEscape(usk.name)] = UnicodeEscape(usv)
+
+
+						elif isinstance(usv, (list,collections.Sequence)):
+							if usk.name not in url_spec:
+								url_spec_list = []
+							else:
+								url_spec_list = url_spec[UnicodeEscape(usk.name)]
+
+							for usvv in usv:
+								url_spec_list.append(UnicodeEscape(usvv))
+
+							url_spec[UnicodeEscape(usk.name)] = url_spec_list
+
+					css_spec[UnicodeEscape(k.name)] = url_spec
+
+			cdata_rules[key.name] = css_spec
+
+		elif 'blacklisted_cdata_regex' == key.name:
+			blacklisted_cdata_regex_list = []
+			for v in val:
+				blacklisted_cdata_regex_list.append(UnicodeEscape(v.regex))
+
+			cdata_rules[key.name] = blacklisted_cdata_regex_list				
+
+
+
+
+
+		# 		if 'validate_amp4ads' == k.name:
+		# 			css_spec[k.name] = v
+		# 		if 'at_rule_spec' == k.name:
+		# 			at_rule_spec = []
+		# 			for vv in v:
+		# 				at_rule_spec_fields = {}
+		# 				for (arsk,arsv) in vv.ListFields():
+		# 					at_rule_spec_fields[arsk.name] = arsv
+		# 			css_spec[k.name] = at_rule_spec_fields
+		# 		elif ('font_url_spec' == k.name) or ('image_url_spec' == k.name):
+		# 			url_spec = {}
+		# 			for (usk,usv) in v.ListFields():
+		# 				if isinstance(usv, collections.Sequence):
+		# 					usv_list = []
+		# 					for usvv in usv:
+		# 						usv_list.append(UnicodeEscape(usvv))
+		# 					url_spec[usk.name] = usv_list
+		# 				else:
+		# 					url_spec[usk.name] = usv
+		# 			css_spec[k.name] = url_spec
+		# 	cdata_rules[key.name] = css_spec
+		# elif 'blacklisted_cdata_regex' == key.name:
+		# 	bcr_list = []
+		# 	for v in val:
+		# 		bcr_list_fields = {}
+		# 		for (kk,vv) in v.ListFields():
+		# 			bcr_list_fields[kk.name] = vv
+		# 		bcr_list.append(bcr_list_fields)
+		# 	cdata_rules[key.name] = bcr_list
+		# elif isinstance(val, bool):
+		# 	cdata_rules[key.name] = val
+		# elif isinstance(val, unicode):
+		# 	cdata_rules[key.name] = UnicodeEscape(val)
+		# elif isinstance(val, str):
+		# 	cdata_rules[key.name] = val
+		# elif isinstance(val, int):
+		# 	cdata_rules[key.name] = val
+
+	return cdata_rules
+
+def GetAtRuleSpec(at_rule_spec):
+
+	for v in at_rule_spec:
+
+		print "%s %s" % (v.name, type(v.name))
+
+def GetCssSpec(css_spec):
+	css_spec = {}
+	for (k,v) in css_spec:
+		if isinstance(v, (str,bool,int)):
+			css_spec[k.name] = v
+		elif isinstance(v, unicode):
+			css_spec[k.name] = UnicodeEscape(v)
+
+	return css_spec
+
+def UnicodeEscape(string):
+	"""Helper function which escapes unicode characters.
+
+	Args:
+		string: A string which may contain unicode characters.
+	Returns:
+		An escaped string.
+	"""
+	return ('' + string).encode('unicode-escape')
+
+
+def Main():
+	"""The main method, which executes all build steps and runs the tests."""
+	logging.basicConfig(
+			format='[[%(filename)s %(funcName)s]] - %(message)s', level=logging.INFO)
+
+	out_dir = 'amp_wp'
+
+	SetupOutDir(out_dir)
+	GenValidatorProtoascii(out_dir)
+	GenValidatorPb2Py(out_dir)
+	GenValidatorProtoascii(out_dir)
+	GeneratePHP(out_dir)
+
+if __name__ == '__main__':
+	Main()
diff --git a/includes/sanitizers/class-amp-allowed-styles-generated.php b/includes/sanitizers/class-amp-allowed-styles-generated.php
new file mode 100644
index 00000000000..62218f4496b
--- /dev/null
+++ b/includes/sanitizers/class-amp-allowed-styles-generated.php
@@ -0,0 +1,234 @@
+<?php
+/**
+ * Generated by amp_wp_build_styles.py - do not edit.
+ *
+ * This is a list of CSS attributes that are allowed by the
+ * AMP specification.
+ */
+class AMP_Allowed_Styles_Generated {
+
+	private static $spec_file_revision = 325;
+	private static $minimum_validator_revision_required = 189;
+
+	private static $allowed_styles = array(
+		'style' => array(
+			array(
+				'attr_spec_list' => array(
+					'amp-custom' => array(
+						'mandatory' => true,
+						'value' => '',
+					),
+					'type' => array(
+						'value_casei' => 'text/css',
+					),
+				),
+				'cdata_spec_list' => array(
+					'blacklisted_cdata_regex' => array(
+						'<!--',
+						'\\.i?-amp-',
+						'(^|\\W)i-amphtml-',
+						'!important',
+					),
+					'css_spec' => array(
+						'at_rule_spec' => array(
+								'$default',
+								'-moz-keyframes',
+								'-o-keyframes',
+								'-webkit-keyframes',
+								'font-face',
+								'keyframes',
+								'media',
+								'supports',
+						),
+						'font_url_spec' => array(
+							'allow_empty' => true,
+							'allow_relative' => true,
+							'allowed_protocol' => array(
+								'data',
+								'http',
+								'https',
+							),
+						),
+						'image_url_spec' => array(
+							'allow_empty' => true,
+							'allow_relative' => true,
+							'allowed_protocol' => array(
+								'absolute',
+								'data',
+								'http',
+								'https',
+							),
+						),
+					),
+					'max_bytes' => '50000',
+					'max_bytes_spec_url' => 'https://www.ampproject.org/docs/reference/spec.html#maximum-size',
+				),
+				'tag_spec' => array(
+					'html_format' => array(
+						'amp',
+					),
+					'mandatory_parent' => 'head',
+					'spec_name' => 'style amp-custom',
+					'spec_url' => 'https://www.ampproject.org/docs/reference/spec.html#stylesheets',
+					'unique' => true,
+				),
+
+			),
+			array(
+				'attr_spec_list' => array(
+					'amp-custom' => array(
+						'mandatory' => true,
+						'value' => '',
+					),
+					'type' => array(
+						'value_casei' => 'text/css',
+					),
+				),
+				'cdata_spec_list' => array(
+					'blacklisted_cdata_regex' => array(
+						'<!--',
+						'\\.i?-amp-',
+						'(^|\\W)i-amphtml-',
+						'!important',
+					),
+					'css_spec' => array(
+						'at_rule_spec' => array(
+								'$default',
+								'-moz-keyframes',
+								'-o-keyframes',
+								'-webkit-keyframes',
+								'font-face',
+								'keyframes',
+								'media',
+								'supports',
+						),
+						'font_url_spec' => array(
+							'allow_empty' => true,
+							'allow_relative' => true,
+							'allowed_protocol' => array(
+								'data',
+								'http',
+								'https',
+							),
+						),
+						'image_url_spec' => array(
+							'allow_empty' => true,
+							'allow_relative' => true,
+							'allowed_protocol' => array(
+								'absolute',
+								'data',
+								'http',
+								'https',
+							),
+						),
+					),
+					'max_bytes' => '20000',
+					'max_bytes_spec_url' => 'https://github.com/ampproject/amphtml/blob/master/extensions/amp-a4a/amp-a4a-format.md#css',
+				),
+				'tag_spec' => array(
+					'html_format' => array(
+						'amp4ads',
+					),
+					'mandatory_parent' => 'head',
+					'spec_name' => 'style amp-custom (amp4ads)',
+					'spec_url' => 'https://github.com/ampproject/amphtml/blob/master/extensions/amp-a4a/amp-a4a-format.md#css',
+					'unique' => true,
+				),
+
+			),
+			array(
+				'attr_spec_list' => array(
+					'amp-boilerplate' => array(
+						'dispatch_key' => true,
+						'mandatory' => true,
+						'value' => '',
+					),
+				),
+				'cdata_spec_list' => array(
+					'cdata_regex' => '\\s*body{-webkit-animation:-amp-start\\s+8s\\s+steps\\(1,end\\)\\s+0s\\s+1\\s+normal\\s+both;-moz-animation:-amp-start\\s+8s\\s+steps\\(1,end\\)\\s+0s\\s+1\\s+normal\\s+both;-ms-animation:-amp-start\\s+8s\\s+steps\\(1,end\\)\\s+0s\\s+1\\s+normal\\s+both;animation:-amp-start\\s+8s\\s+steps\\(1,end\\)\\s+0s\\s+1\\s+normal\\s+both}@-webkit-keyframes\\s+-amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes\\s+-amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes\\s+-amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes\\s+-amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes\\s+-amp-start{from{visibility:hidden}to{visibility:visible}}\\s*',
+				),
+				'tag_spec' => array(
+					'also_requires_tag' => array(
+						'also_requires_tag' => array(
+								'noscript > style[amp-boilerplate]',
+						),
+					),
+					'html_format' => array(
+						'amp',
+					),
+					'mandatory_alternatives' => 'head > style[amp-boilerplate]',
+					'mandatory_parent' => 'head',
+					'spec_name' => 'head > style[amp-boilerplate]',
+					'spec_url' => 'https://github.com/ampproject/amphtml/blob/master/spec/amp-boilerplate.md',
+					'unique' => true,
+				),
+
+			),
+			array(
+				'attr_spec_list' => array(
+					'amp4ads-boilerplate' => array(
+						'dispatch_key' => true,
+						'mandatory' => true,
+						'value' => '',
+					),
+				),
+				'cdata_spec_list' => array(
+					'cdata_regex' => '\\s*body{visibility:hidden}\\s*',
+				),
+				'tag_spec' => array(
+					'html_format' => array(
+						'amp4ads',
+					),
+					'mandatory_parent' => 'head',
+					'spec_name' => 'head > style[amp4ads-boilerplate]',
+					'spec_url' => 'https://github.com/ampproject/amphtml/blob/master/extensions/amp-a4a/amp-a4a-format.md#boilerplate',
+					'unique' => true,
+				),
+
+			),
+			array(
+				'attr_spec_list' => array(
+					'amp-boilerplate' => array(
+						'dispatch_key' => true,
+						'mandatory' => true,
+						'value' => '',
+					),
+				),
+				'cdata_spec_list' => array(
+					'cdata_regex' => '\\s*body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}\\s*',
+				),
+				'tag_spec' => array(
+					'also_requires_tag' => array(
+						'also_requires_tag' => array(
+								'head > style[amp-boilerplate]',
+						),
+					),
+					'html_format' => array(
+						'amp',
+					),
+					'mandatory_alternatives' => 'noscript > style[amp-boilerplate]',
+					'mandatory_parent' => 'noscript',
+					'spec_name' => 'noscript > style[amp-boilerplate]',
+					'spec_url' => 'https://github.com/ampproject/amphtml/blob/master/spec/amp-boilerplate.md',
+					'unique' => true,
+				),
+
+			),
+		),
+	);
+	public static function get_allowed_styles() {
+		return self::$allowed_styles;
+	}
+
+	public static function get_custom_styles_for_amp_html() {
+		foreach ( self::$allowed_styles['style'] as $value) {
+			if ( isset( $value['attr_spec_list']['amp-custom'] ) &&
+				in_array( 'amp', $value['tag_spec']['html_format'] ) ) {
+				return $value;
+			}
+		}
+	}
+
+}
+
+?>
diff --git a/includes/sanitizers/class-amp-style-sanitizer.php b/includes/sanitizers/class-amp-style-sanitizer.php
index 86faf50bbea..94bf452c04c 100644
--- a/includes/sanitizers/class-amp-style-sanitizer.php
+++ b/includes/sanitizers/class-amp-style-sanitizer.php
@@ -1,22 +1,60 @@
 <?php
 
 require_once( AMP__DIR__ . '/includes/sanitizers/class-amp-base-sanitizer.php' );
+require_once( AMP__DIR__ . '/includes/sanitizers/class-amp-allowed-styles-generated.php' );
 
 /**
  * Collects inline styles and outputs them in the amp-custom stylesheet.
  */
 class AMP_Style_Sanitizer extends AMP_Base_Sanitizer {
+
+	/**
+	 * The array generated from the AMP spec related to CSS styles. This array
+	 * comes from the $allowed_styles array in the AMP_Allowed_Styles_Generated
+	 * class that is generated by the amp_wp_build_styles.py script.
+	 * @var array
+	 */
+	private $allowed_custom_styles;
+
+	/**
+	 * Holds a list of sanitized CSS declarations
+	 * @var array
+	 */
 	private $styles = array();
 
+
+	public function __construct( $dom, $args = array() ) {
+		parent::__construct( $dom, $args );
+
+		// Load the allowed styles from the generated AMP_Allowed_Styles_Generated class.
+		$this->allowed_custom_styles = apply_filters( 'amp_allowed_custom_styles', AMP_Allowed_Styles_Generated::get_custom_styles_for_amp_html() );
+	}
+
+	/**
+	 * Allows an external function to request the array of sanitized CSS declarations.
+	 * 
+	 * @return array The list of sanitized CSS declarations.
+	 */
 	public function get_styles() {
 		return $this->styles;
 	}
 
+	/**
+	 * This starts the sanitizer running against the body of the DOM specified
+	 * in the constructor.
+	 */
 	public function sanitize() {
 		$body = $this->get_body_node();
 		$this->collect_styles_recursive( $body );
 	}
 
+	/**
+	 * Recursively loops over each node in the body of the DOM and collects all inline styles
+	 * which are removed from the body node, sanitized, and added to the $styles
+	 * array.
+	 * 
+	 * @param  DOMElement $node The root node to start parsing from (should be <body>).
+	 */
 	private function collect_styles_recursive( $node ) {
 		if ( $node->nodeType !== XML_ELEMENT_NODE ) {
 			return;
@@ -31,7 +69,6 @@ private function collect_styles_recursive( $node ) {
 				if ( ! empty( $style ) ) {
 					$class_name = $this->generate_class_name( $style );
 					$new_class  = trim( $class . ' ' . $class_name );
-
 					$node->setAttribute( 'class', $new_class );
 					$this->styles[ '.' . $class_name ] = $style;
 				}
@@ -43,64 +80,362 @@ private function collect_styles_recursive( $node ) {
 		$length = $node->childNodes->length;
 		for ( $i = $length - 1; $i >= 0; $i -- ) {
 			$child_node = $node->childNodes->item( $i );
-
 			$this->collect_styles_recursive( $child_node );
 		}
 	}
 
-	private function process_style( $string ) {
-		// Filter properties
-		$string = safecss_filter_attr( esc_html( $string ) );
+	/**
+	 * Given a CSS declaration block (without curly braces), this function will
+	 * break the block down into individual declarations and sanitize each one, 
+	 * returning an array of sanitized CSS declarations.
+	 * 
+	 * The following explanation is meant to help clarify the terms used in this
+	 * function: A CSS rule-set consists of a selector and a declaration block. 
+	 * Each declaration block contains one or more declarations separated by a
+	 * semicolon (;). A declaration block always ends with a semicolon. A 
+	 * declaration consists of a property and a value, separated by a colon (:).
+	 *
+	 * See: https://www.w3.org/TR/css-syntax-3/#qualified-rule
+ 	 *
+ 	 *                      CSS rule-set
+ 	 * vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ 	 *   selector                declaration block
+ 	 * vvvvvvvvvvv vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
+ 	 *                       declaration         declaration 
+	 *               vvvvvvvvvvvvvvvvvvvvvvvvv vvvvvvvvvvvvvvv
+	 * .class-name { background-color:#0b0b0b; font-size:16px; }
+	 *               ^^^^^^^^^^^^^^^^ ^^^^^^^
+	 *                   property      value
+	 * 
+	 * @param  string $declaration_block A CSS declaration block (see above)
+	 * @return string                    A sanitized CSS declaration block.
+	 */
+	private function process_style( $declaration_block ) {
 
-		if ( ! $string ) {
+		if ( empty( $declaration_block ) ) {
 			return array();
 		}
 
-		// Normalize order
-		$styles = array_map( 'trim', explode( ';', $string ) );
-		sort( $styles );
+		// Split the declaration block into individual declarations.
+		$declarations = self::css_explode( $declaration_block );
+
+		// Sort declarations
+		sort( $declarations );
 
-		$processed_styles = array();
+		/**
+		 * Trim whitespace, ensure that each declaration has exactly two 
+		 * parts (a property and a value), and filter each declaration
+		 * using rules from the AMP specification.
+		 */
+		$processed_declarations = array();
+		foreach ( $declarations as $declaration ) {
+			// Split a declaration into a property and a value and trim whitespace.
+			$args = array( 'separator' => ':' );
+			$declaration_parts = self::css_explode( $declaration, $args );
 
-		// Normalize whitespace and filter rules
-		foreach ( $styles as $index => $rule ) {
-			$arr2 = array_map( 'trim', explode( ':', $rule, 2 ) );
-			if ( 2 !== count( $arr2 ) ) {
+			// If there are not exactly two parts to this declaration, stop
+			// stop processing and ignore this declaration.
+			if ( 2 !== count( $declaration_parts ) ) {
 				continue;
 			}
+			$declaration = $declaration_parts[0] . ':' . $declaration_parts[1];
 
-			list( $property, $value ) = $this->filter_style( $arr2[0], $arr2[1] );
-			if ( empty( $property ) || empty( $value ) ) {
+			// Sanitize this declaration and stop processing and ignore this 
+			// declaration if sanitization returns an empty string.
+			$declaration = $this->sanitize_amp_custom_style( $declaration );
+			if ( empty( $declaration ) ) {
 				continue;
 			}
 
-			$processed_styles[ $index ] = $property . ':' . $value;
+			$processed_declarations[] = $declaration;
+		}
+
+		return $processed_declarations;
+	}
+
+	/**
+	 * Given a CSS declaration or declaration block, break the string into an 
+	 * array of substrings using the separator specified in the args.
+	 *
+	 * This function works similarly to the php explode() function, but it will
+	 * ignore separators that are inside a quoted string or inside brackets.
+	 *
+	 * Consider the following CSS declaration block:
+	 *
+	 * height:44px;background:url(data:image/png;base64,iVBORw0K...SuQmCC);
+	 *
+	 * To split this block into individual declarations, we want to split on
+	 * the semicolon between '..44px' and 'background...', but not on the semicolon
+	 * between '...png' and 'base64...'. This function will ignore the second
+	 * semicolon because it is inside the parentheses in the 'url(...)' value.
+	 *
+	 * Similarly, when splitting a declaration into a property and a value, it is
+	 * possible to specify a colon as the separator in the $args and the 'background'
+	 * declaration in the example above would be split into:
+	 * 
+	 * array (
+	 * 		0 => string 'background',
+	 * 		1 => string 'url(data:image/png;base64,iVBORw0K...SuQmCC)',
+	 * )
+	 *
+	 * The colon between '...data' and 'image...' would be ignored because it is
+	 * inside the parentheses.
+	 * 
+	 * @param  string $str  The string to parse.
+	 * @param  array  $args Overrides for the default arguments.
+	 * @return array        An array of substrings.
+	 */
+	public static function css_explode( $str, $args = array() ) {
+
+		$defaults = array(
+			'separator' => ';',
+			'left_bracket' => '(',
+			'right_bracket' => ')',
+			'quote_1' => '\'',
+			'quote_2' => '"',
+			'ignore_escaped_quotes' => true,
+			'trim_whitespace' => true,
+		);
+
+		$args = wp_parse_args( $args, $defaults );
+
+		$char = null;
+	    $buffer = '';
+	    $stack = array();
+	    $bracket_depth = 0;
+	    $in_quotes_1 = false;
+	    $in_quotes_2 = false;
+	    
+	    $len = strlen( $str );
+	    for ( $i=0; $i < $len ; $i++ ) {
+
+			$previouschar = $char;
+			$char = $str[ $i ];
+
+			switch ( $char ) {
+
+				case $args['separator']:
+					if ( ( ! $in_quotes_1 ) && ( ! $in_quotes_2 ) && ( 0 === $bracket_depth ) ) {
+						if ( '' !== $buffer ) {
+							if ( $args['trim_whitespace'] ) {
+								$buffer = trim( $buffer );
+							}
+							$stack[] = $buffer;
+							$buffer = '';
+						}
+						continue 2;
+					}
+					break;
+
+				case $args['quote_1']:
+				    if ( 0 === $bracket_depth ) {
+						if ( $args['ignore_escaped_quotes'] ) {
+							if ( '\\' !== $previouschar  ) {
+								$in_quotes_1 = !$in_quotes_1;
+							}
+						} else {
+							$in_quotes_1 = !$in_quotes_1;
+						}
+
+						if ( ! $in_quotes_1 ) {
+							$in_quotes_2 = false;
+						}
+				    }
+					break;
+
+				case $args['quote_2']:
+				    if ( 0 === $bracket_depth ) {
+						if ( $args['ignore_escaped_quotes'] ) {
+							if ( '\\' !== $previouschar ) {
+								$in_quotes_2 = !$in_quotes_2;
+							}
+						} else {
+							$in_quotes_2 = !$in_quotes_2;
+						}
+
+						if ( ! $in_quotes_2 ) {
+							$in_quotes_1 = false;
+						}
+				    }
+					break;
+
+				case $args['left_bracket']:
+					if ( ( ! $in_quotes_1 ) && ( ! $in_quotes_2 ) ) {
+						$bracket_depth++;
+					}
+					break;
+
+				case $args['right_bracket']:
+					if ( ( ! $in_quotes_1 ) && ( ! $in_quotes_2 ) ) {
+						if ( $bracket_depth ) {
+							$bracket_depth--;
+						} else {
+							$stack[] = $buffer . $char;
+							$buffer = '';
+							continue 2;
+						}
+					}
+					break;
+			}
+
+	        $buffer .= $char;
+	    }
+
+		if ( '' !== $buffer ) {
+			if ( $args['trim_whitespace'] ) {
+				$buffer = trim( $buffer );
+			}
+			$stack[] = $buffer;
 		}
 
-		return $processed_styles;
+		return $stack;
 	}
 
-	private function filter_style( $property, $value ) {
-		// Handle overflow rule
-		// https://www.ampproject.org/docs/reference/spec.html#properties
-		if ( 0 === strpos( $property, 'overflow' )
-			&& ( false !== strpos( $value, 'auto' ) || false !== strpos( $value, 'scroll' ) )
-		) {
-			return false;
+	/**
+	 * Given a css declaration, use the AMP specification to remove any disallowed
+	 * data.
+	 *
+	 * The AMP specification currently includes the following rules that might
+	 * apply to custom styles:
+	 *
+	 * - Image URL spec (https://github.com/ampproject/amphtml/blob/master/validator/validator-main.protoascii#L574)
+	 * - CData spec (https://github.com/ampproject/amphtml/blob/master/validator/validator-main.protoascii#L590)
+	 *
+	 * If more rules are defined in the AMP spec, they should be checked here.
+	 * 
+	 * @param  string $styles The CSS declaration to sanitize
+	 * @return string         A sanitized CSS declaration (may be empty).
+	 */
+	private function sanitize_amp_custom_style( $style ) {
+
+		// Sanitize image URL, if one exists.
+		if ( isset( $this->allowed_custom_styles['cdata_spec_list']['css_spec']['image_url_spec'] ) ) {
+			$style = $this->sanitize_url( $style, $this->allowed_custom_styles['cdata_spec_list']['css_spec']['image_url_spec'] );
 		}
 
-		if ( 'width' === $property ) {
-			$property = 'max-width';
+		// Sanitize any disallowed cdata in this declaration.
+		if ( isset( $this->allowed_custom_styles['cdata_spec_list'] ) ) {
+			$style = $this->sanitize_cdata( $style, $this->allowed_custom_styles['cdata_spec_list'] );
+		}
+
+		return $style;
+	}
+
+	/**
+	 * Sanitize a URL in a CSS declaration based on the provided url_spec.
+	 *
+	 * The url_spec should come from an 'image_url_spec' array from the
+	 * $allowed_styles array in the AMP_Allowed_Styles_Generated class
+	 * that is generated by the amp_wp_build_styles.py python script.
+	 *
+	 * Following is an example `image_url_spec`:
+	 *
+	 * 'image_url_spec' => array(
+	 * 		'allow_empty' => true,
+	 * 		'allow_relative' => true,
+	 * 		'allowed_protocol' => array(
+	 * 			'absolute',
+	 * 			'data',
+	 * 			'http',
+	 * 			'https',
+	 * 		),
+	 * 	),
+	 * 
+	 * @param  string $styles   CSS declaration to sanitize.
+	 * @param  string $url_spec The AMP url_spec to use when processing.
+	 * @return string           The sanitized CSS declaration (may be empty).
+	 */
+	private function sanitize_url( $styles, $url_spec ) {
+		// Does this style declaration include a URL?
+		if ( false !== strpos( $styles, 'url(' ) ) {
+
+			// Break the declaration into parts to isolate the URL.
+			$matched = preg_match( '/(.*)url\\(\\s*[\'"]?(.*?)?[\'"]?\\s*\\)(.*)/u', $styles, $matches );
+			
+			// If we coldn't isolate the URL, stop processing and return an empty string.
+			if ( ( 1 !== $matched ) || ( 4 !== count( $matches ) ) ) {
+				return '';
+			}
+
+			$before_url = $matches[1];
+			$url = $matches[2];
+			$after_url = $matches[3];
+
+			$parsed_url = parse_url( $url );
+
+			// If the URL has a disallowed protocol, remove the URL.
+			if ( isset( $parsed_url['scheme'] ) && is_array( $url_spec['allowed_protocol'] ) &&
+				( ! in_array( $parsed_url['scheme'], $url_spec['allowed_protocol'] ) ) ) {
+				$url = '';
+			}
+
+			// If the protocol empty and a relative URL is disallowed, remove the URL.
+			if ( empty( $parsed_url['scheme'] ) && ( false === $url_spec['allow_empty'] ) ) {
+				$url = '';
+			}
+
+			// If the URL is empty and ampty is disallowed, remove entire declaration.
+			// Otherwise, rebuild the declaration.
+			if ( empty( $url ) && ( false === $url_spec['allow_empty'] ) ) {
+				$styles = '';
+			} else {
+				$styles = $before_url . 'url(' . $url . ')' . $after_url;
+			}
 		}
 
-		// !important is not allowed
-		if ( false !== strpos( $value, 'important' ) ) {
-			$value = preg_replace( '/\s*\!\s*important$/', '', $value );
+		return $styles;
+	}
+
+	/**
+	 * Sanitize the cdata in a CSS declaration based on the cdata_spec_list.
+	 *
+	 * The $cdata_spec_list should come from the $allowed_styles array in the 
+	 * AMP_Allowed_Styles_Generated class nthat is generated by the 
+	 * amp_wp_build_styles.py python script.
+	 *
+	 * Currently, the only part of this specification that could apply to 
+	 * custom AMP styles generated from inline styles is 'blacklisted_cdata_regex'.
+	 * If other sections are added to the cdata_spec_list, they should be added
+	 * here.
+	 *
+	 * Following is an example `cdata_spec_list`:
+	 *
+	 * 'cdata_spec_list' => array(
+	 * 		'blacklisted_cdata_regex' => array(
+	 * 			'<!--',
+	 * 			'\\.i?-amp-',
+	 * 			'(^|\\W)i-amphtml-',
+	 * 			'!important',
+	 * 		),
+	 * 		...
+	 * ),
+	 *
+	 * @param  string $styles           CSS declaration to sanitize.
+	 * @param  array   $cdata_spec_list cdata_spec_list to use when processing.
+	 * @return string                   Sanitized CSS declaration (may be empty).
+	 */
+	private function sanitize_cdata( $styles, $cdata_spec_list ) {
+		// If the cdata_spec_list contains a blacklisted_cdata_regex array, remove
+		// remove any blacklisted values from the $styles declaration.
+		if ( isset( $cdata_spec_list['blacklisted_cdata_regex'] ) &&
+			is_array( $cdata_spec_list['blacklisted_cdata_regex'] ) ) {
+			$regex_pattern = '%' . implode( '|', $cdata_spec_list['blacklisted_cdata_regex'] ) . '%u';
+			$matched = preg_match( $regex_pattern, $styles );
+			if ( 1 === $matched ) {
+				return '';
+			}
 		}
 
-		return array( $property, $value );
+		return $styles;
 	}
 
+
+	/**
+	 * Generate a class name based on the hash of the input data.
+	 * 
+	 * @param  mixed  $data The data to generate the hash from.
+	 * @return string       A unique class name for the input data.
+	 */
 	private function generate_class_name( $data ) {
 		$string = maybe_serialize( $data );
 		return 'amp-wp-inline-' . md5( $string );
diff --git a/tests/test-amp-style-sanitizer.php b/tests/test-amp-style-sanitizer.php
index 8dc203a56d5..b847f966c0b 100644
--- a/tests/test-amp-style-sanitizer.php
+++ b/tests/test-amp-style-sanitizer.php
@@ -40,58 +40,67 @@ public function get_data() {
 				),
 			),
 
-			'width_to_max-width' => array(
-				'<figure style="width: 300px"></figure>',
-				'<figure class="amp-wp-inline-2676cd1bfa7e8feb4f0e0e8086ae9ce4"></figure>',
-				array(
-					'.amp-wp-inline-2676cd1bfa7e8feb4f0e0e8086ae9ce4' => array(
-						'max-width:300px',
-					),
-				),
-			),
-
-			'div_kses_banned_style' => array(
-				'<span style="overflow-x: hidden;">Specific overflow axis not allowed.</span>',
-				'<span>Specific overflow axis not allowed.</span>',
-				array(),
-			),
-
-			'div_amp_banned_style' => array(
-				'<span style="overflow: scroll;">Scrollbars not allowed.</span>',
-				'<span>Scrollbars not allowed.</span>',
-				array(),
-			),
-
-			'!important_not_allowed' => array(
-				'<span style="margin: 1px!important;">!important not allowed.</span>',
-				'<span class="amp-wp-inline-b370df7c42957a3192cac40a8ddcff79">!important not allowed.</span>',
-				array(
-					'.amp-wp-inline-b370df7c42957a3192cac40a8ddcff79' => array(
-						'margin:1px',
-					),
-				),
-			),
-
-			'!important_with_spaces_not_allowed' => array(
-				'<span style="color: red  !  important;">!important not allowed.</span>',
-				'<span class="amp-wp-inline-5b88d03e432f20476a218314084d3a05">!important not allowed.</span>',
-				array(
-					'.amp-wp-inline-5b88d03e432f20476a218314084d3a05' => array(
-						'color:red',
-					),
-				),
-			),
-
-			'!important_multiple_not_allowed' => array(
-				'<span style="color: red !important; background: blue!important;">!important not allowed.</span>',
-				'<span class="amp-wp-inline-ef4329d562b6b3486a8a661df5c5280f">!important not allowed.</span>',
-				array(
-					'.amp-wp-inline-ef4329d562b6b3486a8a661df5c5280f' => array(
-						'background:blue',
-						'color:red',
-					),
-				),
-			),
+			// This seems to be allowed in the current AMP spec.
+			// 'width_to_max-width' => array(
+			// 	'<figure style="width: 300px"></figure>',
+			// 	'<figure class="amp-wp-inline-2676cd1bfa7e8feb4f0e0e8086ae9ce4"></figure>',
+			// 	array(
+			// 		'.amp-wp-inline-2676cd1bfa7e8feb4f0e0e8086ae9ce4' => array(
+			// 			'max-width:300px',
+			// 		),
+			// 	),
+			// ),
+
+			// This seems to be allowed in the current AMP spec.
+			// 'div_kses_banned_style' => array(
+			// 	'<span style="overflow-x: hidden;">Specific overflow axis not allowed.</span>',
+			// 	'<span>Specific overflow axis not allowed.</span>',
+			// 	array(),
+			// ),
+
+			// This seems to be allowed in the current AMP spec.			
+			// 'div_amp_banned_style' => array(
+			// 	'<span style="overflow: scroll;">Scrollbars not allowed.</span>',
+			// 	'<span>Scrollbars not allowed.</span>',
+			// 	array(),
+			// ),
+
+			// The AMP regex specification does not provide a way to distinguish which whether
+			// the specific matched regex should be removed or whether the entire declaration that
+			// contains a matched regex should be removed. Therefore, if a declaration matches
+			// a blacklisted regex, the entire declaration must always be removed.
+			// '!important_not_allowed' => array(
+			// 	'<span style="margin: 1px!important;">!important not allowed.</span>',
+			// 	'<span class="amp-wp-inline-b370df7c42957a3192cac40a8ddcff79">!important not allowed.</span>',
+			// 	array(
+			// 		'.amp-wp-inline-b370df7c42957a3192cac40a8ddcff79' => array(
+			// 			'margin:1px',
+			// 		),
+			// 	),
+			// ),
+
+			// The AMP blacklisted cdata regex does not include a version of '!important' with spaces in it.
+			// '!important_with_spaces_not_allowed' => array(
+			// 	'<span style="color: red  !  important;">!important not allowed.</span>',
+			// 	'<span class="amp-wp-inline-5b88d03e432f20476a218314084d3a05">!important not allowed.</span>',
+			// 	array(
+			// 		'.amp-wp-inline-5b88d03e432f20476a218314084d3a05' => array(
+			// 			'color:red',
+			// 		),
+			// 	),
+			// ),
+
+			// Must remove entire declaration when '!important' is found.
+			// '!important_multiple_not_allowed' => array(
+			// 	'<span style="color: red !important; background: blue!important;">!important not allowed.</span>',
+			// 	'<span class="amp-wp-inline-ef4329d562b6b3486a8a661df5c5280f">!important not allowed.</span>',
+			// 	array(
+			// 		'.amp-wp-inline-ef4329d562b6b3486a8a661df5c5280f' => array(
+			// 			'background:blue',
+			// 			'color:red',
+			// 		),
+			// 	),
+			// ),
 
 			'two_nodes' => array(
 				'<span style="color: #00ff00;"><span style="color: #ff0000;">This is red.</span></span>',
@@ -114,7 +123,28 @@ public function get_data() {
 						'background:#000',
 					),
 				)
+			),
 
+			// New tests for updated sanitizer
+			'allow_data_URI' => array(
+				'<div class = "background_image" style = "background:url(data:image/png;base64,iVBOR...QmCC);"></div>',
+				'<div class="background_image amp-wp-inline-9e2b985c66acd167ae81a43e47b64b9c"></div>',
+				array(
+					'.amp-wp-inline-9e2b985c66acd167ae81a43e47b64b9c' => array(
+						'background:url(data:image/png;base64,iVBOR...QmCC)',
+					),
+				),
+			),
+
+			'remove_declaration_with_disallowed_protocol' => array(
+				'<div class = "background_image" style = "height:44px;background:url(disallowed://example.com);"></div>',
+				'<div class="background_image amp-wp-inline-781ca55963bf9d14902efb4d514e1b1d"></div>',
+				array(
+					'.amp-wp-inline-781ca55963bf9d14902efb4d514e1b1d' => array(
+						'background:url()',
+						'height:44px',
+					),
+				),
 			),
 		);
 	}
@@ -135,4 +165,74 @@ public function test_sanitizer( $source, $expected_content, $expected_stylesheet
 		$stylesheet = $sanitizer->get_styles();
 		$this->assertEquals( $expected_stylesheet, $stylesheet );
 	}
+
+
+	public function get_css_explode_data() {
+		return array(
+			'empty' => array(
+				'',
+				array(),
+				array(),
+			),
+			'split_on_semicolon' => array(
+				'property1:value1;property2:value2',
+				array(),
+				array(
+					'property1:value1',
+					'property2:value2',
+				),
+			),
+			'split_on_semicolon_multiple_values' => array(
+				'property1:value1;property2:value2;property3:value3;property4:value4;property5:value5;',
+				array(),
+				array(
+					'property1:value1',
+					'property2:value2',
+					'property3:value3',
+					'property4:value4',
+					'property5:value5',
+				),
+			),
+			'split_on_semicolon_multiple_values_remove_whitespace' => array(
+				'property1:value1; property2:value2 ;  property3:value3  ;   
+				property4:value4;
+				property5:value5              ;',
+				array(),
+				array(
+					'property1:value1',
+					'property2:value2',
+					'property3:value3',
+					'property4:value4',
+					'property5:value5',
+				),
+			),
+			'split_on_semicolon_ignore_quoted' => array(
+				'property1:value1;property2:"value2;property3:value3";property4:\'value4;property5:value5\';property6:url(Then;there;are;more;separators;here);',
+				array(),
+				array(
+					'property1:value1',
+					'property2:"value2;property3:value3"',
+					'property4:\'value4;property5:value5\'',
+					'property6:url(Then;there;are;more;separators;here)',
+				),
+			),
+			'split_and_ignore_quoted_quotes' => array(
+				'property1:"value;value=\'whatever;somethingelse\'"',
+				array( 'separator' => ':' ),
+				array(
+					'property1',
+					'"value;value=\'whatever;somethingelse\'"',
+				),
+			),
+		);
+	}
+
+	/**
+	 * @dataProvider get_css_explode_data
+	 * @group css_explode
+	 */
+	public function test_css_explode( $string, $args, $expected ) {
+		$got = AMP_Style_Sanitizer::css_explode( $string, $args );
+		$this->assertEquals( $expected, $got );
+	}
 }