ampproject · DavidCramer · Jan 17, 2018 · Jan 17, 2018 · Jan 17, 2018 · Jan 18, 2018
diff --git a/includes/class-amp-autoloader.php b/includes/class-amp-autoloader.php
@@ -64,6 +64,7 @@ class AMP_Autoloader {
 		'AMP_Blacklist_Sanitizer'                     => 'includes/sanitizers/class-amp-blacklist-sanitizer',
 		'AMP_Iframe_Sanitizer'                        => 'includes/sanitizers/class-amp-iframe-sanitizer',
 		'AMP_Img_Sanitizer'                           => 'includes/sanitizers/class-amp-img-sanitizer',
+		'AMP_Form_Sanitizer'                          => 'includes/sanitizers/class-amp-form-sanitizer',
 		'AMP_Playbuzz_Sanitizer'                      => 'includes/sanitizers/class-amp-playbuzz-sanitizer',
 		'AMP_Style_Sanitizer'                         => 'includes/sanitizers/class-amp-style-sanitizer',
 		'AMP_Tag_And_Attribute_Sanitizer'             => 'includes/sanitizers/class-amp-tag-and-attribute-sanitizer',

diff --git a/includes/class-amp-theme-support.php b/includes/class-amp-theme-support.php
@@ -347,7 +347,14 @@ public static function finish_output_buffering( $output ) {
 			1
 		);
 
+		$dom = AMP_DOM_Utils::get_dom( $output );
+		// Sanitize forms in the document.
+		$sanitizer = new AMP_Form_Sanitizer( $dom );
+		$sanitizer->sanitize();
+
 		// @todo Add more validation checking and potentially the whitelist sanitizer.
+		$output = $dom->saveHTML();
+
 		return $output;
 	}
 }
diff --git a/includes/sanitizers/class-amp-form-sanitizer.php b/includes/sanitizers/class-amp-form-sanitizer.php
@@ -0,0 +1,74 @@
+<?php
+/**
+ * Class AMP_Form_Sanitizer.
+ *
+ * @package AMP
+ */
+
+/**
+ * Class AMP_Form_Sanitizer
+ *
+ * Strips and corrects attributes in forms.
+ */
+class AMP_Form_Sanitizer extends AMP_Base_Sanitizer {
+
+	/**
+	 * Tag.
+	 *
+	 * @var string HTML <form> tag to identify and process.
+	 *
+	 * @since 0.2
+	 */
+	public static $tag = 'form';
+
+	/**
+	 * Sanitize the <img> elements from the HTML contained in this instance's DOMDocument.
+	 *
+	 * @since 0.2
+	 */
+	public function sanitize() {
+
+		/**
+		 * Node list.
+		 *
+		 * @var DOMNodeList $node
+		 */
+		$nodes     = $this->dom->getElementsByTagName( self::$tag );
+		$num_nodes = $nodes->length;
+
+		if ( 0 === $num_nodes ) {
+			return;
+		}
+
+		for ( $i = $num_nodes - 1; $i >= 0; $i-- ) {
+			$node = $nodes->item( $i );
+			if ( ! $node instanceof DOMElement ) {
+				continue;
+			}
+
+			if ( ! $node->hasAttribute( 'action' ) || '' === $node->getAttribute( 'action' ) ) {
+				$node->parentNode->removeChild( $node );
+				continue;
+			}
+
+			// Correct action.
+			if ( $node->hasAttribute( 'action' ) && ! $node->hasAttribute( 'action-xhr' ) ) {
+				$action_url = $node->getAttribute( 'action' );
+				$action_url = str_replace( 'http:', '', $action_url );
+				$node->setAttribute( 'action', $action_url );
+
+				if ( 'post' === $node->getAttribute( 'method' ) ) {
+					$node->setAttribute( 'action-xhr', $action_url );
+					$node->removeAttribute( 'action' );
+				}
+			}
+
+			// Set a target if needed.
+			if ( ! $node->hasAttribute( 'target' ) ) {
+				$node->setAttribute( 'target', '_blank' );
+			}
+		}
+
+	}
+
+}
diff --git a/includes/utils/class-amp-dom-utils.php b/includes/utils/class-amp-dom-utils.php
@@ -13,17 +13,15 @@
 class AMP_DOM_Utils {
 
 	/**
-	 * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
-	 *
-	 * @see Reciprocal function get_content_from_dom()
+	 * Return a valid DOMDocument representing HTML document passed as a parameter.
 	 *
-	 * @since 0.2
+	 * @since 0.7
 	 *
-	 * @param string $content Valid HTML content to be represented by a DOMDocument.
+	 * @param string $document Valid HTML document to be represented by a DOMDocument.
 	 *
 	 * @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
 	 */
-	public static function get_dom_from_content( $content ) {
+	public static function get_dom( $document ) {
 		$libxml_previous_state = libxml_use_internal_errors( true );
 
 		$dom = new DOMDocument();
@@ -35,7 +33,7 @@ public static function get_dom_from_content( $content ) {
 		 * Add utf-8 charset so loadHTML does not have problems parsing it.
 		 * See: http://php.net/manual/en/domdocument.loadhtml.php#78243
 		 */
-		$result = $dom->loadHTML( '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>' );
+		$result = $dom->loadHTML( $document );
 
 		libxml_clear_errors();
 		libxml_use_internal_errors( $libxml_previous_state );
@@ -47,6 +45,31 @@ public static function get_dom_from_content( $content ) {
 		return $dom;
 	}
 
+	/**
+	 * Return a valid DOMDocument representing arbitrary HTML content passed as a parameter.
+	 *
+	 * @see Reciprocal function get_content_from_dom()
+	 *
+	 * @since 0.2
+	 *
+	 * @param string $content Valid HTML content to be represented by a DOMDocument.
+	 *
+	 * @return DOMDocument|false Returns DOMDocument, or false if conversion failed.
+	 */
+	public static function get_dom_from_content( $content ) {
+		/*
+		 * Wrap in dummy tags, since XML needs one parent node.
+		 * It also makes it easier to loop through nodes.
+		 * We can later use this to extract our nodes.
+		 * Add utf-8 charset so loadHTML does not have problems parsing it.
+		 * See: http://php.net/manual/en/domdocument.loadhtml.php#78243
+		 */
+		$document = '<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8"></head><body>' . $content . '</body></html>';
+
+		return self::get_dom( $document );
+
+	}
+
 	/**
 	 * Return valid HTML content extracted from the DOMDocument passed as a parameter.
 	 *