diff --git a/composer.json b/composer.json index 6e1124e..b3b556c 100644 --- a/composer.json +++ b/composer.json @@ -13,13 +13,12 @@ } }, "require": { - "php": ">=5.3.3", - "zendframework/zend-stdlib": "self.version" + "php": ">=5.3.23" }, "extra": { "branch-alias": { - "dev-master": "2.2-dev", - "dev-develop": "2.3-dev" + "dev-master": "2.3-dev", + "dev-develop": "2.4-dev" } }, "autoload-dev": { diff --git a/src/Css2Xpath.php b/src/Css2Xpath.php index 53c2fd3..7aa7d50 100644 --- a/src/Css2Xpath.php +++ b/src/Css2Xpath.php @@ -11,119 +11,23 @@ /** * Transform CSS selectors to XPath + * + * @deprecated + * @see Document\Query */ class Css2Xpath { /** * Transform CSS expression to XPath * + * @deprecated + * @see Document\Query * @param string $path * @return string */ public static function transform($path) { - $path = (string) $path; - if (strstr($path, ',')) { - $paths = explode(',', $path); - $expressions = array(); - foreach ($paths as $path) { - $xpath = self::transform(trim($path)); - if (is_string($xpath)) { - $expressions[] = $xpath; - } elseif (is_array($xpath)) { - $expressions = array_merge($expressions, $xpath); - } - } - return implode('|', $expressions); - } - - $paths = array('//'); - $path = preg_replace('|\s+>\s+|', '>', $path); - $segments = preg_split('/\s+/', $path); - foreach ($segments as $key => $segment) { - $pathSegment = static::_tokenize($segment); - if (0 == $key) { - if (0 === strpos($pathSegment, '[contains(')) { - $paths[0] .= '*' . ltrim($pathSegment, '*'); - } else { - $paths[0] .= $pathSegment; - } - continue; - } - if (0 === strpos($pathSegment, '[contains(')) { - foreach ($paths as $pathKey => $xpath) { - $paths[$pathKey] .= '//*' . ltrim($pathSegment, '*'); - $paths[] = $xpath . $pathSegment; - } - } else { - foreach ($paths as $pathKey => $xpath) { - $paths[$pathKey] .= '//' . $pathSegment; - } - } - } - - if (1 == count($paths)) { - return $paths[0]; - } - return implode('|', $paths); - } - - /** - * Tokenize CSS expressions to XPath - * - * @param string $expression - * @return string - */ - protected static function _tokenize($expression) - { - // Child selectors - $expression = str_replace('>', '/', $expression); - - // IDs - $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression); - $expression = preg_replace('|(?errors = array(null); + + set_error_handler(array($this, 'addError'), \E_WARNING); + $nodeList = $this->query($expression); + restore_error_handler(); + + $exception = array_pop($this->errors); + if ($exception) { + throw $exception; + } + + return $nodeList; + } + + /** + * Adds an error to the stack of errors + * + * @param int $errno + * @param string $errstr + * @param string $errfile + * @param int $errline + * @return void + */ + public function addError($errno, $errstr = '', $errfile = '', $errline = 0) + { + $last_error = end($this->errors); + $this->errors[] = new ErrorException( + $errstr, + 0, + $errno, + $errfile, + $errline, + $last_error + ); + } +} diff --git a/src/Document.php b/src/Document.php new file mode 100644 index 0000000..8ece0b4 --- /dev/null +++ b/src/Document.php @@ -0,0 +1,311 @@ +setStringDocument($document, $type, $encoding); + } + + /** + * Get raw set document + * + * @return string|null + */ + public function getStringDocument() + { + return $this->stringDocument; + } + + /** + * Set raw document + * + * @param string|null $document + * @param string|null $forcedType Type for the provided document (see constants) + * @param string|null $forcedEncoding Encoding for the provided document + * @return self + */ + protected function setStringDocument($document, $forcedType = null, $forcedEncoding = null) + { + $type = static::DOC_HTML; + if (strstr($document, 'DTD XHTML')) { + $type = static::DOC_XHTML; + } + + // Breaking XML declaration to make syntax highlighting work + if ('<' . '?xml' == substr(trim($document), 0, 5)) { + $type = static::DOC_XML; + if (preg_match('/]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) { + $this->xpathNamespaces[] = $matches[1]; + $type = static::DOC_XHTML; + } + } + + // Unsetting previously registered DOMDocument + $this->domDocument = null; + $this->stringDocument = !empty($document) ? $document : null; + + $this->setType($forcedType ?: (!empty($document) ? $type : null)); + $this->setEncoding($forcedEncoding); + $this->setErrors(array()); + + return $this; + } + + /** + * Get raw document type + * + * @return string|null + */ + public function getType() + { + return $this->type; + } + + /** + * Set raw document type + * + * @param string $type + * @return self + */ + protected function setType($type) + { + $this->type = $type; + + return $this; + } + + /** + * Get DOMDocument generated from set raw document + * + * @return DOMDocument + * @throws Exception\RuntimeException If cannot get DOMDocument; no document registered + */ + public function getDomDocument() + { + if (null === ($stringDocument = $this->getStringDocument())) { + throw new Exception\RuntimeException('Cannot get DOMDocument; no document registered'); + } + + if (null === $this->domDocument) { + $this->domDocument = $this->getDomDocumentFromString($stringDocument); + } + + return $this->domDocument; + } + + /** + * Set DOMDocument + * + * @param DOMDocument $domDocument + * @return self + */ + protected function setDomDocument(DOMDocument $domDocument) + { + $this->domDocument = $domDocument; + + return $this; + } + + /** + * Get set document encoding + * + * @return string|null + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Set raw document encoding for DOMDocument generation + * + * @param string|null $encoding + * @return self + */ + public function setEncoding($encoding) + { + $this->encoding = $encoding; + + return $this->encoding; + } + + /** + * Get DOMDocument generation errors + * + * @return array + */ + public function getErrors() + { + return $this->errors; + } + + /** + * Set document errors from DOMDocument generation + * + * @param array $errors + * @return self + */ + protected function setErrors($errors) + { + $this->errors = $errors; + + return $this; + } + + /** + * Get DOMDocument from set raw document + * + * @return DOMDocument + * @throws Exception\RuntimeException + */ + protected function getDomDocumentFromString($stringDocument) + { + libxml_use_internal_errors(true); + libxml_disable_entity_loader(true); + + $encoding = $this->getEncoding(); + $domDoc = null === $encoding ? new DOMDocument('1.0') : new DOMDocument('1.0', $encoding); + $type = $this->getType(); + + switch ($type) { + case static::DOC_XML: + $success = $domDoc->loadXML($stringDocument); + foreach ($domDoc->childNodes as $child) { + if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { + throw new Exception\RuntimeException( + 'Invalid XML: Detected use of illegal DOCTYPE' + ); + } + } + break; + case static::DOC_HTML: + case static::DOC_XHTML: + default: + $success = $domDoc->loadHTML($stringDocument); + break; + } + + $errors = libxml_get_errors(); + if (!empty($errors)) { + $this->setErrors($errors); + libxml_clear_errors(); + } + + libxml_disable_entity_loader(false); + libxml_use_internal_errors(false); + + if (!$success) { + throw new Exception\RuntimeException(sprintf('Error parsing document (type == %s)', $type)); + } + + return $domDoc; + } + + /** + * Get Document's registered XPath namespaces + * + * @return array + */ + public function getXpathNamespaces() + { + return $this->xpathNamespaces; + } + + /** + * Register XPath namespaces + * + * @param array $xpathNamespaces + * @return void + */ + public function registerXpathNamespaces($xpathNamespaces) + { + $this->xpathNamespaces = $xpathNamespaces; + } + + + /** + * Get Document's registered XPath PHP Functions + * + * @return string|null + */ + public function getXpathPhpFunctions() + { + return $this->xpathPhpFunctions; + } + /** + * Register PHP Functions to use in internal DOMXPath + * + * @param bool $xpathPhpFunctions + * @return void + */ + public function registerXpathPhpFunctions($xpathPhpFunctions = true) + { + $this->xpathPhpFunctions = $xpathPhpFunctions; + } +} diff --git a/src/Document/NodeList.php b/src/Document/NodeList.php new file mode 100644 index 0000000..352326f --- /dev/null +++ b/src/Document/NodeList.php @@ -0,0 +1,160 @@ +list = $list; + } + + /** + * Iterator: rewind to first element + * + * @return DOMNode + */ + public function rewind() + { + $this->position = 0; + + return $this->list->item(0); + } + + /** + * Iterator: is current position valid? + * + * @return bool + */ + public function valid() + { + if (in_array($this->position, range(0, $this->list->length - 1)) && $this->list->length > 0) { + return true; + } + + return false; + } + + /** + * Iterator: return current element + * + * @return DOMNode + */ + public function current() + { + return $this->list->item($this->position); + } + + /** + * Iterator: return key of current element + * + * @return int + */ + public function key() + { + return $this->position; + } + + /** + * Iterator: move to next element + * + * @return DOMNode + */ + public function next() + { + ++$this->position; + + return $this->list->item($this->position); + } + + /** + * Countable: get count + * + * @return int + */ + public function count() + { + return $this->list->length; + } + + /** + * ArrayAccess: offset exists + * + * @param int $key + * @return bool + */ + public function offsetExists($key) + { + if (in_array($key, range(0, $this->list->length - 1)) && $this->list->length > 0) { + return true; + } + return false; + } + + /** + * ArrayAccess: get offset + * + * @param int $key + * @return mixed + */ + public function offsetGet($key) + { + return $this->list->item($key); + } + + /** + * ArrayAccess: set offset + * + * @param mixed $key + * @param mixed $value + * @throws Exception\BadMethodCallException when attempting to write to a read-only item + */ + public function offsetSet($key, $value) + { + throw new Exception\BadMethodCallException('Attempting to write to a read-only list'); + } + + /** + * ArrayAccess: unset offset + * + * @param mixed $key + * @throws Exception\BadMethodCallException when attempting to unset a read-only item + */ + public function offsetUnset($key) + { + throw new Exception\BadMethodCallException('Attempting to unset on a read-only list'); + } +} diff --git a/src/Document/Query.php b/src/Document/Query.php new file mode 100644 index 0000000..a1606c3 --- /dev/null +++ b/src/Document/Query.php @@ -0,0 +1,169 @@ +getDomDocument()); + + $xpathNamespaces = $document->getXpathNamespaces(); + foreach ($xpathNamespaces as $prefix => $namespaceUri) { + $xpath->registerNamespace($prefix, $namespaceUri); + } + + if ($xpathPhpfunctions = $document->getXpathPhpFunctions()) { + $xpath->registerNamespace('php', 'http://php.net/xpath'); + ($xpathPhpfunctions === true) ? $xpath->registerPHPFunctions() : $xpath->registerPHPFunctions($xpathPhpfunctions); + } + + $nodeList = $xpath->queryWithErrorException($expression); + return new NodeList($nodeList); + } + + /** + * Transform CSS expression to XPath + * + * @param string $path + * @return string + */ + public static function cssToXpath($path) + { + $path = (string) $path; + if (strstr($path, ',')) { + $paths = explode(',', $path); + $expressions = array(); + foreach ($paths as $path) { + $xpath = static::cssToXpath(trim($path)); + if (is_string($xpath)) { + $expressions[] = $xpath; + } elseif (is_array($xpath)) { + $expressions = array_merge($expressions, $xpath); + } + } + return implode('|', $expressions); + } + + $paths = array('//'); + $path = preg_replace('|\s+>\s+|', '>', $path); + $segments = preg_split('/\s+/', $path); + foreach ($segments as $key => $segment) { + $pathSegment = static::_tokenize($segment); + if (0 == $key) { + if (0 === strpos($pathSegment, '[contains(')) { + $paths[0] .= '*' . ltrim($pathSegment, '*'); + } else { + $paths[0] .= $pathSegment; + } + continue; + } + if (0 === strpos($pathSegment, '[contains(')) { + foreach ($paths as $pathKey => $xpath) { + $paths[$pathKey] .= '//*' . ltrim($pathSegment, '*'); + $paths[] = $xpath . $pathSegment; + } + } else { + foreach ($paths as $pathKey => $xpath) { + $paths[$pathKey] .= '//' . $pathSegment; + } + } + } + + if (1 == count($paths)) { + return $paths[0]; + } + return implode('|', $paths); + } + + /** + * Tokenize CSS expressions to XPath + * + * @param string $expression + * @return string + */ + protected static function _tokenize($expression) + { + // Child selectors + $expression = str_replace('>', '/', $expression); + + // IDs + $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression); + $expression = preg_replace('|(?queryXpath($xpathQuery, $query); } @@ -298,6 +298,7 @@ public function registerXpathPhpFunctions($xpathPhpFunctions = true) * @param DOMDocument $document * @param string|array $xpathQuery * @return array + * @throws \ErrorException If query cannot be executed */ protected function getNodeList($document, $xpathQuery) { @@ -313,12 +314,7 @@ protected function getNodeList($document, $xpathQuery) } $xpathQuery = (string) $xpathQuery; - ErrorHandler::start(); - $nodeList = $xpath->query($xpathQuery); - $error = ErrorHandler::stop(); - if ($error) { - throw $error; - } + $nodeList = $xpath->queryWithErrorException($xpathQuery); return $nodeList; } } diff --git a/test/Css2XpathTest.php b/test/Document/QueryTest.php similarity index 80% rename from test/Css2XpathTest.php rename to test/Document/QueryTest.php index c792821..a73bc74 100644 --- a/test/Css2XpathTest.php +++ b/test/Document/QueryTest.php @@ -3,29 +3,29 @@ * Zend Framework (http://framework.zend.com/) * * @link http://github.com/zendframework/zf2 for the canonical source repository - * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com) + * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ -namespace ZendTest\Dom; +namespace ZendTest\Dom\Document; -use Zend\Dom\Css2Xpath; +use Zend\Dom\Document\Query; /** - * Test class for Css2Xpath. + * Test class for Zend\Dom\Document\Query. * * @group Zend_Dom */ -class Css2XpathTest extends \PHPUnit_Framework_TestCase +class QueryTest extends \PHPUnit_Framework_TestCase { public function testTransformShouldBeCalledStatically() { - Css2Xpath::transform(''); + Query::cssToXpath(''); } public function testTransformShouldReturnStringByDefault() { - $test = Css2Xpath::transform(''); + $test = Query::cssToXpath(''); $this->assertTrue(is_string($test)); } @@ -34,7 +34,7 @@ public function testTransformShouldReturnStringByDefault() */ public function testTransformShouldReturnMultiplePathsWhenExpressionContainsCommas() { - $test = Css2Xpath::transform('#foo, #bar'); + $test = Query::cssToXpath('#foo, #bar'); $this->assertTrue(is_string($test)); $this->assertContains('|', $test); $this->assertEquals(2, count(explode('|', $test))); @@ -42,19 +42,19 @@ public function testTransformShouldReturnMultiplePathsWhenExpressionContainsComm public function testTransformShouldRecognizeHashSymbolAsId() { - $test = Css2Xpath::transform('#foo'); + $test = Query::cssToXpath('#foo'); $this->assertEquals("//*[@id='foo']", $test); } public function testTransformShouldRecognizeDotSymbolAsClass() { - $test = Css2Xpath::transform('.foo'); + $test = Query::cssToXpath('.foo'); $this->assertEquals("//*[contains(concat(' ', normalize-space(@class), ' '), ' foo ')]", $test); } public function testTransformShouldAssumeSpacesToIndicateRelativeXpathQueries() { - $test = Css2Xpath::transform('div#foo .bar'); + $test = Query::cssToXpath('div#foo .bar'); $this->assertContains('|', $test); $expected = array( "//div[@id='foo']//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]", @@ -67,7 +67,7 @@ public function testTransformShouldAssumeSpacesToIndicateRelativeXpathQueries() public function testTransformShouldWriteChildSelectorsAsAbsoluteXpathRelations() { - $test = Css2Xpath::transform('div#foo>span'); + $test = Query::cssToXpath('div#foo>span'); $this->assertEquals("//div[@id='foo']/span", $test); } @@ -76,7 +76,7 @@ public function testTransformShouldWriteChildSelectorsAsAbsoluteXpathRelations() */ public function testMultipleComplexCssSpecificationShouldTransformToExpectedXpath() { - $test = Css2Xpath::transform('div#foo span.bar, #bar li.baz a'); + $test = Query::cssToXpath('div#foo span.bar, #bar li.baz a'); $this->assertTrue(is_string($test)); $this->assertContains('|', $test); $actual = explode('|', $test); @@ -92,7 +92,7 @@ public function testMultipleComplexCssSpecificationShouldTransformToExpectedXpat public function testClassNotationWithoutSpecifiedTagShouldResultInMultipleQueries() { - $test = Css2Xpath::transform('div.foo .bar a .baz span'); + $test = Query::cssToXpath('div.foo .bar a .baz span'); $this->assertContains('|', $test); $segments = array( "//div[contains(concat(' ', normalize-space(@class), ' '), ' foo ')]//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//a//*[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//span", @@ -107,25 +107,25 @@ public function testClassNotationWithoutSpecifiedTagShouldResultInMultipleQuerie public function testShouldAllowEqualitySelectionOfArbitraryAttributes() { - $test = Css2Xpath::transform('div[foo="bar"]'); + $test = Query::cssToXpath('div[foo="bar"]'); $this->assertEquals("//div[@foo='bar']", $test); } public function testShouldCastAttributeNamesToLowerCase() { - $test = Css2Xpath::transform('div[dojoType="bar"]'); + $test = Query::cssToXpath('div[dojoType="bar"]'); $this->assertEquals("//div[@dojotype='bar']", $test); } public function testShouldAllowContentSubSelectionOfArbitraryAttributes() { - $test = Css2Xpath::transform('div[foo~="bar"]'); + $test = Query::cssToXpath('div[foo~="bar"]'); $this->assertEquals("//div[contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]", $test); } public function testShouldAllowContentMatchingOfArbitraryAttributes() { - $test = Css2Xpath::transform('div[foo*="bar"]'); + $test = Query::cssToXpath('div[foo*="bar"]'); $this->assertEquals("//div[contains(@foo, 'bar')]", $test); } @@ -134,7 +134,7 @@ public function testShouldAllowContentMatchingOfArbitraryAttributes() */ public function testShouldAllowMatchingOfAttributeValues() { - $test = Css2Xpath::transform('tag#id @attribute'); + $test = Query::cssToXpath('tag#id @attribute'); $this->assertEquals("//tag[@id='id']//@attribute", $test); } @@ -143,7 +143,7 @@ public function testShouldAllowMatchingOfAttributeValues() */ public function testShouldAllowWhitespaceInDescendentSelectorExpressions() { - $test = Css2Xpath::transform('child > leaf'); + $test = Query::cssToXpath('child > leaf'); $this->assertEquals("//child/leaf", $test); } @@ -152,7 +152,7 @@ public function testShouldAllowWhitespaceInDescendentSelectorExpressions() */ public function testIdSelectorWithAttribute() { - $test = Css2Xpath::transform('#id[attribute="value"]'); + $test = Query::cssToXpath('#id[attribute="value"]'); $this->assertEquals("//*[@id='id'][@attribute='value']", $test); } @@ -161,7 +161,7 @@ public function testIdSelectorWithAttribute() */ public function testIdSelectorWithLeadingAsterix() { - $test = Css2Xpath::transform('*#id'); + $test = Query::cssToXpath('*#id'); $this->assertEquals("//*[@id='id']", $test); } @@ -170,10 +170,10 @@ public function testIdSelectorWithLeadingAsterix() */ public function testCanTransformWithAttributeAndDot() { - $test = Css2Xpath::transform('a[href="http://example.com"]'); + $test = Query::cssToXpath('a[href="http://example.com"]'); $this->assertEquals("//a[@href='http://example.com']", $test); - $test = Css2Xpath::transform('a[@href="http://example.com"]'); + $test = Query::cssToXpath('a[@href="http://example.com"]'); $this->assertEquals("//a[@href='http://example.com']", $test); } } diff --git a/test/DocumentTest.php b/test/DocumentTest.php new file mode 100644 index 0000000..1681c48 --- /dev/null +++ b/test/DocumentTest.php @@ -0,0 +1,417 @@ +document = new Document(); + } + + public function getHtml() + { + if (null === $this->html) { + $this->html = file_get_contents(__DIR__ . '/_files/sample.xhtml'); + } + return $this->html; + } + + public function loadHtml() + { + $this->document = new Document($this->getHtml()); + } + + public function handleError($msg, $code = 0) + { + $this->error = $msg; + } + + public function testConstructorShouldNotRequireArguments() + { + $document = new Document(); + } + + public function testConstructorShouldAcceptDocumentString() + { + $html = $this->getHtml(); + $document = new Document($html); + $this->assertSame($html, $document->getStringDocument()); + } + + public function testDocShouldBeNullByDefault() + { + $this->assertNull($this->document->getStringDocument()); + } + + public function testDomDocShouldRaiseExceptionByDefault() + { + $this->setExpectedException('\Zend\Dom\Exception\RuntimeException', 'no document'); + $this->document->getDomDocument(); + } + + public function testDocShouldBeNullByEmptyStringConstructor() + { + $emptyStr = ''; + $document = new Document($emptyStr); + $this->assertNull($this->document->getStringDocument()); + } + + public function testDocTypeShouldBeNullByDefault() + { + $this->assertNull($this->document->getType()); + } + + public function testDocEncodingShouldBeNullByDefault() + { + $this->assertNull($this->document->getEncoding()); + } + + public function testShouldAllowSettingDocument() + { + $this->testDocShouldBeNullByDefault(); + $this->loadHtml(); + $this->assertEquals($this->getHtml(), $this->document->getStringDocument()); + } + + public function testDocumentTypeShouldBeAutomaticallyDiscovered() + { + $this->loadHtml(); + $this->assertEquals(Document::DOC_XHTML, $this->document->getType()); + $this->document = new Document(''); + $this->assertEquals(Document::DOC_XML, $this->document->getType()); + $this->document = new Document(''); + $this->assertEquals(Document::DOC_HTML, $this->document->getType()); + } + + public function testQueryingWithoutRegisteringDocumentShouldThrowException() + { + $this->setExpectedException('\Zend\Dom\Exception\RuntimeException', 'no document'); + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + } + + public function testQueryingInvalidDocumentShouldThrowException() + { + set_error_handler(array($this, 'handleError')); + $this->document = new Document('some bogus string', Document::DOC_XML); + try { + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + restore_error_handler(); + $this->fail('Querying invalid document should throw exception'); + } catch (DOMException $e) { + restore_error_handler(); + $this->assertContains('Error parsing', $e->getMessage()); + } + } + + public function testgetDomMethodShouldReturnDomDocumentWithStringDocumentInConstructor() + { + $html = $this->getHtml(); + $document = new Document($html); + $this->assertTrue($document->getDomDocument() instanceof \DOMDocument); + } + + public function testgetDomMethodShouldReturnDomDocumentWithStringDocumentSetFromMethod() + { + $this->loadHtml(); + $this->assertTrue($this->document->getDomDocument() instanceof \DOMDocument); + } + + public function testQueryShouldReturnResultObject() + { + $this->loadHtml(); + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + $this->assertTrue($result instanceof Document\NodeList); + } + + public function testResultShouldIndicateNumberOfFoundNodes() + { + $this->loadHtml(); + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(3, count($result)); + } + + public function testResultShouldAllowIteratingOverFoundNodes() + { + $this->loadHtml(); + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(3, count($result)); + foreach ($result as $node) { + $this->assertTrue($node instanceof \DOMNode, var_export($result, 1)); + } + } + + public function testQueryShouldFindNodesWithMultipleClasses() + { + $this->loadHtml(); + $result = Document\Query::execute('.footerblock .last', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(1, count($result)); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsExactly() + { + $this->loadHtml(); + $result = Document\Query::execute('div[dojoType="FilteringSelect"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(1, count($result)); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAsDiscreteWords() + { + $this->loadHtml(); + $result = Document\Query::execute('li[dojoType~="bar"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(2, count($result)); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAndAttributeValue() + { + $this->loadHtml(); + $result = Document\Query::execute('li[dojoType*="bar"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(2, count($result)); + } + + public function testQueryXpathShouldAllowQueryingArbitraryUsingXpath() + { + $this->loadHtml(); + $result = Document\Query::execute('//li[contains(@dojotype, "bar")]', $this->document); + $this->assertEquals(2, count($result)); + } + + public function testXpathPhpFunctionsShouldBeDisabledByDefault() + { + $this->loadHtml(); + try { + $result = Document\Query::execute('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]', $this->document); + } catch (\Exception $e) { + return; + } + $this->assertTrue(false, 'XPath PHPFunctions should be disabled by default'); + } + + public function testXpathPhpFunctionsShouldBeEnabledWithoutParameter() + { + $this->loadHtml(); + $this->document->registerXpathPhpFunctions(); + $result = Document\Query::execute('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]', $this->document); + $this->assertEquals( + 'content-type', + strtolower($result->current()->getAttribute('http-equiv')) + ); + } + + public function testXpathPhpFunctionsShouldBeNotCalledWhenSpecifiedFunction() + { + $this->loadHtml(); + try { + $this->document->registerXpathPhpFunctions('stripos'); + $result = Document\Query::execute('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]', $this->document); + } catch (\Exception $e) { + // $e->getMessage() - Not allowed to call handler 'strtolower() + return; + } + $this->assertTrue(false, 'Not allowed to call handler strtolower()'); + } + + /** + * @group ZF-9243 + */ + public function testLoadingDocumentWithErrorsShouldNotRaisePhpErrors() + { + $file = file_get_contents(__DIR__ . '/_files/bad-sample.html'); + $this->document = new Document($file); + $result = Document\Query::execute('p', $this->document, Document\Query::TYPE_CSS); + $errors = $this->document->getErrors(); + $this->assertTrue(is_array($errors)); + $this->assertTrue(0 < count($errors)); + } + + /** + * @group ZF-9765 + */ + public function testCssSelectorShouldFindNodesWhenMatchingMultipleAttributes() + { + $html = << + + +
+ + + +
+ + +EOF; + + $this->document = new Document($html); + $result = Document\Query::execute('input[type="hidden"][value="1"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(2, count($result)); + $result = Document\Query::execute('input[value="1"][type~="hidden"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(2, count($result)); + $result = Document\Query::execute('input[type="hidden"][value="0"]', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(1, count($result)); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingAtConstruction() + { + $doc = new Document($this->getHtml(), null, 'iso-8859-1'); + $this->assertEquals('iso-8859-1', $doc->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingWhenSettingDocument() + { + $this->document = new Document($this->getHtml(), null, 'iso-8859-1'); + $this->assertEquals('iso-8859-1', $this->document->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingViaSetter() + { + $this->document->setEncoding('iso-8859-1'); + $this->assertEquals('iso-8859-1', $this->document->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testSpecifyingEncodingSetsEncodingOnDomDocument() + { + $this->document = new Document($this->getHtml(), null, 'utf-8'); + $result = Document\Query::execute('.foo', $this->document, Document\Query::TYPE_CSS); + $this->assertInstanceof('\\Zend\\Dom\\Document\\NodeList', $result); + $this->assertInstanceof('\\DOMDocument', $this->document->getDomDocument()); + $this->assertEquals('utf-8', $this->document->getEncoding()); + } + + /** + * @group ZF-11376 + */ + public function testXhtmlDocumentWithXmlDeclaration() + { + $xhtmlWithXmlDecl = << + + </head> + <body><p>Test paragraph.</p></body> +</html> +EOB; + $this->document = new Document($xhtmlWithXmlDecl, null, 'utf-8'); + $result = Document\Query::execute('//p', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(1, $result->count()); + } + + /** + * @group ZF-12106 + */ + public function testXhtmlDocumentWithXmlAndDoctypeDeclaration() + { + $xhtmlWithXmlDecl = <<<EOB +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>Virtual Library + + +

Moved to example.org.

+ + +EOB; + $this->document = new Document($xhtmlWithXmlDecl, null, 'utf-8'); + $result = Document\Query::execute('//p', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(1, $result->count()); + } + + public function testLoadingXmlContainingDoctypeShouldFailToPreventXxeAndXeeAttacks() + { + $xml = << +]> + + This result is &harmless; + +XML; + $this->document = new Document($xml); + $this->setExpectedException("\Zend\Dom\Exception\RuntimeException"); + $result = Document\Query::execute('/', $this->document); + } + + public function testOffsetExists() + { + $this->loadHtml(); + $result = Document\Query::execute('input', $this->document, Document\Query::TYPE_CSS); + + $this->assertEquals(3, $result->count()); + $this->assertFalse($result->offsetExists(3)); + $this->assertTrue($result->offsetExists(2)); + } + + public function testOffsetGet() + { + $this->loadHtml(); + $result = Document\Query::execute('input', $this->document, Document\Query::TYPE_CSS); + + $this->assertEquals(3, $result->count()); + $this->assertEquals('login', $result[2]->getAttribute('id')); + } + + /** + * @expectedException Zend\Dom\Exception\BadMethodCallException + */ + public function testOffsetSet() + { + $this->loadHtml(); + $result = Document\Query::execute('input', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(3, $result->count()); + + $result[0] = ''; + } + + + /** + * @expectedException Zend\Dom\Exception\BadMethodCallException + */ + public function testOffsetUnset() + { + $this->loadHtml(); + $result = Document\Query::execute('input', $this->document, Document\Query::TYPE_CSS); + $this->assertEquals(3, $result->count()); + + unset($result[2]); + } +} diff --git a/test/NodeListTest.php b/test/NodeListTest.php index 06667f5..bc4562e 100644 --- a/test/NodeListTest.php +++ b/test/NodeListTest.php @@ -3,13 +3,13 @@ * Zend Framework (http://framework.zend.com/) * * @link http://github.com/zendframework/zf2 for the canonical source repository - * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com) + * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com) * @license http://framework.zend.com/license/new-bsd New BSD License */ namespace ZendTest\Dom; -use Zend\Dom\NodeList; +use Zend\Dom\Document\NodeList; /** * @group Zend_Dom @@ -22,8 +22,8 @@ class NodeListTest extends \PHPUnit_Framework_TestCase public function testEmptyResultDoesNotReturnIteratorValidTrue() { $dom = new \DOMDocument(); - $emptyNodeList = $dom->getElementsByTagName("a"); - $result = new NodeList("", "", $dom, $emptyNodeList); + $emptyNodeList = $dom->getElementsByTagName('a'); + $result = new NodeList($emptyNodeList); $this->assertFalse($result->valid()); } diff --git a/test/QueryTest.php b/test/QueryTest.php deleted file mode 100644 index b67b93c..0000000 --- a/test/QueryTest.php +++ /dev/null @@ -1,412 +0,0 @@ -query = new Query(); - } - - public function getHtml() - { - if (null === $this->html) { - $this->html = file_get_contents(__DIR__ . '/_files/sample.xhtml'); - } - return $this->html; - } - - public function loadHtml() - { - $this->query->setDocument($this->getHtml()); - } - - public function handleError($msg, $code = 0) - { - $this->error = $msg; - } - - public function testConstructorShouldNotRequireArguments() - { - $query = new Query(); - } - - public function testConstructorShouldAcceptDocumentString() - { - $html = $this->getHtml(); - $query = new Query($html); - $this->assertSame($html, $query->getDocument()); - } - - public function testDocShouldBeNullByDefault() - { - $this->assertNull($this->query->getDocument()); - } - - public function testDocShouldBeNullByEmptyStringConstructor() - { - $emptyStr = ""; - $query = new Query($emptyStr); - $this->assertNull($this->query->getDocument()); - } - - public function testDocShouldBeNullByEmptyStringSet() - { - $emptyStr = ""; - $this->query->setDocument($emptyStr); - $this->assertNull($this->query->getDocument()); - } - - public function testDocTypeShouldBeNullByDefault() - { - $this->assertNull($this->query->getDocumentType()); - } - - public function testShouldAllowSettingDocument() - { - $this->testDocShouldBeNullByDefault(); - $this->loadHtml(); - $this->assertEquals($this->getHtml(), $this->query->getDocument()); - } - - public function testDocumentTypeShouldBeAutomaticallyDiscovered() - { - $this->loadHtml(); - $this->assertEquals(Query::DOC_XHTML, $this->query->getDocumentType()); - $this->query->setDocument(''); - $this->assertEquals(Query::DOC_XML, $this->query->getDocumentType()); - $this->query->setDocument(''); - $this->assertEquals(Query::DOC_HTML, $this->query->getDocumentType()); - } - - public function testQueryingWithoutRegisteringDocumentShouldThrowException() - { - $this->setExpectedException('\Zend\Dom\Exception\RuntimeException', 'no document'); - $this->query->execute('.foo'); - } - - public function testQueryingInvalidDocumentShouldThrowException() - { - set_error_handler(array($this, 'handleError')); - $this->query->setDocumentXml('some bogus string'); - try { - $this->query->execute('.foo'); - restore_error_handler(); - $this->fail('Querying invalid document should throw exception'); - } catch (DOMException $e) { - restore_error_handler(); - $this->assertContains('Error parsing', $e->getMessage()); - } - } - - public function testQueryShouldReturnResultObject() - { - $this->loadHtml(); - $test = $this->query->execute('.foo'); - $this->assertTrue($test instanceof NodeList); - } - - public function testResultShouldIndicateNumberOfFoundNodes() - { - $this->loadHtml(); - $result = $this->query->execute('.foo'); - $message = 'Xpath: ' . $result->getXpathQuery() . "\n"; - $this->assertEquals(3, count($result), $message); - } - - public function testResultShouldAllowIteratingOverFoundNodes() - { - $this->loadHtml(); - $result = $this->query->execute('.foo'); - $this->assertEquals(3, count($result)); - foreach ($result as $node) { - $this->assertTrue($node instanceof \DOMNode, var_export($result, 1)); - } - } - - public function testQueryShouldFindNodesWithMultipleClasses() - { - $this->loadHtml(); - $result = $this->query->execute('.footerblock .last'); - $this->assertEquals(1, count($result), $result->getXpathQuery()); - } - - public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsExactly() - { - $this->loadHtml(); - $result = $this->query->execute('div[dojoType="FilteringSelect"]'); - $this->assertEquals(1, count($result), $result->getXpathQuery()); - } - - public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAsDiscreteWords() - { - $this->loadHtml(); - $result = $this->query->execute('li[dojoType~="bar"]'); - $this->assertEquals(2, count($result), $result->getXpathQuery()); - } - - public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAndAttributeValue() - { - $this->loadHtml(); - $result = $this->query->execute('li[dojoType*="bar"]'); - $this->assertEquals(2, count($result), $result->getXpathQuery()); - } - - public function testQueryXpathShouldAllowQueryingArbitraryUsingXpath() - { - $this->loadHtml(); - $result = $this->query->queryXpath('//li[contains(@dojotype, "bar")]'); - $this->assertEquals(2, count($result), $result->getXpathQuery()); - } - - public function testXpathPhpFunctionsShouldBeDisableByDefault() - { - $this->loadHtml(); - try { - $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); - } catch (\Exception $e) { - return; - } - $this->assertFails('XPath PHPFunctions should be disable by default'); - } - - public function testXpathPhpFunctionsShouldBeEnableWithoutParameter() - { - $this->loadHtml(); - $this->query->registerXpathPhpFunctions(); - $result = $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); - $this->assertEquals('content-type', - strtolower($result->current()->getAttribute('http-equiv')), - $result->getXpathQuery()); - } - - public function testXpathPhpFunctionsShouldBeNotCalledWhenSpecifiedFunction() - { - $this->loadHtml(); - try { - $this->query->registerXpathPhpFunctions('stripos'); - $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); - } catch (\Exception $e) { - // $e->getMessage() - Not allowed to call handler 'strtolower() - return; - } - $this->assertFails('Not allowed to call handler strtolower()'); - } - - /** - * @group ZF-9243 - */ - public function testLoadingDocumentWithErrorsShouldNotRaisePhpErrors() - { - $file = file_get_contents(__DIR__ . '/_files/bad-sample.html'); - $this->query->setDocument($file); - $this->query->execute('p'); - $errors = $this->query->getDocumentErrors(); - $this->assertTrue(is_array($errors)); - $this->assertTrue(0 < count($errors)); - } - - /** - * @group ZF-9765 - */ - public function testCssSelectorShouldFindNodesWhenMatchingMultipleAttributes() - { - $html = << - - -
- - - -
- - -EOF; - - $this->query->setDocument($html); - $results = $this->query->execute('input[type="hidden"][value="1"]'); - $this->assertEquals(2, count($results), $results->getXpathQuery()); - $results = $this->query->execute('input[value="1"][type~="hidden"]'); - $this->assertEquals(2, count($results), $results->getXpathQuery()); - $results = $this->query->execute('input[type="hidden"][value="0"]'); - $this->assertEquals(1, count($results)); - } - - /** - * @group ZF-3938 - */ - public function testAllowsSpecifyingEncodingAtConstruction() - { - $doc = new Query($this->getHtml(), 'iso-8859-1'); - $this->assertEquals('iso-8859-1', $doc->getEncoding()); - } - - /** - * @group ZF-3938 - */ - public function testAllowsSpecifyingEncodingWhenSettingDocument() - { - $this->query->setDocument($this->getHtml(), 'iso-8859-1'); - $this->assertEquals('iso-8859-1', $this->query->getEncoding()); - } - - /** - * @group ZF-3938 - */ - public function testAllowsSpecifyingEncodingViaSetter() - { - $this->query->setEncoding('iso-8859-1'); - $this->assertEquals('iso-8859-1', $this->query->getEncoding()); - } - - /** - * @group ZF-3938 - */ - public function testSpecifyingEncodingSetsEncodingOnDomDocument() - { - $this->query->setDocument($this->getHtml(), 'utf-8'); - $test = $this->query->execute('.foo'); - $this->assertInstanceof('\\Zend\\Dom\\NodeList', $test); - $doc = $test->getDocument(); - $this->assertInstanceof('\\DOMDocument', $doc); - $this->assertEquals('utf-8', $doc->encoding); - } - - /** - * @group ZF-11376 - */ - public function testXhtmlDocumentWithXmlDeclaration() - { - $xhtmlWithXmlDecl = << - - </head> - <body><p>Test paragraph.</p></body> -</html> -EOB; - $this->query->setDocument($xhtmlWithXmlDecl, 'utf-8'); - $this->assertEquals(1, $this->query->execute('//p')->count()); - } - - /** - * @group ZF-12106 - */ - public function testXhtmlDocumentWithXmlAndDoctypeDeclaration() - { - $xhtmlWithXmlDecl = <<<EOB -<?xml version="1.0" encoding="UTF-8"?> -<!DOCTYPE html - PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" - "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> -<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> - <head> - <title>Virtual Library - - -

Moved to example.org.

- - -EOB; - $this->query->setDocument($xhtmlWithXmlDecl, 'utf-8'); - $this->assertEquals(1, $this->query->execute('//p')->count()); - } - - public function testLoadingXmlContainingDoctypeShouldFailToPreventXxeAndXeeAttacks() - { - $xml = << -]> - - This result is &harmless; - -XML; - $this->query->setDocumentXml($xml); - $this->setExpectedException("\Zend\Dom\Exception\RuntimeException"); - $this->query->queryXpath('/'); - } - - public function testOffsetExists() - { - $this->loadHtml(); - $results = $this->query->execute('input'); - - $this->assertEquals(3, $results->count()); - $this->assertFalse($results->offsetExists(3)); - $this->assertTrue($results->offsetExists(2)); - } - - public function testOffsetGet() - { - $this->loadHtml(); - $results = $this->query->execute('input'); - - $this->assertEquals(3, $results->count()); - $this->assertEquals('login', $results[2]->getAttribute('id')); - } - - /** - * @expectedException Zend\Dom\Exception\BadMethodCallException - */ - public function testOffsetSet() - { - $this->loadHtml(); - $results = $this->query->execute('input'); - $this->assertEquals(3, $results->count()); - - $results[0] = ''; - } - - - /** - * @expectedException Zend\Dom\Exception\BadMethodCallException - */ - public function testOffsetUnset() - { - $this->loadHtml(); - $results = $this->query->execute('input'); - $this->assertEquals(3, $results->count()); - - unset($results[2]); - } - - /** - * @group ZF-5310 - */ - public function testCssSelectorShouldFindNodesWhenMatchingAttributeValueWithDot() - { - $this->loadHtml(); - $results = $this->query->execute('a[href="http://www.about.com"]'); - - $this->assertEquals(1, $results->count()); - $this->assertEquals('About', $results[0]->nodeValue); - - } -}