From 242a79a7fbb13d63f97db20e4fc8ed8903e7a6c4 Mon Sep 17 00:00:00 2001 From: Matthew Weier O'Phinney Date: Fri, 31 Aug 2012 14:44:59 -0500 Subject: [PATCH] [zendframework/zf2#2284][ZF2-507] Updated README - Notice about Date header --- .coveralls.yml | 3 + .gitattributes | 6 + .gitignore | 14 ++ .php_cs | 43 ++++ .travis.yml | 35 +++ CONTRIBUTING.md | 229 +++++++++++++++++ LICENSE.txt | 27 ++ README.md | 9 + composer.json | 34 +++ phpunit.xml.dist | 35 +++ phpunit.xml.travis | 35 +++ src/Css2Xpath.php | 131 ++++++++++ src/Exception/ExceptionInterface.php | 21 ++ src/Exception/RuntimeException.php | 21 ++ src/NodeList.php | 178 +++++++++++++ src/Query.php | 319 ++++++++++++++++++++++++ test/Css2XpathTest.php | 171 +++++++++++++ test/NodeListTest.php | 34 +++ test/QueryTest.php | 359 +++++++++++++++++++++++++++ test/_files/bad-sample.html | 11 + test/_files/sample.xhtml | 125 ++++++++++ test/bootstrap.php | 34 +++ 22 files changed, 1874 insertions(+) create mode 100644 .coveralls.yml create mode 100644 .gitattributes create mode 100644 .gitignore create mode 100644 .php_cs create mode 100644 .travis.yml create mode 100644 CONTRIBUTING.md create mode 100644 LICENSE.txt create mode 100644 README.md create mode 100644 composer.json create mode 100644 phpunit.xml.dist create mode 100644 phpunit.xml.travis create mode 100644 src/Css2Xpath.php create mode 100644 src/Exception/ExceptionInterface.php create mode 100644 src/Exception/RuntimeException.php create mode 100644 src/NodeList.php create mode 100644 src/Query.php create mode 100644 test/Css2XpathTest.php create mode 100644 test/NodeListTest.php create mode 100644 test/QueryTest.php create mode 100644 test/_files/bad-sample.html create mode 100644 test/_files/sample.xhtml create mode 100644 test/bootstrap.php diff --git a/.coveralls.yml b/.coveralls.yml new file mode 100644 index 0000000..53bda82 --- /dev/null +++ b/.coveralls.yml @@ -0,0 +1,3 @@ +coverage_clover: clover.xml +json_path: coveralls-upload.json +src_dir: src diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..85dc9a8 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,6 @@ +/test export-ignore +/vendor export-ignore +.gitattributes export-ignore +.gitignore export-ignore +.travis.yml export-ignore +.php_cs export-ignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4cac0a2 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +.buildpath +.DS_Store +.idea +.project +.settings/ +.*.sw* +.*.un~ +nbproject +tmp/ + +clover.xml +coveralls-upload.json +phpunit.xml +vendor diff --git a/.php_cs b/.php_cs new file mode 100644 index 0000000..bf4b799 --- /dev/null +++ b/.php_cs @@ -0,0 +1,43 @@ +notPath('TestAsset') + ->notPath('_files') + ->filter(function (SplFileInfo $file) { + if (strstr($file->getPath(), 'compatibility')) { + return false; + } + }); +$config = Symfony\CS\Config\Config::create(); +$config->level(null); +$config->fixers( + array( + 'braces', + 'duplicate_semicolon', + 'elseif', + 'empty_return', + 'encoding', + 'eof_ending', + 'function_call_space', + 'function_declaration', + 'indentation', + 'join_function', + 'line_after_namespace', + 'linefeed', + 'lowercase_keywords', + 'parenthesis', + 'multiple_use', + 'method_argument_space', + 'object_operator', + 'php_closing_tag', + 'psr0', + 'remove_lines_between_uses', + 'short_tag', + 'standardize_not_equal', + 'trailing_spaces', + 'unused_use', + 'visibility', + 'whitespacy_lines', + ) +); +$config->finder($finder); +return $config; diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 0000000..fe909ec --- /dev/null +++ b/.travis.yml @@ -0,0 +1,35 @@ +sudo: false + +language: php + +matrix: + fast_finish: true + include: + - php: 5.5 + - php: 5.6 + env: + - EXECUTE_TEST_COVERALLS=true + - EXECUTE_CS_CHECK=true + - php: 7 + - php: hhvm + allow_failures: + - php: 7 + - php: hhvm + +notifications: + irc: "irc.freenode.org#zftalk.dev" + email: false + +before_install: + - if [[ $EXECUTE_TEST_COVERALLS != 'true' ]]; then phpenv config-rm xdebug.ini || return 0 ; fi + +install: + - composer install --no-interaction --prefer-source + +script: + - if [[ $EXECUTE_TEST_COVERALLS == 'true' ]]; then ./vendor/bin/phpunit -c phpunit.xml.travis --coverage-clover clover.xml ; fi + - if [[ $EXECUTE_TEST_COVERALLS != 'true' ]]; then ./vendor/bin/phpunit -c phpunit.xml.travis ; fi + - if [[ $EXECUTE_CS_CHECK == 'true' ]]; then ./vendor/bin/php-cs-fixer fix -v --diff --dry-run --config-file=.php_cs ; fi + +after_script: + - if [[ $EXECUTE_TEST_COVERALLS == 'true' ]]; then ./vendor/bin/coveralls ; fi diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..82d5a35 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,229 @@ +# CONTRIBUTING + +## RESOURCES + +If you wish to contribute to Zend Framework, please be sure to +read/subscribe to the following resources: + + - [Coding Standards](https://github.com/zendframework/zf2/wiki/Coding-Standards) + - [Contributor's Guide](http://framework.zend.com/participate/contributor-guide) + - ZF Contributor's mailing list: + Archives: http://zend-framework-community.634137.n4.nabble.com/ZF-Contributor-f680267.html + Subscribe: zf-contributors-subscribe@lists.zend.com + - ZF Contributor's IRC channel: + #zftalk.dev on Freenode.net + +If you are working on new features or refactoring [create a proposal](https://github.com/zendframework/zend-dom/issues/new). + +## Reporting Potential Security Issues + +If you have encountered a potential security vulnerability, please **DO NOT** report it on the public +issue tracker: send it to us at [zf-security@zend.com](mailto:zf-security@zend.com) instead. +We will work with you to verify the vulnerability and patch it as soon as possible. + +When reporting issues, please provide the following information: + +- Component(s) affected +- A description indicating how to reproduce the issue +- A summary of the security vulnerability and impact + +We request that you contact us via the email address above and give the project +contributors a chance to resolve the vulnerability and issue a new release prior +to any public exposure; this helps protect users and provides them with a chance +to upgrade and/or update in order to protect their applications. + +For sensitive email communications, please use [our PGP key](http://framework.zend.com/zf-security-pgp-key.asc). + +## RUNNING TESTS + +> ### Note: testing versions prior to 2.4 +> +> This component originates with Zend Framework 2. During the lifetime of ZF2, +> testing infrastructure migrated from PHPUnit 3 to PHPUnit 4. In most cases, no +> changes were necessary. However, due to the migration, tests may not run on +> versions < 2.4. As such, you may need to change the PHPUnit dependency if +> attempting a fix on such a version. + +To run tests: + +- Clone the repository: + + ```console + $ git clone git@github.com:zendframework/zend-dom.git + $ cd + ``` + +- Install dependencies via composer: + + ```console + $ curl -sS https://getcomposer.org/installer | php -- + $ ./composer.phar install + ``` + + If you don't have `curl` installed, you can also download `composer.phar` from https://getcomposer.org/ + +- Run the tests via `phpunit` and the provided PHPUnit config, like in this example: + + ```console + $ ./vendor/bin/phpunit + ``` + +You can turn on conditional tests with the phpunit.xml file. +To do so: + + - Copy `phpunit.xml.dist` file to `phpunit.xml` + - Edit `phpunit.xml` to enable any specific functionality you + want to test, as well as to provide test values to utilize. + +## Running Coding Standards Checks + +This component uses [php-cs-fixer](http://cs.sensiolabs.org/) for coding +standards checks, and provides configuration for our selected checks. +`php-cs-fixer` is installed by default via Composer. + +To run checks only: + +```console +$ ./vendor/bin/php-cs-fixer fix . -v --diff --dry-run --config-file=.php_cs +``` + +To have `php-cs-fixer` attempt to fix problems for you, omit the `--dry-run` +flag: + +```console +$ ./vendor/bin/php-cs-fixer fix . -v --diff --config-file=.php_cs +``` + +If you allow php-cs-fixer to fix CS issues, please re-run the tests to ensure +they pass, and make sure you add and commit the changes after verification. + +## Recommended Workflow for Contributions + +Your first step is to establish a public repository from which we can +pull your work into the master repository. We recommend using +[GitHub](https://github.com), as that is where the component is already hosted. + +1. Setup a [GitHub account](http://github.com/), if you haven't yet +2. Fork the repository (http://github.com/zendframework/zend-dom) +3. Clone the canonical repository locally and enter it. + + ```console + $ git clone git://github.com:zendframework/zend-dom.git + $ cd zend-dom + ``` + +4. Add a remote to your fork; substitute your GitHub username in the command + below. + + ```console + $ git remote add {username} git@github.com:{username}/zend-dom.git + $ git fetch {username} + ``` + +### Keeping Up-to-Date + +Periodically, you should update your fork or personal repository to +match the canonical ZF repository. Assuming you have setup your local repository +per the instructions above, you can do the following: + + +```console +$ git checkout master +$ git fetch origin +$ git rebase origin/master +# OPTIONALLY, to keep your remote up-to-date - +$ git push {username} master:master +``` + +If you're tracking other branches -- for example, the "develop" branch, where +new feature development occurs -- you'll want to do the same operations for that +branch; simply substitute "develop" for "master". + +### Working on a patch + +We recommend you do each new feature or bugfix in a new branch. This simplifies +the task of code review as well as the task of merging your changes into the +canonical repository. + +A typical workflow will then consist of the following: + +1. Create a new local branch based off either your master or develop branch. +2. Switch to your new local branch. (This step can be combined with the + previous step with the use of `git checkout -b`.) +3. Do some work, commit, repeat as necessary. +4. Push the local branch to your remote repository. +5. Send a pull request. + +The mechanics of this process are actually quite trivial. Below, we will +create a branch for fixing an issue in the tracker. + +```console +$ git checkout -b hotfix/9295 +Switched to a new branch 'hotfix/9295' +``` + +... do some work ... + + +```console +$ git commit +``` + +... write your log message ... + + +```console +$ git push {username} hotfix/9295:hotfix/9295 +Counting objects: 38, done. +Delta compression using up to 2 threads. +Compression objects: 100% (18/18), done. +Writing objects: 100% (20/20), 8.19KiB, done. +Total 20 (delta 12), reused 0 (delta 0) +To ssh://git@github.com/{username}/zend-dom.git + b5583aa..4f51698 HEAD -> master +``` + +To send a pull request, you have two options. + +If using GitHub, you can do the pull request from there. Navigate to +your repository, select the branch you just created, and then select the +"Pull Request" button in the upper right. Select the user/organization +"zendframework" as the recipient. + +If using your own repository - or even if using GitHub - you can use `git +format-patch` to create a patchset for us to apply; in fact, this is +**recommended** for security-related patches. If you use `format-patch`, please +send the patches as attachments to: + +- zf-devteam@zend.com for patches without security implications +- zf-security@zend.com for security patches + +#### What branch to issue the pull request against? + +Which branch should you issue a pull request against? + +- For fixes against the stable release, issue the pull request against the + "master" branch. +- For new features, or fixes that introduce new elements to the public API (such + as new public methods or properties), issue the pull request against the + "develop" branch. + +### Branch Cleanup + +As you might imagine, if you are a frequent contributor, you'll start to +get a ton of branches both locally and on your remote. + +Once you know that your changes have been accepted to the master +repository, we suggest doing some cleanup of these branches. + +- Local branch cleanup + + ```console + $ git branch -d + ``` + +- Remote branch removal + + ```console + $ git push {username} : + ``` diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..6eab5aa --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,27 @@ +Copyright (c) 2005-2015, Zend Technologies USA, Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Zend Technologies USA, Inc. nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..9b9bc78 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +# zend-dom + +The `Zend\Dom` component provides tools for working with DOM documents and +structures. Currently, we offer `Zend\Dom\Query`, which provides a unified +interface for querying DOM documents utilizing both XPath and CSS selectors. + + +- File issues at https://github.com/zendframework/zend-dom/issues +- Documentation is at http://framework.zend.com/manual/current/en/index.html#zend-dom diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..83bbda6 --- /dev/null +++ b/composer.json @@ -0,0 +1,34 @@ +{ + "name": "zendframework/zend-dom", + "description": "provides tools for working with DOM documents and structures", + "license": "BSD-3-Clause", + "keywords": [ + "zf2", + "dom" + ], + "homepage": "https://github.com/zendframework/zend-dom", + "autoload": { + "psr-4": { + "Zend\\Dom": "src/" + } + }, + "require": { + "php": ">=5.3.3" + }, + "extra": { + "branch-alias": { + "dev-master": "2.4-dev", + "dev-develop": "2.5-dev" + } + }, + "autoload-dev": { + "psr-4": { + "ZendTest\\Dom\\": "test/" + } + }, + "require-dev": { + "fabpot/php-cs-fixer": "1.7.*", + "satooshi/php-coveralls": "dev-master", + "phpunit/PHPUnit": "~4.0" + } +} \ No newline at end of file diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..ec4b781 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,35 @@ + + + + + ./test/ + + + + + + disable + + + + + + ./src + + + + + + + + + + + + diff --git a/phpunit.xml.travis b/phpunit.xml.travis new file mode 100644 index 0000000..ec4b781 --- /dev/null +++ b/phpunit.xml.travis @@ -0,0 +1,35 @@ + + + + + ./test/ + + + + + + disable + + + + + + ./src + + + + + + + + + + + + diff --git a/src/Css2Xpath.php b/src/Css2Xpath.php new file mode 100644 index 0000000..c3739d4 --- /dev/null +++ b/src/Css2Xpath.php @@ -0,0 +1,131 @@ +\s+|', '>', $path); + $segments = preg_split('/\s+/', $path); + foreach ($segments as $key => $segment) { + $pathSegment = self::_tokenize($segment); + if (0 == $key) { + if (0 === strpos($pathSegment, '[contains(')) { + $paths[0] .= '*' . ltrim($pathSegment, '*'); + } else { + $paths[0] .= $pathSegment; + } + continue; + } + if (0 === strpos($pathSegment, '[contains(')) { + foreach ($paths as $key => $xpath) { + $paths[$key] .= '//*' . ltrim($pathSegment, '*'); + $paths[] = $xpath . $pathSegment; + } + } else { + foreach ($paths as $key => $xpath) { + $paths[$key] .= '//' . $pathSegment; + } + } + } + + if (1 == count($paths)) { + return $paths[0]; + } + return implode('|', $paths); + } + + /** + * Tokenize CSS expressions to XPath + * + * @param string $expression + * @return string + */ + protected static function _tokenize($expression) + { + // Child selectors + $expression = str_replace('>', '/', $expression); + + // IDs + $expression = preg_replace('|#([a-z][a-z0-9_-]*)|i', '[@id=\'$1\']', $expression); + $expression = preg_replace('|(?cssQuery = $cssQuery; + $this->xpathQuery = $xpathQuery; + $this->document = $document; + $this->nodeList = $nodeList; + } + + /** + * Retrieve CSS Query + * + * @return string + */ + public function getCssQuery() + { + return $this->cssQuery; + } + + /** + * Retrieve XPath query + * + * @return string + */ + public function getXpathQuery() + { + return $this->xpathQuery; + } + + /** + * Retrieve DOMDocument + * + * @return DOMDocument + */ + public function getDocument() + { + return $this->document; + } + + /** + * Iterator: rewind to first element + * + * @return void + */ + public function rewind() + { + $this->position = 0; + return $this->nodeList->item(0); + } + + /** + * Iterator: is current position valid? + * + * @return bool + */ + public function valid() + { + if (in_array($this->position, range(0, $this->nodeList->length - 1)) && $this->nodeList->length > 0) { + return true; + } + return false; + } + + /** + * Iterator: return current element + * + * @return DOMElement + */ + public function current() + { + return $this->nodeList->item($this->position); + } + + /** + * Iterator: return key of current element + * + * @return int + */ + public function key() + { + return $this->position; + } + + /** + * Iterator: move to next element + * + * @return void + */ + public function next() + { + ++$this->position; + return $this->nodeList->item($this->position); + } + + /** + * Countable: get count + * + * @return int + */ + public function count() + { + return $this->nodeList->length; + } +} diff --git a/src/Query.php b/src/Query.php new file mode 100644 index 0000000..2073130 --- /dev/null +++ b/src/Query.php @@ -0,0 +1,319 @@ +setEncoding($encoding); + $this->setDocument($document); + } + + /** + * Set document encoding + * + * @param string $encoding + * @return Query + */ + public function setEncoding($encoding) + { + $this->encoding = (null === $encoding) ? null : (string) $encoding; + return $this; + } + + /** + * Get document encoding + * + * @return null|string + */ + public function getEncoding() + { + return $this->encoding; + } + + /** + * Set document to query + * + * @param string $document + * @param null|string $encoding Document encoding + * @return Query + */ + public function setDocument($document, $encoding = null) + { + if (0 === strlen($document)) { + return $this; + } + // breaking XML declaration to make syntax highlighting work + if ('<' . '?xml' == substr(trim($document), 0, 5)) { + if (preg_match('/]*xmlns="([^"]+)"[^>]*>/i', $document, $matches)) { + $this->xpathNamespaces[] = $matches[1]; + return $this->setDocumentXhtml($document, $encoding); + } + return $this->setDocumentXml($document, $encoding); + } + if (strstr($document, 'DTD XHTML')) { + return $this->setDocumentXhtml($document, $encoding); + } + return $this->setDocumentHtml($document, $encoding); + } + + /** + * Register HTML document + * + * @param string $document + * @param null|string $encoding Document encoding + * @return Query + */ + public function setDocumentHtml($document, $encoding = null) + { + $this->document = (string) $document; + $this->docType = self::DOC_HTML; + if (null !== $encoding) { + $this->setEncoding($encoding); + } + return $this; + } + + /** + * Register XHTML document + * + * @param string $document + * @param null|string $encoding Document encoding + * @return Query + */ + public function setDocumentXhtml($document, $encoding = null) + { + $this->document = (string) $document; + $this->docType = self::DOC_XHTML; + if (null !== $encoding) { + $this->setEncoding($encoding); + } + return $this; + } + + /** + * Register XML document + * + * @param string $document + * @param null|string $encoding Document encoding + * @return Query + */ + public function setDocumentXml($document, $encoding = null) + { + $this->document = (string) $document; + $this->docType = self::DOC_XML; + if (null !== $encoding) { + $this->setEncoding($encoding); + } + return $this; + } + + /** + * Retrieve current document + * + * @return string + */ + public function getDocument() + { + return $this->document; + } + + /** + * Get document type + * + * @return string + */ + public function getDocumentType() + { + return $this->docType; + } + + /** + * Get any DOMDocument errors found + * + * @return false|array + */ + public function getDocumentErrors() + { + return $this->documentErrors; + } + + /** + * Perform a CSS selector query + * + * @param string $query + * @return NodeList + */ + public function execute($query) + { + $xpathQuery = Css2Xpath::transform($query); + return $this->queryXpath($xpathQuery, $query); + } + + /** + * Perform an XPath query + * + * @param string|array $xpathQuery + * @param string|null $query CSS selector query + * @throws Exception\RuntimeException + * @return NodeList + */ + public function queryXpath($xpathQuery, $query = null) + { + if (null === ($document = $this->getDocument())) { + throw new Exception\RuntimeException('Cannot query; no document registered'); + } + + $encoding = $this->getEncoding(); + libxml_use_internal_errors(true); + libxml_disable_entity_loader(true); + if (null === $encoding) { + $domDoc = new DOMDocument('1.0'); + } else { + $domDoc = new DOMDocument('1.0', $encoding); + } + $type = $this->getDocumentType(); + switch ($type) { + case self::DOC_XML: + $success = $domDoc->loadXML($document); + foreach ($domDoc->childNodes as $child) { + if ($child->nodeType === XML_DOCUMENT_TYPE_NODE) { + throw new Exception\RuntimeException( + 'Invalid XML: Detected use of illegal DOCTYPE' + ); + } + } + break; + case self::DOC_HTML: + case self::DOC_XHTML: + default: + $success = $domDoc->loadHTML($document); + break; + } + $errors = libxml_get_errors(); + if (!empty($errors)) { + $this->documentErrors = $errors; + libxml_clear_errors(); + } + libxml_disable_entity_loader(false); + libxml_use_internal_errors(false); + + if (!$success) { + throw new Exception\RuntimeException(sprintf('Error parsing document (type == %s)', $type)); + } + + $nodeList = $this->getNodeList($domDoc, $xpathQuery); + return new NodeList($query, $xpathQuery, $domDoc, $nodeList); + } + + /** + * Register XPath namespaces + * + * @param array $xpathNamespaces + * @return void + */ + public function registerXpathNamespaces($xpathNamespaces) + { + $this->xpathNamespaces = $xpathNamespaces; + } + + /** + * Register PHP Functions to use in internal DOMXPath + * + * @param mixed $restrict + * @return void + */ + public function registerXpathPhpFunctions($xpathPhpFunctions = true) + { + $this->xpathPhpFunctions = $xpathPhpFunctions; + } + + /** + * Prepare node list + * + * @param DOMDocument $document + * @param string|array $xpathQuery + * @return array + */ + protected function getNodeList($document, $xpathQuery) + { + $xpath = new DOMXPath($document); + foreach ($this->xpathNamespaces as $prefix => $namespaceUri) { + $xpath->registerNamespace($prefix, $namespaceUri); + } + if ($this->xpathPhpFunctions) { + $xpath->registerNamespace("php", "http://php.net/xpath"); + ($this->xpathPhpFunctions === true) ? + $xpath->registerPHPFunctions() + : $xpath->registerPHPFunctions($this->xpathPhpFunctions); + } + $xpathQuery = (string) $xpathQuery; + return $xpath->query($xpathQuery); + } +} diff --git a/test/Css2XpathTest.php b/test/Css2XpathTest.php new file mode 100644 index 0000000..587cc7d --- /dev/null +++ b/test/Css2XpathTest.php @@ -0,0 +1,171 @@ +assertTrue(is_string($test)); + } + + /** + * @group ZF-6281 + */ + public function testTransformShouldReturnMultiplePathsWhenExpressionContainsCommas() + { + $test = Css2Xpath::transform('#foo, #bar'); + $this->assertTrue(is_string($test)); + $this->assertContains('|', $test); + $this->assertEquals(2, count(explode('|', $test))); + } + + public function testTransformShouldRecognizeHashSymbolAsId() + { + $test = Css2Xpath::transform('#foo'); + $this->assertEquals("//*[@id='foo']", $test); + } + + public function testTransformShouldRecognizeDotSymbolAsClass() + { + $test = Css2Xpath::transform('.foo'); + $this->assertEquals("//*[contains(concat(' ', normalize-space(@class), ' '), ' foo ')]", $test); + } + + public function testTransformShouldAssumeSpacesToIndicateRelativeXpathQueries() + { + $test = Css2Xpath::transform('div#foo .bar'); + $this->assertContains('|', $test); + $expected = array( + "//div[@id='foo']//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]", + "//div[@id='foo'][contains(concat(' ', normalize-space(@class), ' '), ' bar ')]", + ); + foreach ($expected as $path) { + $this->assertContains($path, $test); + } + } + + public function testTransformShouldWriteChildSelectorsAsAbsoluteXpathRelations() + { + $test = Css2Xpath::transform('div#foo>span'); + $this->assertEquals("//div[@id='foo']/span", $test); + } + + /** + * @group ZF-6281 + */ + public function testMultipleComplexCssSpecificationShouldTransformToExpectedXpath() + { + $test = Css2Xpath::transform('div#foo span.bar, #bar li.baz a'); + $this->assertTrue(is_string($test)); + $this->assertContains('|', $test); + $actual = explode('|', $test); + $expected = array( + "//div[@id='foo']//span[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]", + "//*[@id='bar']//li[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//a", + ); + $this->assertEquals(count($expected), count($actual)); + foreach ($actual as $path) { + $this->assertContains($path, $expected); + } + } + + public function testClassNotationWithoutSpecifiedTagShouldResultInMultipleQueries() + { + $test = Css2Xpath::transform('div.foo .bar a .baz span'); + $this->assertContains('|', $test); + $segments = array( + "//div[contains(concat(' ', normalize-space(@class), ' '), ' foo ')]//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//a//*[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//span", + "//div[contains(concat(' ', normalize-space(@class), ' '), ' foo ')]//*[contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//a[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//span", + "//div[contains(concat(' ', normalize-space(@class), ' '), ' foo ')][contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//a//*[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//span", + "//div[contains(concat(' ', normalize-space(@class), ' '), ' foo ')][contains(concat(' ', normalize-space(@class), ' '), ' bar ')]//a[contains(concat(' ', normalize-space(@class), ' '), ' baz ')]//span", + ); + foreach ($segments as $xpath) { + $this->assertContains($xpath, $test); + } + } + + public function testShouldAllowEqualitySelectionOfArbitraryAttributes() + { + $test = Css2Xpath::transform('div[foo="bar"]'); + $this->assertEquals("//div[@foo='bar']", $test); + } + + public function testShouldCastAttributeNamesToLowerCase() + { + $test = Css2Xpath::transform('div[dojoType="bar"]'); + $this->assertEquals("//div[@dojotype='bar']", $test); + } + + public function testShouldAllowContentSubSelectionOfArbitraryAttributes() + { + $test = Css2Xpath::transform('div[foo~="bar"]'); + $this->assertEquals("//div[contains(concat(' ', normalize-space(@foo), ' '), ' bar ')]", $test); + } + + public function testShouldAllowContentMatchingOfArbitraryAttributes() + { + $test = Css2Xpath::transform('div[foo*="bar"]'); + $this->assertEquals("//div[contains(@foo, 'bar')]", $test); + } + + /** + * @group ZF-4010 + */ + public function testShouldAllowMatchingOfAttributeValues() + { + $test = Css2Xpath::transform('tag#id @attribute'); + $this->assertEquals("//tag[@id='id']//@attribute", $test); + } + + /** + * @group ZF-8006 + */ + public function testShouldAllowWhitespaceInDescendentSelectorExpressions() + { + $test = Css2Xpath::transform('child > leaf'); + $this->assertEquals("//child/leaf", $test); + } + + /** + * @group ZF-9764 + */ + public function testIdSelectorWithAttribute() + { + $test = Css2Xpath::transform('#id[attribute="value"]'); + $this->assertEquals("//*[@id='id'][@attribute='value']", $test); + } + + /** + * @group ZF-9764 + */ + public function testIdSelectorWithLeadingAsterix() + { + $test = Css2Xpath::transform('*#id'); + $this->assertEquals("//*[@id='id']", $test); + } +} diff --git a/test/NodeListTest.php b/test/NodeListTest.php new file mode 100644 index 0000000..6c91471 --- /dev/null +++ b/test/NodeListTest.php @@ -0,0 +1,34 @@ +getElementsByTagName("a"); + $result = new NodeList("", "", $dom, $emptyNodeList); + + $this->assertFalse($result->valid()); + } +} diff --git a/test/QueryTest.php b/test/QueryTest.php new file mode 100644 index 0000000..071d3d2 --- /dev/null +++ b/test/QueryTest.php @@ -0,0 +1,359 @@ +query = new Query(); + } + + public function getHtml() + { + if (null === $this->html) { + $this->html = file_get_contents(__DIR__ . '/_files/sample.xhtml'); + } + return $this->html; + } + + public function loadHtml() + { + $this->query->setDocument($this->getHtml()); + } + + public function handleError($msg, $code = 0) + { + $this->error = $msg; + } + + public function testConstructorShouldNotRequireArguments() + { + $query = new Query(); + } + + public function testConstructorShouldAcceptDocumentString() + { + $html = $this->getHtml(); + $query = new Query($html); + $this->assertSame($html, $query->getDocument()); + } + + public function testDocShouldBeNullByDefault() + { + $this->assertNull($this->query->getDocument()); + } + + public function testDocShouldBeNullByEmptyStringConstructor() + { + $emptyStr = ""; + $query = new Query($emptyStr); + $this->assertNull($this->query->getDocument()); + } + + public function testDocShouldBeNullByEmptyStringSet() + { + $emptyStr = ""; + $this->query->setDocument($emptyStr); + $this->assertNull($this->query->getDocument()); + } + + public function testDocTypeShouldBeNullByDefault() + { + $this->assertNull($this->query->getDocumentType()); + } + + public function testShouldAllowSettingDocument() + { + $this->testDocShouldBeNullByDefault(); + $this->loadHtml(); + $this->assertEquals($this->getHtml(), $this->query->getDocument()); + } + + public function testDocumentTypeShouldBeAutomaticallyDiscovered() + { + $this->loadHtml(); + $this->assertEquals(Query::DOC_XHTML, $this->query->getDocumentType()); + $this->query->setDocument(''); + $this->assertEquals(Query::DOC_XML, $this->query->getDocumentType()); + $this->query->setDocument(''); + $this->assertEquals(Query::DOC_HTML, $this->query->getDocumentType()); + } + + public function testQueryingWithoutRegisteringDocumentShouldThrowException() + { + $this->setExpectedException('\Zend\Dom\Exception\RuntimeException', 'no document'); + $this->query->execute('.foo'); + } + + public function testQueryingInvalidDocumentShouldThrowException() + { + set_error_handler(array($this, 'handleError')); + $this->query->setDocumentXml('some bogus string'); + try { + $this->query->execute('.foo'); + restore_error_handler(); + $this->fail('Querying invalid document should throw exception'); + } catch (DOMException $e) { + restore_error_handler(); + $this->assertContains('Error parsing', $e->getMessage()); + } + } + + public function testQueryShouldReturnResultObject() + { + $this->loadHtml(); + $test = $this->query->execute('.foo'); + $this->assertTrue($test instanceof NodeList); + } + + public function testResultShouldIndicateNumberOfFoundNodes() + { + $this->loadHtml(); + $result = $this->query->execute('.foo'); + $message = 'Xpath: ' . $result->getXpathQuery() . "\n"; + $this->assertEquals(3, count($result), $message); + } + + public function testResultShouldAllowIteratingOverFoundNodes() + { + $this->loadHtml(); + $result = $this->query->execute('.foo'); + $this->assertEquals(3, count($result)); + foreach ($result as $node) { + $this->assertTrue($node instanceof \DOMNode, var_export($result, 1)); + } + } + + public function testQueryShouldFindNodesWithMultipleClasses() + { + $this->loadHtml(); + $result = $this->query->execute('.footerblock .last'); + $this->assertEquals(1, count($result), $result->getXpathQuery()); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsExactly() + { + $this->loadHtml(); + $result = $this->query->execute('div[dojoType="FilteringSelect"]'); + $this->assertEquals(1, count($result), $result->getXpathQuery()); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAsDiscreteWords() + { + $this->loadHtml(); + $result = $this->query->execute('li[dojoType~="bar"]'); + $this->assertEquals(2, count($result), $result->getXpathQuery()); + } + + public function testQueryShouldFindNodesWithArbitraryAttributeSelectorsAndAttributeValue() + { + $this->loadHtml(); + $result = $this->query->execute('li[dojoType*="bar"]'); + $this->assertEquals(2, count($result), $result->getXpathQuery()); + } + + public function testQueryXpathShouldAllowQueryingArbitraryUsingXpath() + { + $this->loadHtml(); + $result = $this->query->queryXpath('//li[contains(@dojotype, "bar")]'); + $this->assertEquals(2, count($result), $result->getXpathQuery()); + } + + public function testXpathPhpFunctionsShouldBeDisableByDefault() + { + $this->loadHtml(); + try { + $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); + } catch (\Exception $e) { + return ; + } + $this->assertFails('XPath PHPFunctions should be disable by default'); + } + + public function testXpathPhpFunctionsShouldBeEnableWithoutParameter() + { + $this->loadHtml(); + $this->query->registerXpathPhpFunctions(); + $result = $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); + $this->assertEquals('content-type', + strtolower($result->current()->getAttribute('http-equiv')), + $result->getXpathQuery()); + } + + public function testXpathPhpFunctionsShouldBeNotCalledWhenSpecifiedFunction() + { + $this->loadHtml(); + try { + $this->query->registerXpathPhpFunctions('stripos'); + $this->query->queryXpath('//meta[php:functionString("strtolower", @http-equiv) = "content-type"]'); + } catch (\Exception $e) { + // $e->getMessage() - Not allowed to call handler 'strtolower() + return ; + } + $this->assertFails('Not allowed to call handler strtolower()'); + } + + /** + * @group ZF-9243 + */ + public function testLoadingDocumentWithErrorsShouldNotRaisePhpErrors() + { + $file = file_get_contents(__DIR__ . '/_files/bad-sample.html'); + $this->query->setDocument($file); + $this->query->execute('p'); + $errors = $this->query->getDocumentErrors(); + $this->assertTrue(is_array($errors)); + $this->assertTrue(0 < count($errors)); + } + + /** + * @group ZF-9765 + */ + public function testCssSelectorShouldFindNodesWhenMatchingMultipleAttributes() + { + $html = << + + +
+ + + +
+ + +EOF; + + $this->query->setDocument($html); + $results = $this->query->execute('input[type="hidden"][value="1"]'); + $this->assertEquals(2, count($results), $results->getXpathQuery()); + $results = $this->query->execute('input[value="1"][type~="hidden"]'); + $this->assertEquals(2, count($results), $results->getXpathQuery()); + $results = $this->query->execute('input[type="hidden"][value="0"]'); + $this->assertEquals(1, count($results)); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingAtConstruction() + { + $doc = new Query($this->getHtml(), 'iso-8859-1'); + $this->assertEquals('iso-8859-1', $doc->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingWhenSettingDocument() + { + $this->query->setDocument($this->getHtml(), 'iso-8859-1'); + $this->assertEquals('iso-8859-1', $this->query->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testAllowsSpecifyingEncodingViaSetter() + { + $this->query->setEncoding('iso-8859-1'); + $this->assertEquals('iso-8859-1', $this->query->getEncoding()); + } + + /** + * @group ZF-3938 + */ + public function testSpecifyingEncodingSetsEncodingOnDomDocument() + { + $this->query->setDocument($this->getHtml(), 'utf-8'); + $test = $this->query->execute('.foo'); + $this->assertInstanceof('\\Zend\\Dom\\NodeList', $test); + $doc = $test->getDocument(); + $this->assertInstanceof('\\DOMDocument', $doc); + $this->assertEquals('utf-8', $doc->encoding); + } + + /** + * @group ZF-11376 + */ + public function testXhtmlDocumentWithXmlDeclaration() + { + $xhtmlWithXmlDecl = << + + </head> + <body><p>Test paragraph.</p></body> +</html> +EOB; + $this->query->setDocument($xhtmlWithXmlDecl, 'utf-8'); + $this->assertEquals(1, $this->query->execute('//p')->count()); + } + + /** + * @group ZF-12106 + */ + public function testXhtmlDocumentWithXmlAndDoctypeDeclaration() + { + $xhtmlWithXmlDecl = <<<EOB +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE html + PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" + "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> +<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en"> + <head> + <title>Virtual Library + + +

Moved to example.org.

+ + +EOB; + $this->query->setDocument($xhtmlWithXmlDecl, 'utf-8'); + $this->assertEquals(1, $this->query->execute('//p')->count()); + } + + public function testLoadingXmlContainingDoctypeShouldFailToPreventXxeAndXeeAttacks() + { + $xml = << +]> + + This result is &harmless; + +XML; + $this->query->setDocumentXml($xml); + $this->setExpectedException("\Zend\Dom\Exception\RuntimeException"); + $this->query->queryXpath('/'); + } +} diff --git a/test/_files/bad-sample.html b/test/_files/bad-sample.html new file mode 100644 index 0000000..a70bc0f --- /dev/null +++ b/test/_files/bad-sample.html @@ -0,0 +1,11 @@ + + + + + bad HTMl sample + + +

foola la la +

text
+ + diff --git a/test/_files/sample.xhtml b/test/_files/sample.xhtml new file mode 100644 index 0000000..c250c54 --- /dev/null +++ b/test/_files/sample.xhtml @@ -0,0 +1,125 @@ + + + + Sample HTML Content for DOM Queries + + + + + + + +
+
+

Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Sed at dui quis magna + viverra sagittis. Donec sed nibh ut pede pretium ultricies. Mauris viverra + tempus dui. Donec dolor risus, mollis sed, pulvinar id, faucibus ac, nisl. Duis + eu erat at erat tempus euismod. Suspendisse sodales lacus vitae libero. Sed + suscipit commodo enim. Pellentesque erat. Cras orci. Pellentesque non nunc. Cras + diam libero, feugiat vel, accumsan et, laoreet mattis, purus. Pellentesque + volutpat hendrerit justo. Cras vehicula. Vestibulum cursus gravida est. + Curabitur pede. Mauris consectetuer rutrum dolor. Ut a turpis ac metus + sollicitudin dignissim.

+ +

Praesent blandit, nisi in egestas mattis, lectus tortor vehicula massa, commodo + vehicula diam lacus consequat lectus. In vitae pede eget leo faucibus + condimentum. Pellentesque nisi. Ut condimentum elit vitae mi. Morbi neque erat, + mollis nec, volutpat sed, placerat sit amet, ligula. Morbi eleifend. Morbi + auctor condimentum ipsum. In ac purus. Sed vitae magna. Sed volutpat vestibulum + leo. Phasellus id nibh.

+ +

Fusce blandit elementum leo. Proin id erat. In dignissim orci vulputate libero + cursus volutpat. Ut vitae arcu non nulla sagittis laoreet. Suspendisse sed + tortor ac risus placerat convallis. Donec nulla ipsum, tempus et, porttitor + quis, blandit vel, tortor. Donec condimentum. Aliquam felis dui, consectetuer + ultricies, euismod eu, pretium non, sem. Sed ultricies, tortor non vulputate + tempor, urna leo sagittis libero, ut feugiat nulla tellus et ligula. Suspendisse + eget est. Vestibulum interdum mi at felis. Fusce dictum. Fusce a enim at ipsum + consectetuer molestie. Integer rhoncus. Cum sociis natoque penatibus et magnis + dis parturient montes, nascetur ridiculus mus. Donec rhoncus mattis arcu.

+ +

Proin id nisi. Vivamus eu risus. Vivamus et enim et turpis volutpat bibendum. + Nunc nunc ipsum, semper sit amet, pellentesque in, venenatis sit amet, elit. + Suspendisse potenti. Cras lacinia, nisl vel vulputate pellentesque, lacus tellus + lobortis ipsum, sed pellentesque neque felis ac orci. Sed pellentesque. + Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac + turpis egestas. Etiam lacus enim, facilisis eu, suscipit ac, condimentum ut, + ante. Aenean convallis mattis enim. Aliquam pretium. Etiam quam. Donec + tincidunt. Pellentesque id ante ut orci gravida semper. Cum sociis natoque + penatibus et magnis dis parturient montes, nascetur ridiculus mus. Fusce + bibendum elit vel sem cursus ultrices. Nunc in purus. Ut consectetuer, nulla ac + porta hendrerit, ligula eros blandit nulla, at ornare dui odio vel justo. Morbi + tellus nulla, sodales eget, aliquet ac, varius vitae, pede. Pellentesque + habitant morbi tristique senectus et netus et malesuada fames ac turpis + egestas.

+ +

Vestibulum urna. Proin hendrerit neque et tellus. Duis luctus congue sem. + Aliquam ultricies orci nec enim. Nam velit eros, feugiat posuere, ultricies + vitae, consequat nec, urna. In ipsum neque, porta sed, ornare nec, cursus et, + mauris. Nam eget urna. Suspendisse venenatis nulla nec urna lacinia bibendum. + Morbi tempor lobortis nisl. Nulla ut eros. Fusce id tortor ut diam vulputate + consectetuer. Nulla vel augue. Nulla ac tellus sed orci pulvinar dictum. Proin + lobortis. Fusce consequat auctor ante. Donec bibendum fringilla nunc. Donec + viverra, urna ac auctor dapibus, augue ipsum tristique lacus, sed feugiat nibh + nibh quis purus. Donec orci est, pharetra laoreet, laoreet id, mattis ut, odio. + In lacus sem, rutrum tristique, dignissim ac, imperdiet ac, lacus.

+
+ + +
+ + + + diff --git a/test/bootstrap.php b/test/bootstrap.php new file mode 100644 index 0000000..f1bbcf9 --- /dev/null +++ b/test/bootstrap.php @@ -0,0 +1,34 @@ +