Skip to content

Commit

Permalink
Merge pull request #1060 from nextcloud/maintenance/217-url-in-parser
Browse files Browse the repository at this point in the history
Pass the url to the parsers of the recipes to allow for domain-specific parsers
  • Loading branch information
christianlupus authored Jul 1, 2022
2 parents c384b23 + 933c3ce commit d837c96
Show file tree
Hide file tree
Showing 9 changed files with 22 additions and 14 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
[#1050](https://github.com/nextcloud/cookbook/pull/1050) @christianlupus
- Migrated node-sass to dart sass
[#1051](https://github.com/nextcloud/cookbook/pull/1051) @christianlupus
- Add the url as a parameter to allow for specialized parsers per website in the backend
[#1060](https://github.com/nextcloud/cookbook/pull/1060) @christianlupus

### Codebase maintenance
- Removed codecov.io upload of intermediate merge commits during pull requests
Expand Down
3 changes: 2 additions & 1 deletion lib/Helper/HTMLParser/AbstractHtmlParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,9 @@ public function __construct(IL10N $l10n) {
* Extract the recipe from the given document.
*
* @param \DOMDocument $document The document to parse
* @param ?string $url The URL of the recipe to import
* @return array The JSON content in the document as a PHP array
* @throws HtmlParsingException If the parsing was not successful
*/
abstract public function parse(\DOMDocument $document): array;
abstract public function parse(\DOMDocument $document, ?string $url): array;
}
2 changes: 1 addition & 1 deletion lib/Helper/HTMLParser/HttpJsonLdParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public function __construct(IL10N $l10n, JsonService $jsonService) {
$this->jsonService = $jsonService;
}

public function parse(\DOMDocument $document): array {
public function parse(\DOMDocument $document, ?string $url): array {
$xpath = new \DOMXPath($document);

$json_ld_elements = $xpath->query("//*[@type='application/ld+json']");
Expand Down
2 changes: 1 addition & 1 deletion lib/Helper/HTMLParser/HttpMicrodataParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ public function __construct(IL10N $l10n) {
parent::__construct($l10n);
}

public function parse(DOMDocument $document): array {
public function parse(DOMDocument $document, ?string $url): array {
$this->xpath = new DOMXPath($document);

$selectorHttp = "//*[@itemtype='http://schema.org/Recipe']";
Expand Down
5 changes: 3 additions & 2 deletions lib/Service/RecipeExtractionService.php
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,15 @@ public function __construct(HttpJsonLdParser $jsonParser, HttpMicrodataParser $m
* Parse a DOM document using all registered parsers
*
* @param \DOMDocument $document The document to parse
* @param ?string $url The URL of the recipe to be parsed
* @throws HtmlParsingException If no parser was able to successfully parse the document
* @return array The data as returned from the parser
*/
public function parse(\DOMDocument $document): array {
public function parse(\DOMDocument $document, ?string $url): array {
/** @var $parser AbstractHtmlParser */
foreach ($this->parsers as $parser) {
try {
return $parser->parse($document);
return $parser->parse($document, $url);
} catch (HtmlParsingException $ex) {
// Silently ignore failure as there might be other parsers better suited
}
Expand Down
2 changes: 1 addition & 1 deletion lib/Service/RecipeService.php
Original file line number Diff line number Diff line change
Expand Up @@ -834,7 +834,7 @@ public function downloadRecipe(string $url): File {
$this->htmlDownloadService->downloadRecipe($url);

try {
$json = $this->recipeExtractionService->parse($this->htmlDownloadService->getDom());
$json = $this->recipeExtractionService->parse($this->htmlDownloadService->getDom(), $url);
} catch (HtmlParsingException $ex) {
throw new ImportException($ex->getMessage(), null, $ex);
}
Expand Down
2 changes: 1 addition & 1 deletion tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ public function testHTMLFile($file, $valid, $jsonFile): void {
$document->loadHTML($content);

try {
$res = $parser->parse($document);
$res = $parser->parse($document, 'http://example.com');

$jsonDest = file_get_contents(__DIR__ . "/res_JsonLd/$jsonFile");
$expected = json_decode($jsonDest, true);
Expand Down
4 changes: 2 additions & 2 deletions tests/Unit/Helper/HTMLParser/HttpMicrodataParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ public function testHTMLFile($filename, $valid, $jsonFile): void {
$document->loadHTML($content);

try {
$res = $parser->parse($document);
$res = $parser->parse($document, 'http://example.com');

$jsonDest = file_get_contents(__DIR__ . "/res_Microdata/$jsonFile");
$expected = json_decode($jsonDest, true);
Expand Down Expand Up @@ -145,7 +145,7 @@ private function finishTest($parser, $content, $jsonFile): void {
$document->loadHTML($content);

try {
$res = $parser->parse($document);
$res = $parser->parse($document, 'http://exmapl.com');

$jsonDest = file_get_contents(__DIR__ . "/res_Microdata/$jsonFile");
$expected = json_decode($jsonDest, true);
Expand Down
14 changes: 9 additions & 5 deletions tests/Unit/Service/RecipeExtractionServiceTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
use OCA\Cookbook\Exception\HtmlParsingException;
use OCP\IL10N;
use OCA\Cookbook\Service\RecipeExtractionService;
use PHPUnit\Framework\MockObject\MockObject;

class RecipeExtractionServiceTest extends TestCase {
/**
Expand All @@ -26,42 +27,45 @@ protected function setUp(): void {
* @param bool $exceptionExpected
*/
public function testParsingDelegation($jsonSuccess, $microdataSuccess, $exceptionExpected): void {
/** @var HttpJsonLdParser|MockObject $jsonParser */
$jsonParser = $this->createMock(HttpJsonLdParser::class);
/** @var HttpMicrodataParser|MockObject $microdataParser */
$microdataParser = $this->createMock(HttpMicrodataParser::class);

$document = $this->createStub(\DOMDocument::class);
$url = 'http://example.com';
$expectedObject = [new \stdClass()];

if ($jsonSuccess) {
$jsonParser->expects($this->once())
->method('parse')
->with($document)
->with($document, $url)
->willReturn($expectedObject);

$microdataParser->expects($this->never())->method('parse');
} else {
$jsonParser->expects($this->once())
->method('parse')
->with($document)
->with($document, $url)
->willThrowException(new HtmlParsingException());

if ($microdataSuccess) {
$microdataParser->expects($this->once())
->method('parse')
->with($document)
->with($document, $url)
->willReturn($expectedObject);
} else {
$microdataParser->expects($this->once())
->method('parse')
->with($document)
->with($document, $url)
->willThrowException(new HtmlParsingException());
}
}

$sut = new RecipeExtractionService($jsonParser, $microdataParser, $this->l);

try {
$ret = $sut->parse($document);
$ret = $sut->parse($document, $url);

$this->assertEquals($expectedObject, $ret);
} catch (HtmlParsingException $ex) {
Expand Down

0 comments on commit d837c96

Please sign in to comment.