Skip to content

Commit

Permalink
Merge pull request #1282 from nextcloud/fix/1279-json-with-array
Browse files Browse the repository at this point in the history
Make parser more generic to handle more websites
  • Loading branch information
christianlupus authored Oct 24, 2022
2 parents 916353c + 95b84c0 commit dcc2544
Show file tree
Hide file tree
Showing 8 changed files with 7,033 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
[1122](https://github.com/nextcloud/cookbook/pull/1122) @MarcelRobitaille
- Add filter to prevent special chars in folder names
[#1268](https://github.com/nextcloud/cookbook/pull/1268) @christianlupus
- Fix bug with websites that provide array of schema entries
[#1282](https://github.com/nextcloud/cookbook/pull/1282) @christianlupus

### Maintenance
- Use the pre-built database images for MySQL and PostgreSQL tests
Expand Down
19 changes: 9 additions & 10 deletions lib/Helper/HTMLParser/HttpJsonLdParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -70,15 +70,18 @@ private function parseJsonLdElement(\DOMNode $node): array {
// Look for an array of recipes
$this->mapArray($json);

// Ensure the type of the object is never an array
$this->checkForArrayType($json);
if ($this->jsonService->isSchemaObject($json, 'Recipe', true, false)) {
// Ensure the type of the object is never an array
$this->checkForArrayType($json);

if ($this->jsonService->isSchemaObject($json, 'Recipe')) {
// We found our recipe
return $json;
} else {
throw new HtmlParsingException($this->l->t('No recipe was found.'));
// Continue with other approaches
}

//
throw new HtmlParsingException($this->l->t('No recipe was found.'));
}

/**
Expand Down Expand Up @@ -156,7 +159,7 @@ private function searchForRecipeInArray(array $arr): ?array {
// Iterate through all objects in the array ...
foreach ($arr as $item) {
// ... looking for a recipe
if ($this->jsonService->isSchemaObject($item, 'Recipe')) {
if ($this->jsonService->isSchemaObject($item, 'Recipe', true, false)) {
// We found a recipe in the array, use it
return $item;
}
Expand All @@ -174,12 +177,8 @@ private function searchForRecipeInArray(array $arr): ?array {
* @param array $json The JSON object to parse
*/
private function checkForArrayType(array &$json) {
if (! $this->jsonService->isSchemaObject($json)) {
return;
}

if (is_array($json['@type'])) {
$json['@type'] = $json['@type'][0];
$json['@type'] = 'Recipe';
}
}
}
18 changes: 17 additions & 1 deletion lib/Service/JsonService.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ class JsonService {
* @param mixed $obj The object to check
* @param string $type The type to check for. If null or '' no type check is performed
* @param bool $checkContext If true, check for a present context entry
* @param bool $uniqueType If false, also accept JSON objects that contain multiple types as @type.
* @return bool true, if $obj is an object and optionally satisfies the type check
*/
public function isSchemaObject($obj, string $type = null, bool $checkContext = true): bool {
public function isSchemaObject($obj, string $type = null, bool $checkContext = true, bool $uniqueType = true): bool {
if (! is_array($obj)) {
// Objects must bve encoded as arrays in JSON
return false;
Expand All @@ -44,6 +45,21 @@ public function isSchemaObject($obj, string $type = null, bool $checkContext = t
return true;
}

if (is_array($obj['@type'])) {
if ($uniqueType) {
if (count($obj['@type']) === 1 && $obj['@type'][0] === $type) {
return true;
}
return false;
}

$foundTypes = array_filter($obj['@type'], function ($x) use ($type) {
return trim($x) === $type;
});

return count($foundTypes) > 0;
}

// Check if type matches
return (strcmp($obj['@type'], $type) === 0);
}
Expand Down
1 change: 1 addition & 0 deletions tests/Unit/Helper/HTMLParser/HttpJsonLdParserTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public function dataProvider(): array {
'caseG' => ['caseG.html', true, 'caseG.json'],
'caseH' => ['caseH.html', true, 'caseH.json'],
'caseI' => ['caseI.html', true, 'caseI.json'],
'caseJ' => ['caseJ.html', true, 'caseJ.json'],
];
}

Expand Down
5,598 changes: 5,598 additions & 0 deletions tests/Unit/Helper/HTMLParser/res_JsonLd/caseJ.html

Large diffs are not rendered by default.

Loading

0 comments on commit dcc2544

Please sign in to comment.