From 99a39ff2633dfa8e71f3cc2edbd17cd0575a14e0 Mon Sep 17 00:00:00 2001 From: somini Date: Mon, 22 Jul 2019 00:56:50 +0100 Subject: [PATCH 1/3] [FabriceBellard]: New Bridge --- bridges/FabriceBellardBridge.php | 38 ++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 bridges/FabriceBellardBridge.php diff --git a/bridges/FabriceBellardBridge.php b/bridges/FabriceBellardBridge.php new file mode 100644 index 00000000000..c66c63e3f25 --- /dev/null +++ b/bridges/FabriceBellardBridge.php @@ -0,0 +1,38 @@ +find('p') as $obj) { + $item = array(); + + $links = $obj->find('a'); + + $link_uri = self::URI; + if (count($links) > 0) { + /* Fix relative links */ + foreach ($links as $link) { + if (strpos($link, '://') === false) { + $link->href = self::URI . $link->href; + } + } + $link_uri = $links[0]->href; + if ($link_uri[-1] !== '/') { + $link_uri = $link_uri . '/'; + } + } + + $item['title'] = strip_tags($obj->innertext); + $item['uri'] = $link_uri; + $item['content'] = $obj->innertext; + + $this->items[] = $item; + } + } +} From ec386ddddc68f1b608b8a879ed39bb8b42e86c4d Mon Sep 17 00:00:00 2001 From: somini Date: Sat, 27 Jul 2019 00:15:11 +0100 Subject: [PATCH 2/3] Use defaultLinkTo to improve HTML parsing There are still hacks needed, and it still fails sometimes... --- bridges/FabriceBellardBridge.php | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/bridges/FabriceBellardBridge.php b/bridges/FabriceBellardBridge.php index c66c63e3f25..52bb49ed7b9 100644 --- a/bridges/FabriceBellardBridge.php +++ b/bridges/FabriceBellardBridge.php @@ -12,20 +12,19 @@ public function collectData() { foreach ($html->find('p') as $obj) { $item = array(); - $links = $obj->find('a'); + $html = defaultLinkTo($html, $this->getURI()); - $link_uri = self::URI; + $links = $obj->find('a'); if (count($links) > 0) { - /* Fix relative links */ - foreach ($links as $link) { - if (strpos($link, '://') === false) { - $link->href = self::URI . $link->href; - } - } $link_uri = $links[0]->href; - if ($link_uri[-1] !== '/') { - $link_uri = $link_uri . '/'; - } + } + else { + $link_uri = $this->getURI(); + } + + /* try to make sure the link is valid */ + if ($link_uri[-1] !== '/' && strpos($link_uri, '/') === false) { + $link_uri = $link_uri . '/'; } $item['title'] = strip_tags($obj->innertext); From 82956623d82a7e3597882938e57f2d5624fc1c9d Mon Sep 17 00:00:00 2001 From: somini Date: Sat, 27 Jul 2019 01:02:36 +0100 Subject: [PATCH 3/3] Appease Travis --- bridges/FabriceBellardBridge.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/bridges/FabriceBellardBridge.php b/bridges/FabriceBellardBridge.php index 52bb49ed7b9..2c24b5ead77 100644 --- a/bridges/FabriceBellardBridge.php +++ b/bridges/FabriceBellardBridge.php @@ -17,8 +17,7 @@ public function collectData() { $links = $obj->find('a'); if (count($links) > 0) { $link_uri = $links[0]->href; - } - else { + } else { $link_uri = $this->getURI(); }