diff --git a/src/Graby.php b/src/Graby.php index 03a153d0..d3a6c528 100644 --- a/src/Graby.php +++ b/src/Graby.php @@ -745,7 +745,19 @@ private function getSinglePage($html, $url) // check it's not what we have already! if (false !== $singlePageUrl && $singlePageUrl !== $url) { // it's not, so let's try to fetch it... - $response = $this->httpClient->fetch($singlePageUrl, false, $siteConfig->http_header); + $headers = $siteConfig->http_header; + + $sourceUrl = parse_url($url); + $targetUrl = parse_url($singlePageUrl); + if (\is_array($sourceUrl) + && \is_array($targetUrl) + && \array_key_exists('host', $sourceUrl) + && \array_key_exists('host', $targetUrl) + && $sourceUrl['host'] !== $targetUrl['host']) { + $targetSiteConfig = $this->configBuilder->buildForHost($targetUrl['host']); + $headers = $targetSiteConfig->http_header; + } + $response = $this->httpClient->fetch($singlePageUrl, false, $headers); if ($response['status'] < 300) { $this->logger->info('Single page content found with url', ['url' => $singlePageUrl]); diff --git a/tests/GrabyTest.php b/tests/GrabyTest.php index 0980e94c..62db02fc 100644 --- a/tests/GrabyTest.php +++ b/tests/GrabyTest.php @@ -497,6 +497,35 @@ public function testSinglePageMimeAction(): void $this->assertFalse($res['native_ad']); } + public function testSinglePageReloadSiteConfig(): void + { + DnsMock::withMockedHosts([ + 'singlepage2.com' => [['type' => 'A', 'ip' => self::AN_IPV4]], + 'singlepage5.com' => [['type' => 'A', 'ip' => self::AN_IPV4]], + ]); + + $httpMockClient = new HttpMockClient(); + $httpMockClient->addResponse(new Response( + 200, + ['Content-Type' => 'text/html'], + '

my title

' + )); + $httpMockClient->addResponse(new Response( + 200, + ['Content-Type' => 'text/html'], + '

my title

my singlepage5
' + )); + + $graby = new Graby(['debug' => true, 'xss_filter' => false, 'extractor' => ['config_builder' => [ + 'site_config' => [__DIR__ . '/fixtures/site_config'], + ]]], $httpMockClient); + + $res = $graby->fetchContent('http://singlepage2.com/hello'); + + $this->assertStringContainsString('my singlepage5', $res['html']); + $this->assertSame('http://singlepage5.com/hello', $res['url']); + } + /** * @group dns-sensitive */ diff --git a/tests/fixtures/site_config/singlepage5.com.txt b/tests/fixtures/site_config/singlepage5.com.txt new file mode 100644 index 00000000..ec0531c9 --- /dev/null +++ b/tests/fixtures/site_config/singlepage5.com.txt @@ -0,0 +1,3 @@ +title: //h1[@class='print-title'] +body: //div[@class='main-article'] +prune: no \ No newline at end of file