Skip to content

Commit

Permalink
Merge pull request #300 from leepeuker/update-imdb-web-scrapper
Browse files Browse the repository at this point in the history
Adjust imdb web scraper to changed imdb frontend
  • Loading branch information
leepeuker authored Apr 8, 2023
2 parents 3930905 + 9b1f7f0 commit a57795e
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 51 deletions.
6 changes: 3 additions & 3 deletions src/Api/Imdb/ImdbWebScrapper.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class ImdbWebScrapper
{
private const REQUEST_HEADERS = ['headers' => ['User-Agent' => self::USER_AGENT]];

private const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:107.0) Gecko/20100101 Firefox/107.0';
private const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:111.0) Gecko/20100101 Firefox/111.0';

public function __construct(
private readonly Client $httpClient,
Expand Down Expand Up @@ -50,7 +50,7 @@ public function findRating(string $imdbId) : ?ImdbRating

private function extractRatingAverage(string $imdbRatingPage, string $imdbId) : ?float
{
preg_match('/weightedaverage<\/a>voteof(\d.\d)\/10</', $imdbRatingPage, $averageRatingMatches);
preg_match('/jUnWeS">(\d([.,])\d)/', $imdbRatingPage, $averageRatingMatches);
if (empty($averageRatingMatches[1]) === true) {
$this->logger->warning('IMDb: Could not extract rating average.', ['url' => $this->urlGenerator->buildMovieRatingsUrl($imdbId)]);

Expand All @@ -62,7 +62,7 @@ private function extractRatingAverage(string $imdbRatingPage, string $imdbId) :

private function extractRatingVoteCount(string $imdbRatingPage, string $imdbId) : ?int
{
preg_match('/([0-9]{1,3}([.,][0-9]{3})*)IMDbusershavegivena/', $imdbRatingPage, $voteCountMatches);
preg_match('/dWymrF">([0-9]{1,3}([.,]?[0-9]{3})*)/', $imdbRatingPage, $voteCountMatches);
if (empty($voteCountMatches[1]) === true) {
$this->logger->warning('IMDb: Could not extract imdb rating vote count.', ['url' => $this->urlGenerator->buildMovieRatingsUrl($imdbId)]);

Expand Down
9 changes: 9 additions & 0 deletions src/Domain/Movie/MovieRepository.php
Original file line number Diff line number Diff line change
Expand Up @@ -431,6 +431,15 @@ public function fetchMovieIdsHavingImdbIdOrderedByLastImdbUpdatedAt(?int $maxAge
$limitQuery = " LIMIT $limit";
}

if ($this->dbConnection->getDatabasePlatform() instanceof SqlitePlatform) {
return $this->dbConnection->fetchFirstColumn(
'SELECT movie.id
FROM `movie`
WHERE movie.imdb_id IS NOT NULL AND (updated_at_imdb IS NULL OR updated_at_imdb <= datetime("now","-' . $maxAgeInHours . ' hours"))
ORDER BY updated_at_imdb ASC' . $limitQuery,
);
}

return $this->dbConnection->fetchFirstColumn(
'SELECT movie.id
FROM `movie`
Expand Down
63 changes: 15 additions & 48 deletions tests/unit/Api/Imdb/ImdbWebScrapperTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -27,69 +27,36 @@ public function provideFindRatingData() : array
{
return [
[
' <div class="allText">
<div class="allText">
229.240
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of 7.9 / 10
<br /><br />',
'<spanclass="sc-5931bdee-1jUnWeS">7.9</span>
<div class="sc-5931bdee-3 dWymrF">229.240</div>',
ImdbRating::create(7.9, 229240)
],
[
' <div class="allText">
<div class="allText">
229,240
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of 7,9 / 10
<br /><br />',
'<spanclass="sc-5931bdee-1jUnWeS">7,9</span>
<div class="sc-5931bdee-3 dWymrF">229,240</div>',
ImdbRating::create(7.9, 229240)
],
[
' <div class="allText">
<div class="allText">
1.229,240
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of 7,9 / 10
<br /><br />',
'<spanclass="sc-5931bdee-1jUnWeS">7,9</span>
<div class="sc-5931bdee-3 dWymrF">229240</div>',
ImdbRating::create(7.9, 229240)
],
[
'<spanclass="sc-5931bdee-1jUnWeS">7,9</span>
<div class="sc-5931bdee-3 dWymrF">1.229,240</div>',
ImdbRating::create(7.9, 1229240)
],
[
' <div class="allText">
<div class="allText">
40
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of 7,9 / 10
<br /><br />',
'<spanclass="sc-5931bdee-1jUnWeS">7,9</span>
<div class="sc-5931bdee-3 dWymrF">40</div>',
ImdbRating::create(7.9, 40)
],
[
' <div class="allText">
<div class="allText">
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of 7,9 / 10
<br /><br />',
'<spanclass="sc-5931bdee-1jUnWeS">7,9</span>',
null
],
[
' <div class="allText">
<div class="allText">
IMDb users have given a <a href="https://help.imdb.com/article/imdb/track-movies-tv/weighted-average-ratings/GWT2DSBYVT2F25SK?ref_=cons_tt_rt_wtavg">weighted average</a> vote of / 10
<br /><br />',
'',
null
],
];
Expand Down

0 comments on commit a57795e

Please sign in to comment.