Skip to content

Commit

Permalink
Fallback to r18 scraper for ID 00X movies on dmm (#138)
Browse files Browse the repository at this point in the history
  • Loading branch information
jvlflame committed Oct 17, 2020
1 parent e1e01bf commit c2f1e84
Showing 1 changed file with 64 additions and 50 deletions.
114 changes: 64 additions & 50 deletions src/Javinizer/Public/Get-DmmUrl.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -15,65 +15,79 @@ function Get-DmmUrl {

process {
$originalId = $Id
if ($r18Url) {
$r18Id = (($r18Url -split 'id=')[1] -split '\/')[0]
$directUrl = "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=$r18Id"
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Converting R18 Id to Dmm: [$r18Id] -> [$directUrl]"
} else {
# Convert the movie Id (ID-###) to content Id (ID00###) to match dmm naming standards
if (!($Strict)) {
if ($Id -match '([a-zA-Z|tT28|rR18]+-\d+z{0,1}Z{0,1}e{0,1}E{0,1})') {
$splitId = $Id -split '-'
if (($splitId[1])[-1] -match '\D') {
$appendChar = ($splitId[1])[-1]
$splitId[1] = $splitId[1] -replace '\D', ''
}
$Id = $splitId[0] + $splitId[1].PadLeft(5, '0') + $appendChar
$Id = $Id.Trim()
}
}

$searchUrl = "https://www.dmm.co.jp/search/?redirect=1&enc=UTF-8&category=&searchstr=$Id"

# The digital/videoa URL is not being caught by the html for movie IDs matching '0001 - 0009'
if ($Id -match '00\d') {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Movie ID using 00X format, falling back to R18 scraper"
$url = Get-R18Url -Id $Id -Strict:$Strict
try {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]"
$webRequest = Invoke-WebRequest -Uri $searchUrl -Method Get -Verbose:$false
$cid = ($url.En | Select-String -Pattern 'id=(.*)\/').Matches.Groups[1].Value
} catch {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$searchUrl]: $PSItem" -Action 'Continue'
$cid = $null
}

$retryCount = 3
$searchResults = ($webrequest.links.href | Where-Object { $_ -like '*digital/videoa/*' })
$numResults = $searchResults.count

if ($retryCount -gt $numResults) {
$retryCount = $numResults
if ($null -ne $cid) {
$directUrl = "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=$cid/?i3_ref=search&i3_ord=5"
}
} else {
if ($r18Url) {
$r18Id = (($r18Url -split 'id=')[1] -split '\/')[0]
$directUrl = "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=$r18Id"
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Converting R18 Id to Dmm: [$r18Id] -> [$directUrl]"
} else {
# Convert the movie Id (ID-###) to content Id (ID00###) to match dmm naming standards
if (!($Strict)) {
if ($Id -match '([a-zA-Z|tT28|rR18]+-\d+z{0,1}Z{0,1}e{0,1}E{0,1})') {
$splitId = $Id -split '-'
if (($splitId[1])[-1] -match '\D') {
$appendChar = ($splitId[1])[-1]
$splitId[1] = $splitId[1] -replace '\D', ''
}
$Id = $splitId[0] + $splitId[1].PadLeft(5, '0') + $appendChar
$Id = $Id.Trim()
}
}

if ($numResults -ge 1) {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Searching [$retryCount] of [$numResults] results for [$originalId]"
$searchUrl = "https://www.dmm.co.jp/search/?redirect=1&enc=UTF-8&category=&searchstr=$Id"

$count = 1
foreach ($result in $searchResults) {
try {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$result]"
$webRequest = Invoke-WebRequest -Uri $result -Method Get -Verbose:$false
} catch {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]: $PSItem" -Action 'Continue'
}
try {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$searchUrl]"
$webRequest = Invoke-WebRequest -Uri $searchUrl -Method Get -Verbose:$false
} catch {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$searchUrl]: $PSItem" -Action 'Continue'
}

$resultId = Get-DmmContentId -WebRequest $webRequest
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Result [$count] is [$resultId]"
if ($resultId -match "^(.*_)?\d*$Id") {
$directUrl = $result
break
}
$retryCount = 3
$searchResults = ($webrequest.links.href | Where-Object { $_ -like '*digital/videoa/*' })
$numResults = $searchResults.count

if ($count -eq $retryCount) {
break
}
if ($retryCount -gt $numResults) {
$retryCount = $numResults
}

$count++
if ($numResults -ge 1) {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Searching [$retryCount] of [$numResults] results for [$originalId]"

$count = 1
foreach ($result in $searchResults) {
try {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Performing [GET] on URL [$result]"
$webRequest = Invoke-WebRequest -Uri $result -Method Get -Verbose:$false
} catch {
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Error -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Error occurred on [GET] on URL [$result]: $PSItem" -Action 'Continue'
}

$resultId = Get-DmmContentId -WebRequest $webRequest
Write-JVLog -Write:$script:JVLogWrite -LogPath $script:JVLogPath -WriteLevel $script:JVLogWriteLevel -Level Debug -Message "[$originalId] [$($MyInvocation.MyCommand.Name)] Result [$count] is [$resultId]"
if ($resultId -match "^(.*_)?\d*$Id") {
$directUrl = $result
break
}

if ($count -eq $retryCount) {
break
}

$count++
}
}
}
}
Expand Down

0 comments on commit c2f1e84

Please sign in to comment.