Skip to content

Commit

Permalink
DMM Offline (#198)
Browse files Browse the repository at this point in the history
* Process DMM all locally

single call to github to download the repo archive.
remove need for PAT
update RTN to 0.2.13
change to batch_parse for title parsing from RTN

* introduce concurrent dictionary, and parallelism
  • Loading branch information
iPromKnight authored Apr 2, 2024
1 parent 6181207 commit 79e0a0f
Show file tree
Hide file tree
Showing 20 changed files with 378 additions and 191 deletions.
14 changes: 7 additions & 7 deletions deployment/docker/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ services:
condition: service_healthy
env_file: stack.env
hostname: knightcrawler-addon
image: gabisonfire/knightcrawler-addon:2.0.19
image: gabisonfire/knightcrawler-addon:2.0.20
labels:
logging: promtail
networks:
Expand All @@ -117,7 +117,7 @@ services:
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-consumer:2.0.19
image: gabisonfire/knightcrawler-consumer:2.0.20
labels:
logging: promtail
networks:
Expand All @@ -138,7 +138,7 @@ services:
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-debrid-collector:2.0.19
image: gabisonfire/knightcrawler-debrid-collector:2.0.20
labels:
logging: promtail
networks:
Expand All @@ -152,7 +152,7 @@ services:
migrator:
condition: service_completed_successfully
env_file: stack.env
image: gabisonfire/knightcrawler-metadata:2.0.19
image: gabisonfire/knightcrawler-metadata:2.0.20
networks:
- knightcrawler-network
restart: "no"
Expand All @@ -163,7 +163,7 @@ services:
postgres:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-migrator:2.0.19
image: gabisonfire/knightcrawler-migrator:2.0.20
networks:
- knightcrawler-network
restart: "no"
Expand All @@ -182,7 +182,7 @@ services:
redis:
condition: service_healthy
env_file: stack.env
image: gabisonfire/knightcrawler-producer:2.0.19
image: gabisonfire/knightcrawler-producer:2.0.20
labels:
logging: promtail
networks:
Expand All @@ -207,7 +207,7 @@ services:
deploy:
replicas: ${QBIT_REPLICAS:-0}
env_file: stack.env
image: gabisonfire/knightcrawler-qbit-collector:2.0.19
image: gabisonfire/knightcrawler-qbit-collector:2.0.20
labels:
logging: promtail
networks:
Expand Down
14 changes: 7 additions & 7 deletions deployment/docker/src/components/knightcrawler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ x-depends: &knightcrawler-app-depends

services:
metadata:
image: gabisonfire/knightcrawler-metadata:2.0.18
image: gabisonfire/knightcrawler-metadata:2.0.20
env_file: ../../.env
networks:
- knightcrawler-network
Expand All @@ -30,7 +30,7 @@ services:
condition: service_completed_successfully

migrator:
image: gabisonfire/knightcrawler-migrator:2.0.18
image: gabisonfire/knightcrawler-migrator:2.0.20
env_file: ../../.env
networks:
- knightcrawler-network
Expand All @@ -40,30 +40,30 @@ services:
condition: service_healthy

addon:
image: gabisonfire/knightcrawler-addon:2.0.18
image: gabisonfire/knightcrawler-addon:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
hostname: knightcrawler-addon
ports:
- "7000:7000"

consumer:
image: gabisonfire/knightcrawler-consumer:2.0.18
image: gabisonfire/knightcrawler-consumer:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped

debridcollector:
image: gabisonfire/knightcrawler-debrid-collector:2.0.18
image: gabisonfire/knightcrawler-debrid-collector:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped

producer:
image: gabisonfire/knightcrawler-producer:2.0.18
image: gabisonfire/knightcrawler-producer:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped

qbitcollector:
image: gabisonfire/knightcrawler-qbit-collector:2.0.18
image: gabisonfire/knightcrawler-qbit-collector:2.0.20
<<: [*knightcrawler-app, *knightcrawler-app-depends]
restart: unless-stopped
depends_on:
Expand Down
3 changes: 0 additions & 3 deletions deployment/docker/stack.env
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,3 @@ QBIT_REPLICAS=0

# Addon
DEBUG_MODE=false

# Producer
GITHUB_PAT=
2 changes: 1 addition & 1 deletion src/debrid-collector/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
rank-torrent-name==0.2.11
rank-torrent-name==0.2.13
2 changes: 1 addition & 1 deletion src/producer/src/Configuration/scrapers.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
},
{
"Name": "SyncDmmJob",
"IntervalSeconds": 1800,
"IntervalSeconds": 10800,
"Enabled": true
},
{
Expand Down
70 changes: 70 additions & 0 deletions src/producer/src/Features/Crawlers/Dmm/DMMFileDownloader.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
namespace Producer.Features.Crawlers.Dmm;

public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader
{
private const string Filename = "main.zip";
private readonly IReadOnlyCollection<string> _filesToIgnore = [
"index.html",
"404.html",
"dedupe.sh",
"CNAME",
];

public const string ClientName = "DmmFileDownloader";

public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken)
{
logger.LogInformation("Downloading DMM Hashlists");

var response = await client.GetAsync(Filename, cancellationToken);

var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists");

EnsureDirectoryIsClean(tempDirectory);

response.EnsureSuccessStatusCode();

await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken);
using var archive = new ZipArchive(stream);

logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory);

foreach (var entry in archive.Entries)
{
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName));
if (!entry.FullName.EndsWith('/')) // It's a file
{
entry.ExtractToFile(entryPath, true);
}
}

foreach (var file in _filesToIgnore)
{
CleanRepoExtras(tempDirectory, file);
}

logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory);

return tempDirectory;
}

private static void CleanRepoExtras(string tempDirectory, string fileName)
{
var repoIndex = Path.Combine(tempDirectory, fileName);

if (File.Exists(repoIndex))
{
File.Delete(repoIndex);
}
}

private static void EnsureDirectoryIsClean(string tempDirectory)
{
if (Directory.Exists(tempDirectory))
{
Directory.Delete(tempDirectory, true);
}

Directory.CreateDirectory(tempDirectory);
}
}
6 changes: 6 additions & 0 deletions src/producer/src/Features/Crawlers/Dmm/DMMHttpClient.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
namespace Producer.Features.Crawlers.Dmm;

public class DMMHttpClient
{

}
Loading

0 comments on commit 79e0a0f

Please sign in to comment.