-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Process DMM all locally single call to github to download the repo archive. remove need for PAT update RTN to 0.2.13 change to batch_parse for title parsing from RTN * introduce concurrent dictionary, and parallelism
- Loading branch information
1 parent
6181207
commit 79e0a0f
Showing
20 changed files
with
378 additions
and
191 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -38,6 +38,3 @@ QBIT_REPLICAS=0 | |
|
||
# Addon | ||
DEBUG_MODE=false | ||
|
||
# Producer | ||
GITHUB_PAT= |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
rank-torrent-name==0.2.11 | ||
rank-torrent-name==0.2.13 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,7 +28,7 @@ | |
}, | ||
{ | ||
"Name": "SyncDmmJob", | ||
"IntervalSeconds": 1800, | ||
"IntervalSeconds": 10800, | ||
"Enabled": true | ||
}, | ||
{ | ||
|
70 changes: 70 additions & 0 deletions
70
src/producer/src/Features/Crawlers/Dmm/DMMFileDownloader.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
namespace Producer.Features.Crawlers.Dmm; | ||
|
||
public class DMMFileDownloader(HttpClient client, ILogger<DMMFileDownloader> logger) : IDMMFileDownloader | ||
{ | ||
private const string Filename = "main.zip"; | ||
private readonly IReadOnlyCollection<string> _filesToIgnore = [ | ||
"index.html", | ||
"404.html", | ||
"dedupe.sh", | ||
"CNAME", | ||
]; | ||
|
||
public const string ClientName = "DmmFileDownloader"; | ||
|
||
public async Task<string> DownloadFileToTempPath(CancellationToken cancellationToken) | ||
{ | ||
logger.LogInformation("Downloading DMM Hashlists"); | ||
|
||
var response = await client.GetAsync(Filename, cancellationToken); | ||
|
||
var tempDirectory = Path.Combine(Path.GetTempPath(), "DMMHashlists"); | ||
|
||
EnsureDirectoryIsClean(tempDirectory); | ||
|
||
response.EnsureSuccessStatusCode(); | ||
|
||
await using var stream = await response.Content.ReadAsStreamAsync(cancellationToken); | ||
using var archive = new ZipArchive(stream); | ||
|
||
logger.LogInformation("Extracting DMM Hashlists to {TempDirectory}", tempDirectory); | ||
|
||
foreach (var entry in archive.Entries) | ||
{ | ||
var entryPath = Path.Combine(tempDirectory, Path.GetFileName(entry.FullName)); | ||
if (!entry.FullName.EndsWith('/')) // It's a file | ||
{ | ||
entry.ExtractToFile(entryPath, true); | ||
} | ||
} | ||
|
||
foreach (var file in _filesToIgnore) | ||
{ | ||
CleanRepoExtras(tempDirectory, file); | ||
} | ||
|
||
logger.LogInformation("Downloaded and extracted Repository to {TempDirectory}", tempDirectory); | ||
|
||
return tempDirectory; | ||
} | ||
|
||
private static void CleanRepoExtras(string tempDirectory, string fileName) | ||
{ | ||
var repoIndex = Path.Combine(tempDirectory, fileName); | ||
|
||
if (File.Exists(repoIndex)) | ||
{ | ||
File.Delete(repoIndex); | ||
} | ||
} | ||
|
||
private static void EnsureDirectoryIsClean(string tempDirectory) | ||
{ | ||
if (Directory.Exists(tempDirectory)) | ||
{ | ||
Directory.Delete(tempDirectory, true); | ||
} | ||
|
||
Directory.CreateDirectory(tempDirectory); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
namespace Producer.Features.Crawlers.Dmm; | ||
|
||
public class DMMHttpClient | ||
{ | ||
|
||
} |
Oops, something went wrong.