Skip to content

Commit

Permalink
proxy is a no go
Browse files Browse the repository at this point in the history
  • Loading branch information
Yucked committed Oct 15, 2023
1 parent ab34185 commit 3bb4b8a
Show file tree
Hide file tree
Showing 8 changed files with 58 additions and 81 deletions.
4 changes: 4 additions & 0 deletions Grimoire.Commons/Grimoire.Commons.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,8 @@
<PackageReference Include="Microsoft.Extensions.Logging.Abstractions" Version="7.0.1"/>
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\..\Torch\Torch.csproj" />
</ItemGroup>

</Project>
52 changes: 18 additions & 34 deletions Grimoire.Commons/HtmlParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,31 +6,18 @@

namespace Grimoire.Commons;

public class HtmlParser {
private readonly ILogger<HtmlParser> _logger;
private readonly IBrowsingContext _context;
private readonly IConfiguration _configuration;
private readonly HttpClient _proxyClient, _httpClient;
public class HtmlParser(ILogger<HtmlParser> logger,
HttpClient httpClient,
IConfiguration configuration) {
private readonly IBrowsingContext _context = BrowsingContext.New(
Configuration.Default.WithDefaultLoader()
);

public HtmlParser(ILogger<HtmlParser> logger,
HttpClient httpClient,
IConfiguration configuration) {
_logger = logger;
_context = BrowsingContext.New(
Configuration.Default.WithDefaultLoader()
);
_proxyClient = new HttpClient(new HttpClientHandler {
UseCookies = false
});
_httpClient = httpClient;
_configuration = configuration;
}

public async Task<IDocument> ParseAsync(string url, bool useProxy = false) {
public async Task<IDocument> ParseAsync(string url) {
var retries = 0;
IDocument document;
do {
var content = await GetContentAsync(url, useProxy);
var content = await GetContentAsync(url);
await using var stream = await content.ReadAsStreamAsync();
document = await _context.OpenAsync(x => x.Content(stream));
await document.WaitForReadyAsync();
Expand All @@ -40,41 +27,38 @@ public async Task<IDocument> ParseAsync(string url, bool useProxy = false) {
}

break;
} while (retries <= _configuration.GetValue<int>("Proxy:MaxRetries"));
} while (retries <= configuration.GetValue<int>("Http:Retries"));

return document;
}

public async Task<HttpContent> GetContentAsync(string url, bool useProxy) {
public async Task<HttpContent> GetContentAsync(string url) {
try {
using var requestMessage = new HttpRequestMessage {
var requestMessage = new HttpRequestMessage {
Method = HttpMethod.Get,
RequestUri = new Uri(url),
Headers = {
{
"User-Agent", _configuration.GetSection("Proxy:UserAgents").Get<string[]>().RandomItem()
"User-Agent", configuration.GetSection("Http:UserAgents").Get<string[]>().RandomItem()
}
}
};

await Task.Delay(Random.Shared.Next(_configuration.GetValue<int>("Proxy:MaxDelay")));
using var responseMessage = useProxy
? await _proxyClient.SendAsync(requestMessage)
: await _httpClient.SendAsync(requestMessage);
await Task.Delay(Random.Shared.Next(configuration.GetValue<int>("Http:Delay")));
using var responseMessage = await httpClient.SendAsync(requestMessage);
if (responseMessage.IsSuccessStatusCode) {
return responseMessage.Content;
}

_logger.LogError("{}\n{}", responseMessage.StatusCode, responseMessage.ReasonPhrase);
logger.LogError("{}\n{}", responseMessage.StatusCode, responseMessage.ReasonPhrase);
throw new Exception(responseMessage.ReasonPhrase);
}
catch (Exception exception) {
_logger.LogError("Failed to get {}\n{}", url, exception);
logger.LogError("Failed to get {}\n{}", url, exception);
if (exception is not HttpRequestException) {
throw;
}

// TODO: Get new tor identity
throw;
}
}
Expand All @@ -85,15 +69,15 @@ public Task<IDocument> ParseHtmlAsync(string html) {

public async Task DownloadAsync(string url, string output) {
try {
var content = await GetContentAsync(url, true);
var content = await GetContentAsync(url);
var fileName =
(content.Headers.ContentDisposition?.FileNameStar
?? url.Split('/')[^1]).Clean();
await using var fs = new FileStream($"{output}/{fileName}", FileMode.CreateNew);
await content.CopyToAsync(fs);
}
catch {
_logger.LogError("Failed to download {}", url);
logger.LogError("Failed to download {}", url);
}
}
}
25 changes: 12 additions & 13 deletions Grimoire.Sources/Sources/ArenaScansSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,19 +112,6 @@ public async Task<Chapter> FetchChapterAsync(Chapter chapter) {
.Href
.Split('/')[^1];

async Task<IDocument> GetChapterDocumentAsync() {
var content =
await _htmlParser.GetContentAsync(
$"{Url}/wp-json/wp/v2/posts/{chapterId}", true);
await using var stream = await content.ReadAsStreamAsync();
using var jsonDocument = await JsonDocument.ParseAsync(stream);
var html = jsonDocument.RootElement
.GetProperty("content")
.GetProperty("rendered")
.GetString();
return await _htmlParser.ParseHtmlAsync(html);
}

var parsedChapters = document
.GetElementById("readerarea")!
.Descendents<IHtmlImageElement>()
Expand All @@ -141,6 +128,18 @@ await _htmlParser.GetContentAsync(
: parsedChapters;

return chapter;

async Task<IDocument> GetChapterDocumentAsync() {
var content =
await _htmlParser.GetContentAsync($"{Url}/wp-json/wp/v2/posts/{chapterId}");
await using var stream = await content.ReadAsStreamAsync();
using var jsonDocument = await JsonDocument.ParseAsync(stream);
var html = jsonDocument.RootElement
.GetProperty("content")
.GetProperty("rendered")
.GetString();
return await _htmlParser.ParseHtmlAsync(html);
}
}
catch (Exception exception) {
_logger.LogError("{}: {}\n{}\n{}",
Expand Down
18 changes: 6 additions & 12 deletions Grimoire.Sources/Sources/TCBScansSource.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@

namespace Grimoire.Sources.Sources;

public sealed class TCBScansSource : IGrimoireSource {
public sealed class TCBScansSource(
ILogger<TCBScansSource> logger,
HtmlParser htmlParser) : IGrimoireSource {
public string Name
=> "TCB Scans";

Expand All @@ -16,16 +18,8 @@ public string Url
public string Icon
=> $"{Url}/files/apple-touch-icon.png";

private readonly ILogger<TCBScansSource> _logger;
private readonly HtmlParser _htmlParser;

public TCBScansSource(ILogger<TCBScansSource> logger, HtmlParser htmlParser) {
_logger = logger;
_htmlParser = htmlParser;
}

public async Task<IReadOnlyList<Manga>> GetMangasAsync() {
using var document = await _htmlParser.ParseAsync($"{Url}/projects", true);
using var document = await htmlParser.ParseAsync($"{Url}/projects");
var tasks = document
.QuerySelectorAll("a.mb-3.text-white")
.AsParallel()
Expand All @@ -34,7 +28,7 @@ public async Task<IReadOnlyList<Manga>> GetMangasAsync() {
}

public async Task<Manga> GetMangaAsync(string url) {
using var document = await _htmlParser.ParseAsync(url, true);
using var document = await htmlParser.ParseAsync(url);
return new Manga {
Name = document.QuerySelector("div.px-4 > h1").TextContent.Clean(),
Url = url,
Expand All @@ -54,7 +48,7 @@ public async Task<Manga> GetMangaAsync(string url) {
}

public async Task<Chapter> FetchChapterAsync(Chapter chapter) {
using var document = await _htmlParser.ParseAsync(chapter.Url);
using var document = await htmlParser.ParseAsync(chapter.Url);
chapter.Pages = document
.QuerySelectorAll("img.fixed-ratio-content")
.Select(x => (x as IHtmlImageElement).Source)
Expand Down
1 change: 0 additions & 1 deletion Grimoire.Web/Grimoire.Web.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
</PropertyGroup>

<ItemGroup>
<ProjectReference Include="..\..\Torch\Torch.csproj" />
<ProjectReference Include="..\Grimoire.Sources\Grimoire.Sources.csproj"/>
</ItemGroup>

Expand Down
9 changes: 1 addition & 8 deletions Grimoire.Web/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@
using Grimoire.Web;
using Grimoire.Web.Handlers;
using MongoDB.Driver;
using Torch;

var builder = WebApplication.CreateBuilder(args);

builder.Configuration.Sources.Clear();
builder.Configuration.AddJsonFile("config.json", false, true);

Expand All @@ -30,8 +28,7 @@
.GetDatabase(nameof(Grimoire)))
.AddSingleton<CacheHandler>()
.AddSingleton<DbHandler>()
.AddSingleton<TorchClient>()
.AddSingleton(x => x.GetRequiredService<TorchClient>().HttpClient);
.AddHttpClient();

var app = builder.Build();
app.UseHttpsRedirection()
Expand All @@ -46,8 +43,4 @@
app.MapBlazorHub();
app.MapFallbackToPage("/_Host");

var torchClient = app.Services.GetRequiredService<TorchClient>();
app.Lifetime.ApplicationStopping.Register(() => torchClient.TerminateAsync().GetAwaiter().GetResult());
await torchClient.InitializeAsync();

await app.RunAsync();
24 changes: 17 additions & 7 deletions Grimoire.Web/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"LogLevel": {
"Default": "Information",
"Microsoft.AspNetCore": "Warning",
"System.Net.Http": "Information"
"System.Net.Http.HttpClient.*": "None"
}
},
"Kestrel": {
Expand All @@ -13,20 +13,30 @@
"Url": "http://0.0.0.0:9000"
},
"Https": {
"Url": "http://0.0.0.0:9001"
"Url": "https://0.0.0.0:9001"
}
}
},
"Theme": "dark",
"RefreshEvery": "00:30:00",
"Mongo": "mongodb://grimoire:grimoire@localhost:27017",
"Save": {
"To": "Grimoire_Data",
"SourceIcon": true,
"MangaCover": true,
"MangaChapter": true
"MangaChapter": false
},
"Cache": {
"Manga": true
},
"Mongo": "mongodb://grimoire:grimoire@localhost:27017"
"Http": {
"Retries": 5,
"Delay": 5000,
"UserAgents": [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/114.0",
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36 RuxitSynthetic/1.0 v3283331297382284845 t6205049005192687891",
"Mozilla/5.0 (iPhone; CPU iPhone OS 10_1_4; like Mac OS X) AppleWebKit/601.14 (KHTML, like Gecko) Chrome/51.0.1615.309 Mobile Safari/535.3",
"Mozilla / 5.0 (compatible; MSIE 10.0; Windows; U; Windows NT 10.4; WOW64 Trident / 6.0)",
"Mozilla/5.0 (Linux; U; Linux x86_64; en-US) AppleWebKit/534.29 (KHTML, like Gecko) Chrome/49.0.3483.101 Safari/534",
"Mozilla/5.0 (Windows; U; Windows NT 10.1;; en-US) AppleWebKit/535.14 (KHTML, like Gecko) Chrome/51.0.2258.396 Safari/600",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_5; en-US) AppleWebKit/600.21 (KHTML, like Gecko) Chrome/48.0.1544.246 Safari/536"
]
}
}
6 changes: 0 additions & 6 deletions Grimoire.sln
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Grimoire.Sources", "Grimoir
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Grimoire.Web", "Grimoire.Web\Grimoire.Web.csproj", "{28A34992-5C75-40D5-B342-1F5EDEC209DB}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Torch", "..\Torch\Torch.csproj", "{3EE90E14-37F5-4E77-8C45-5F592D6332F7}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Expand All @@ -32,9 +30,5 @@ Global
{28A34992-5C75-40D5-B342-1F5EDEC209DB}.Debug|Any CPU.Build.0 = Debug|Any CPU
{28A34992-5C75-40D5-B342-1F5EDEC209DB}.Release|Any CPU.ActiveCfg = Release|Any CPU
{28A34992-5C75-40D5-B342-1F5EDEC209DB}.Release|Any CPU.Build.0 = Release|Any CPU
{3EE90E14-37F5-4E77-8C45-5F592D6332F7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{3EE90E14-37F5-4E77-8C45-5F592D6332F7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{3EE90E14-37F5-4E77-8C45-5F592D6332F7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{3EE90E14-37F5-4E77-8C45-5F592D6332F7}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

0 comments on commit 3bb4b8a

Please sign in to comment.