From 7a3046786f59a3c4d584e217a650919f0304c76d Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Fri, 1 Mar 2024 23:00:07 +0900
Subject: [PATCH 1/3] =?UTF-8?q?#1-2=20ScraperSelector=E3=82=92=E8=BF=BD?=
=?UTF-8?q?=E5=8A=A0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../Services/IFileExtensionService.cs | 6 ++
.../Services/IScraperSelectorService.cs | 11 +++
.../Services/IScrapingAozoraService.cs | 8 ++
.../Services/IScrapingNaroService.cs | 9 ++
.../Contracts/Services/IScrapingService.cs | 8 --
Epub/KoeBook.Epub/ScrapingHelper.cs | 90 -------------------
.../Services/FileExtensionService.cs | 19 ++++
.../Services/ScraperSelectorService.cs | 19 ++++
...pingAozora.cs => ScrapingAozoraService.cs} | 51 ++++++-----
...crapingNarou.cs => ScrapingNaroService.cs} | 13 +--
Epub/KoeBook.Epub/Utility/ScrapingHelper.cs | 77 ++++++++++++++++
KoeBook.Core/Services/AnalyzerService.cs | 4 +-
KoeBook/App.xaml.cs | 2 +-
13 files changed, 181 insertions(+), 136 deletions(-)
create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs
create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs
create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
delete mode 100644 Epub/KoeBook.Epub/ScrapingHelper.cs
create mode 100644 Epub/KoeBook.Epub/Services/FileExtensionService.cs
create mode 100644 Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
rename Epub/KoeBook.Epub/Services/{ScrapingAozora.cs => ScrapingAozoraService.cs} (94%)
rename Epub/KoeBook.Epub/Services/{ScrapingNarou.cs => ScrapingNaroService.cs} (95%)
create mode 100644 Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs b/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs
new file mode 100644
index 0000000..a498d94
--- /dev/null
+++ b/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs
@@ -0,0 +1,6 @@
+namespace KoeBook.Epub.Contracts.Services;
+
+public interface IFileExtensionService
+{
+ public string GetImagesMediaType(string fileName);
+}
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs
new file mode 100644
index 0000000..4dca9a9
--- /dev/null
+++ b/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs
@@ -0,0 +1,11 @@
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Contracts.Services;
+
+///
+/// スクレイピングを行い、EpubDocumentを作成します。
+///
+public interface IScraperSelectorService
+{
+ public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct);
+}
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
new file mode 100644
index 0000000..86756e5
--- /dev/null
+++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
@@ -0,0 +1,8 @@
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Contracts.Services;
+
+public interface IScrapingAozoraService
+{
+ public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct);
+}
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
new file mode 100644
index 0000000..f9259aa
--- /dev/null
+++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
@@ -0,0 +1,9 @@
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Contracts.Services;
+
+public interface IScrapingNaroService
+{
+ public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct);
+}
+
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
deleted file mode 100644
index 700be5a..0000000
--- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-using KoeBook.Epub.Models;
-
-namespace KoeBook.Epub.Contracts.Services;
-
-public interface IScrapingService
-{
- public Task ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct);
-}
diff --git a/Epub/KoeBook.Epub/ScrapingHelper.cs b/Epub/KoeBook.Epub/ScrapingHelper.cs
deleted file mode 100644
index a34c780..0000000
--- a/Epub/KoeBook.Epub/ScrapingHelper.cs
+++ /dev/null
@@ -1,90 +0,0 @@
-using System.Net;
-using System.Reflection.Metadata;
-using KoeBook.Epub.Models;
-
-namespace KoeBook.Epub;
-
-public static class ScrapingHelper
-{
- internal static void checkChapter(EpubDocument document)
- {
- if (document.Chapters.Count == 0)
- {
- document.Chapters.Add(new Chapter() { Title = null });
- }
- return;
- }
-
- internal static void checkSection(EpubDocument document, int ChapterNum)
- {
-
- checkChapter(document);
-
- if (document.Chapters[ChapterNum].Sections.Count == 0)
- {
- if (document.Chapters[ChapterNum].Title != null)
- {
- document.Chapters[ChapterNum].Sections.Add(new Section(document.Chapters[ChapterNum].Title!));
- }
- else
- {
- document.Chapters[ChapterNum].Sections.Add(new Section(document.Title));
- }
-
- }
- return;
- }
-
- internal static void checkParagraph(EpubDocument document, int chapterNum, int sectionNum)
- {
- checkSection(document, chapterNum);
- if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count == 0)
- {
- document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
- }
- return;
- }
-
- public static List SplitBrace(string text)
- {
- if (text.Length == 1 && text != "「" && text != "」")
- {
- return new List() { text };
- }
- var result = new List();
- int bracket = 0;
- var brackets = new List();
- foreach (char c in text)
- {
- if (c == '「') bracket++;
- if (c == '」') bracket--;
- brackets.Add(bracket);
- }
- var mn = Math.Min(0, brackets.Min());
- int startIdx = 0;
- for (int i = 0; i < brackets.Count; i++)
- {
- brackets[i] -= mn;
- if (text[i] == '「' && brackets[i] == 1 && i != 0)
- {
- result.Add(text[startIdx..i]);
- startIdx = i;
- }
- if (text[i] == '」' && brackets[i] == 0 && i != 0)
- {
- result.Add(text[startIdx..(i + 1)]);
- startIdx = i + 1;
- }
- }
- if (startIdx != text.Length - 1)
- {
- result.Add(text[startIdx..]);
- }
- if (result[^1] == "")
- {
- result.RemoveAt(result.Count - 1);
- }
-
- return result;
- }
-}
diff --git a/Epub/KoeBook.Epub/Services/FileExtensionService.cs b/Epub/KoeBook.Epub/Services/FileExtensionService.cs
new file mode 100644
index 0000000..c5037a6
--- /dev/null
+++ b/Epub/KoeBook.Epub/Services/FileExtensionService.cs
@@ -0,0 +1,19 @@
+using KoeBook.Epub.Contracts.Services;
+
+namespace KoeBook.Epub.Services;
+
+public class FileExtensionService : IFileExtensionService
+{
+ public string GetImagesMediaType(string fileName)
+ {
+ return Path.GetExtension(fileName) switch
+ {
+ ".gif" => "image/gif",
+ ".jpg" or ".jpeg" => "image/jpeg",
+ ".png" => "image/png",
+ ".svg" => "image/svg+xml",
+ ".webp" => "image/webp",
+ _ => string.Empty,
+ };
+ }
+}
diff --git a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
new file mode 100644
index 0000000..4cfb2ee
--- /dev/null
+++ b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
@@ -0,0 +1,19 @@
+using KoeBook.Epub.Contracts.Services;
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Services;
+
+public class ScraperSelectorService(IScrapingAozoraService scrapingAozoraService, IScrapingNaroService scrapingNaroService) : IScraperSelectorService
+{
+ public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
+ {
+ var uri = new Uri(url);
+
+ return uri.Host switch
+ {
+ "www.aozora.gr.jp" => scrapingAozoraService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct),
+ "ncode.syosetu.com" => scrapingNaroService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct),
+ _ => throw new ArgumentException("有効なドメインではありません。"),
+ };
+ }
+}
diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozora.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
similarity index 94%
rename from Epub/KoeBook.Epub/Services/ScrapingAozora.cs
rename to Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index a4d6f9e..16f9446 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozora.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -4,21 +4,20 @@
using AngleSharp.Io;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
-using static KoeBook.Epub.ScrapingHelper;
+using static KoeBook.Epub.Utility.ScrapingHelper;
namespace KoeBook.Epub.Services
{
- public partial class ScrapingAozora : IScrapingService
+ public partial class ScrapingAozoraService : IScrapingAozoraService
{
- private int chapterNum;
- private int sectionNum;
- private bool chapterExist = false;
- private bool sectionExist = false;
-
-
- public async Task ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
+ public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
{
+ var chapterNum = 0;
+ var sectionNum = 0;
+ var chapterExist = false;
+ var sectionExist = false;
+
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
var doc = await context.OpenAsync(url, ct).ConfigureAwait(false);
@@ -61,7 +60,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
}
if ((MidashiId - previousMidashiId) == 10)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Add(new Section(TextProcess(midashi)));
sectionExist = true;
}
@@ -97,7 +96,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (previous == true)
{
- checkSection(document, chapterNum);
+ EnsureSection(document, chapterNum);
document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
}
}
@@ -156,12 +155,12 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextProcess(midashi);
@@ -183,7 +182,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
if (element.ClassName == "caption")
{
// https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextProcess(element));
@@ -216,12 +215,12 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
foreach (var splitText in SplitBrace(TextProcess(element)))
@@ -253,7 +252,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
@@ -274,7 +273,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
await response.Content.CopyToAsync(ms, ct).ConfigureAwait(false);
var filePass = System.IO.Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
File.WriteAllBytes(filePass, ms.ToArray());
- checkSection(document, chapterNum);
+ EnsureSection(document, chapterNum);
if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
{
document.Chapters[chapterNum].Sections[sectionNum].Elements.Insert(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1, new Picture(filePass));
@@ -283,7 +282,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
}
if (img.AlternativeText != null)
{
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextReplace(img.AlternativeText);
@@ -332,7 +331,7 @@ public async Task ScrapingAsync(string url, string coverFilePath,
case "[#ページの左右中央]":
break;
default:
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
foreach (var splitText in SplitBrace(TextProcess(element)))
@@ -362,12 +361,12 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextProcess(element));
@@ -402,12 +401,12 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextProcess(element);
@@ -450,12 +449,12 @@ public async Task ScrapingAsync(string url, string coverFilePath,
{
if (sectionExist)
{
- checkChapter(document);
+ EnsureChapter(document);
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- checkParagraph(document, chapterNum, sectionNum);
+ EnsureParagraph(document, chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextReplace(nextNode.Text()));
diff --git a/Epub/KoeBook.Epub/Services/ScrapingNarou.cs b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
similarity index 95%
rename from Epub/KoeBook.Epub/Services/ScrapingNarou.cs
rename to Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
index a003f50..7d6ce7a 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingNarou.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
@@ -5,20 +5,15 @@
using AngleSharp.Io;
using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
-using static KoeBook.Epub.ScrapingHelper;
+using static KoeBook.Epub.Utility.ScrapingHelper;
namespace KoeBook.Epub.Services
{
- public partial class ScrapingNarouService : IScrapingService
+ public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingNaroService
{
- public ScrapingNarouService(IHttpClientFactory httpClientFactory)
- {
- _httpCliantFactory = httpClientFactory;
- }
-
- private readonly IHttpClientFactory _httpCliantFactory;
+ private readonly IHttpClientFactory _httpCliantFactory = httpClientFactory;
- public async Task ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
+ public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
using var context = BrowsingContext.New(config);
diff --git a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
new file mode 100644
index 0000000..3af91cd
--- /dev/null
+++ b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
@@ -0,0 +1,77 @@
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Utility;
+
+public static class ScrapingHelper
+{
+ internal static void EnsureChapter(EpubDocument document)
+ {
+ if (document.Chapters.Count == 0)
+ document.Chapters.Add(new Chapter() { Title = null });
+ }
+
+ internal static void EnsureSection(EpubDocument document, int chapterIndex)
+ {
+ EnsureChapter(document);
+
+ if (document.Chapters[chapterIndex].Sections.Count == 0)
+ {
+ if (document.Chapters[chapterIndex].Title != null)
+ document.Chapters[chapterIndex].Sections.Add(new Section(document.Chapters[chapterIndex].Title!));
+ else
+ document.Chapters[chapterIndex].Sections.Add(new Section(document.Title));
+ }
+ }
+
+ internal static void EnsureParagraph(EpubDocument document, int chapterIndex, int sectionIndex)
+ {
+ EnsureSection(document, chapterIndex);
+
+ if (document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0)
+ document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph());
+ }
+
+ public static List SplitBrace(string text)
+ {
+ if (text.Length == 1 && text != "「" && text != "」")
+ return [text];
+
+ var bracket = 0;
+ var brackets = new int[text.Length];
+ for (var i = 0; i < text.Length; i++)
+ {
+ var c = text[i];
+ if (c == '「') bracket++;
+ else if (c == '」') bracket--;
+ brackets[i] = bracket;
+ }
+
+ var result = new List();
+ var mn = Math.Min(0, brackets.Min());
+ var startIdx = 0;
+ for (var i = 0; i < brackets.Length; i++)
+ {
+ brackets[i] -= mn;
+ if (text[i] == '「' && brackets[i] == 1 && i != 0)
+ {
+ result.Add(text[startIdx..i]);
+ startIdx = i;
+ }
+ if (text[i] == '」' && brackets[i] == 0 && i != 0)
+ {
+ result.Add(text[startIdx..(i + 1)]);
+ startIdx = i + 1;
+ }
+ }
+ if (startIdx != text.Length - 1)
+ {
+ result.Add(text[startIdx..]);
+ }
+ if (result[^1] == "")
+ {
+ result.RemoveAt(result.Count - 1);
+ }
+
+ return result;
+ }
+}
diff --git a/KoeBook.Core/Services/AnalyzerService.cs b/KoeBook.Core/Services/AnalyzerService.cs
index 91b098b..59b72a6 100644
--- a/KoeBook.Core/Services/AnalyzerService.cs
+++ b/KoeBook.Core/Services/AnalyzerService.cs
@@ -7,9 +7,9 @@
namespace KoeBook.Core.Services;
-public partial class AnalyzerService(IScrapingService scrapingService, IEpubDocumentStoreService epubDocumentStoreService, ILlmAnalyzerService llmAnalyzerService) : IAnalyzerService
+public partial class AnalyzerService(IScraperSelectorService scrapingService, IEpubDocumentStoreService epubDocumentStoreService, ILlmAnalyzerService llmAnalyzerService) : IAnalyzerService
{
- private readonly IScrapingService _scrapingService = scrapingService;
+ private readonly IScraperSelectorService _scrapingService = scrapingService;
private readonly IEpubDocumentStoreService _epubDocumentStoreService = epubDocumentStoreService;
private readonly ILlmAnalyzerService _llmAnalyzerService = llmAnalyzerService;
private Dictionary _rubyReplacements = new Dictionary();
diff --git a/KoeBook/App.xaml.cs b/KoeBook/App.xaml.cs
index ecc14a2..84011fe 100644
--- a/KoeBook/App.xaml.cs
+++ b/KoeBook/App.xaml.cs
@@ -99,7 +99,7 @@ public App()
services.AddSingleton();
services.AddSingleton();
// TODO: 切り替えサービスを作成
- services.AddSingleton();
+ services.AddSingleton();
// Views and ViewModels
services.AddTransient();
From 7a0fb10ce2ef022d9f731b0c09d9a325f9f0496a Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Sat, 2 Mar 2024 00:00:17 +0900
Subject: [PATCH 2/3] =?UTF-8?q?#1-2=20Ensure=E2=97=AF=E2=97=AF=E3=82=92?=
=?UTF-8?q?=E7=A7=BB=E5=8B=95?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
Epub/KoeBook.Epub/Models/EpubDocument.cs | 27 +++++++++++++++
.../Services/ScrapingAozoraService.cs | 34 +++++++++----------
Epub/KoeBook.Epub/Utility/ScrapingHelper.cs | 31 +----------------
3 files changed, 45 insertions(+), 47 deletions(-)
diff --git a/Epub/KoeBook.Epub/Models/EpubDocument.cs b/Epub/KoeBook.Epub/Models/EpubDocument.cs
index 0f723ed..583d0ce 100644
--- a/Epub/KoeBook.Epub/Models/EpubDocument.cs
+++ b/Epub/KoeBook.Epub/Models/EpubDocument.cs
@@ -37,6 +37,33 @@ public class EpubDocument(string title, string author, string coverFilePath, Gui
];
public List Chapters { get; set; } = [];
+ internal void EnsureChapter()
+ {
+ if (Chapters.Count == 0)
+ Chapters.Add(new Chapter() { Title = null });
+ }
+
+ internal void EnsureSection(int chapterIndex)
+ {
+ EnsureChapter();
+
+ if (Chapters[chapterIndex].Sections.Count == 0)
+ {
+ if (Chapters[chapterIndex].Title != null)
+ Chapters[chapterIndex].Sections.Add(new Section(Chapters[chapterIndex].Title!));
+ else
+ Chapters[chapterIndex].Sections.Add(new Section(Title));
+ }
+ }
+
+ internal void EnsureParagraph(int chapterIndex, int sectionIndex)
+ {
+ EnsureSection(chapterIndex);
+
+ if (Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0)
+ Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph());
+ }
+
public string CreateNavXhtml()
{
var builder = new StringBuilder($"""
diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index 16f9446..0915687 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -60,7 +60,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
}
if ((MidashiId - previousMidashiId) == 10)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Add(new Section(TextProcess(midashi)));
sectionExist = true;
}
@@ -96,7 +96,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (previous == true)
{
- EnsureSection(document, chapterNum);
+ document.EnsureSection(chapterNum);
document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph());
}
}
@@ -155,12 +155,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextProcess(midashi);
@@ -182,7 +182,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
if (element.ClassName == "caption")
{
// https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextProcess(element));
@@ -215,12 +215,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
foreach (var splitText in SplitBrace(TextProcess(element)))
@@ -252,7 +252,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
@@ -273,7 +273,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
await response.Content.CopyToAsync(ms, ct).ConfigureAwait(false);
var filePass = System.IO.Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1"));
File.WriteAllBytes(filePass, ms.ToArray());
- EnsureSection(document, chapterNum);
+ document.EnsureSection(chapterNum);
if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1)
{
document.Chapters[chapterNum].Sections[sectionNum].Elements.Insert(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1, new Picture(filePass));
@@ -282,7 +282,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
}
if (img.AlternativeText != null)
{
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextReplace(img.AlternativeText);
@@ -331,7 +331,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
case "[#ページの左右中央]":
break;
default:
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
foreach (var splitText in SplitBrace(TextProcess(element)))
@@ -361,12 +361,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextProcess(element));
@@ -401,12 +401,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
paragraph.Text += TextProcess(element);
@@ -449,12 +449,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP
{
if (sectionExist)
{
- EnsureChapter(document);
+ document.EnsureChapter();
document.Chapters[^1].Sections.Insert(0, new Section("___"));
}
sectionNum++;
}
- EnsureParagraph(document, chapterNum, sectionNum);
+ document.EnsureParagraph(chapterNum, sectionNum);
if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph))
{
var split = SplitBrace(TextReplace(nextNode.Text()));
diff --git a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
index 3af91cd..ba8ff41 100644
--- a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
+++ b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs
@@ -1,36 +1,7 @@
-using KoeBook.Epub.Models;
-
-namespace KoeBook.Epub.Utility;
+namespace KoeBook.Epub.Utility;
public static class ScrapingHelper
{
- internal static void EnsureChapter(EpubDocument document)
- {
- if (document.Chapters.Count == 0)
- document.Chapters.Add(new Chapter() { Title = null });
- }
-
- internal static void EnsureSection(EpubDocument document, int chapterIndex)
- {
- EnsureChapter(document);
-
- if (document.Chapters[chapterIndex].Sections.Count == 0)
- {
- if (document.Chapters[chapterIndex].Title != null)
- document.Chapters[chapterIndex].Sections.Add(new Section(document.Chapters[chapterIndex].Title!));
- else
- document.Chapters[chapterIndex].Sections.Add(new Section(document.Title));
- }
- }
-
- internal static void EnsureParagraph(EpubDocument document, int chapterIndex, int sectionIndex)
- {
- EnsureSection(document, chapterIndex);
-
- if (document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0)
- document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph());
- }
-
public static List SplitBrace(string text)
{
if (text.Length == 1 && text != "「" && text != "」")
From 73200703870769657c1a750291954a3cb9728f95 Mon Sep 17 00:00:00 2001
From: miyaji255 <84168445+miyaji255@users.noreply.github.com>
Date: Sat, 2 Mar 2024 16:15:41 +0900
Subject: [PATCH 3/3] =?UTF-8?q?#1=20=E3=82=BB=E3=83=AC=E3=82=AF=E3=82=BF?=
=?UTF-8?q?=E3=83=BC=E3=82=92=E5=A4=89=E6=9B=B4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
.../Services/IScrapingAozoraService.cs | 8 --------
.../Services/IScrapingNaroService.cs | 9 ---------
.../Contracts/Services/IScrapingService.cs | 8 ++++++++
.../Services/ScraperSelectorService.cs | 20 +++++++++++--------
.../Services/ScrapingAozoraService.cs | 7 ++++++-
.../Services/ScrapingNaroService.cs | 7 ++++++-
KoeBook/App.xaml.cs | 6 ++++--
7 files changed, 36 insertions(+), 29 deletions(-)
delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
deleted file mode 100644
index 86756e5..0000000
--- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs
+++ /dev/null
@@ -1,8 +0,0 @@
-using KoeBook.Epub.Models;
-
-namespace KoeBook.Epub.Contracts.Services;
-
-public interface IScrapingAozoraService
-{
- public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct);
-}
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
deleted file mode 100644
index f9259aa..0000000
--- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs
+++ /dev/null
@@ -1,9 +0,0 @@
-using KoeBook.Epub.Models;
-
-namespace KoeBook.Epub.Contracts.Services;
-
-public interface IScrapingNaroService
-{
- public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct);
-}
-
diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
new file mode 100644
index 0000000..e0818f3
--- /dev/null
+++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs
@@ -0,0 +1,8 @@
+using KoeBook.Epub.Models;
+
+namespace KoeBook.Epub.Contracts.Services;
+
+public interface IScrapingService : IScraperSelectorService
+{
+ public bool IsMatchSite(Uri url);
+}
diff --git a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
index 4cfb2ee..51a68f3 100644
--- a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
+++ b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs
@@ -1,19 +1,23 @@
-using KoeBook.Epub.Contracts.Services;
+using System.Collections.Immutable;
+using KoeBook.Epub.Contracts.Services;
using KoeBook.Epub.Models;
namespace KoeBook.Epub.Services;
-public class ScraperSelectorService(IScrapingAozoraService scrapingAozoraService, IScrapingNaroService scrapingNaroService) : IScraperSelectorService
+public class ScraperSelectorService(IEnumerable scrapingServices) : IScraperSelectorService
{
- public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
+ private readonly ImmutableArray _scrapingServices = scrapingServices.ToImmutableArray();
+
+ public async ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct)
{
var uri = new Uri(url);
- return uri.Host switch
+ foreach (var service in _scrapingServices)
{
- "www.aozora.gr.jp" => scrapingAozoraService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct),
- "ncode.syosetu.com" => scrapingNaroService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct),
- _ => throw new ArgumentException("有効なドメインではありません。"),
- };
+ if (service.IsMatchSite(uri))
+ return await service.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct);
+ }
+
+ throw new ArgumentException("対応するURLではありません");
}
}
diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
index 0915687..203bcaa 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs
@@ -9,8 +9,13 @@
namespace KoeBook.Epub.Services
{
- public partial class ScrapingAozoraService : IScrapingAozoraService
+ public partial class ScrapingAozoraService : IScrapingService
{
+ public bool IsMatchSite(Uri uri)
+ {
+ return uri.Host == "www.aozora.gr.jp";
+ }
+
public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
{
var chapterNum = 0;
diff --git a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
index 7d6ce7a..a1e00ec 100644
--- a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
+++ b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs
@@ -9,10 +9,15 @@
namespace KoeBook.Epub.Services
{
- public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingNaroService
+ public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingService
{
private readonly IHttpClientFactory _httpCliantFactory = httpClientFactory;
+ public bool IsMatchSite(Uri uri)
+ {
+ return uri.Host == "ncode.syosetu.com";
+ }
+
public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct)
{
var config = Configuration.Default.WithDefaultLoader();
diff --git a/KoeBook/App.xaml.cs b/KoeBook/App.xaml.cs
index 84011fe..ddde655 100644
--- a/KoeBook/App.xaml.cs
+++ b/KoeBook/App.xaml.cs
@@ -98,8 +98,10 @@ public App()
services.AddSingleton();
services.AddSingleton();
services.AddSingleton();
- // TODO: 切り替えサービスを作成
- services.AddSingleton();
+
+ services.AddSingleton()
+ .AddSingleton()
+ .AddSingleton();
// Views and ViewModels
services.AddTransient();