From 7a3046786f59a3c4d584e217a650919f0304c76d Mon Sep 17 00:00:00 2001 From: miyaji255 <84168445+miyaji255@users.noreply.github.com> Date: Fri, 1 Mar 2024 23:00:07 +0900 Subject: [PATCH 1/3] =?UTF-8?q?#1-2=20ScraperSelector=E3=82=92=E8=BF=BD?= =?UTF-8?q?=E5=8A=A0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Services/IFileExtensionService.cs | 6 ++ .../Services/IScraperSelectorService.cs | 11 +++ .../Services/IScrapingAozoraService.cs | 8 ++ .../Services/IScrapingNaroService.cs | 9 ++ .../Contracts/Services/IScrapingService.cs | 8 -- Epub/KoeBook.Epub/ScrapingHelper.cs | 90 ------------------- .../Services/FileExtensionService.cs | 19 ++++ .../Services/ScraperSelectorService.cs | 19 ++++ ...pingAozora.cs => ScrapingAozoraService.cs} | 51 ++++++----- ...crapingNarou.cs => ScrapingNaroService.cs} | 13 +-- Epub/KoeBook.Epub/Utility/ScrapingHelper.cs | 77 ++++++++++++++++ KoeBook.Core/Services/AnalyzerService.cs | 4 +- KoeBook/App.xaml.cs | 2 +- 13 files changed, 181 insertions(+), 136 deletions(-) create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs delete mode 100644 Epub/KoeBook.Epub/ScrapingHelper.cs create mode 100644 Epub/KoeBook.Epub/Services/FileExtensionService.cs create mode 100644 Epub/KoeBook.Epub/Services/ScraperSelectorService.cs rename Epub/KoeBook.Epub/Services/{ScrapingAozora.cs => ScrapingAozoraService.cs} (94%) rename Epub/KoeBook.Epub/Services/{ScrapingNarou.cs => ScrapingNaroService.cs} (95%) create mode 100644 Epub/KoeBook.Epub/Utility/ScrapingHelper.cs diff --git a/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs b/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs new file mode 100644 index 0000000..a498d94 --- /dev/null +++ b/Epub/KoeBook.Epub/Contracts/Services/IFileExtensionService.cs @@ -0,0 +1,6 @@ +namespace KoeBook.Epub.Contracts.Services; + +public interface IFileExtensionService +{ + public string GetImagesMediaType(string fileName); +} diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs new file mode 100644 index 0000000..4dca9a9 --- /dev/null +++ b/Epub/KoeBook.Epub/Contracts/Services/IScraperSelectorService.cs @@ -0,0 +1,11 @@ +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Contracts.Services; + +/// +/// スクレイピングを行い、EpubDocumentを作成します。 +/// +public interface IScraperSelectorService +{ + public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct); +} diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs new file mode 100644 index 0000000..86756e5 --- /dev/null +++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs @@ -0,0 +1,8 @@ +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Contracts.Services; + +public interface IScrapingAozoraService +{ + public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct); +} diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs new file mode 100644 index 0000000..f9259aa --- /dev/null +++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs @@ -0,0 +1,9 @@ +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Contracts.Services; + +public interface IScrapingNaroService +{ + public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct); +} + diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs deleted file mode 100644 index 700be5a..0000000 --- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs +++ /dev/null @@ -1,8 +0,0 @@ -using KoeBook.Epub.Models; - -namespace KoeBook.Epub.Contracts.Services; - -public interface IScrapingService -{ - public Task ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct); -} diff --git a/Epub/KoeBook.Epub/ScrapingHelper.cs b/Epub/KoeBook.Epub/ScrapingHelper.cs deleted file mode 100644 index a34c780..0000000 --- a/Epub/KoeBook.Epub/ScrapingHelper.cs +++ /dev/null @@ -1,90 +0,0 @@ -using System.Net; -using System.Reflection.Metadata; -using KoeBook.Epub.Models; - -namespace KoeBook.Epub; - -public static class ScrapingHelper -{ - internal static void checkChapter(EpubDocument document) - { - if (document.Chapters.Count == 0) - { - document.Chapters.Add(new Chapter() { Title = null }); - } - return; - } - - internal static void checkSection(EpubDocument document, int ChapterNum) - { - - checkChapter(document); - - if (document.Chapters[ChapterNum].Sections.Count == 0) - { - if (document.Chapters[ChapterNum].Title != null) - { - document.Chapters[ChapterNum].Sections.Add(new Section(document.Chapters[ChapterNum].Title!)); - } - else - { - document.Chapters[ChapterNum].Sections.Add(new Section(document.Title)); - } - - } - return; - } - - internal static void checkParagraph(EpubDocument document, int chapterNum, int sectionNum) - { - checkSection(document, chapterNum); - if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count == 0) - { - document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph()); - } - return; - } - - public static List SplitBrace(string text) - { - if (text.Length == 1 && text != "「" && text != "」") - { - return new List() { text }; - } - var result = new List(); - int bracket = 0; - var brackets = new List(); - foreach (char c in text) - { - if (c == '「') bracket++; - if (c == '」') bracket--; - brackets.Add(bracket); - } - var mn = Math.Min(0, brackets.Min()); - int startIdx = 0; - for (int i = 0; i < brackets.Count; i++) - { - brackets[i] -= mn; - if (text[i] == '「' && brackets[i] == 1 && i != 0) - { - result.Add(text[startIdx..i]); - startIdx = i; - } - if (text[i] == '」' && brackets[i] == 0 && i != 0) - { - result.Add(text[startIdx..(i + 1)]); - startIdx = i + 1; - } - } - if (startIdx != text.Length - 1) - { - result.Add(text[startIdx..]); - } - if (result[^1] == "") - { - result.RemoveAt(result.Count - 1); - } - - return result; - } -} diff --git a/Epub/KoeBook.Epub/Services/FileExtensionService.cs b/Epub/KoeBook.Epub/Services/FileExtensionService.cs new file mode 100644 index 0000000..c5037a6 --- /dev/null +++ b/Epub/KoeBook.Epub/Services/FileExtensionService.cs @@ -0,0 +1,19 @@ +using KoeBook.Epub.Contracts.Services; + +namespace KoeBook.Epub.Services; + +public class FileExtensionService : IFileExtensionService +{ + public string GetImagesMediaType(string fileName) + { + return Path.GetExtension(fileName) switch + { + ".gif" => "image/gif", + ".jpg" or ".jpeg" => "image/jpeg", + ".png" => "image/png", + ".svg" => "image/svg+xml", + ".webp" => "image/webp", + _ => string.Empty, + }; + } +} diff --git a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs new file mode 100644 index 0000000..4cfb2ee --- /dev/null +++ b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs @@ -0,0 +1,19 @@ +using KoeBook.Epub.Contracts.Services; +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Services; + +public class ScraperSelectorService(IScrapingAozoraService scrapingAozoraService, IScrapingNaroService scrapingNaroService) : IScraperSelectorService +{ + public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct) + { + var uri = new Uri(url); + + return uri.Host switch + { + "www.aozora.gr.jp" => scrapingAozoraService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct), + "ncode.syosetu.com" => scrapingNaroService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct), + _ => throw new ArgumentException("有効なドメインではありません。"), + }; + } +} diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozora.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs similarity index 94% rename from Epub/KoeBook.Epub/Services/ScrapingAozora.cs rename to Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs index a4d6f9e..16f9446 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingAozora.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs @@ -4,21 +4,20 @@ using AngleSharp.Io; using KoeBook.Epub.Contracts.Services; using KoeBook.Epub.Models; -using static KoeBook.Epub.ScrapingHelper; +using static KoeBook.Epub.Utility.ScrapingHelper; namespace KoeBook.Epub.Services { - public partial class ScrapingAozora : IScrapingService + public partial class ScrapingAozoraService : IScrapingAozoraService { - private int chapterNum; - private int sectionNum; - private bool chapterExist = false; - private bool sectionExist = false; - - - public async Task ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) + public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) { + var chapterNum = 0; + var sectionNum = 0; + var chapterExist = false; + var sectionExist = false; + var config = Configuration.Default.WithDefaultLoader(); using var context = BrowsingContext.New(config); var doc = await context.OpenAsync(url, ct).ConfigureAwait(false); @@ -61,7 +60,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, } if ((MidashiId - previousMidashiId) == 10) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Add(new Section(TextProcess(midashi))); sectionExist = true; } @@ -97,7 +96,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (previous == true) { - checkSection(document, chapterNum); + EnsureSection(document, chapterNum); document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph()); } } @@ -156,12 +155,12 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextProcess(midashi); @@ -183,7 +182,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, if (element.ClassName == "caption") { // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分 - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextProcess(element)); @@ -216,12 +215,12 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { foreach (var splitText in SplitBrace(TextProcess(element))) @@ -253,7 +252,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; @@ -274,7 +273,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, await response.Content.CopyToAsync(ms, ct).ConfigureAwait(false); var filePass = System.IO.Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1")); File.WriteAllBytes(filePass, ms.ToArray()); - checkSection(document, chapterNum); + EnsureSection(document, chapterNum); if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1) { document.Chapters[chapterNum].Sections[sectionNum].Elements.Insert(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1, new Picture(filePass)); @@ -283,7 +282,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, } if (img.AlternativeText != null) { - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextReplace(img.AlternativeText); @@ -332,7 +331,7 @@ public async Task ScrapingAsync(string url, string coverFilePath, case "[#ページの左右中央]": break; default: - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { foreach (var splitText in SplitBrace(TextProcess(element))) @@ -362,12 +361,12 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextProcess(element)); @@ -402,12 +401,12 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextProcess(element); @@ -450,12 +449,12 @@ public async Task ScrapingAsync(string url, string coverFilePath, { if (sectionExist) { - checkChapter(document); + EnsureChapter(document); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - checkParagraph(document, chapterNum, sectionNum); + EnsureParagraph(document, chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextReplace(nextNode.Text())); diff --git a/Epub/KoeBook.Epub/Services/ScrapingNarou.cs b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs similarity index 95% rename from Epub/KoeBook.Epub/Services/ScrapingNarou.cs rename to Epub/KoeBook.Epub/Services/ScrapingNaroService.cs index a003f50..7d6ce7a 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingNarou.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs @@ -5,20 +5,15 @@ using AngleSharp.Io; using KoeBook.Epub.Contracts.Services; using KoeBook.Epub.Models; -using static KoeBook.Epub.ScrapingHelper; +using static KoeBook.Epub.Utility.ScrapingHelper; namespace KoeBook.Epub.Services { - public partial class ScrapingNarouService : IScrapingService + public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingNaroService { - public ScrapingNarouService(IHttpClientFactory httpClientFactory) - { - _httpCliantFactory = httpClientFactory; - } - - private readonly IHttpClientFactory _httpCliantFactory; + private readonly IHttpClientFactory _httpCliantFactory = httpClientFactory; - public async Task ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) + public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) { var config = Configuration.Default.WithDefaultLoader(); using var context = BrowsingContext.New(config); diff --git a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs new file mode 100644 index 0000000..3af91cd --- /dev/null +++ b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs @@ -0,0 +1,77 @@ +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Utility; + +public static class ScrapingHelper +{ + internal static void EnsureChapter(EpubDocument document) + { + if (document.Chapters.Count == 0) + document.Chapters.Add(new Chapter() { Title = null }); + } + + internal static void EnsureSection(EpubDocument document, int chapterIndex) + { + EnsureChapter(document); + + if (document.Chapters[chapterIndex].Sections.Count == 0) + { + if (document.Chapters[chapterIndex].Title != null) + document.Chapters[chapterIndex].Sections.Add(new Section(document.Chapters[chapterIndex].Title!)); + else + document.Chapters[chapterIndex].Sections.Add(new Section(document.Title)); + } + } + + internal static void EnsureParagraph(EpubDocument document, int chapterIndex, int sectionIndex) + { + EnsureSection(document, chapterIndex); + + if (document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0) + document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph()); + } + + public static List SplitBrace(string text) + { + if (text.Length == 1 && text != "「" && text != "」") + return [text]; + + var bracket = 0; + var brackets = new int[text.Length]; + for (var i = 0; i < text.Length; i++) + { + var c = text[i]; + if (c == '「') bracket++; + else if (c == '」') bracket--; + brackets[i] = bracket; + } + + var result = new List(); + var mn = Math.Min(0, brackets.Min()); + var startIdx = 0; + for (var i = 0; i < brackets.Length; i++) + { + brackets[i] -= mn; + if (text[i] == '「' && brackets[i] == 1 && i != 0) + { + result.Add(text[startIdx..i]); + startIdx = i; + } + if (text[i] == '」' && brackets[i] == 0 && i != 0) + { + result.Add(text[startIdx..(i + 1)]); + startIdx = i + 1; + } + } + if (startIdx != text.Length - 1) + { + result.Add(text[startIdx..]); + } + if (result[^1] == "") + { + result.RemoveAt(result.Count - 1); + } + + return result; + } +} diff --git a/KoeBook.Core/Services/AnalyzerService.cs b/KoeBook.Core/Services/AnalyzerService.cs index 91b098b..59b72a6 100644 --- a/KoeBook.Core/Services/AnalyzerService.cs +++ b/KoeBook.Core/Services/AnalyzerService.cs @@ -7,9 +7,9 @@ namespace KoeBook.Core.Services; -public partial class AnalyzerService(IScrapingService scrapingService, IEpubDocumentStoreService epubDocumentStoreService, ILlmAnalyzerService llmAnalyzerService) : IAnalyzerService +public partial class AnalyzerService(IScraperSelectorService scrapingService, IEpubDocumentStoreService epubDocumentStoreService, ILlmAnalyzerService llmAnalyzerService) : IAnalyzerService { - private readonly IScrapingService _scrapingService = scrapingService; + private readonly IScraperSelectorService _scrapingService = scrapingService; private readonly IEpubDocumentStoreService _epubDocumentStoreService = epubDocumentStoreService; private readonly ILlmAnalyzerService _llmAnalyzerService = llmAnalyzerService; private Dictionary _rubyReplacements = new Dictionary(); diff --git a/KoeBook/App.xaml.cs b/KoeBook/App.xaml.cs index ecc14a2..84011fe 100644 --- a/KoeBook/App.xaml.cs +++ b/KoeBook/App.xaml.cs @@ -99,7 +99,7 @@ public App() services.AddSingleton(); services.AddSingleton(); // TODO: 切り替えサービスを作成 - services.AddSingleton(); + services.AddSingleton(); // Views and ViewModels services.AddTransient(); From 7a0fb10ce2ef022d9f731b0c09d9a325f9f0496a Mon Sep 17 00:00:00 2001 From: miyaji255 <84168445+miyaji255@users.noreply.github.com> Date: Sat, 2 Mar 2024 00:00:17 +0900 Subject: [PATCH 2/3] =?UTF-8?q?#1-2=20Ensure=E2=97=AF=E2=97=AF=E3=82=92?= =?UTF-8?q?=E7=A7=BB=E5=8B=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Epub/KoeBook.Epub/Models/EpubDocument.cs | 27 +++++++++++++++ .../Services/ScrapingAozoraService.cs | 34 +++++++++---------- Epub/KoeBook.Epub/Utility/ScrapingHelper.cs | 31 +---------------- 3 files changed, 45 insertions(+), 47 deletions(-) diff --git a/Epub/KoeBook.Epub/Models/EpubDocument.cs b/Epub/KoeBook.Epub/Models/EpubDocument.cs index 0f723ed..583d0ce 100644 --- a/Epub/KoeBook.Epub/Models/EpubDocument.cs +++ b/Epub/KoeBook.Epub/Models/EpubDocument.cs @@ -37,6 +37,33 @@ public class EpubDocument(string title, string author, string coverFilePath, Gui ]; public List Chapters { get; set; } = []; + internal void EnsureChapter() + { + if (Chapters.Count == 0) + Chapters.Add(new Chapter() { Title = null }); + } + + internal void EnsureSection(int chapterIndex) + { + EnsureChapter(); + + if (Chapters[chapterIndex].Sections.Count == 0) + { + if (Chapters[chapterIndex].Title != null) + Chapters[chapterIndex].Sections.Add(new Section(Chapters[chapterIndex].Title!)); + else + Chapters[chapterIndex].Sections.Add(new Section(Title)); + } + } + + internal void EnsureParagraph(int chapterIndex, int sectionIndex) + { + EnsureSection(chapterIndex); + + if (Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0) + Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph()); + } + public string CreateNavXhtml() { var builder = new StringBuilder($""" diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs index 16f9446..0915687 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs @@ -60,7 +60,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP } if ((MidashiId - previousMidashiId) == 10) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Add(new Section(TextProcess(midashi))); sectionExist = true; } @@ -96,7 +96,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (previous == true) { - EnsureSection(document, chapterNum); + document.EnsureSection(chapterNum); document.Chapters[chapterNum].Sections[sectionNum].Elements.Add(new Paragraph()); } } @@ -155,12 +155,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextProcess(midashi); @@ -182,7 +182,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP if (element.ClassName == "caption") { // https://www.aozora.gr.jp/annotation/graphics.html#:~:text=%3Cdiv%20class%3D%22caption%22%3E を処理するための部分 - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextProcess(element)); @@ -215,12 +215,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { foreach (var splitText in SplitBrace(TextProcess(element))) @@ -252,7 +252,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; @@ -273,7 +273,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP await response.Content.CopyToAsync(ms, ct).ConfigureAwait(false); var filePass = System.IO.Path.Combine(imageDirectory, FileUrlToFileName().Replace(img.Source, "$1")); File.WriteAllBytes(filePass, ms.ToArray()); - EnsureSection(document, chapterNum); + document.EnsureSection(chapterNum); if (document.Chapters[chapterNum].Sections[sectionNum].Elements.Count > 1) { document.Chapters[chapterNum].Sections[sectionNum].Elements.Insert(document.Chapters[chapterNum].Sections[sectionNum].Elements.Count - 1, new Picture(filePass)); @@ -282,7 +282,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP } if (img.AlternativeText != null) { - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextReplace(img.AlternativeText); @@ -331,7 +331,7 @@ public async ValueTask ScrapingAsync(string url, string coverFileP case "[#ページの左右中央]": break; default: - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { foreach (var splitText in SplitBrace(TextProcess(element))) @@ -361,12 +361,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextProcess(element)); @@ -401,12 +401,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { paragraph.Text += TextProcess(element); @@ -449,12 +449,12 @@ public async ValueTask ScrapingAsync(string url, string coverFileP { if (sectionExist) { - EnsureChapter(document); + document.EnsureChapter(); document.Chapters[^1].Sections.Insert(0, new Section("___")); } sectionNum++; } - EnsureParagraph(document, chapterNum, sectionNum); + document.EnsureParagraph(chapterNum, sectionNum); if ((document.Chapters[chapterNum].Sections[sectionNum].Elements[^1] is Paragraph paragraph)) { var split = SplitBrace(TextReplace(nextNode.Text())); diff --git a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs index 3af91cd..ba8ff41 100644 --- a/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs +++ b/Epub/KoeBook.Epub/Utility/ScrapingHelper.cs @@ -1,36 +1,7 @@ -using KoeBook.Epub.Models; - -namespace KoeBook.Epub.Utility; +namespace KoeBook.Epub.Utility; public static class ScrapingHelper { - internal static void EnsureChapter(EpubDocument document) - { - if (document.Chapters.Count == 0) - document.Chapters.Add(new Chapter() { Title = null }); - } - - internal static void EnsureSection(EpubDocument document, int chapterIndex) - { - EnsureChapter(document); - - if (document.Chapters[chapterIndex].Sections.Count == 0) - { - if (document.Chapters[chapterIndex].Title != null) - document.Chapters[chapterIndex].Sections.Add(new Section(document.Chapters[chapterIndex].Title!)); - else - document.Chapters[chapterIndex].Sections.Add(new Section(document.Title)); - } - } - - internal static void EnsureParagraph(EpubDocument document, int chapterIndex, int sectionIndex) - { - EnsureSection(document, chapterIndex); - - if (document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Count == 0) - document.Chapters[chapterIndex].Sections[sectionIndex].Elements.Add(new Paragraph()); - } - public static List SplitBrace(string text) { if (text.Length == 1 && text != "「" && text != "」") From 73200703870769657c1a750291954a3cb9728f95 Mon Sep 17 00:00:00 2001 From: miyaji255 <84168445+miyaji255@users.noreply.github.com> Date: Sat, 2 Mar 2024 16:15:41 +0900 Subject: [PATCH 3/3] =?UTF-8?q?#1=20=E3=82=BB=E3=83=AC=E3=82=AF=E3=82=BF?= =?UTF-8?q?=E3=83=BC=E3=82=92=E5=A4=89=E6=9B=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Services/IScrapingAozoraService.cs | 8 -------- .../Services/IScrapingNaroService.cs | 9 --------- .../Contracts/Services/IScrapingService.cs | 8 ++++++++ .../Services/ScraperSelectorService.cs | 20 +++++++++++-------- .../Services/ScrapingAozoraService.cs | 7 ++++++- .../Services/ScrapingNaroService.cs | 7 ++++++- KoeBook/App.xaml.cs | 6 ++++-- 7 files changed, 36 insertions(+), 29 deletions(-) delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs delete mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs create mode 100644 Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs deleted file mode 100644 index 86756e5..0000000 --- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingAozoraService.cs +++ /dev/null @@ -1,8 +0,0 @@ -using KoeBook.Epub.Models; - -namespace KoeBook.Epub.Contracts.Services; - -public interface IScrapingAozoraService -{ - public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct); -} diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs deleted file mode 100644 index f9259aa..0000000 --- a/Epub/KoeBook.Epub/Contracts/Services/IScrapingNaroService.cs +++ /dev/null @@ -1,9 +0,0 @@ -using KoeBook.Epub.Models; - -namespace KoeBook.Epub.Contracts.Services; - -public interface IScrapingNaroService -{ - public ValueTask ScrapingAsync(string url, string coverFillePath, string imageDirectory, Guid id, CancellationToken ct); -} - diff --git a/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs new file mode 100644 index 0000000..e0818f3 --- /dev/null +++ b/Epub/KoeBook.Epub/Contracts/Services/IScrapingService.cs @@ -0,0 +1,8 @@ +using KoeBook.Epub.Models; + +namespace KoeBook.Epub.Contracts.Services; + +public interface IScrapingService : IScraperSelectorService +{ + public bool IsMatchSite(Uri url); +} diff --git a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs index 4cfb2ee..51a68f3 100644 --- a/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs +++ b/Epub/KoeBook.Epub/Services/ScraperSelectorService.cs @@ -1,19 +1,23 @@ -using KoeBook.Epub.Contracts.Services; +using System.Collections.Immutable; +using KoeBook.Epub.Contracts.Services; using KoeBook.Epub.Models; namespace KoeBook.Epub.Services; -public class ScraperSelectorService(IScrapingAozoraService scrapingAozoraService, IScrapingNaroService scrapingNaroService) : IScraperSelectorService +public class ScraperSelectorService(IEnumerable scrapingServices) : IScraperSelectorService { - public ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct) + private readonly ImmutableArray _scrapingServices = scrapingServices.ToImmutableArray(); + + public async ValueTask ScrapingAsync(string url, string coverFillePath, string tempDirectory, Guid id, CancellationToken ct) { var uri = new Uri(url); - return uri.Host switch + foreach (var service in _scrapingServices) { - "www.aozora.gr.jp" => scrapingAozoraService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct), - "ncode.syosetu.com" => scrapingNaroService.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct), - _ => throw new ArgumentException("有効なドメインではありません。"), - }; + if (service.IsMatchSite(uri)) + return await service.ScrapingAsync(url, coverFillePath, tempDirectory, id, ct); + } + + throw new ArgumentException("対応するURLではありません"); } } diff --git a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs index 0915687..203bcaa 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingAozoraService.cs @@ -9,8 +9,13 @@ namespace KoeBook.Epub.Services { - public partial class ScrapingAozoraService : IScrapingAozoraService + public partial class ScrapingAozoraService : IScrapingService { + public bool IsMatchSite(Uri uri) + { + return uri.Host == "www.aozora.gr.jp"; + } + public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) { var chapterNum = 0; diff --git a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs index 7d6ce7a..a1e00ec 100644 --- a/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs +++ b/Epub/KoeBook.Epub/Services/ScrapingNaroService.cs @@ -9,10 +9,15 @@ namespace KoeBook.Epub.Services { - public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingNaroService + public partial class ScrapingNaroService(IHttpClientFactory httpClientFactory) : IScrapingService { private readonly IHttpClientFactory _httpCliantFactory = httpClientFactory; + public bool IsMatchSite(Uri uri) + { + return uri.Host == "ncode.syosetu.com"; + } + public async ValueTask ScrapingAsync(string url, string coverFilePath, string imageDirectory, Guid id, CancellationToken ct) { var config = Configuration.Default.WithDefaultLoader(); diff --git a/KoeBook/App.xaml.cs b/KoeBook/App.xaml.cs index 84011fe..ddde655 100644 --- a/KoeBook/App.xaml.cs +++ b/KoeBook/App.xaml.cs @@ -98,8 +98,10 @@ public App() services.AddSingleton(); services.AddSingleton(); services.AddSingleton(); - // TODO: 切り替えサービスを作成 - services.AddSingleton(); + + services.AddSingleton() + .AddSingleton() + .AddSingleton(); // Views and ViewModels services.AddTransient();