diff --git a/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs new file mode 100644 index 000000000..3d336e3f2 --- /dev/null +++ b/src/SIL.Machine/Corpora/FileParatextProjectFileHandler.cs @@ -0,0 +1,39 @@ +using System.IO; +using System.Linq; + +namespace SIL.Machine.Corpora +{ + public class FileParatextProjectFileHandler : IParatextProjectFileHandler + { + private readonly string _projectDir; + + public FileParatextProjectFileHandler(string projectDir) + { + _projectDir = projectDir; + } + + public bool Exists(string fileName) + { + return File.Exists(Path.Combine(_projectDir, fileName)); + } + + public Stream Open(string fileName) + { + return File.OpenRead(Path.Combine(_projectDir, fileName)); + } + + public UsfmStylesheet CreateStylesheet(string fileName) + { + string customStylesheetFileName = Path.Combine(_projectDir, "custom.sty"); + return new UsfmStylesheet( + fileName, + File.Exists(customStylesheetFileName) ? customStylesheetFileName : null + ); + } + + public string Find(string extension) + { + return Directory.EnumerateFiles(_projectDir, "*" + extension).FirstOrDefault(); + } + } +} diff --git a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs index 996400a4c..27a51b420 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectSettingsParser.cs @@ -1,39 +1,13 @@ -using System.IO; -using System.Linq; - -namespace SIL.Machine.Corpora +namespace SIL.Machine.Corpora { public class FileParatextProjectSettingsParser : ParatextProjectSettingsParserBase { - private readonly string _projectDir; - public FileParatextProjectSettingsParser(string projectDir) - { - _projectDir = projectDir; - } - - protected override UsfmStylesheet CreateStylesheet(string fileName) - { - string customStylesheetFileName = Path.Combine(_projectDir, "custom.sty"); - return new UsfmStylesheet( - fileName, - File.Exists(customStylesheetFileName) ? customStylesheetFileName : null - ); - } - - protected override bool Exists(string fileName) - { - return File.Exists(Path.Combine(_projectDir, fileName)); - } - - protected override string Find(string extension) - { - return Directory.EnumerateFiles(_projectDir, "*" + extension).FirstOrDefault(); - } + : base(new FileParatextProjectFileHandler(projectDir)) { } - protected override Stream Open(string fileName) + public static ParatextProjectSettings Parse(string projectDir) { - return File.OpenRead(Path.Combine(_projectDir, fileName)); + return new FileParatextProjectSettingsParser(projectDir).Parse(); } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs index c9c9dd958..6389b76ad 100644 --- a/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/FileParatextProjectTextUpdater.cs @@ -1,25 +1,9 @@ -using System.IO; - -namespace SIL.Machine.Corpora +namespace SIL.Machine.Corpora { public class FileParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { - private readonly string _projectDir; - public FileParatextProjectTextUpdater(string projectDir) - : base(new FileParatextProjectSettingsParser(projectDir)) - { - _projectDir = projectDir; - } - - protected override bool Exists(string fileName) - { - return File.Exists(Path.Combine(_projectDir, fileName)); - } - - protected override Stream Open(string fileName) - { - return File.OpenRead(Path.Combine(_projectDir, fileName)); - } + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) + { } } } diff --git a/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs new file mode 100644 index 000000000..17e1e8aa2 --- /dev/null +++ b/src/SIL.Machine/Corpora/FileParatextProjectVersificationErrorDetector.cs @@ -0,0 +1,9 @@ +namespace SIL.Machine.Corpora +{ + public class FileParatextProjectVersificationErrorDetector : ParatextProjectVersificationErrorDetectorBase + { + public FileParatextProjectVersificationErrorDetector(string projectDir) + : base(new FileParatextProjectFileHandler(projectDir), FileParatextProjectSettingsParser.Parse(projectDir)) + { } + } +} diff --git a/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs new file mode 100644 index 000000000..ac4b1a9cc --- /dev/null +++ b/src/SIL.Machine/Corpora/IParatextProjectFileHandler.cs @@ -0,0 +1,12 @@ +using System.IO; + +namespace SIL.Machine.Corpora +{ + public interface IParatextProjectFileHandler + { + bool Exists(string fileName); + Stream Open(string fileName); + string Find(string extension); + UsfmStylesheet CreateStylesheet(string fileName); + } +} diff --git a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs index 111cbc5ef..60ce88002 100644 --- a/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs +++ b/src/SIL.Machine/Corpora/ParatextBackupTermsCorpus.cs @@ -15,14 +15,12 @@ public ParatextBackupTermsCorpus( { using (var archive = ZipFile.OpenRead(fileName)) { - ParatextProjectSettings settings = new ZipParatextProjectSettingsParser(archive).Parse(); - IEnumerable<(string, IReadOnlyList)> glosses = new ZipParatextProjectTermsParser( - archive, - settings - ) + IEnumerable<(string, IReadOnlyList)> glosses = new ZipParatextProjectTermsParser(archive) .Parse(termCategories, useTermGlosses, chapters) .OrderBy(g => g.TermId); + ParatextProjectSettings settings = ZipParatextProjectSettingsParser.Parse(archive); + string textId = $"{settings.BiblicalTermsListType}:{settings.BiblicalTermsProjectName}:{settings.BiblicalTermsFileName}"; diff --git a/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs index b3431843c..52b54cbad 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectSettingsParserBase.cs @@ -8,16 +8,23 @@ namespace SIL.Machine.Corpora { public abstract class ParatextProjectSettingsParserBase { + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; + + public ParatextProjectSettingsParserBase(IParatextProjectFileHandler paratextProjectFileHandler) + { + _paratextProjectFileHandler = paratextProjectFileHandler; + } + public ParatextProjectSettings Parse() { Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); string settingsFileName = "Settings.xml"; - if (!Exists(settingsFileName)) - settingsFileName = Find(".ssf"); + if (!_paratextProjectFileHandler.Exists(settingsFileName)) + settingsFileName = _paratextProjectFileHandler.Find(".ssf"); if (string.IsNullOrEmpty(settingsFileName)) throw new InvalidOperationException("The project does not contain a settings file."); XDocument settingsDoc; - using (Stream stream = Open(settingsFileName)) + using (Stream stream = _paratextProjectFileHandler.Open(settingsFileName)) { settingsDoc = XDocument.Load(stream); } @@ -36,7 +43,7 @@ public ParatextProjectSettings Parse() var scrVersType = (int?)settingsDoc.Root.Element("Versification") ?? (int)ScrVersType.English; var versification = new ScrVers((ScrVersType)scrVersType); - if (Exists("custom.vrs")) + if (_paratextProjectFileHandler.Exists("custom.vrs")) { var guid = (string)settingsDoc.Root.Element("Guid"); string versName = ((ScrVersType)scrVersType).ToString() + "-" + guid; @@ -46,7 +53,7 @@ public ParatextProjectSettings Parse() } else { - using (var reader = new StreamReader(Open("custom.vrs"))) + using (var reader = new StreamReader(_paratextProjectFileHandler.Open("custom.vrs"))) { versification = Versification.Table.Implementation.Load( reader, @@ -60,9 +67,9 @@ public ParatextProjectSettings Parse() } var stylesheetFileName = (string)settingsDoc.Root.Element("StyleSheet") ?? "usfm.sty"; - if (!Exists(stylesheetFileName) && stylesheetFileName != "usfm_sb.sty") + if (!_paratextProjectFileHandler.Exists(stylesheetFileName) && stylesheetFileName != "usfm_sb.sty") stylesheetFileName = "usfm.sty"; - UsfmStylesheet stylesheet = CreateStylesheet(stylesheetFileName); + UsfmStylesheet stylesheet = _paratextProjectFileHandler.CreateStylesheet(stylesheetFileName); string prefix = ""; string form = "41MAT"; @@ -122,10 +129,5 @@ public ParatextProjectSettings Parse() languageCode ); } - - protected abstract bool Exists(string fileName); - protected abstract string Find(string extension); - protected abstract Stream Open(string fileName); - protected abstract UsfmStylesheet CreateStylesheet(string fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs index 38e3904e1..15e761756 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTermsParserBase.cs @@ -37,15 +37,15 @@ public abstract class ParatextProjectTermsParserBase private static readonly Regex NumericalInformationRegex = new Regex(@"\s+\d+(\.\d+)*$", RegexOptions.Compiled); private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTermsParserBase(ParatextProjectSettings settings) + protected ParatextProjectTermsParserBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { _settings = settings; - } - - protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public IEnumerable<(string TermId, IReadOnlyList Glosses)> Parse( @@ -59,9 +59,9 @@ protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase setti IDictionary> termIdToReferences; if (_settings.BiblicalTermsListType == "Project") { - if (Exists(_settings.BiblicalTermsFileName)) + if (_paratextProjectFileHandler.Exists(_settings.BiblicalTermsFileName)) { - using (Stream keyTermsFile = Open(_settings.BiblicalTermsFileName)) + using (Stream keyTermsFile = _paratextProjectFileHandler.Open(_settings.BiblicalTermsFileName)) { biblicalTermsDoc = XDocument.Load(keyTermsFile); termIdToCategoryDictionary = GetCategoryPerId(biblicalTermsDoc); @@ -115,9 +115,9 @@ protected ParatextProjectTermsParserBase(ParatextProjectSettingsParserBase setti } XDocument termRenderingsDoc = null; - if (Exists("TermRenderings.xml")) + if (_paratextProjectFileHandler.Exists("TermRenderings.xml")) { - using (Stream keyTermsFile = Open("TermRenderings.xml")) + using (Stream keyTermsFile = _paratextProjectFileHandler.Open("TermRenderings.xml")) { termRenderingsDoc = XDocument.Load(keyTermsFile); } @@ -298,9 +298,5 @@ private static IDictionary> GetReferences(XDo .ToImmutableHashSet() ); } - - protected abstract Stream Open(string fileName); - - protected abstract bool Exists(string fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs index 85dc470a0..5b0731c4d 100644 --- a/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs +++ b/src/SIL.Machine/Corpora/ParatextProjectTextUpdaterBase.cs @@ -8,15 +8,15 @@ namespace SIL.Machine.Corpora public abstract class ParatextProjectTextUpdaterBase { private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectTextUpdaterBase(ParatextProjectSettings settings) + protected ParatextProjectTextUpdaterBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { _settings = settings; - } - - protected ParatextProjectTextUpdaterBase(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public string UpdateUsfm( @@ -73,7 +73,8 @@ public string UpdateUsfm( } } - protected abstract bool Exists(string fileName); - protected abstract Stream Open(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); + + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs new file mode 100644 index 000000000..4faa7861b --- /dev/null +++ b/src/SIL.Machine/Corpora/ParatextProjectVersificationErrorDetectorBase.cs @@ -0,0 +1,55 @@ +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; + +namespace SIL.Machine.Corpora +{ + public abstract class ParatextProjectVersificationErrorDetectorBase + { + private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; + + protected ParatextProjectVersificationErrorDetectorBase( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) + { + _settings = settings; + _paratextProjectFileHandler = paratextProjectFileHandler; + } + + public IReadOnlyList GetUsfmVersificationErrors( + UsfmVersificationErrorDetector handler = null + ) + { + handler = handler ?? new UsfmVersificationErrorDetector(_settings.Versification); + foreach (string fileName in _settings.GetAllScriptureBookFileNames()) + { + if (!_paratextProjectFileHandler.Exists(fileName)) + continue; + + string usfm; + using (var reader = new StreamReader(_paratextProjectFileHandler.Open(fileName))) + { + usfm = reader.ReadToEnd(); + } + + try + { + UsfmParser.Parse(usfm, handler, _settings.Stylesheet, _settings.Versification); + } + catch (Exception ex) + { + var sb = new StringBuilder(); + sb.Append($"An error occurred while parsing the usfm for '{fileName}`"); + if (!string.IsNullOrEmpty(_settings.Name)) + sb.Append($" in project '{_settings.Name}'"); + sb.Append($". Error: '{ex.Message}'"); + throw new InvalidOperationException(sb.ToString(), ex); + } + } + return handler.Errors; + } + } +} diff --git a/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs new file mode 100644 index 000000000..4349b91f1 --- /dev/null +++ b/src/SIL.Machine/Corpora/UsfmVersificationErrorDetector.cs @@ -0,0 +1,244 @@ +using System.Collections.Generic; +using System.ComponentModel; +using System.Linq; +using SIL.Scripture; + +namespace SIL.Machine.Corpora +{ + public enum UsfmVersificationErrorType + { + MissingChapter, + MissingVerse, + ExtraVerse, + InvalidVerseRange, + MissingVerseSegment, + ExtraVerseSegment + } + + public class UsfmVersificationError + { + private readonly int _bookNum; + private readonly int _expectedChapter; + private readonly int _expectedVerse; + private readonly int _actualChapter; + private readonly int _actualVerse; + private VerseRef? _verseRef = null; + + public UsfmVersificationError( + int bookNum, + int expectedChapter, + int expectedVerse, + int actualChapter, + int actualVerse, + VerseRef? verseRef = null + ) + { + _bookNum = bookNum; + _expectedChapter = expectedChapter; + _expectedVerse = expectedVerse; + _actualChapter = actualChapter; + _actualVerse = actualVerse; + _verseRef = verseRef; + } + + public UsfmVersificationErrorType Type { get; private set; } + + // Returns true if there is an error + public bool CheckError() + { + //A non-empty chapter is expected + if (_expectedChapter > _actualChapter && _expectedVerse != 0) + { + Type = UsfmVersificationErrorType.MissingChapter; + return true; + } + if (_expectedVerse > _actualVerse && _expectedChapter == _actualChapter) + { + Type = UsfmVersificationErrorType.MissingVerse; + return true; + } + if (_verseRef != null) + { + if (string.IsNullOrEmpty(_verseRef.Value.Segment()) && _verseRef.Value.HasSegmentsDefined) + { + Type = UsfmVersificationErrorType.MissingVerseSegment; + return true; + } + if (!string.IsNullOrEmpty(_verseRef.Value.Segment()) && !_verseRef.Value.HasSegmentsDefined) + { + Type = UsfmVersificationErrorType.ExtraVerseSegment; + return true; + } + if (!_verseRef.Value.Valid) + { + Type = Map(_verseRef.Value.ValidStatus); + return true; + } + } + return false; + } + + private static UsfmVersificationErrorType Map(VerseRef.ValidStatusType validStatus) + { + switch (validStatus) + { + case VerseRef.ValidStatusType.OutOfRange: + return UsfmVersificationErrorType.ExtraVerse; + case VerseRef.ValidStatusType.VerseRepeated: + case VerseRef.ValidStatusType.VerseOutOfOrder: + return UsfmVersificationErrorType.InvalidVerseRange; + default: + throw new InvalidEnumArgumentException( + nameof(validStatus), + (int)validStatus, + typeof(VerseRef.ValidStatusType) + ); + } + } + + public string ExpectedVerseRef + { + get + { + // We do not want to throw an exception here, and the VerseRef constructor can throw + // an exception with certain invalid verse data; use TryParse instead. + if (!VerseRef.TryParse($"{_bookNum} {_expectedChapter}:{_expectedVerse}", out VerseRef defaultVerseRef)) + { + return ""; + } + if (Type == UsfmVersificationErrorType.ExtraVerse) + return ""; + if ( + Type == UsfmVersificationErrorType.MissingVerseSegment + && VerseRef.TryParse( + $"{defaultVerseRef.Book} {defaultVerseRef.Chapter}:{defaultVerseRef.Verse}a", + out VerseRef verseWithSegment + ) + ) + { + return verseWithSegment.ToString(); + } + if (Type == UsfmVersificationErrorType.InvalidVerseRange) + { + List sortedAllUniqueVerses = _verseRef + .Value.AllVerses() + .Distinct() + .OrderBy(v => v) + .ToList(); + VerseRef firstVerse = sortedAllUniqueVerses[0]; + VerseRef lastVerse = sortedAllUniqueVerses[sortedAllUniqueVerses.Count - 1]; + if (firstVerse.Equals(lastVerse)) + { + return firstVerse.ToString(); + } + else if ( + VerseRef.TryParse( + $"{firstVerse.Book} {firstVerse.Chapter}:{firstVerse.Verse}-{lastVerse.Verse}", + out VerseRef correctedVerseRangeRef + ) + ) + { + return correctedVerseRangeRef.ToString(); + } + } + return defaultVerseRef.ToString(); + } + } + public string ActualVerseRef => + _verseRef != null + ? _verseRef.Value.ToString() + : new VerseRef(_bookNum, _actualChapter, _actualVerse).ToString(); + } + + public class UsfmVersificationErrorDetector : UsfmParserHandlerBase + { + private readonly ScrVers _versification; + private int _currentBook; + private int _currentChapter; + private VerseRef _currentVerse; + private readonly List _errors; + + public UsfmVersificationErrorDetector(ScrVers versification) + { + _versification = versification; + _currentBook = 0; + _currentChapter = 0; + _currentVerse = new VerseRef(); + _errors = new List(); + } + + public IReadOnlyList Errors => _errors; + + public override void EndUsfm(UsfmParserState state) + { + if (_currentBook > 0 && Canon.IsCanonical(_currentBook)) + { + var versificationError = new UsfmVersificationError( + _currentBook, + _versification.GetLastChapter(_currentBook), + _versification.GetLastVerse(_currentBook, _versification.GetLastChapter(_currentBook)), + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum + ); + if (versificationError.CheckError()) + _errors.Add(versificationError); + } + } + + public override void StartBook(UsfmParserState state, string marker, string code) + { + _currentBook = state.VerseRef.BookNum; + _currentChapter = 0; + _currentVerse = new VerseRef(); + } + + public override void Chapter( + UsfmParserState state, + string number, + string marker, + string altNumber, + string pubNumber + ) + { + if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) + { + var versificationError = new UsfmVersificationError( + _currentBook, + _currentChapter, + _versification.GetLastVerse(_currentBook, _currentChapter), + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum + ); + if (versificationError.CheckError()) + _errors.Add(versificationError); + } + + _currentChapter = state.VerseRef.ChapterNum; + _currentVerse = new VerseRef(); + } + + public override void Verse( + UsfmParserState state, + string number, + string marker, + string altNumber, + string pubNumber + ) + { + _currentVerse = state.VerseRef; + if (_currentBook > 0 && Canon.IsCanonical(_currentBook) && _currentChapter > 0) + { + var versificationError = new UsfmVersificationError( + _currentBook, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentChapter, + _currentVerse.AllVerses().Last().VerseNum, + _currentVerse + ); + if (versificationError.CheckError()) + _errors.Add(versificationError); + } + } + } +} diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs similarity index 60% rename from src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs rename to src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs index bb0c593f4..00338ddaa 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParserBase.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectFileHandler.cs @@ -1,11 +1,41 @@ -using System.IO; +using System.IO; +using System.IO.Compression; +using System.Linq; using SIL.IO; namespace SIL.Machine.Corpora { - public abstract class ZipParatextProjectSettingsParserBase : ParatextProjectSettingsParserBase + public class ZipParatextProjectFileHandler : IParatextProjectFileHandler { - protected override UsfmStylesheet CreateStylesheet(string fileName) + private readonly ZipArchive _archive; + + public ZipParatextProjectFileHandler(ZipArchive archive) + { + _archive = archive; + } + + public bool Exists(string fileName) + { + return _archive.GetEntry(fileName) != null; + } + + public Stream Open(string fileName) + { + ZipArchiveEntry entry = _archive.GetEntry(fileName); + if (entry == null) + return null; + return entry.Open(); + } + + public string Find(string extension) + { + ZipArchiveEntry entry = _archive.Entries.FirstOrDefault(e => e.FullName.EndsWith(extension)); + if (entry == null) + return null; + return entry.FullName; + } + + public UsfmStylesheet CreateStylesheet(string fileName) { TempFile stylesheetTempFile = null; TempFile customStylesheetTempFile = null; diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs index bb2d6c1a5..aed8cfb38 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectSettingsParser.cs @@ -1,37 +1,15 @@ -using System.IO; -using System.IO.Compression; -using System.Linq; +using System.IO.Compression; namespace SIL.Machine.Corpora { - public class ZipParatextProjectSettingsParser : ZipParatextProjectSettingsParserBase + public class ZipParatextProjectSettingsParser : ParatextProjectSettingsParserBase { - private readonly ZipArchive _archive; - public ZipParatextProjectSettingsParser(ZipArchive archive) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override string Find(string extension) - { - ZipArchiveEntry entry = _archive.Entries.FirstOrDefault(e => e.FullName.EndsWith(extension)); - if (entry == null) - return null; - return entry.FullName; - } + : base(new ZipParatextProjectFileHandler(archive)) { } - protected override Stream Open(string fileName) + public static ParatextProjectSettings Parse(ZipArchive archive) { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); + return new ZipParatextProjectSettingsParser(archive).Parse(); } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs index 863cb563f..55a9e6f29 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTermsParser.cs @@ -1,29 +1,10 @@ -using System.IO; using System.IO.Compression; namespace SIL.Machine.Corpora { public class ZipParatextProjectTermsParser : ParatextProjectTermsParserBase { - private readonly ZipArchive _archive; - - public ZipParatextProjectTermsParser(ZipArchive archive, ParatextProjectSettings settings = null) - : base(settings ?? new ZipParatextProjectSettingsParser(archive).Parse()) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + public ZipParatextProjectTermsParser(ZipArchive archive) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs index 0eb30f567..4903a4db9 100644 --- a/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs +++ b/src/SIL.Machine/Corpora/ZipParatextProjectTextUpdater.cs @@ -1,29 +1,10 @@ -using System.IO; using System.IO.Compression; namespace SIL.Machine.Corpora { public class ZipParatextProjectTextUpdater : ParatextProjectTextUpdaterBase { - private readonly ZipArchive _archive; - public ZipParatextProjectTextUpdater(ZipArchive archive) - : base(new ZipParatextProjectSettingsParser(archive)) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs new file mode 100644 index 000000000..711398362 --- /dev/null +++ b/src/SIL.Machine/Corpora/ZipParatextProjectVersificationErrorDetector.cs @@ -0,0 +1,10 @@ +using System.IO.Compression; + +namespace SIL.Machine.Corpora +{ + public class ZipParatextProjectVersificationErrorDetector : ParatextProjectVersificationErrorDetectorBase + { + public ZipParatextProjectVersificationErrorDetector(ZipArchive archive) + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } + } +} diff --git a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs index 1812fad40..a78210496 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ParatextProjectQuoteConventionDetector.cs @@ -11,15 +11,15 @@ namespace SIL.Machine.PunctuationAnalysis public abstract class ParatextProjectQuoteConventionDetector { private readonly ParatextProjectSettings _settings; + private readonly IParatextProjectFileHandler _paratextProjectFileHandler; - protected ParatextProjectQuoteConventionDetector(ParatextProjectSettings settings) + protected ParatextProjectQuoteConventionDetector( + IParatextProjectFileHandler paratextProjectFileHandler, + ParatextProjectSettings settings + ) { _settings = settings; - } - - protected ParatextProjectQuoteConventionDetector(ParatextProjectSettingsParserBase settingsParser) - { - _settings = settingsParser.Parse(); + _paratextProjectFileHandler = paratextProjectFileHandler; } public QuoteConventionAnalysis GetQuoteConventionAnalysis(QuoteConventionDetector handler = null) @@ -81,7 +81,8 @@ string bookId in Canon return handler.DetectQuoteConvention(includeChapters); } - protected abstract bool Exists(string fileName); - protected abstract Stream Open(string fileName); + private bool Exists(string fileName) => _paratextProjectFileHandler.Exists(fileName); + + private Stream Open(string fileName) => _paratextProjectFileHandler.Open(fileName); } } diff --git a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs index fa8af932b..1df1db842 100644 --- a/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs +++ b/src/SIL.Machine/PunctuationAnalysis/ZipParatextProjectQuoteConventionDetector.cs @@ -1,4 +1,3 @@ -using System.IO; using System.IO.Compression; using SIL.Machine.Corpora; @@ -6,25 +5,7 @@ namespace SIL.Machine.PunctuationAnalysis { public class ZipParatextProjectQuoteConventionDetector : ParatextProjectQuoteConventionDetector { - private readonly ZipArchive _archive; - public ZipParatextProjectQuoteConventionDetector(ZipArchive archive) - : base(new ZipParatextProjectSettingsParser(archive)) - { - _archive = archive; - } - - protected override bool Exists(string fileName) - { - return _archive.GetEntry(fileName) != null; - } - - protected override Stream Open(string fileName) - { - ZipArchiveEntry entry = _archive.GetEntry(fileName); - if (entry == null) - return null; - return entry.Open(); - } + : base(new ZipParatextProjectFileHandler(archive), ZipParatextProjectSettingsParser.Parse(archive)) { } } } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs new file mode 100644 index 000000000..df869d1e8 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectFileHandler.cs @@ -0,0 +1,60 @@ +using System.Text; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +public class MemoryParatextProjectFileHandler(IDictionary? files = null) : IParatextProjectFileHandler +{ + public IDictionary Files { get; } = files ?? new Dictionary(); + + public UsfmStylesheet CreateStylesheet(string fileName) + { + throw new NotImplementedException(); + } + + public bool Exists(string fileName) + { + return Files.ContainsKey(fileName); + } + + public string Find(string extension) + { + throw new NotImplementedException(); + } + + public Stream? Open(string fileName) + { + if (!Files.TryGetValue(fileName, out string? contents)) + return null; + return new MemoryStream(Encoding.UTF8.GetBytes(contents)); + } + + public class DefaultParatextProjectSettings( + string name = "Test", + string fullName = "TestProject", + Encoding? encoding = null, + ScrVers? versification = null, + UsfmStylesheet? stylesheet = null, + string fileNamePrefix = "", + string fileNameForm = "41MAT", + string fileNameSuffix = "Test.SFM", + string biblicalTermsListType = "Project", + string biblicalTermsProjectName = "Test", + string biblicalTermsFileName = "ProjectBiblicalTerms.xml", + string languageCode = "en" + ) + : ParatextProjectSettings( + name, + fullName, + encoding ?? Encoding.UTF8, + versification ?? ScrVers.English, + stylesheet ?? new UsfmStylesheet("usfm.sty"), + fileNamePrefix, + fileNameForm, + fileNameSuffix, + biblicalTermsListType, + biblicalTermsProjectName, + biblicalTermsFileName, + languageCode + ) { } +} diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs index 7fb937981..a8c4c7c86 100644 --- a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectTermsParser.cs @@ -1,21 +1,7 @@ -using System.Text; - namespace SIL.Machine.Corpora; -public class MemoryParatextProjectTermsParser(ParatextProjectSettings settings, IDictionary files) - : ParatextProjectTermsParserBase(settings) -{ - public IDictionary Files { get; } = files; - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} +public class MemoryParatextProjectTermsParser(IDictionary? files, ParatextProjectSettings? settings) + : ParatextProjectTermsParserBase( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs new file mode 100644 index 000000000..d8f00008e --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/MemoryParatextProjectVersificationErrorDetector.cs @@ -0,0 +1,10 @@ +namespace SIL.Machine.Corpora; + +public class MemoryParatextProjectVersificationErrorDetector( + IDictionary? files = null, + ParatextProjectSettings? settings = null +) + : ParatextProjectVersificationErrorDetectorBase( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs index 1e3fb7365..00329912a 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectTermsParserTests.cs @@ -1,6 +1,4 @@ -using System.Text; using NUnit.Framework; -using SIL.Scripture; namespace SIL.Machine.Corpora; @@ -47,7 +45,7 @@ public void TestGetKeyTermsFromTermsRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -62,7 +60,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings() public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermGlosses() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -76,7 +74,7 @@ public void TestGetKeyTermsFromTermsLocalizations_NoTermRenderings_DoNotUseTermG public void TestGetKeyTermsFromTermsLocalizations() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -92,7 +90,7 @@ public void TestGetKeyTermsFromTermsLocalizations() public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml", languageCode: "fr" @@ -115,7 +113,7 @@ public void TestGetKeyTermsFromTermsLocalizations_FilterByChapters() public void TestGetKeyTermsFromTermsLocalizations_TermRenderingsExists_PreferLocalization() { var env = new TestEnvironment( - new DefaultParatextProjectSettings( + new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( biblicalTermsListType: "Major", biblicalTermsFileName: "BiblicalTerms.xml" ), @@ -187,41 +185,11 @@ private class TestEnvironment( private readonly bool _useTermGlosses = useTermGlosses; private readonly IDictionary>? _chapters = chapters; - public ParatextProjectTermsParserBase Parser { get; } = - new MemoryParatextProjectTermsParser(settings ?? new DefaultParatextProjectSettings(), files ?? new()); + public ParatextProjectTermsParserBase Parser { get; } = new MemoryParatextProjectTermsParser(files, settings); public IEnumerable<(string TermId, IReadOnlyList Glosses)> GetGlosses() { return Parser.Parse(new string[] { "PN" }, _useTermGlosses, _chapters); } } - - private class DefaultParatextProjectSettings( - string name = "Test", - string fullName = "TestProject", - Encoding? encoding = null, - ScrVers? versification = null, - UsfmStylesheet? stylesheet = null, - string fileNamePrefix = "", - string fileNameForm = "41MAT", - string fileNameSuffix = "Test.SFM", - string biblicalTermsListType = "Project", - string biblicalTermsProjectName = "Test", - string biblicalTermsFileName = "ProjectBiblicalTerms.xml", - string languageCode = "en" - ) - : ParatextProjectSettings( - name, - fullName, - encoding ?? Encoding.UTF8, - versification ?? ScrVers.English, - stylesheet ?? new UsfmStylesheet("usfm.sty"), - fileNamePrefix, - fileNameForm, - fileNameSuffix, - biblicalTermsListType, - biblicalTermsProjectName, - biblicalTermsFileName, - languageCode - ) { } } diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs new file mode 100644 index 000000000..5b4d581f4 --- /dev/null +++ b/tests/SIL.Machine.Tests/Corpora/ParatextProjectVersificationErrorTests.cs @@ -0,0 +1,407 @@ +using System.Text; +using System.Text.Json; +using NUnit.Framework; +using SIL.Scripture; + +namespace SIL.Machine.Corpora; + +[TestFixture] +public class ParatextProjectQuoteConventionDetectorTests +{ + [Test] + public void GetUsfmVersificationErrors_Noerrors() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + Assert.That( + env.GetUsfmVersificationErrors(), + Has.Count.EqualTo(0), + JsonSerializer.Serialize(env.GetUsfmVersificationErrors()) + ); + } + + [Test] + public void GetUsfmVersificationErrors_MissingVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + } + + [Test] + public void GetUsfmVersificationErrors_MissingChapter() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingChapter)); + } + + [Test] + public void GetUsfmVersificationErrors_ExtraVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + \v 16 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + } + + [Test] + public void GetUsfmVersificationErrors_InvalidVerse() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 13-12 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.InvalidVerseRange)); + } + + [Test] + public void GetUsfmVersificationErrors_ExtraVerseSegment() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14a + \v 14b + \v 15 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerseSegment)); + } + + [Test] + public void GetUsfmVersificationErrors_MissingVerseSegment() + { + var env = new TestEnvironment( + settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + versification: GetCustomVersification(@"*3JN 1:13,a,b") + ), + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerseSegment)); + } + + [Test] + public void GetUsfmVersificationErrors_IgnoreNonCanonicals() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "98XXETest.SFM", + @"\id XXE + \c 1 + \v 3-2 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(0), JsonSerializer.Serialize(errors)); + } + + [Test] + public void GetUsfmVersificationErrors_ExtraVerse_ExcludedInCustomVrs() + { + var env = new TestEnvironment( + settings: new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings( + versification: GetCustomVersification(@"-3JN 1:13") + ), + files: new Dictionary() + { + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + } + + [Test] + public void GetUsfmVersificationErrors_MultipleBooks() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "642JNTest.SFM", + @"\id 2JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + " + }, + { + "653JNTest.SFM", + @"\id 3JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \v 13 + \v 14 + \v 15 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(1), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + } + + [Test] + public void GetUsfmVersificationErrors_MultipleChapters() + { + var env = new TestEnvironment( + files: new Dictionary() + { + { + "642JNTest.SFM", + @"\id 2JN + \c 1 + \v 1 + \v 2 + \v 3 + \v 4 + \v 5 + \v 6 + \v 7 + \v 8 + \v 9 + \v 10 + \v 11 + \v 12 + \c 2 + \v 1 + " + } + } + ); + IReadOnlyList errors = env.GetUsfmVersificationErrors(); + Assert.That(errors, Has.Count.EqualTo(2), JsonSerializer.Serialize(errors)); + Assert.That(errors[0].Type, Is.EqualTo(UsfmVersificationErrorType.MissingVerse)); + Assert.That(errors[1].Type, Is.EqualTo(UsfmVersificationErrorType.ExtraVerse)); + } + + private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) + { + public ParatextProjectVersificationErrorDetectorBase Detector { get; } = + new MemoryParatextProjectVersificationErrorDetector(files, settings); + + public IReadOnlyList GetUsfmVersificationErrors() + { + return Detector.GetUsfmVersificationErrors(); + } + } + + private static ScrVers GetCustomVersification(string customVrsContents, ScrVers? baseVersification = null) + { + baseVersification ??= ScrVers.English; + ScrVers customVersification = baseVersification; + using (var reader = new StreamReader(new MemoryStream(Encoding.UTF8.GetBytes(customVrsContents)))) + { + customVersification = Versification.Table.Implementation.Load( + reader, + "custom.vrs", + baseVersification, + baseVersification.ToString() + "-" + customVrsContents.GetHashCode() + ); + } + Versification.Table.Implementation.RemoveAllUnknownVersifications(); + return customVersification; + } +} diff --git a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs index e2eefd4f2..96966bfd5 100644 --- a/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UsfmManualTests.cs @@ -1,4 +1,5 @@ using System.IO.Compression; +using System.Text.Json; using NUnit.Framework; using SIL.Machine.PunctuationAnalysis; @@ -78,4 +79,19 @@ public void AnalyzeCorporaQuoteConventions() Assert.NotNull(targetAnalysis); }); } + + [Test] + [Ignore("This is for manual testing only. Remove this tag to run the test.")] + public void ValidateUsfmVersification() + { + using ZipArchive zipArchive = ZipFile.OpenRead(CorporaTestHelpers.UsfmSourceProjectZipPath); + var versificationErrorDetector = new ZipParatextProjectVersificationErrorDetector(zipArchive); + IReadOnlyList errors = versificationErrorDetector.GetUsfmVersificationErrors(); + + Assert.That( + errors, + Has.Count.EqualTo(0), + JsonSerializer.Serialize(errors, new JsonSerializerOptions { WriteIndented = true }) + ); + } } diff --git a/tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs index 09462018f..5a6ea8aee 100644 --- a/tests/SIL.Machine.Tests/Corpora/FallbackQuotationMarkResolverTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/FallbackQuotationMarkResolverTests.cs @@ -1,7 +1,6 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class FallbackQuotationMarkResolverTests diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs new file mode 100644 index 000000000..6116b8f86 --- /dev/null +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConventionDetector.cs @@ -0,0 +1,12 @@ +using SIL.Machine.Corpora; + +namespace SIL.Machine.PunctuationAnalysis; + +public class MemoryParatextProjectQuoteConventionDetector( + IDictionary? files, + ParatextProjectSettings? settings +) + : ParatextProjectQuoteConventionDetector( + new MemoryParatextProjectFileHandler(files), + settings ?? new MemoryParatextProjectFileHandler.DefaultParatextProjectSettings() + ) { } diff --git a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs deleted file mode 100644 index d74f74836..000000000 --- a/tests/SIL.Machine.Tests/PunctuationAnalysis/MemoryParatextProjectQuoteConvetionDetector.cs +++ /dev/null @@ -1,24 +0,0 @@ -using System.Text; -using SIL.Machine.Corpora; - -namespace SIL.Machine.PunctuationAnalysis; - -public class MemoryParatextProjectQuoteConventionDetector( - ParatextProjectSettings settings, - IDictionary files -) : ParatextProjectQuoteConventionDetector(settings) -{ - public IDictionary Files { get; } = files; - - protected override bool Exists(string fileName) - { - return Files.ContainsKey(fileName); - } - - protected override Stream? Open(string fileName) - { - if (!Files.TryGetValue(fileName, out string? contents)) - return null; - return new MemoryStream(Encoding.UTF8.GetBytes(contents)); - } -} diff --git a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConventionDetectorTests.cs similarity index 80% rename from tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConventionDetectorTests.cs index 7338d611b..e083961b5 100644 --- a/tests/SIL.Machine.Tests/Corpora/ParatextProjectQuoteConvetionDetectorTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/ParatextProjectQuoteConventionDetectorTests.cs @@ -1,9 +1,8 @@ -using System.Text; using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; using SIL.Scripture; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class ParatextProjectQuoteConventionDetectorTests @@ -130,10 +129,7 @@ public void TestGetQuotationConventionInvalidBookCode() private class TestEnvironment(ParatextProjectSettings? settings = null, Dictionary? files = null) { public ParatextProjectQuoteConventionDetector Detector { get; } = - new MemoryParatextProjectQuoteConventionDetector( - settings ?? new DefaultParatextProjectSettings(), - files ?? new() - ); + new MemoryParatextProjectQuoteConventionDetector(files, settings); public QuoteConventionAnalysis GetQuoteConvention(string? scriptureRange = null) { @@ -165,33 +161,4 @@ private static string GetTestChapter(int number, QuoteConvention? quoteConventio \v 5 Then someone said, {leftQuote}More things someone said.{rightQuote} "; } - - private class DefaultParatextProjectSettings( - string name = "Test", - string fullName = "TestProject", - Encoding? encoding = null, - ScrVers? versification = null, - UsfmStylesheet? stylesheet = null, - string fileNamePrefix = "", - string fileNameForm = "41MAT", - string fileNameSuffix = "Test.SFM", - string biblicalTermsListType = "Project", - string biblicalTermsProjectName = "Test", - string biblicalTermsFileName = "ProjectBiblicalTerms.xml", - string languageCode = "en" - ) - : ParatextProjectSettings( - name, - fullName, - encoding ?? Encoding.UTF8, - versification ?? ScrVers.English, - stylesheet ?? new UsfmStylesheet("usfm.sty"), - fileNamePrefix, - fileNameForm, - fileNameSuffix, - biblicalTermsListType, - biblicalTermsProjectName, - biblicalTermsFileName, - languageCode - ) { } } diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs similarity index 97% rename from tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs index 81750a7a8..117e2ac7e 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationDenormalizationTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs index fc709face..02cd3b803 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationDenormalizationUsfmBlockUpdateHandlerTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationMarkDenormalizationUsfmUpdateBlockHandlerTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs index 2f4ba1896..df37f803f 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuotationMarkUpdateFirstPassTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuotationMarkUpdateFirstPassTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuotationMarkUpdateFirstPassTests diff --git a/tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs similarity index 99% rename from tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs rename to tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs index af5a264e1..083b9f505 100644 --- a/tests/SIL.Machine.Tests/Corpora/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs +++ b/tests/SIL.Machine.Tests/PunctuationAnalysis/QuoteConventionChangingUsfmBlockUpdateHandlerTests.cs @@ -1,7 +1,7 @@ using NUnit.Framework; -using SIL.Machine.PunctuationAnalysis; +using SIL.Machine.Corpora; -namespace SIL.Machine.Corpora; +namespace SIL.Machine.PunctuationAnalysis; [TestFixture] public class QuoteConventionChangingUsfmUpdateBlockHandlerTests