diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj index 88a132c3..782e0bdb 100644 --- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj +++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj @@ -36,9 +36,9 @@ - - - + + + diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs index da2c01b4..7bda757c 100644 --- a/src/Serval/src/Serval.Client/Client.g.cs +++ b/src/Serval/src/Serval.Client/Client.g.cs @@ -2471,6 +2471,10 @@ public partial interface ITranslationEnginesClient ///
* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**. ///
* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**. ///
+ ///
Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options: + ///
* `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data. + ///
* `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized. + ///
///
Only pretranslations for the most recent successful build of the engine are returned. ///
The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// @@ -2482,9 +2486,10 @@ public partial interface ITranslationEnginesClient /// The behavior of paragraph markers. /// The behavior of embed markers. /// The behavior of style markers. + /// The normalization behavior of quotes. /// The book in USFM format /// A server side error occurred. - System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); + System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, PretranslationNormalizationBehavior? quoteNormalizationBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)); /// A cancellation token that can be used by other objects or threads to receive notice of cancellation. /// @@ -5168,6 +5173,10 @@ public string BaseUrl ///
* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**. ///
* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**. ///
+ ///
Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options: + ///
* `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data. + ///
* `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized. + ///
///
Only pretranslations for the most recent successful build of the engine are returned. ///
The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// @@ -5179,9 +5188,10 @@ public string BaseUrl /// The behavior of paragraph markers. /// The behavior of embed markers. /// The behavior of style markers. + /// The normalization behavior of quotes. /// The book in USFM format /// A server side error occurred. - public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) + public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, PretranslationNormalizationBehavior? quoteNormalizationBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken)) { if (id == null) throw new System.ArgumentNullException("id"); @@ -5232,6 +5242,10 @@ public string BaseUrl { urlBuilder_.Append(System.Uri.EscapeDataString("style-marker-behavior")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(styleMarkerBehavior, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); } + if (quoteNormalizationBehavior != null) + { + urlBuilder_.Append(System.Uri.EscapeDataString("quotation-mark-behavior")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(quoteNormalizationBehavior, System.Globalization.CultureInfo.InvariantCulture))).Append('&'); + } urlBuilder_.Length--; PrepareRequest(client_, request_, urlBuilder_); @@ -10341,6 +10355,18 @@ public enum PretranslationUsfmMarkerBehavior } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] + public enum PretranslationNormalizationBehavior + { + + [System.Runtime.Serialization.EnumMember(Value = @"Normalized")] + Normalized = 0, + + [System.Runtime.Serialization.EnumMember(Value = @"Denormalized")] + Denormalized = 1, + + } + [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")] public partial class TranslationBuild { diff --git a/src/Serval/src/Serval.Shared/Serval.Shared.csproj b/src/Serval/src/Serval.Shared/Serval.Shared.csproj index 74672ca8..9f6ab13d 100644 --- a/src/Serval/src/Serval.Shared/Serval.Shared.csproj +++ b/src/Serval/src/Serval.Shared/Serval.Shared.csproj @@ -19,7 +19,7 @@ - + diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs new file mode 100644 index 00000000..fefedb6a --- /dev/null +++ b/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs @@ -0,0 +1,7 @@ +namespace Serval.Translation.Contracts; + +public enum PretranslationNormalizationBehavior +{ + Normalized, + Denormalized +} diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs index 3ca47f0e..ab1027f1 100644 --- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs +++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs @@ -901,6 +901,10 @@ CancellationToken cancellationToken /// * `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**. /// * `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**. /// + /// Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options: + /// * `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data. + /// * `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized. + /// /// Only pretranslations for the most recent successful build of the engine are returned. /// The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation). /// @@ -912,6 +916,7 @@ CancellationToken cancellationToken /// The behavior of paragraph markers. /// The behavior of embed markers. /// The behavior of style markers. + /// The normalization behavior of quotes. /// /// The book in USFM format /// The specified book does not exist in the source or target corpus. @@ -941,6 +946,7 @@ public async Task GetPretranslatedUsfmAsync( [FromQuery(Name = "paragraph-marker-behavior")] PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior, [FromQuery(Name = "embed-behavior")] PretranslationUsfmMarkerBehavior? embedBehavior, [FromQuery(Name = "style-marker-behavior")] PretranslationUsfmMarkerBehavior? styleMarkerBehavior, + [FromQuery(Name = "quotation-mark-behavior")] PretranslationNormalizationBehavior? quoteNormalizationBehavior, CancellationToken cancellationToken ) { @@ -961,6 +967,7 @@ CancellationToken cancellationToken paragraphMarkerBehavior ?? PretranslationUsfmMarkerBehavior.Preserve, embedBehavior ?? PretranslationUsfmMarkerBehavior.Preserve, styleMarkerBehavior ?? PretranslationUsfmMarkerBehavior.Strip, + quoteNormalizationBehavior ?? PretranslationNormalizationBehavior.Normalized, cancellationToken ); if (usfm == "") diff --git a/src/Serval/src/Serval.Translation/Models/Build.cs b/src/Serval/src/Serval.Translation/Models/Build.cs index e3ee1c63..ecfefe02 100644 --- a/src/Serval/src/Serval.Translation/Models/Build.cs +++ b/src/Serval/src/Serval.Translation/Models/Build.cs @@ -20,5 +20,5 @@ public record Build : IInitializableEntity public bool? IsInitialized { get; set; } public DateTime? DateCreated { get; set; } public IReadOnlyList? Phases { get; init; } - public IReadOnlyCollection? Analysis { get; init; } + public IReadOnlyList? Analysis { get; init; } } diff --git a/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs b/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs index af5c876d..ede5c9a5 100644 --- a/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs +++ b/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs @@ -20,6 +20,7 @@ Task GetUsfmAsync( PretranslationUsfmMarkerBehavior paragraphMarkerBehavior, PretranslationUsfmMarkerBehavior embedBehavior, PretranslationUsfmMarkerBehavior styleMarkerBehavior, + PretranslationNormalizationBehavior quoteNormalizationBehavior, CancellationToken cancellationToken = default ); } diff --git a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs index 2952c0b8..32f51dbb 100644 --- a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs +++ b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs @@ -1,4 +1,5 @@ using SIL.Machine.Corpora; +using SIL.Machine.PunctuationAnalysis; using SIL.Machine.Translation; namespace Serval.Translation.Services; @@ -44,6 +45,7 @@ public async Task GetUsfmAsync( PretranslationUsfmMarkerBehavior paragraphMarkerBehavior, PretranslationUsfmMarkerBehavior embedBehavior, PretranslationUsfmMarkerBehavior styleMarkerBehavior, + PretranslationNormalizationBehavior quoteNormalizationBehavior, CancellationToken cancellationToken = default ) { @@ -68,6 +70,8 @@ public async Task GetUsfmAsync( styleMarkerBehavior ); + List remarks = [disclaimerRemark, markerPlacementRemark]; + CorpusFile sourceFile; CorpusFile targetFile; if (corpus is not null) @@ -147,6 +151,10 @@ PretranslationUsfmMarkerBehavior StyleBehavior if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition) updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler()); + if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition) + updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler()); + + string usfm = ""; // Update the target book if it exists if (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Target) { @@ -162,7 +170,6 @@ PretranslationUsfmMarkerBehavior StyleBehavior ); using Shared.Services.ZipParatextProjectTextUpdater updater = _scriptureDataFileService.GetZipParatextProjectTextUpdater(targetFile.Filename); - string usfm = ""; switch (textOrigin) { case PretranslationUsfmTextOrigin.PreferExisting: @@ -176,7 +183,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; break; case PretranslationUsfmTextOrigin.PreferPretranslated: @@ -190,7 +197,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; break; case PretranslationUsfmTextOrigin.OnlyExisting: @@ -204,7 +211,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; break; case PretranslationUsfmTextOrigin.OnlyPretranslated: @@ -218,16 +225,16 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; break; } - // In order to support PretranslationUsfmTemplate.Auto - if (!string.IsNullOrEmpty(usfm)) - return usfm; } - if (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Source) + if ( + string.IsNullOrEmpty(usfm) + && (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Source) + ) { using Shared.Services.ZipParatextProjectTextUpdater updater = _scriptureDataFileService.GetZipParatextProjectTextUpdater(sourceFile.Filename); @@ -238,7 +245,8 @@ PretranslationUsfmMarkerBehavior StyleBehavior case PretranslationUsfmTextOrigin.PreferExisting: case PretranslationUsfmTextOrigin.PreferPretranslated: case PretranslationUsfmTextOrigin.OnlyPretranslated: - return updater.UpdateUsfm( + usfm = + updater.UpdateUsfm( textId, pretranslationRows.Select(Map).ToList(), fullName: targetSettings.FullName, @@ -247,10 +255,12 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; + break; case PretranslationUsfmTextOrigin.OnlyExisting: - return updater.UpdateUsfm( + usfm = + updater.UpdateUsfm( textId, [], // don't pass the pretranslations, we only want the existing text. fullName: targetSettings.FullName, @@ -259,12 +269,86 @@ PretranslationUsfmMarkerBehavior StyleBehavior embedBehavior: Map(embedBehavior), styleBehavior: Map(styleMarkerBehavior), updateBlockHandlers: updateBlockHandlers, - remarks: [disclaimerRemark, markerPlacementRemark] + remarks: remarks ) ?? ""; + break; + } + } + if (quoteNormalizationBehavior == PretranslationNormalizationBehavior.Denormalized) + { + if (build.Analysis is null) + { + throw new InvalidOperationException( + $"Unable to denormalize quotation marks: No quote convention analysis exists for build {build.Id}" + ); + } + if (!build.Analysis.Any(a => a.ParallelCorpusRef == corpusId)) + { + throw new InvalidOperationException( + $"Unable to denormalize quotation marks: No quote convention analysis exists for corpus {corpusId}" + ); } + ParallelCorpusAnalysis analysis = build.Analysis.Single(c => c.ParallelCorpusRef == corpusId); + usfm = DenormalizeQuotationMarks(usfm, analysis); } - return ""; + return usfm; + } + + private static string DenormalizeQuotationMarks(string usfm, ParallelCorpusAnalysis analysis) + { + QuoteConvention sourceQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName( + analysis.SourceQuoteConvention + ); + if (sourceQuoteConvention is null) + { + throw new InvalidOperationException( + $"Unable to denormalize quotation marks: No such convention {analysis.SourceQuoteConvention}" + ); + } + QuoteConvention targetQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName( + analysis.TargetQuoteConvention + ); + if (targetQuoteConvention is null) + { + throw new InvalidOperationException( + $"Unable to denormalize quotation marks: No such convention {analysis.TargetQuoteConvention}" + ); + } + QuotationMarkDenormalizationFirstPass quotationMarkDenormalizationFirstPass = + new(sourceQuoteConvention, targetQuoteConvention); + + UsfmParser.Parse(usfm, quotationMarkDenormalizationFirstPass); + List bestChapterStrategies = + quotationMarkDenormalizationFirstPass.FindBestChapterStrategies(); + + QuotationMarkDenormalizationUsfmUpdateBlockHandler quotationMarkDenormalizer = + new( + sourceQuoteConvention, + targetQuoteConvention, + new QuotationMarkUpdateSettings(chapterStrategies: bestChapterStrategies) + ); + List remarks = []; + if (bestChapterStrategies.Any(s => s != QuotationMarkUpdateStrategy.Skip)) + { + string quotationDenormalizationRemark = + "Quotation marks in the following chapters have been automatically denormalized after translation: " + + string.Join( + ", ", + bestChapterStrategies + .Select((strategy, index) => (strategy, index)) + .Where(tuple => tuple.strategy != QuotationMarkUpdateStrategy.Skip) + .Select(tuple => tuple.index + 1) + ) + + "."; + remarks.Add(quotationDenormalizationRemark); + } + + var updater = new UpdateUsfmParserHandler(updateBlockHandlers: [quotationMarkDenormalizer], remarks: remarks); + UsfmParser.Parse(usfm, updater); + + usfm = updater.GetUsfm(); + return usfm; } /// diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs index 1f714542..a923fc7e 100644 --- a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs +++ b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs @@ -37,7 +37,7 @@ public async Task GetUsfmAsync_Source_PreferExisting() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \p \v 2 Chapter 1, verse 2. \v 3 @@ -64,7 +64,7 @@ public async Task GetUsfmAsync_Source_PreferPretranslated() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \p \v 2 Chapter 1, verse 2. \v 3 @@ -118,7 +118,7 @@ public async Task GetUsfmAsync_Source_OnlyPretranslated() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \p \v 2 Chapter 1, verse 2. \v 3 @@ -147,7 +147,7 @@ public async Task GetUsfmAsync_Source_PlaceMarkers() \rem Embed markers were moved to the end of the verse. Paragraph markers have positions preserved. Style markers were removed. \c 1 \v 1 Chapter 1, verse 1. -\p Translated new paragraph +\p ""Translated new paragraph"" \v 2 Chapter 1, verse 2. \v 3 " @@ -201,7 +201,7 @@ public async Task GetUsfmAsync_Target_PreferPretranslated() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \v 2 Chapter 1, verse 2. \v 3 TRG - Chapter one, verse three. " @@ -240,7 +240,7 @@ public async Task GetUsfmAsync_Auto_TargetBookDoesNotExist() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \p \v 2 Chapter 1, verse 2. \v 3 @@ -268,7 +268,7 @@ public async Task GetUsfmAsync_Auto_TargetBookExists() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \v 2 Chapter 1, verse 2. \v 3 TRG - Chapter one, verse three. " @@ -319,7 +319,7 @@ public async Task GetUsfmAsync_Target_OnlyPretranslated() \rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully. \rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed. \c 1 -\v 1 Chapter 1, verse 1. Translated new paragraph +\v 1 Chapter 1, verse 1. ""Translated new paragraph"" \v 2 Chapter 1, verse 2. \v 3 " @@ -340,11 +340,33 @@ public async Task GetUsfmAsync_Disclaimer_Remark_Shown() Assert.That(usfm, Does.Contain("rem This draft")); } + [Test] + public async Task GetUsfmAsync_DenormalizeQuotationMarks() + { + using TestEnvironment env = new(); + + string usfm = await env.GetUsfmAsync( + PretranslationUsfmTextOrigin.PreferExisting, + PretranslationUsfmTemplate.Source, + quotationMarkBehavior: PretranslationNormalizationBehavior.Denormalized + ); + Assert.That(usfm, Does.Contain("“Translated new paragraph”")); + Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(3)); + + usfm = await env.GetUsfmAsync( + PretranslationUsfmTextOrigin.PreferExisting, + PretranslationUsfmTemplate.Source, + quotationMarkBehavior: PretranslationNormalizationBehavior.Normalized + ); + Assert.That(usfm, Does.Contain("\"Translated new paragraph\"")); + Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(2)); + } + private class TestEnvironment : IDisposable { public TestEnvironment() { - Shared.Models.CorpusFile file1 = + CorpusFile file1 = new() { Id = "file1", @@ -352,7 +374,7 @@ public TestEnvironment() Format = Shared.Contracts.FileFormat.Paratext, TextId = "project1" }; - Shared.Models.CorpusFile file2 = + CorpusFile file2 = new() { Id = "file2", @@ -395,7 +417,7 @@ public TestEnvironment() new() { Id = "parallel_corpus1", - SourceCorpora = new List() + SourceCorpora = new List() { new() { @@ -404,7 +426,7 @@ public TestEnvironment() Files = [file1], } }, - TargetCorpora = new List() + TargetCorpora = new List() { new() { @@ -425,13 +447,31 @@ public TestEnvironment() { Id = "build1", EngineRef = "engine1", - DateFinished = DateTime.UnixEpoch + DateFinished = DateTime.UnixEpoch, + Analysis = + [ + new ParallelCorpusAnalysis() + { + ParallelCorpusRef = "corpus1", + SourceQuoteConvention = "standard_english", + TargetQuoteConvention = "standard_english" + } + ] }, new() { Id = "build2", EngineRef = "parallel_engine1", - DateFinished = DateTime.UnixEpoch + DateFinished = DateTime.UnixEpoch, + Analysis = + [ + new ParallelCorpusAnalysis() + { + ParallelCorpusRef = "parallel_corpus1", + SourceQuoteConvention = "standard_english", + TargetQuoteConvention = "standard_english" + } + ] } ] ); @@ -445,9 +485,22 @@ public TestEnvironment() CorpusRef = "corpus1", TextId = "MAT", Refs = ["MAT 1:1"], - Translation = "Chapter 1, verse 1. Translated new paragraph", + Translation = "Chapter 1, verse 1. \"Translated new paragraph\"", SourceTokens = ["SRC", "-", "Chapter", "one", ",", "verse", "one", ".", "new", "paragraph"], - TranslationTokens = ["Chapter", "1", ",", "verse", "1", ".", "Translated", "new", "paragraph"], + TranslationTokens = + [ + "Chapter", + "1", + ",", + "verse", + "1", + ".", + "\"", + "Translated", + "new", + "paragraph", + "\"" + ], Alignment = [ new() { SourceIndex = 2, TargetIndex = 0 }, @@ -456,9 +509,9 @@ public TestEnvironment() new() { SourceIndex = 5, TargetIndex = 3 }, new() { SourceIndex = 6, TargetIndex = 4 }, new() { SourceIndex = 7, TargetIndex = 5 }, - new() { SourceIndex = 8, TargetIndex = 6 }, new() { SourceIndex = 8, TargetIndex = 7 }, - new() { SourceIndex = 9, TargetIndex = 8 }, + new() { SourceIndex = 8, TargetIndex = 8 }, + new() { SourceIndex = 9, TargetIndex = 9 }, ] }, new() @@ -479,9 +532,22 @@ public TestEnvironment() CorpusRef = "parallel_corpus1", TextId = "MAT", Refs = ["MAT 1:1"], - Translation = "Chapter 1, verse 1. Translated new paragraph", + Translation = "Chapter 1, verse 1. \"Translated new paragraph\"", SourceTokens = ["SRC", "-", "Chapter", "one", ",", "verse", "one", ".", "new", "paragraph"], - TranslationTokens = ["Chapter", "1", ",", "verse", "1", ".", "Translated", "new", "paragraph"], + TranslationTokens = + [ + "Chapter", + "1", + ",", + "verse", + "1", + ".", + "\"", + "Translated", + "new", + "paragraph", + "\"" + ], Alignment = [ new() { SourceIndex = 2, TargetIndex = 0 }, @@ -490,9 +556,9 @@ public TestEnvironment() new() { SourceIndex = 5, TargetIndex = 3 }, new() { SourceIndex = 6, TargetIndex = 4 }, new() { SourceIndex = 7, TargetIndex = 5 }, - new() { SourceIndex = 8, TargetIndex = 6 }, new() { SourceIndex = 8, TargetIndex = 7 }, - new() { SourceIndex = 9, TargetIndex = 8 }, + new() { SourceIndex = 8, TargetIndex = 8 }, + new() { SourceIndex = 9, TargetIndex = 9 }, ] }, new() @@ -557,7 +623,8 @@ Shared.Services.ZipParatextProjectTextUpdater GetTextUpdater(string type) public async Task GetUsfmAsync( PretranslationUsfmTextOrigin textOrigin, PretranslationUsfmTemplate template, - PretranslationUsfmMarkerBehavior paragraphMarkerBehavior = PretranslationUsfmMarkerBehavior.Preserve + PretranslationUsfmMarkerBehavior paragraphMarkerBehavior = PretranslationUsfmMarkerBehavior.Preserve, + PretranslationNormalizationBehavior quotationMarkBehavior = PretranslationNormalizationBehavior.Normalized ) { string usfm = await Service.GetUsfmAsync( @@ -569,7 +636,8 @@ public async Task GetUsfmAsync( template: template, paragraphMarkerBehavior: paragraphMarkerBehavior, embedBehavior: PretranslationUsfmMarkerBehavior.Preserve, - styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip + styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip, + quoteNormalizationBehavior: quotationMarkBehavior ); usfm = usfm.Replace("\r\n", "\n"); string parallel_usfm = await Service.GetUsfmAsync( @@ -581,7 +649,8 @@ public async Task GetUsfmAsync( template: template, paragraphMarkerBehavior: paragraphMarkerBehavior, embedBehavior: PretranslationUsfmMarkerBehavior.Preserve, - styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip + styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip, + quoteNormalizationBehavior: quotationMarkBehavior ); parallel_usfm = parallel_usfm.Replace("\r\n", "\n"); Assert.That(parallel_usfm, Is.EqualTo(usfm)); diff --git a/src/Serval/test/Serval.Translation.Tests/Usings.cs b/src/Serval/test/Serval.Translation.Tests/Usings.cs index 8b984c89..1ef6c9b3 100644 --- a/src/Serval/test/Serval.Translation.Tests/Usings.cs +++ b/src/Serval/test/Serval.Translation.Tests/Usings.cs @@ -1,4 +1,5 @@ global using System.Text; +global using System.Text.RegularExpressions; global using Grpc.Core; global using Grpc.Net.ClientFactory; global using MassTransit; diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj index bfef3d79..c1f24615 100644 --- a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj +++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj @@ -19,7 +19,7 @@ - +