diff --git a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj
index 88a132c3..782e0bdb 100644
--- a/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj
+++ b/src/Machine/src/Serval.Machine.Shared/Serval.Machine.Shared.csproj
@@ -36,9 +36,9 @@
-
-
-
+
+
+
diff --git a/src/Serval/src/Serval.Client/Client.g.cs b/src/Serval/src/Serval.Client/Client.g.cs
index da2c01b4..7bda757c 100644
--- a/src/Serval/src/Serval.Client/Client.g.cs
+++ b/src/Serval/src/Serval.Client/Client.g.cs
@@ -2471,6 +2471,10 @@ public partial interface ITranslationEnginesClient
///
* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
///
* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
///
+ ///
Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options:
+ ///
* `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data.
+ ///
* `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized.
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned.
///
The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation).
///
@@ -2482,9 +2486,10 @@ public partial interface ITranslationEnginesClient
/// The behavior of paragraph markers.
/// The behavior of embed markers.
/// The behavior of style markers.
+ /// The normalization behavior of quotes.
/// The book in USFM format
/// A server side error occurred.
- System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken));
+ System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, PretranslationNormalizationBehavior? quoteNormalizationBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken));
/// A cancellation token that can be used by other objects or threads to receive notice of cancellation.
///
@@ -5168,6 +5173,10 @@ public string BaseUrl
///
* `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
///
* `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
///
+ ///
Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options:
+ ///
* `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data.
+ ///
* `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized.
+ ///
///
Only pretranslations for the most recent successful build of the engine are returned.
///
The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation).
///
@@ -5179,9 +5188,10 @@ public string BaseUrl
/// The behavior of paragraph markers.
/// The behavior of embed markers.
/// The behavior of style markers.
+ /// The normalization behavior of quotes.
/// The book in USFM format
/// A server side error occurred.
- public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken))
+ public virtual async System.Threading.Tasks.Task GetPretranslatedUsfmAsync(string id, string corpusId, string textId, PretranslationUsfmTextOrigin? textOrigin = null, PretranslationUsfmTemplate? template = null, PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior = null, PretranslationUsfmMarkerBehavior? embedBehavior = null, PretranslationUsfmMarkerBehavior? styleMarkerBehavior = null, PretranslationNormalizationBehavior? quoteNormalizationBehavior = null, System.Threading.CancellationToken cancellationToken = default(System.Threading.CancellationToken))
{
if (id == null)
throw new System.ArgumentNullException("id");
@@ -5232,6 +5242,10 @@ public string BaseUrl
{
urlBuilder_.Append(System.Uri.EscapeDataString("style-marker-behavior")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(styleMarkerBehavior, System.Globalization.CultureInfo.InvariantCulture))).Append('&');
}
+ if (quoteNormalizationBehavior != null)
+ {
+ urlBuilder_.Append(System.Uri.EscapeDataString("quotation-mark-behavior")).Append('=').Append(System.Uri.EscapeDataString(ConvertToString(quoteNormalizationBehavior, System.Globalization.CultureInfo.InvariantCulture))).Append('&');
+ }
urlBuilder_.Length--;
PrepareRequest(client_, request_, urlBuilder_);
@@ -10341,6 +10355,18 @@ public enum PretranslationUsfmMarkerBehavior
}
+ [System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
+ public enum PretranslationNormalizationBehavior
+ {
+
+ [System.Runtime.Serialization.EnumMember(Value = @"Normalized")]
+ Normalized = 0,
+
+ [System.Runtime.Serialization.EnumMember(Value = @"Denormalized")]
+ Denormalized = 1,
+
+ }
+
[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class TranslationBuild
{
diff --git a/src/Serval/src/Serval.Shared/Serval.Shared.csproj b/src/Serval/src/Serval.Shared/Serval.Shared.csproj
index 74672ca8..9f6ab13d 100644
--- a/src/Serval/src/Serval.Shared/Serval.Shared.csproj
+++ b/src/Serval/src/Serval.Shared/Serval.Shared.csproj
@@ -19,7 +19,7 @@
-
+
diff --git a/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs b/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs
new file mode 100644
index 00000000..fefedb6a
--- /dev/null
+++ b/src/Serval/src/Serval.Translation/Contracts/PretranslationQuotationMarkBehavior.cs
@@ -0,0 +1,7 @@
+namespace Serval.Translation.Contracts;
+
+public enum PretranslationNormalizationBehavior
+{
+ Normalized,
+ Denormalized
+}
diff --git a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs
index 3ca47f0e..ab1027f1 100644
--- a/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs
+++ b/src/Serval/src/Serval.Translation/Controllers/TranslationEnginesController.cs
@@ -901,6 +901,10 @@ CancellationToken cancellationToken
/// * `TryToPlace`: The USFM markers (or the entire embed) are placed in approximately the right location within the verse. **This option is only available for paragraph markers. Quality of placement may differ from language to language.**.
/// * `Strip`: The USFM markers (or the entire embed) are removed. **This is the default for style markers**.
///
+ /// Quote normalization behavior is controlled by the `quoteNormalizationBehavior` parameter options:
+ /// * `Normalized`: The quotes in the pretranslated USFM are normalized quotes (typically straight quotes: ', ") in the style of the source data.
+ /// * `Denormalized`: The quotes in the pretranslated USFM are denormalized into the style of the target data. Quote denormalization may not be successful in all contexts. A remark will be added to the USFM listing the chapters that were successfully denormalized.
+ ///
/// Only pretranslations for the most recent successful build of the engine are returned.
/// The USFM parsing and marker types used are defined here: [this wiki](https://github.com/sillsdev/serval/wiki/USFM-Parsing-and-Translation).
///
@@ -912,6 +916,7 @@ CancellationToken cancellationToken
/// The behavior of paragraph markers.
/// The behavior of embed markers.
/// The behavior of style markers.
+ /// The normalization behavior of quotes.
///
/// The book in USFM format
/// The specified book does not exist in the source or target corpus.
@@ -941,6 +946,7 @@ public async Task GetPretranslatedUsfmAsync(
[FromQuery(Name = "paragraph-marker-behavior")] PretranslationUsfmMarkerBehavior? paragraphMarkerBehavior,
[FromQuery(Name = "embed-behavior")] PretranslationUsfmMarkerBehavior? embedBehavior,
[FromQuery(Name = "style-marker-behavior")] PretranslationUsfmMarkerBehavior? styleMarkerBehavior,
+ [FromQuery(Name = "quotation-mark-behavior")] PretranslationNormalizationBehavior? quoteNormalizationBehavior,
CancellationToken cancellationToken
)
{
@@ -961,6 +967,7 @@ CancellationToken cancellationToken
paragraphMarkerBehavior ?? PretranslationUsfmMarkerBehavior.Preserve,
embedBehavior ?? PretranslationUsfmMarkerBehavior.Preserve,
styleMarkerBehavior ?? PretranslationUsfmMarkerBehavior.Strip,
+ quoteNormalizationBehavior ?? PretranslationNormalizationBehavior.Normalized,
cancellationToken
);
if (usfm == "")
diff --git a/src/Serval/src/Serval.Translation/Models/Build.cs b/src/Serval/src/Serval.Translation/Models/Build.cs
index e3ee1c63..ecfefe02 100644
--- a/src/Serval/src/Serval.Translation/Models/Build.cs
+++ b/src/Serval/src/Serval.Translation/Models/Build.cs
@@ -20,5 +20,5 @@ public record Build : IInitializableEntity
public bool? IsInitialized { get; set; }
public DateTime? DateCreated { get; set; }
public IReadOnlyList? Phases { get; init; }
- public IReadOnlyCollection? Analysis { get; init; }
+ public IReadOnlyList? Analysis { get; init; }
}
diff --git a/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs b/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs
index af5c876d..ede5c9a5 100644
--- a/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs
+++ b/src/Serval/src/Serval.Translation/Services/IPretranslationService.cs
@@ -20,6 +20,7 @@ Task GetUsfmAsync(
PretranslationUsfmMarkerBehavior paragraphMarkerBehavior,
PretranslationUsfmMarkerBehavior embedBehavior,
PretranslationUsfmMarkerBehavior styleMarkerBehavior,
+ PretranslationNormalizationBehavior quoteNormalizationBehavior,
CancellationToken cancellationToken = default
);
}
diff --git a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs
index 2952c0b8..32f51dbb 100644
--- a/src/Serval/src/Serval.Translation/Services/PretranslationService.cs
+++ b/src/Serval/src/Serval.Translation/Services/PretranslationService.cs
@@ -1,4 +1,5 @@
using SIL.Machine.Corpora;
+using SIL.Machine.PunctuationAnalysis;
using SIL.Machine.Translation;
namespace Serval.Translation.Services;
@@ -44,6 +45,7 @@ public async Task GetUsfmAsync(
PretranslationUsfmMarkerBehavior paragraphMarkerBehavior,
PretranslationUsfmMarkerBehavior embedBehavior,
PretranslationUsfmMarkerBehavior styleMarkerBehavior,
+ PretranslationNormalizationBehavior quoteNormalizationBehavior,
CancellationToken cancellationToken = default
)
{
@@ -68,6 +70,8 @@ public async Task GetUsfmAsync(
styleMarkerBehavior
);
+ List remarks = [disclaimerRemark, markerPlacementRemark];
+
CorpusFile sourceFile;
CorpusFile targetFile;
if (corpus is not null)
@@ -147,6 +151,10 @@ PretranslationUsfmMarkerBehavior StyleBehavior
if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition)
updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler());
+ if (paragraphMarkerBehavior == PretranslationUsfmMarkerBehavior.PreservePosition)
+ updateBlockHandlers.Add(new PlaceMarkersUsfmUpdateBlockHandler());
+
+ string usfm = "";
// Update the target book if it exists
if (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Target)
{
@@ -162,7 +170,6 @@ PretranslationUsfmMarkerBehavior StyleBehavior
);
using Shared.Services.ZipParatextProjectTextUpdater updater =
_scriptureDataFileService.GetZipParatextProjectTextUpdater(targetFile.Filename);
- string usfm = "";
switch (textOrigin)
{
case PretranslationUsfmTextOrigin.PreferExisting:
@@ -176,7 +183,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
break;
case PretranslationUsfmTextOrigin.PreferPretranslated:
@@ -190,7 +197,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
break;
case PretranslationUsfmTextOrigin.OnlyExisting:
@@ -204,7 +211,7 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
break;
case PretranslationUsfmTextOrigin.OnlyPretranslated:
@@ -218,16 +225,16 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
break;
}
- // In order to support PretranslationUsfmTemplate.Auto
- if (!string.IsNullOrEmpty(usfm))
- return usfm;
}
- if (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Source)
+ if (
+ string.IsNullOrEmpty(usfm)
+ && (template is PretranslationUsfmTemplate.Auto or PretranslationUsfmTemplate.Source)
+ )
{
using Shared.Services.ZipParatextProjectTextUpdater updater =
_scriptureDataFileService.GetZipParatextProjectTextUpdater(sourceFile.Filename);
@@ -238,7 +245,8 @@ PretranslationUsfmMarkerBehavior StyleBehavior
case PretranslationUsfmTextOrigin.PreferExisting:
case PretranslationUsfmTextOrigin.PreferPretranslated:
case PretranslationUsfmTextOrigin.OnlyPretranslated:
- return updater.UpdateUsfm(
+ usfm =
+ updater.UpdateUsfm(
textId,
pretranslationRows.Select(Map).ToList(),
fullName: targetSettings.FullName,
@@ -247,10 +255,12 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
+ break;
case PretranslationUsfmTextOrigin.OnlyExisting:
- return updater.UpdateUsfm(
+ usfm =
+ updater.UpdateUsfm(
textId,
[], // don't pass the pretranslations, we only want the existing text.
fullName: targetSettings.FullName,
@@ -259,12 +269,86 @@ PretranslationUsfmMarkerBehavior StyleBehavior
embedBehavior: Map(embedBehavior),
styleBehavior: Map(styleMarkerBehavior),
updateBlockHandlers: updateBlockHandlers,
- remarks: [disclaimerRemark, markerPlacementRemark]
+ remarks: remarks
) ?? "";
+ break;
+ }
+ }
+ if (quoteNormalizationBehavior == PretranslationNormalizationBehavior.Denormalized)
+ {
+ if (build.Analysis is null)
+ {
+ throw new InvalidOperationException(
+ $"Unable to denormalize quotation marks: No quote convention analysis exists for build {build.Id}"
+ );
+ }
+ if (!build.Analysis.Any(a => a.ParallelCorpusRef == corpusId))
+ {
+ throw new InvalidOperationException(
+ $"Unable to denormalize quotation marks: No quote convention analysis exists for corpus {corpusId}"
+ );
}
+ ParallelCorpusAnalysis analysis = build.Analysis.Single(c => c.ParallelCorpusRef == corpusId);
+ usfm = DenormalizeQuotationMarks(usfm, analysis);
}
- return "";
+ return usfm;
+ }
+
+ private static string DenormalizeQuotationMarks(string usfm, ParallelCorpusAnalysis analysis)
+ {
+ QuoteConvention sourceQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName(
+ analysis.SourceQuoteConvention
+ );
+ if (sourceQuoteConvention is null)
+ {
+ throw new InvalidOperationException(
+ $"Unable to denormalize quotation marks: No such convention {analysis.SourceQuoteConvention}"
+ );
+ }
+ QuoteConvention targetQuoteConvention = QuoteConventions.Standard.GetQuoteConventionByName(
+ analysis.TargetQuoteConvention
+ );
+ if (targetQuoteConvention is null)
+ {
+ throw new InvalidOperationException(
+ $"Unable to denormalize quotation marks: No such convention {analysis.TargetQuoteConvention}"
+ );
+ }
+ QuotationMarkDenormalizationFirstPass quotationMarkDenormalizationFirstPass =
+ new(sourceQuoteConvention, targetQuoteConvention);
+
+ UsfmParser.Parse(usfm, quotationMarkDenormalizationFirstPass);
+ List bestChapterStrategies =
+ quotationMarkDenormalizationFirstPass.FindBestChapterStrategies();
+
+ QuotationMarkDenormalizationUsfmUpdateBlockHandler quotationMarkDenormalizer =
+ new(
+ sourceQuoteConvention,
+ targetQuoteConvention,
+ new QuotationMarkUpdateSettings(chapterStrategies: bestChapterStrategies)
+ );
+ List remarks = [];
+ if (bestChapterStrategies.Any(s => s != QuotationMarkUpdateStrategy.Skip))
+ {
+ string quotationDenormalizationRemark =
+ "Quotation marks in the following chapters have been automatically denormalized after translation: "
+ + string.Join(
+ ", ",
+ bestChapterStrategies
+ .Select((strategy, index) => (strategy, index))
+ .Where(tuple => tuple.strategy != QuotationMarkUpdateStrategy.Skip)
+ .Select(tuple => tuple.index + 1)
+ )
+ + ".";
+ remarks.Add(quotationDenormalizationRemark);
+ }
+
+ var updater = new UpdateUsfmParserHandler(updateBlockHandlers: [quotationMarkDenormalizer], remarks: remarks);
+ UsfmParser.Parse(usfm, updater);
+
+ usfm = updater.GetUsfm();
+ return usfm;
}
///
diff --git a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs
index 1f714542..a923fc7e 100644
--- a/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs
+++ b/src/Serval/test/Serval.Translation.Tests/Services/PretranslationServiceTests.cs
@@ -37,7 +37,7 @@ public async Task GetUsfmAsync_Source_PreferExisting()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\p
\v 2 Chapter 1, verse 2.
\v 3
@@ -64,7 +64,7 @@ public async Task GetUsfmAsync_Source_PreferPretranslated()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\p
\v 2 Chapter 1, verse 2.
\v 3
@@ -118,7 +118,7 @@ public async Task GetUsfmAsync_Source_OnlyPretranslated()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\p
\v 2 Chapter 1, verse 2.
\v 3
@@ -147,7 +147,7 @@ public async Task GetUsfmAsync_Source_PlaceMarkers()
\rem Embed markers were moved to the end of the verse. Paragraph markers have positions preserved. Style markers were removed.
\c 1
\v 1 Chapter 1, verse 1.
-\p Translated new paragraph
+\p ""Translated new paragraph""
\v 2 Chapter 1, verse 2.
\v 3
"
@@ -201,7 +201,7 @@ public async Task GetUsfmAsync_Target_PreferPretranslated()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\v 2 Chapter 1, verse 2.
\v 3 TRG - Chapter one, verse three.
"
@@ -240,7 +240,7 @@ public async Task GetUsfmAsync_Auto_TargetBookDoesNotExist()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\p
\v 2 Chapter 1, verse 2.
\v 3
@@ -268,7 +268,7 @@ public async Task GetUsfmAsync_Auto_TargetBookExists()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\v 2 Chapter 1, verse 2.
\v 3 TRG - Chapter one, verse three.
"
@@ -319,7 +319,7 @@ public async Task GetUsfmAsync_Target_OnlyPretranslated()
\rem This draft of MAT was generated using AI on 1970-01-01 00:00:00Z. It should be reviewed and edited carefully.
\rem Paragraph and embed markers were moved to the end of the verse. Style markers were removed.
\c 1
-\v 1 Chapter 1, verse 1. Translated new paragraph
+\v 1 Chapter 1, verse 1. ""Translated new paragraph""
\v 2 Chapter 1, verse 2.
\v 3
"
@@ -340,11 +340,33 @@ public async Task GetUsfmAsync_Disclaimer_Remark_Shown()
Assert.That(usfm, Does.Contain("rem This draft"));
}
+ [Test]
+ public async Task GetUsfmAsync_DenormalizeQuotationMarks()
+ {
+ using TestEnvironment env = new();
+
+ string usfm = await env.GetUsfmAsync(
+ PretranslationUsfmTextOrigin.PreferExisting,
+ PretranslationUsfmTemplate.Source,
+ quotationMarkBehavior: PretranslationNormalizationBehavior.Denormalized
+ );
+ Assert.That(usfm, Does.Contain("“Translated new paragraph”"));
+ Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(3));
+
+ usfm = await env.GetUsfmAsync(
+ PretranslationUsfmTextOrigin.PreferExisting,
+ PretranslationUsfmTemplate.Source,
+ quotationMarkBehavior: PretranslationNormalizationBehavior.Normalized
+ );
+ Assert.That(usfm, Does.Contain("\"Translated new paragraph\""));
+ Assert.That(Regex.Matches(usfm, @"\\rem"), Has.Count.EqualTo(2));
+ }
+
private class TestEnvironment : IDisposable
{
public TestEnvironment()
{
- Shared.Models.CorpusFile file1 =
+ CorpusFile file1 =
new()
{
Id = "file1",
@@ -352,7 +374,7 @@ public TestEnvironment()
Format = Shared.Contracts.FileFormat.Paratext,
TextId = "project1"
};
- Shared.Models.CorpusFile file2 =
+ CorpusFile file2 =
new()
{
Id = "file2",
@@ -395,7 +417,7 @@ public TestEnvironment()
new()
{
Id = "parallel_corpus1",
- SourceCorpora = new List()
+ SourceCorpora = new List()
{
new()
{
@@ -404,7 +426,7 @@ public TestEnvironment()
Files = [file1],
}
},
- TargetCorpora = new List()
+ TargetCorpora = new List()
{
new()
{
@@ -425,13 +447,31 @@ public TestEnvironment()
{
Id = "build1",
EngineRef = "engine1",
- DateFinished = DateTime.UnixEpoch
+ DateFinished = DateTime.UnixEpoch,
+ Analysis =
+ [
+ new ParallelCorpusAnalysis()
+ {
+ ParallelCorpusRef = "corpus1",
+ SourceQuoteConvention = "standard_english",
+ TargetQuoteConvention = "standard_english"
+ }
+ ]
},
new()
{
Id = "build2",
EngineRef = "parallel_engine1",
- DateFinished = DateTime.UnixEpoch
+ DateFinished = DateTime.UnixEpoch,
+ Analysis =
+ [
+ new ParallelCorpusAnalysis()
+ {
+ ParallelCorpusRef = "parallel_corpus1",
+ SourceQuoteConvention = "standard_english",
+ TargetQuoteConvention = "standard_english"
+ }
+ ]
}
]
);
@@ -445,9 +485,22 @@ public TestEnvironment()
CorpusRef = "corpus1",
TextId = "MAT",
Refs = ["MAT 1:1"],
- Translation = "Chapter 1, verse 1. Translated new paragraph",
+ Translation = "Chapter 1, verse 1. \"Translated new paragraph\"",
SourceTokens = ["SRC", "-", "Chapter", "one", ",", "verse", "one", ".", "new", "paragraph"],
- TranslationTokens = ["Chapter", "1", ",", "verse", "1", ".", "Translated", "new", "paragraph"],
+ TranslationTokens =
+ [
+ "Chapter",
+ "1",
+ ",",
+ "verse",
+ "1",
+ ".",
+ "\"",
+ "Translated",
+ "new",
+ "paragraph",
+ "\""
+ ],
Alignment =
[
new() { SourceIndex = 2, TargetIndex = 0 },
@@ -456,9 +509,9 @@ public TestEnvironment()
new() { SourceIndex = 5, TargetIndex = 3 },
new() { SourceIndex = 6, TargetIndex = 4 },
new() { SourceIndex = 7, TargetIndex = 5 },
- new() { SourceIndex = 8, TargetIndex = 6 },
new() { SourceIndex = 8, TargetIndex = 7 },
- new() { SourceIndex = 9, TargetIndex = 8 },
+ new() { SourceIndex = 8, TargetIndex = 8 },
+ new() { SourceIndex = 9, TargetIndex = 9 },
]
},
new()
@@ -479,9 +532,22 @@ public TestEnvironment()
CorpusRef = "parallel_corpus1",
TextId = "MAT",
Refs = ["MAT 1:1"],
- Translation = "Chapter 1, verse 1. Translated new paragraph",
+ Translation = "Chapter 1, verse 1. \"Translated new paragraph\"",
SourceTokens = ["SRC", "-", "Chapter", "one", ",", "verse", "one", ".", "new", "paragraph"],
- TranslationTokens = ["Chapter", "1", ",", "verse", "1", ".", "Translated", "new", "paragraph"],
+ TranslationTokens =
+ [
+ "Chapter",
+ "1",
+ ",",
+ "verse",
+ "1",
+ ".",
+ "\"",
+ "Translated",
+ "new",
+ "paragraph",
+ "\""
+ ],
Alignment =
[
new() { SourceIndex = 2, TargetIndex = 0 },
@@ -490,9 +556,9 @@ public TestEnvironment()
new() { SourceIndex = 5, TargetIndex = 3 },
new() { SourceIndex = 6, TargetIndex = 4 },
new() { SourceIndex = 7, TargetIndex = 5 },
- new() { SourceIndex = 8, TargetIndex = 6 },
new() { SourceIndex = 8, TargetIndex = 7 },
- new() { SourceIndex = 9, TargetIndex = 8 },
+ new() { SourceIndex = 8, TargetIndex = 8 },
+ new() { SourceIndex = 9, TargetIndex = 9 },
]
},
new()
@@ -557,7 +623,8 @@ Shared.Services.ZipParatextProjectTextUpdater GetTextUpdater(string type)
public async Task GetUsfmAsync(
PretranslationUsfmTextOrigin textOrigin,
PretranslationUsfmTemplate template,
- PretranslationUsfmMarkerBehavior paragraphMarkerBehavior = PretranslationUsfmMarkerBehavior.Preserve
+ PretranslationUsfmMarkerBehavior paragraphMarkerBehavior = PretranslationUsfmMarkerBehavior.Preserve,
+ PretranslationNormalizationBehavior quotationMarkBehavior = PretranslationNormalizationBehavior.Normalized
)
{
string usfm = await Service.GetUsfmAsync(
@@ -569,7 +636,8 @@ public async Task GetUsfmAsync(
template: template,
paragraphMarkerBehavior: paragraphMarkerBehavior,
embedBehavior: PretranslationUsfmMarkerBehavior.Preserve,
- styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip
+ styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip,
+ quoteNormalizationBehavior: quotationMarkBehavior
);
usfm = usfm.Replace("\r\n", "\n");
string parallel_usfm = await Service.GetUsfmAsync(
@@ -581,7 +649,8 @@ public async Task GetUsfmAsync(
template: template,
paragraphMarkerBehavior: paragraphMarkerBehavior,
embedBehavior: PretranslationUsfmMarkerBehavior.Preserve,
- styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip
+ styleMarkerBehavior: PretranslationUsfmMarkerBehavior.Strip,
+ quoteNormalizationBehavior: quotationMarkBehavior
);
parallel_usfm = parallel_usfm.Replace("\r\n", "\n");
Assert.That(parallel_usfm, Is.EqualTo(usfm));
diff --git a/src/Serval/test/Serval.Translation.Tests/Usings.cs b/src/Serval/test/Serval.Translation.Tests/Usings.cs
index 8b984c89..1ef6c9b3 100644
--- a/src/Serval/test/Serval.Translation.Tests/Usings.cs
+++ b/src/Serval/test/Serval.Translation.Tests/Usings.cs
@@ -1,4 +1,5 @@
global using System.Text;
+global using System.Text.RegularExpressions;
global using Grpc.Core;
global using Grpc.Net.ClientFactory;
global using MassTransit;
diff --git a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj
index bfef3d79..c1f24615 100644
--- a/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj
+++ b/src/ServiceToolkit/src/SIL.ServiceToolkit/SIL.ServiceToolkit.csproj
@@ -19,7 +19,7 @@
-
+