Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Echo/src/EchoEngine/Usings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
global using Google.Protobuf.WellKnownTypes;
global using Grpc.Core;
global using Microsoft.Extensions.Diagnostics.HealthChecks;
global using SIL.ServiceToolkit.Utils;
global using SIL.ServiceToolkit.Services;
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ public static IMachineBuilder AddServalTranslationPlatformService(this IMachineB
x.AddConsumer<TranslationIncrementEngineCorpusSizeConsumer>();
x.AddConsumer<TranslationInsertPretranslationsConsumer>();
x.AddConsumer<TranslationUpdateBuildExecutionDataConsumer>();
x.AddConsumer<TranslationUpdateParallelCorpusAnalysisConsumer>();
});

builder
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using Serval.Translation.V1;

namespace Serval.Machine.Shared.Consumers;

public class TranslationUpdateParallelCorpusAnalysisConsumer(TranslationPlatformApi.TranslationPlatformApiClient client)
: ServalPlatformConsumerBase<UpdateParallelCorpusAnalysisRequest>(
ServalTranslationPlatformOutboxConstants.OutboxId,
ServalTranslationPlatformOutboxConstants.UpdateParallelCorpusAnalysis,
client.UpdateParallelCorpusAnalysisAsync
) { }
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Serval.Machine.Shared.Models;

public record ParallelCorpusAnalysis
{
public required string ParallelCorpusRef { get; init; }
public required string SourceQuoteConvention { get; init; }
public required string TargetQuoteConvention { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,11 @@ Task UpdateBuildExecutionDataAsync(
IReadOnlyDictionary<string, string> executionData,
CancellationToken cancellationToken = default
);

Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyCollection<ParallelCorpusAnalysis> parallelCorpusAnalysis,
CancellationToken cancellationToken = default
);
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,4 +26,42 @@ protected override bool ResolveLanguageCodeForBaseModel(string languageCode, out
{
return _languageTagService.ConvertToFlores200Code(languageCode, out resolvedCode);
}

protected override async Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyList<ParallelCorpus> corpora,
CancellationToken cancellationToken
)
{
List<ParallelCorpusAnalysis> parallelCorpusAnalysis = [];
foreach (ParallelCorpus parallelCorpus in corpora)
{
(QuoteConventionAnalysis? sourceQuotationConvention, QuoteConventionAnalysis? targetQuotationConvention) =
ParallelCorpusPreprocessingService.AnalyzeParallelCorpus(parallelCorpus);
string sourceQuotationConventionName = sourceQuotationConvention?.BestQuoteConvention.Name ?? string.Empty;
string targetQuotationConventionName = targetQuotationConvention?.BestQuoteConvention.Name ?? string.Empty;
if (
!string.IsNullOrWhiteSpace(sourceQuotationConventionName)
|| !string.IsNullOrWhiteSpace(sourceQuotationConventionName)
)
{
parallelCorpusAnalysis.Add(
new ParallelCorpusAnalysis
{
ParallelCorpusRef = parallelCorpus.Id,
SourceQuoteConvention = sourceQuotationConventionName,
TargetQuoteConvention = targetQuotationConventionName,
}
);
}
}

await PlatformService.UpdateParallelCorpusAnalysisAsync(
engineId,
buildId,
parallelCorpusAnalysis,
cancellationToken
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ await UpdateBuildExecutionData(
cancellationToken
);

await UpdateParallelCorpusAnalysisAsync(engineId, buildId, data, cancellationToken);

if (trainCount == 0 && (!sourceTagInBaseModel || !targetTagInBaseModel))
{
throw new InvalidOperationException(
Expand Down Expand Up @@ -90,6 +92,13 @@ protected abstract Task UpdateBuildExecutionData(
CancellationToken cancellationToken
);

protected virtual Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyList<ParallelCorpus> corpora,
CancellationToken cancellationToken
) => Task.CompletedTask;

protected abstract Task<(int TrainCount, int InferenceCount)> WriteDataFilesAsync(
string buildId,
IReadOnlyList<ParallelCorpus> corpora,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ public static class ServalTranslationPlatformOutboxConstants
public const string InsertPretranslations = "InsertPretranslations";
public const string IncrementEngineCorpusSize = "IncrementTrainEngineCorpusSize";
public const string UpdateBuildExecutionData = "UpdateBuildExecutionData";
public const string UpdateParallelCorpusAnalysis = "UpdateParallelCorpusAnalysis";
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,33 @@ await _outboxService.EnqueueMessageAsync(
cancellationToken: cancellationToken
);
}

public async Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyCollection<ParallelCorpusAnalysis> parallelCorpusAnalysis,
CancellationToken cancellationToken = default
)
{
var content = new UpdateParallelCorpusAnalysisRequest { EngineId = engineId, BuildId = buildId };
foreach (ParallelCorpusAnalysis analysis in parallelCorpusAnalysis)
{
content.ParallelCorpusAnalysis.Add(
new ParallelCorpusAnalysisResult
{
ParallelCorpusId = analysis.ParallelCorpusRef,
SourceQuoteConvention = analysis.SourceQuoteConvention,
TargetQuoteConvention = analysis.TargetQuoteConvention,
}
);
}

await _outboxService.EnqueueMessageAsync(
outboxId: ServalTranslationPlatformOutboxConstants.OutboxId,
method: ServalTranslationPlatformOutboxConstants.UpdateParallelCorpusAnalysis,
groupId: engineId,
content,
cancellationToken: cancellationToken
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,15 @@ await _outboxService.EnqueueMessageAsync(
cancellationToken: cancellationToken
);
}

public Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyCollection<ParallelCorpusAnalysis> parallelCorpusAnalysis,
CancellationToken cancellationToken = default
)
{
// Word alignment does not support parallel corpus analysis
return Task.CompletedTask;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -112,4 +112,15 @@ CancellationToken cancellationToken
};
await PlatformService.UpdateBuildExecutionDataAsync(engineId, buildId, executionData, cancellationToken);
}

protected override Task UpdateParallelCorpusAnalysisAsync(
string engineId,
string buildId,
IReadOnlyList<ParallelCorpus> corpora,
CancellationToken cancellationToken
)
{
// Word alignment does not support parallel corpus analysis
return Task.CompletedTask;
}
}
1 change: 1 addition & 0 deletions src/Machine/src/Serval.Machine.Shared/Usings.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
global using SIL.DataAccess;
global using SIL.Machine.Corpora;
global using SIL.Machine.Morphology.HermitCrab;
global using SIL.Machine.PunctuationAnalysis;
global using SIL.Machine.Tokenization;
global using SIL.Machine.Translation;
global using SIL.Machine.Translation.Thot;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -297,13 +297,17 @@ public async Task RunAsync_RemoveFreestandingEllipses()
string sourceExtract = await env.GetSourceExtractAsync();
Assert.That(
sourceExtract,
Is.EqualTo("Source one, chapter two, verse one.\nSource one, chapter two, verse two.\n\n"),
Is.EqualTo(
"Source one, chapter two, verse one.\nSource one, chapter two, verse two. \u201ca quotation\u201d\n\n"
),
sourceExtract
);
string targetExtract = await env.GetTargetExtractAsync();
Assert.That(
targetExtract,
Is.EqualTo("Target one, chapter two, verse one.\n\nTarget one, chapter two, verse three.\n"),
Is.EqualTo(
"Target one, chapter two, verse one.\n\nTarget one, chapter two, verse three. \"a quotation\"\n"
),
targetExtract
);
JsonArray? pretranslations = await env.GetPretranslationsAsync();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,6 @@
\c 2
\p
\v 1 Source one, chapter two, verse one.
\v 2 Source one, chapter two, verse two.
\v 2 Source one, chapter two, verse two. “a quotation”
\v 3 ...
\v 4 ...
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
\p
\v 1 Target one, chapter two, verse one.
\v 2 ...
\v 3 Target one, chapter two, verse three.
\v 3 Target one, chapter two, verse three. "a quotation"
20 changes: 20 additions & 0 deletions src/Serval/src/Serval.Client/Client.g.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10407,6 +10407,9 @@ public partial class TranslationBuild
[Newtonsoft.Json.JsonProperty("phases", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<Phase>? Phases { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("analysis", Required = Newtonsoft.Json.Required.Default, NullValueHandling = Newtonsoft.Json.NullValueHandling.Ignore)]
public System.Collections.Generic.IList<ParallelCorpusAnalysis>? Analysis { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
Expand Down Expand Up @@ -10522,6 +10525,23 @@ public enum PhaseStage

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class ParallelCorpusAnalysis
{
[Newtonsoft.Json.JsonProperty("parallelCorpusRef", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string ParallelCorpusRef { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("sourceQuoteConvention", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string SourceQuoteConvention { get; set; } = default!;

[Newtonsoft.Json.JsonProperty("targetQuoteConvention", Required = Newtonsoft.Json.Required.Always)]
[System.ComponentModel.DataAnnotations.Required(AllowEmptyStrings = true)]
public string TargetQuoteConvention { get; set; } = default!;

}

[System.CodeDom.Compiler.GeneratedCode("NJsonSchema", "14.1.0.0 (NJsonSchema v11.0.2.0 (Newtonsoft.Json v13.0.0.0))")]
public partial class TranslationBuildConfig
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ service TranslationPlatformApi {
rpc IncrementEngineCorpusSize(IncrementEngineCorpusSizeRequest) returns (google.protobuf.Empty);
rpc InsertPretranslations(stream InsertPretranslationsRequest) returns (google.protobuf.Empty);
rpc UpdateBuildExecutionData(UpdateBuildExecutionDataRequest) returns (google.protobuf.Empty);
rpc UpdateParallelCorpusAnalysis(UpdateParallelCorpusAnalysisRequest) returns (google.protobuf.Empty);
}

message UpdateBuildStatusRequest {
Expand Down Expand Up @@ -73,6 +74,18 @@ message UpdateBuildExecutionDataRequest {
map<string, string> execution_data = 3;
}

message UpdateParallelCorpusAnalysisRequest {
string engine_id = 1;
string build_id = 2;
repeated ParallelCorpusAnalysisResult parallel_corpus_analysis = 3;
}

message ParallelCorpusAnalysisResult {
string parallel_corpus_id = 1;
string source_quote_convention = 2;
string target_quote_convention = 3;
}

message Phase {
PhaseStage stage = 1;
optional int32 step = 2;
Expand Down
8 changes: 8 additions & 0 deletions src/Serval/src/Serval.Shared/Models/ParallelCorpusAnalysis.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Serval.Shared.Models;

public record ParallelCorpusAnalysis
{
public required string ParallelCorpusRef { get; init; }
public required string SourceQuoteConvention { get; init; }
public required string TargetQuoteConvention { get; init; }
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
namespace Serval.Translation.Contracts;

public record ParallelCorpusAnalysisDto
{
public required string ParallelCorpusRef { get; init; }
public required string SourceQuoteConvention { get; init; }
public required string TargetQuoteConvention { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,5 @@ public record TranslationBuildDto
public string? DeploymentVersion { get; init; }
public IReadOnlyDictionary<string, string>? ExecutionData { get; init; }
public IReadOnlyList<PhaseDto>? Phases { get; init; }
public IReadOnlyList<ParallelCorpusAnalysisDto>? Analysis { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -1662,7 +1662,8 @@ private TranslationBuildDto Map(Build source)
Options = source.Options,
DeploymentVersion = source.DeploymentVersion,
ExecutionData = source.ExecutionData,
Phases = source.Phases?.Select(Map).ToList()
Phases = source.Phases?.Select(Map).ToList(),
Analysis = source.Analysis?.Select(Map).ToList(),
};
}

Expand Down Expand Up @@ -1890,6 +1891,16 @@ private static PhaseDto Map(BuildPhase source)
StepCount = source.StepCount
};
}

private static ParallelCorpusAnalysisDto Map(ParallelCorpusAnalysis source)
{
return new ParallelCorpusAnalysisDto
{
ParallelCorpusRef = source.ParallelCorpusRef,
SourceQuoteConvention = source.SourceQuoteConvention,
TargetQuoteConvention = source.TargetQuoteConvention,
};
}
}

#pragma warning restore CS0612 // Type or member is obsolete
1 change: 1 addition & 0 deletions src/Serval/src/Serval.Translation/Models/Build.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,5 @@ public record Build : IInitializableEntity
public bool? IsInitialized { get; set; }
public DateTime? DateCreated { get; set; }
public IReadOnlyList<BuildPhase>? Phases { get; init; }
public IReadOnlyCollection<ParallelCorpusAnalysis>? Analysis { get; init; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,36 @@ await _builds.UpdateAsync(
return new Empty();
}

public override async Task<Empty> UpdateParallelCorpusAnalysis(
UpdateParallelCorpusAnalysisRequest request,
ServerCallContext context
)
{
// Ensure only parallel corpus IDs are present
Engine? engine = await _engines.GetAsync(request.EngineId, context.CancellationToken);
if (engine == null)
return Empty;
var analysis = request
.ParallelCorpusAnalysis.Where(p => engine.ParallelCorpora.Select(pc => pc.Id).Contains(p.ParallelCorpusId))
.Select(a => new ParallelCorpusAnalysis
{
ParallelCorpusRef = a.ParallelCorpusId,
SourceQuoteConvention = a.SourceQuoteConvention,
TargetQuoteConvention = a.TargetQuoteConvention,
})
.ToList();
if (analysis.Count > 0)
{
await _builds.UpdateAsync(
b => b.Id == request.BuildId && b.EngineRef == request.EngineId,
u => u.Set(b => b.Analysis, analysis),
cancellationToken: context.CancellationToken
);
}

return Empty;
}

public override async Task<Empty> IncrementEngineCorpusSize(
IncrementEngineCorpusSizeRequest request,
ServerCallContext context
Expand Down
Loading
Loading