Skip to content

Commit

Permalink
Sanitizing node-crossed candidates. (#20)
Browse files Browse the repository at this point in the history
  • Loading branch information
ShikiSuen authored Mar 16, 2023
1 parent bf5c71c commit 4b9b3cb
Show file tree
Hide file tree
Showing 7 changed files with 62 additions and 13 deletions.
1 change: 1 addition & 0 deletions Megrez.Tests/LMDataForTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ nian2zhong1 年中 -11.373044
gao1ke1ji4 高科技 -9.842421
zhe4yang4 這樣 -6.000000 // Non-LibTaBE
ni3zhe4 你這 -9.000000 // Non-LibTaBE
ke1ke1 顆顆 -8.000000 // Non-LibTaBE
jiao4 教 -3.676169
jiao4 較 -3.24869962
jiao4yu4 教育 -3.32220565
Expand Down
2 changes: 1 addition & 1 deletion Megrez.Tests/Megrez.Tests.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
<TargetFramework>net6.0</TargetFramework>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<ReleaseVersion>2.6.0</ReleaseVersion>
<ReleaseVersion>2.6.2</ReleaseVersion>
</PropertyGroup>

<ItemGroup>
Expand Down
38 changes: 38 additions & 0 deletions Megrez.Tests/MegrezTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -538,4 +538,42 @@ public void Test21_Compositor_hardCopy() {
List<Node> resultB = compositorB.Walk().WalkedNodes;
Assert.True(resultA.SequenceEqual(resultB));
}

[Test]
public void Test22_Compositor_SanitizingNodeCrossing() {
SimpleLM theLM = new(input: StrSampleData);
string rawReadings = "ke1 ke1";
Compositor compositor = new(langModel: theLM, separator: "");
foreach (string key in rawReadings.Split(separator: " ")) {
compositor.InsertKey(key);
}
int a = compositor.FetchCandidatesAt(givenLocation: 1, filter: Compositor.CandidateFetchFilter.BeginAt)
.Select(x => x.KeyArray.Count)
.Max();
int b = compositor.FetchCandidatesAt(givenLocation: 1, filter: Compositor.CandidateFetchFilter.EndAt)
.Select(x => x.KeyArray.Count)
.Max();
int c = compositor.FetchCandidatesAt(givenLocation: 0, filter: Compositor.CandidateFetchFilter.BeginAt)
.Select(x => x.KeyArray.Count)
.Max();
int d = compositor.FetchCandidatesAt(givenLocation: 2, filter: Compositor.CandidateFetchFilter.EndAt)
.Select(x => x.KeyArray.Count)
.Max();
Assert.AreEqual(actual: $"{a} {b} {c} {d}", expected: "1 1 2 2");
compositor.Cursor = compositor.Length;
compositor.InsertKey("jin1");
a = compositor.FetchCandidatesAt(givenLocation: 1, filter: Compositor.CandidateFetchFilter.BeginAt)
.Select(x => x.KeyArray.Count)
.Max();
b = compositor.FetchCandidatesAt(givenLocation: 1, filter: Compositor.CandidateFetchFilter.EndAt)
.Select(x => x.KeyArray.Count)
.Max();
c = compositor.FetchCandidatesAt(givenLocation: 0, filter: Compositor.CandidateFetchFilter.BeginAt)
.Select(x => x.KeyArray.Count)
.Max();
d = compositor.FetchCandidatesAt(givenLocation: 2, filter: Compositor.CandidateFetchFilter.EndAt)
.Select(x => x.KeyArray.Count)
.Max();
Assert.AreEqual(actual: $"{a} {b} {c} {d}", expected: "1 1 2 2");
}
}
2 changes: 1 addition & 1 deletion Megrez.sln
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,6 @@ Global
$0.DotNetNamingPolicy = $4
$4.DirectoryNamespaceAssociation = PrefixedHierarchical
$0.StandardHeader = $5
version = 2.6.0
version = 2.6.2
EndGlobalSection
EndGlobal
8 changes: 4 additions & 4 deletions Megrez/Megrez.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
<PropertyGroup>
<TargetFramework>net6.0</TargetFramework>
<Nullable>enable</Nullable>
<ReleaseVersion>2.6.0</ReleaseVersion>
<ReleaseVersion>2.6.2</ReleaseVersion>
<PackageId>vChewing.Megrez</PackageId>
<Authors>Shiki Suen</Authors>
<Company>Atelier Inmu</Company>
<Copyright>(c) 2022 and onwards The vChewing Project for Megrez-specific changes; (c) 2022 and onwards Lukhnos Liu for upstream contents.</Copyright>
<RepositoryUrl>https://github.com/ShikiSuen/MegrezNT</RepositoryUrl>
<NeutralLanguage>zh-TW</NeutralLanguage>
<AssemblyVersion>2.6.0</AssemblyVersion>
<FileVersion>2.6.0</FileVersion>
<Version>2.6.0</Version>
<AssemblyVersion>2.6.2</AssemblyVersion>
<FileVersion>2.6.2</FileVersion>
<Version>2.6.2</Version>
<Product>Megrez</Product>
<GeneratePackageOnBuild>True</GeneratePackageOnBuild>
<PackageReadmeFile>README.md</PackageReadmeFile>
Expand Down
19 changes: 13 additions & 6 deletions Megrez/src/3_KeyValuePaired.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,32 +165,39 @@ public enum CandidateFetchFilter {
/// <summary>
/// 返回在當前位置的所有候選字詞(以詞音配對的形式)。<para/>如果組字器內有幅位、且游標
/// 位於組字器的(文字輸入順序的)最前方(也就是游標位置的數值是最大合規數值)的
/// 話,那麼這裡會用到 location - 1、以免去在呼叫該函式後再處理的麻煩。
/// 話,那麼這裡會對 location 的位置自動減去 1、以免去在呼叫該函式後再處理的麻煩。
/// </summary>
/// <param name="location">游標位置。</param>
/// <param name="givenLocation">游標位置,必須是顯示的游標位置、不得做任何事先糾偏處理。</param>
/// <param name="filter">候選字音配對陣列。</param>
/// <returns></returns>
public List<KeyValuePaired> FetchCandidatesAt(int location, CandidateFetchFilter filter = CandidateFetchFilter.All) {
public List<KeyValuePaired> FetchCandidatesAt(int? givenLocation = null,
CandidateFetchFilter filter = CandidateFetchFilter.All) {
List<KeyValuePaired> result = new();
if (Keys.IsEmpty()) return result;
int location = Math.Max(0, Math.Min(givenLocation ?? Cursor, Keys.Count));
if (filter == CandidateFetchFilter.EndAt) {
if (location == Keys.Count) filter = CandidateFetchFilter.All;
location -= 1;
}
location = Math.Max(0, Math.Min(location, Keys.Count - 1));

// 按照讀音的長度(幅位長度)來給節點排序。
List<NodeAnchor> anchors =
FetchOverlappingNodesAt(location).StableSorted((x, y) => x.SpanLength.CompareTo(y.SpanLength));
string keyAtCursor = Keys[location];
foreach (Node theNode in anchors.Select(x => x.Node).Where(x => !x.KeyArray.IsEmpty())) {
foreach (NodeAnchor theAnchor in anchors) {
Node theNode = theAnchor.Node;
foreach (Unigram gram in theNode.Unigrams) {
switch (filter) {
case CandidateFetchFilter.All:
// 得加上這道篩選,所以會出現很多無效結果。
if (!theNode.KeyArray.Contains(keyAtCursor)) continue;
break;
case CandidateFetchFilter.BeginAt:
if (theNode.KeyArray.First() != keyAtCursor) continue;
if (theAnchor.SpanIndex != location) continue;
break;
case CandidateFetchFilter.EndAt:
if (theNode.KeyArray.Last() != keyAtCursor) continue;
if (theNode.SpanLength >= 2 && theAnchor.SpanIndex + theAnchor.SpanLength - 1 != location) continue;
break;
}
result.Add(new(theNode.KeyArray, gram.Value));
Expand Down
5 changes: 4 additions & 1 deletion Megrez/src/4_SpanUnit.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,13 @@ public bool DropNodesOfOrBeyond(int length) {
/// <returns>一個包含所有與該位置重疊的節點的陣列。</returns>
internal List<NodeAnchor> FetchOverlappingNodesAt(int givenLocation) {
List<NodeAnchor> results = new();
if (Spans.IsEmpty() || givenLocation >= Spans.Count) return results;
givenLocation = Math.Max(0, Math.Min(givenLocation, Keys.Count - 1));
if (Spans.IsEmpty()) return results;

// 先獲取該位置的所有單字節點。
foreach (int spanLength in new BRange(1, Spans[givenLocation].MaxLength + 1)) {
if (Spans[givenLocation].NodeOf(spanLength) is not {} node) continue;
if (string.IsNullOrEmpty(node.KeyArray.Joined())) continue;
results.Add(new(node, givenLocation));
}

Expand All @@ -127,6 +129,7 @@ internal List<NodeAnchor> FetchOverlappingNodesAt(int givenLocation) {
if (alpha > bravo) continue;
foreach (int theLength in new BRange(alpha, bravo + 1)) {
if (Spans[theLocation].NodeOf(theLength) is not {} node) continue;
if (string.IsNullOrEmpty(node.KeyArray.Joined())) continue;
results.Add(new(node, theLocation));
}
}
Expand Down

0 comments on commit 4b9b3cb

Please sign in to comment.