-
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
💾 Feat: LCS algorithm, now you can get the longest sub-sequences
- Loading branch information
1 parent
9a7fa72
commit 3c5224b
Showing
10 changed files
with
256 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,6 @@ | ||
## Platform ignores | ||
.DS_Store | ||
|
||
## Ignore for Rider files | ||
.idea/ | ||
|
||
|
18 changes: 18 additions & 0 deletions
18
Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
<IsPackable>false</IsPackable> | ||
<IsTestProject>true</IsTestProject> | ||
</PropertyGroup> | ||
<ItemGroup> | ||
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.0" /> | ||
<PackageReference Include="MSTest.TestAdapter" Version="3.0.4" /> | ||
<PackageReference Include="MSTest.TestFramework" Version="3.0.4" /> | ||
<PackageReference Include="coverlet.collector" Version="6.0.0" /> | ||
</ItemGroup> | ||
<ItemGroup> | ||
<ProjectReference Include="..\Common.Algorithm.Core\Common.Algorithm.Core.csproj" /> | ||
</ItemGroup> | ||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
global using Microsoft.VisualStudio.TestTools.UnitTesting; |
45 changes: 45 additions & 0 deletions
45
Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
using System.Diagnostics; | ||
using System.Text; | ||
using Common.Algorithm.Core.Text.Distance.Calculators; | ||
|
||
namespace Common.Algorithm.Core.Test.Text.Distance.Calculators; | ||
|
||
[TestClass] | ||
public class Test_LCS | ||
{ | ||
[TestMethod] | ||
public void TestGetDistanceInfo() | ||
{ | ||
var distanceInfo = new LCS().GetDistanceInfo(inputs: ["ABCBDAB", "BDCABA"]); | ||
Assert.AreEqual(4, distanceInfo.Distance); | ||
Assert.IsNotNull(distanceInfo.LcsInfo); | ||
Assert.IsNotNull(distanceInfo.LcsInfo.LcsMatchedSubSequences); | ||
var content = new StringBuilder(); | ||
foreach (var pair in distanceInfo.LcsInfo.LcsMatchedSubSequences) | ||
{ | ||
content.AppendLine($"+ {pair.Key}: "); | ||
foreach (var sequence in pair.Value) | ||
{ | ||
content.AppendLine($" - {sequence}"); | ||
} | ||
} | ||
Debug.WriteLine(content.ToString()); | ||
var d1 = new Dictionary<int, List<string>>() { { 4, ["BCBA", "BDAB"] } }.Assertable(); | ||
var d2 = distanceInfo.LcsInfo.LcsMatchedSubSequences.Assertable(); | ||
Debug.WriteLine(d1); | ||
Debug.WriteLine(d2); | ||
Assert.AreEqual(d1, d2); | ||
} | ||
} | ||
|
||
public static class LcsTestUtils | ||
{ | ||
public static string Assertable(this Dictionary<int, List<string>> dict) | ||
{ | ||
return string.Join( | ||
'\n', | ||
dict.OrderBy(p => p.Key) | ||
.Select(p => $"{p.Key}: {string.Join(',', p.Value.OrderBy(s => s))};") | ||
); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
<Project Sdk="Microsoft.NET.Sdk"> | ||
|
||
<PropertyGroup> | ||
<TargetFramework>net8.0</TargetFramework> | ||
<ImplicitUsings>enable</ImplicitUsings> | ||
<Nullable>enable</Nullable> | ||
</PropertyGroup> | ||
|
||
</Project> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
namespace Common.Algorithm.Core.Text.Distance; | ||
|
||
public class CalculationOptions | ||
{ | ||
public LcsOptions? LcsOptions { get; set; } | ||
} | ||
|
||
public class LcsOptions | ||
{ | ||
public bool ContainsOnlyLongestSubSequences { get; set; } = true; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
namespace Common.Algorithm.Core.Text.Distance.Calculators; | ||
|
||
public class LCS : IDistanceCalculator | ||
{ | ||
private int Width { get; set; } | ||
|
||
private int Height { get; set; } | ||
|
||
public DistanceInfo GetDistanceInfo(List<string> inputs, CalculationOptions? options = null) | ||
{ | ||
if (inputs.Count != 2) | ||
throw new ArgumentOutOfRangeException( | ||
nameof(inputs), | ||
"There should be only two inputs" | ||
); | ||
|
||
options ??= new(); | ||
|
||
var result = GetLcsInfo(inputs[0], inputs[1]); | ||
|
||
return new DistanceInfo() | ||
{ | ||
OriginalInputs = inputs, | ||
Distance = result.LcsMatchedSubSequences!.Keys.First(), | ||
LcsInfo = result, | ||
}; | ||
} | ||
|
||
private LcsInfo GetLcsInfo(string a, string b) | ||
{ | ||
Width = Math.Max(a.Length, b.Length); | ||
Height = Math.Min(a.Length, b.Length); | ||
|
||
var sa = a.Length > b.Length ? b : a; | ||
var sb = a.Length > b.Length ? a : b; | ||
|
||
var calMatrix = new int[Height + 1, Width + 1]; | ||
var dirMatrix = new int[Height + 1, Width + 1]; | ||
|
||
var results = new List<string>(); | ||
|
||
for (var i = 1; i <= Height; ++i) | ||
for (var j = 1; j <= Width; ++j) | ||
{ | ||
var same = sa[i - 1] == sb[j - 1]; | ||
calMatrix[i, j] = ( | ||
same | ||
? calMatrix[i - 1, j - 1] + 1 | ||
: Math.Max(calMatrix[i, j - 1], calMatrix[i - 1, j]) | ||
); | ||
dirMatrix[i, j] = same ? 1 : (calMatrix[i - 1, j] >= calMatrix[i, j - 1] ? 2 : 3); | ||
} | ||
|
||
for (var i = 0; i < Width; ++i) | ||
results.Add(""); | ||
|
||
for (int j = Width; j >= 1; --j) | ||
Trace(Width - j, Height, Width - (Width - j)); | ||
|
||
var m = new Dictionary<string, int>(); | ||
foreach (var result in results) | ||
if (result.Length == results[0].Length) | ||
m[result] = 1; | ||
results.Clear(); | ||
foreach (var pair in m) | ||
if (pair.Value == 1) | ||
results.Add(pair.Key); | ||
|
||
return new() | ||
{ | ||
LcsMatchedSubSequences = new Dictionary<int, List<string>> | ||
{ | ||
{ results[0].Length, results }, | ||
}, | ||
}; | ||
|
||
void Trace(int i, int m, int n) | ||
{ | ||
if (m == 0 || n == 0) | ||
return; | ||
switch (dirMatrix[m, n]) | ||
{ | ||
case 1: | ||
Trace(i, m - 1, n - 1); | ||
results[i] = string.Concat(results[i].Append(sa[m - 1])); | ||
break; | ||
case 2: | ||
Trace(i, m - 1, n); | ||
break; | ||
case 3: | ||
Trace(i, m, n - 1); | ||
break; | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
namespace Common.Algorithm.Core.Text.Distance; | ||
|
||
public class DistanceInfo | ||
{ | ||
public List<string>? OriginalInputs { get; set; } | ||
|
||
public double Distance { get; set; } | ||
|
||
public LcsInfo? LcsInfo { get; set; } | ||
} | ||
|
||
public class LcsInfo | ||
{ | ||
/// <summary> | ||
/// The matched sub-sequences from LCS algorithm | ||
/// </summary> | ||
/// <example> | ||
/// When calculate distance of `(abbabbc, abbac)`, the result will be: | ||
/// 5, ['abbac'] | ||
/// 4, ['abba', 'abbc', 'bbac'] | ||
/// ... | ||
/// | ||
/// Only longest sub-sequences will be added if you indicated | ||
/// </example> | ||
public Dictionary<int, List<string>>? LcsMatchedSubSequences { get; set; } | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
namespace Common.Algorithm.Core.Text.Distance; | ||
|
||
public interface IDistanceCalculator | ||
{ | ||
public DistanceInfo GetDistanceInfo( | ||
List<string> inputs, | ||
CalculationOptions options | ||
); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters