Skip to content

Commit

Permalink
💾 Feat: LCS algorithm, now you can get the longest sub-sequences
Browse files Browse the repository at this point in the history
  • Loading branch information
Dynesshely committed Sep 29, 2024
1 parent 9a7fa72 commit 3c5224b
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
## Platform ignores
.DS_Store

## Ignore for Rider files
.idea/

Expand Down
18 changes: 18 additions & 0 deletions Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<IsPackable>false</IsPackable>
<IsTestProject>true</IsTestProject>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.0" />
<PackageReference Include="MSTest.TestAdapter" Version="3.0.4" />
<PackageReference Include="MSTest.TestFramework" Version="3.0.4" />
<PackageReference Include="coverlet.collector" Version="6.0.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Common.Algorithm.Core\Common.Algorithm.Core.csproj" />
</ItemGroup>
</Project>
1 change: 1 addition & 0 deletions Common.Algorithm.Core.Test/GlobalUsings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
global using Microsoft.VisualStudio.TestTools.UnitTesting;
45 changes: 45 additions & 0 deletions Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
using System.Diagnostics;
using System.Text;
using Common.Algorithm.Core.Text.Distance.Calculators;

namespace Common.Algorithm.Core.Test.Text.Distance.Calculators;

[TestClass]
public class Test_LCS
{
[TestMethod]
public void TestGetDistanceInfo()
{
var distanceInfo = new LCS().GetDistanceInfo(inputs: ["ABCBDAB", "BDCABA"]);
Assert.AreEqual(4, distanceInfo.Distance);
Assert.IsNotNull(distanceInfo.LcsInfo);
Assert.IsNotNull(distanceInfo.LcsInfo.LcsMatchedSubSequences);
var content = new StringBuilder();
foreach (var pair in distanceInfo.LcsInfo.LcsMatchedSubSequences)
{
content.AppendLine($"+ {pair.Key}: ");
foreach (var sequence in pair.Value)
{
content.AppendLine($" - {sequence}");
}
}
Debug.WriteLine(content.ToString());
var d1 = new Dictionary<int, List<string>>() { { 4, ["BCBA", "BDAB"] } }.Assertable();
var d2 = distanceInfo.LcsInfo.LcsMatchedSubSequences.Assertable();
Debug.WriteLine(d1);
Debug.WriteLine(d2);
Assert.AreEqual(d1, d2);
}
}

public static class LcsTestUtils
{
public static string Assertable(this Dictionary<int, List<string>> dict)
{
return string.Join(
'\n',
dict.OrderBy(p => p.Key)
.Select(p => $"{p.Key}: {string.Join(',', p.Value.OrderBy(s => s))};")
);
}
}
9 changes: 9 additions & 0 deletions Common.Algorithm.Core/Common.Algorithm.Core.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

</Project>
11 changes: 11 additions & 0 deletions Common.Algorithm.Core/Text/Distance/CalculationOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
namespace Common.Algorithm.Core.Text.Distance;

public class CalculationOptions
{
public LcsOptions? LcsOptions { get; set; }
}

public class LcsOptions
{
public bool ContainsOnlyLongestSubSequences { get; set; } = true;
}
96 changes: 96 additions & 0 deletions Common.Algorithm.Core/Text/Distance/Calculators/LCS.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
namespace Common.Algorithm.Core.Text.Distance.Calculators;

public class LCS : IDistanceCalculator
{
private int Width { get; set; }

private int Height { get; set; }

public DistanceInfo GetDistanceInfo(List<string> inputs, CalculationOptions? options = null)
{
if (inputs.Count != 2)
throw new ArgumentOutOfRangeException(
nameof(inputs),
"There should be only two inputs"
);

options ??= new();

var result = GetLcsInfo(inputs[0], inputs[1]);

return new DistanceInfo()
{
OriginalInputs = inputs,
Distance = result.LcsMatchedSubSequences!.Keys.First(),
LcsInfo = result,
};
}

private LcsInfo GetLcsInfo(string a, string b)
{
Width = Math.Max(a.Length, b.Length);
Height = Math.Min(a.Length, b.Length);

var sa = a.Length > b.Length ? b : a;
var sb = a.Length > b.Length ? a : b;

var calMatrix = new int[Height + 1, Width + 1];
var dirMatrix = new int[Height + 1, Width + 1];

var results = new List<string>();

for (var i = 1; i <= Height; ++i)
for (var j = 1; j <= Width; ++j)
{
var same = sa[i - 1] == sb[j - 1];
calMatrix[i, j] = (
same
? calMatrix[i - 1, j - 1] + 1
: Math.Max(calMatrix[i, j - 1], calMatrix[i - 1, j])
);
dirMatrix[i, j] = same ? 1 : (calMatrix[i - 1, j] >= calMatrix[i, j - 1] ? 2 : 3);
}

for (var i = 0; i < Width; ++i)
results.Add("");

for (int j = Width; j >= 1; --j)
Trace(Width - j, Height, Width - (Width - j));

var m = new Dictionary<string, int>();
foreach (var result in results)
if (result.Length == results[0].Length)
m[result] = 1;
results.Clear();
foreach (var pair in m)
if (pair.Value == 1)
results.Add(pair.Key);

return new()
{
LcsMatchedSubSequences = new Dictionary<int, List<string>>
{
{ results[0].Length, results },
},
};

void Trace(int i, int m, int n)
{
if (m == 0 || n == 0)
return;
switch (dirMatrix[m, n])
{
case 1:
Trace(i, m - 1, n - 1);
results[i] = string.Concat(results[i].Append(sa[m - 1]));
break;
case 2:
Trace(i, m - 1, n);
break;
case 3:
Trace(i, m, n - 1);
break;
}
}
}
}
26 changes: 26 additions & 0 deletions Common.Algorithm.Core/Text/Distance/DistanceInfo.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
namespace Common.Algorithm.Core.Text.Distance;

public class DistanceInfo
{
public List<string>? OriginalInputs { get; set; }

public double Distance { get; set; }

public LcsInfo? LcsInfo { get; set; }
}

public class LcsInfo
{
/// <summary>
/// The matched sub-sequences from LCS algorithm
/// </summary>
/// <example>
/// When calculate distance of `(abbabbc, abbac)`, the result will be:
/// 5, ['abbac']
/// 4, ['abba', 'abbc', 'bbac']
/// ...
///
/// Only longest sub-sequences will be added if you indicated
/// </example>
public Dictionary<int, List<string>>? LcsMatchedSubSequences { get; set; }
}
9 changes: 9 additions & 0 deletions Common.Algorithm.Core/Text/Distance/IDistanceCalculator.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
namespace Common.Algorithm.Core.Text.Distance;

public interface IDistanceCalculator
{
public DistanceInfo GetDistanceInfo(
List<string> inputs,
CalculationOptions options
);
}
38 changes: 38 additions & 0 deletions Common.Algorithm.sln
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,49 @@ Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 17
VisualStudioVersion = 17.1.32228.430
MinimumVisualStudioVersion = 10.0.40219.1
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core", "Common.Algorithm.Core\Common.Algorithm.Core.csproj", "{7515FDE9-F1E0-4F53-8956-1F451034B7E7}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core.Test", "Common.Algorithm.Core.Test\Common.Algorithm.Core.Test.csproj", "{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}"
EndProject
Global
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {25933558-031B-4AE0-ACDA-96635BF768C3}
EndGlobalSection
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
Debug|x64 = Debug|x64
Debug|x86 = Debug|x86
Release|Any CPU = Release|Any CPU
Release|x64 = Release|x64
Release|x86 = Release|x86
EndGlobalSection
GlobalSection(ProjectConfigurationPlatforms) = postSolution
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.Build.0 = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.ActiveCfg = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.Build.0 = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.ActiveCfg = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.Build.0 = Debug|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.ActiveCfg = Release|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.Build.0 = Release|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.ActiveCfg = Release|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.Build.0 = Release|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.ActiveCfg = Release|Any CPU
{7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.Build.0 = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.Build.0 = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.ActiveCfg = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.Build.0 = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.ActiveCfg = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.Build.0 = Debug|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.ActiveCfg = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.Build.0 = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.ActiveCfg = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.Build.0 = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.ActiveCfg = Release|Any CPU
{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.Build.0 = Release|Any CPU
EndGlobalSection
EndGlobal

0 comments on commit 3c5224b

Please sign in to comment.