From 3c5224b4f24a9702cbfd2b60c16bb6fba11e9a31 Mon Sep 17 00:00:00 2001 From: Dynesshely Date: Sun, 29 Sep 2024 08:54:25 +0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=92=BE=20Feat:=20LCS=20algorithm,=20now?= =?UTF-8?q?=20you=20can=20get=20the=20longest=20sub-sequences?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 3 + .../Common.Algorithm.Core.Test.csproj | 18 ++++ Common.Algorithm.Core.Test/GlobalUsings.cs | 1 + .../Text/Distance/Calculators/Test_LCS.cs | 45 +++++++++ .../Common.Algorithm.Core.csproj | 9 ++ .../Text/Distance/CalculationOptions.cs | 11 +++ .../Text/Distance/Calculators/LCS.cs | 96 +++++++++++++++++++ .../Text/Distance/DistanceInfo.cs | 26 +++++ .../Text/Distance/IDistanceCalculator.cs | 9 ++ Common.Algorithm.sln | 38 ++++++++ 10 files changed, 256 insertions(+) create mode 100644 Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj create mode 100644 Common.Algorithm.Core.Test/GlobalUsings.cs create mode 100644 Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs create mode 100644 Common.Algorithm.Core/Common.Algorithm.Core.csproj create mode 100644 Common.Algorithm.Core/Text/Distance/CalculationOptions.cs create mode 100644 Common.Algorithm.Core/Text/Distance/Calculators/LCS.cs create mode 100644 Common.Algorithm.Core/Text/Distance/DistanceInfo.cs create mode 100644 Common.Algorithm.Core/Text/Distance/IDistanceCalculator.cs diff --git a/.gitignore b/.gitignore index a59a25c..33cb9ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +## Platform ignores +.DS_Store + ## Ignore for Rider files .idea/ diff --git a/Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj b/Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj new file mode 100644 index 0000000..0a119e5 --- /dev/null +++ b/Common.Algorithm.Core.Test/Common.Algorithm.Core.Test.csproj @@ -0,0 +1,18 @@ + + + net8.0 + enable + enable + false + true + + + + + + + + + + + \ No newline at end of file diff --git a/Common.Algorithm.Core.Test/GlobalUsings.cs b/Common.Algorithm.Core.Test/GlobalUsings.cs new file mode 100644 index 0000000..ab67c7e --- /dev/null +++ b/Common.Algorithm.Core.Test/GlobalUsings.cs @@ -0,0 +1 @@ +global using Microsoft.VisualStudio.TestTools.UnitTesting; \ No newline at end of file diff --git a/Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs b/Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs new file mode 100644 index 0000000..ac19e7e --- /dev/null +++ b/Common.Algorithm.Core.Test/Text/Distance/Calculators/Test_LCS.cs @@ -0,0 +1,45 @@ +using System.Diagnostics; +using System.Text; +using Common.Algorithm.Core.Text.Distance.Calculators; + +namespace Common.Algorithm.Core.Test.Text.Distance.Calculators; + +[TestClass] +public class Test_LCS +{ + [TestMethod] + public void TestGetDistanceInfo() + { + var distanceInfo = new LCS().GetDistanceInfo(inputs: ["ABCBDAB", "BDCABA"]); + Assert.AreEqual(4, distanceInfo.Distance); + Assert.IsNotNull(distanceInfo.LcsInfo); + Assert.IsNotNull(distanceInfo.LcsInfo.LcsMatchedSubSequences); + var content = new StringBuilder(); + foreach (var pair in distanceInfo.LcsInfo.LcsMatchedSubSequences) + { + content.AppendLine($"+ {pair.Key}: "); + foreach (var sequence in pair.Value) + { + content.AppendLine($" - {sequence}"); + } + } + Debug.WriteLine(content.ToString()); + var d1 = new Dictionary>() { { 4, ["BCBA", "BDAB"] } }.Assertable(); + var d2 = distanceInfo.LcsInfo.LcsMatchedSubSequences.Assertable(); + Debug.WriteLine(d1); + Debug.WriteLine(d2); + Assert.AreEqual(d1, d2); + } +} + +public static class LcsTestUtils +{ + public static string Assertable(this Dictionary> dict) + { + return string.Join( + '\n', + dict.OrderBy(p => p.Key) + .Select(p => $"{p.Key}: {string.Join(',', p.Value.OrderBy(s => s))};") + ); + } +} diff --git a/Common.Algorithm.Core/Common.Algorithm.Core.csproj b/Common.Algorithm.Core/Common.Algorithm.Core.csproj new file mode 100644 index 0000000..fa71b7a --- /dev/null +++ b/Common.Algorithm.Core/Common.Algorithm.Core.csproj @@ -0,0 +1,9 @@ + + + + net8.0 + enable + enable + + + diff --git a/Common.Algorithm.Core/Text/Distance/CalculationOptions.cs b/Common.Algorithm.Core/Text/Distance/CalculationOptions.cs new file mode 100644 index 0000000..c46a094 --- /dev/null +++ b/Common.Algorithm.Core/Text/Distance/CalculationOptions.cs @@ -0,0 +1,11 @@ +namespace Common.Algorithm.Core.Text.Distance; + +public class CalculationOptions +{ + public LcsOptions? LcsOptions { get; set; } +} + +public class LcsOptions +{ + public bool ContainsOnlyLongestSubSequences { get; set; } = true; +} diff --git a/Common.Algorithm.Core/Text/Distance/Calculators/LCS.cs b/Common.Algorithm.Core/Text/Distance/Calculators/LCS.cs new file mode 100644 index 0000000..be774f9 --- /dev/null +++ b/Common.Algorithm.Core/Text/Distance/Calculators/LCS.cs @@ -0,0 +1,96 @@ +namespace Common.Algorithm.Core.Text.Distance.Calculators; + +public class LCS : IDistanceCalculator +{ + private int Width { get; set; } + + private int Height { get; set; } + + public DistanceInfo GetDistanceInfo(List inputs, CalculationOptions? options = null) + { + if (inputs.Count != 2) + throw new ArgumentOutOfRangeException( + nameof(inputs), + "There should be only two inputs" + ); + + options ??= new(); + + var result = GetLcsInfo(inputs[0], inputs[1]); + + return new DistanceInfo() + { + OriginalInputs = inputs, + Distance = result.LcsMatchedSubSequences!.Keys.First(), + LcsInfo = result, + }; + } + + private LcsInfo GetLcsInfo(string a, string b) + { + Width = Math.Max(a.Length, b.Length); + Height = Math.Min(a.Length, b.Length); + + var sa = a.Length > b.Length ? b : a; + var sb = a.Length > b.Length ? a : b; + + var calMatrix = new int[Height + 1, Width + 1]; + var dirMatrix = new int[Height + 1, Width + 1]; + + var results = new List(); + + for (var i = 1; i <= Height; ++i) + for (var j = 1; j <= Width; ++j) + { + var same = sa[i - 1] == sb[j - 1]; + calMatrix[i, j] = ( + same + ? calMatrix[i - 1, j - 1] + 1 + : Math.Max(calMatrix[i, j - 1], calMatrix[i - 1, j]) + ); + dirMatrix[i, j] = same ? 1 : (calMatrix[i - 1, j] >= calMatrix[i, j - 1] ? 2 : 3); + } + + for (var i = 0; i < Width; ++i) + results.Add(""); + + for (int j = Width; j >= 1; --j) + Trace(Width - j, Height, Width - (Width - j)); + + var m = new Dictionary(); + foreach (var result in results) + if (result.Length == results[0].Length) + m[result] = 1; + results.Clear(); + foreach (var pair in m) + if (pair.Value == 1) + results.Add(pair.Key); + + return new() + { + LcsMatchedSubSequences = new Dictionary> + { + { results[0].Length, results }, + }, + }; + + void Trace(int i, int m, int n) + { + if (m == 0 || n == 0) + return; + switch (dirMatrix[m, n]) + { + case 1: + Trace(i, m - 1, n - 1); + results[i] = string.Concat(results[i].Append(sa[m - 1])); + break; + case 2: + Trace(i, m - 1, n); + break; + case 3: + Trace(i, m, n - 1); + break; + } + } + } +} diff --git a/Common.Algorithm.Core/Text/Distance/DistanceInfo.cs b/Common.Algorithm.Core/Text/Distance/DistanceInfo.cs new file mode 100644 index 0000000..0313cc1 --- /dev/null +++ b/Common.Algorithm.Core/Text/Distance/DistanceInfo.cs @@ -0,0 +1,26 @@ +namespace Common.Algorithm.Core.Text.Distance; + +public class DistanceInfo +{ + public List? OriginalInputs { get; set; } + + public double Distance { get; set; } + + public LcsInfo? LcsInfo { get; set; } +} + +public class LcsInfo +{ + /// + /// The matched sub-sequences from LCS algorithm + /// + /// + /// When calculate distance of `(abbabbc, abbac)`, the result will be: + /// 5, ['abbac'] + /// 4, ['abba', 'abbc', 'bbac'] + /// ... + /// + /// Only longest sub-sequences will be added if you indicated + /// + public Dictionary>? LcsMatchedSubSequences { get; set; } +} diff --git a/Common.Algorithm.Core/Text/Distance/IDistanceCalculator.cs b/Common.Algorithm.Core/Text/Distance/IDistanceCalculator.cs new file mode 100644 index 0000000..4f6f712 --- /dev/null +++ b/Common.Algorithm.Core/Text/Distance/IDistanceCalculator.cs @@ -0,0 +1,9 @@ +namespace Common.Algorithm.Core.Text.Distance; + +public interface IDistanceCalculator +{ + public DistanceInfo GetDistanceInfo( + List inputs, + CalculationOptions options + ); +} \ No newline at end of file diff --git a/Common.Algorithm.sln b/Common.Algorithm.sln index fbfe609..5e55060 100644 --- a/Common.Algorithm.sln +++ b/Common.Algorithm.sln @@ -3,6 +3,10 @@ Microsoft Visual Studio Solution File, Format Version 12.00 # Visual Studio Version 17 VisualStudioVersion = 17.1.32228.430 MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core", "Common.Algorithm.Core\Common.Algorithm.Core.csproj", "{7515FDE9-F1E0-4F53-8956-1F451034B7E7}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Common.Algorithm.Core.Test", "Common.Algorithm.Core.Test\Common.Algorithm.Core.Test.csproj", "{59F3A4ED-E411-4ADD-899C-F7FD1B723F27}" +EndProject Global GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -10,4 +14,38 @@ Global GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {25933558-031B-4AE0-ACDA-96635BF768C3} EndGlobalSection + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Debug|x64 = Debug|x64 + Debug|x86 = Debug|x86 + Release|Any CPU = Release|Any CPU + Release|x64 = Release|x64 + Release|x86 = Release|x86 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|Any CPU.Build.0 = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.ActiveCfg = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x64.Build.0 = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.ActiveCfg = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Debug|x86.Build.0 = Debug|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.ActiveCfg = Release|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|Any CPU.Build.0 = Release|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.ActiveCfg = Release|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x64.Build.0 = Release|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.ActiveCfg = Release|Any CPU + {7515FDE9-F1E0-4F53-8956-1F451034B7E7}.Release|x86.Build.0 = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|Any CPU.Build.0 = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.ActiveCfg = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x64.Build.0 = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.ActiveCfg = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Debug|x86.Build.0 = Debug|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.ActiveCfg = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|Any CPU.Build.0 = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.ActiveCfg = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x64.Build.0 = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.ActiveCfg = Release|Any CPU + {59F3A4ED-E411-4ADD-899C-F7FD1B723F27}.Release|x86.Build.0 = Release|Any CPU + EndGlobalSection EndGlobal