From ed295a5654d3bc3e49382fa5d07e484155632cd9 Mon Sep 17 00:00:00 2001 From: Bela VanderVoort Date: Fri, 3 May 2024 10:47:54 -0500 Subject: [PATCH] Consider wide unicode characters to be size of 2 (#1237) closes #260 --- Src/CSharpier.Tests/CodeFormatterTests.cs | 24 ++++++++++++++ .../Utilities/CharacterSizeCalculator.cs | 33 +++++++++++++++++++ Src/CSharpier/Utilities/StringExtensions.cs | 11 ++----- 3 files changed, 59 insertions(+), 9 deletions(-) create mode 100644 Src/CSharpier/Utilities/CharacterSizeCalculator.cs diff --git a/Src/CSharpier.Tests/CodeFormatterTests.cs b/Src/CSharpier.Tests/CodeFormatterTests.cs index 47b18c657..da686609c 100644 --- a/Src/CSharpier.Tests/CodeFormatterTests.cs +++ b/Src/CSharpier.Tests/CodeFormatterTests.cs @@ -4,6 +4,8 @@ namespace CSharpier.Tests; +using CSharpier.Utilities; + // TODO xml move these around [TestFixture] [Parallelizable(ParallelScope.All)] @@ -62,6 +64,28 @@ public void Format_Should_Use_Width() result.Code.Should().Be("var someVariable =\n someValue;\n"); } + [Test] + public void Format_Should_Measure_Regular_Characters() + { + var code = """ + var x = "123456"; + """; + var result = CodeFormatter.Format(code, new CodeFormatterOptions { Width = 20 }); + + result.Code.Should().Be("var x = \"123456\";\n"); + } + + [Test] + public void Format_Should_Measure_Wide_Characters() + { + var code = """ + var x = "가가가가가가"; + """; + var result = CodeFormatter.Format(code, new CodeFormatterOptions { Width = 20 }); + + result.Code.Should().Be("var x =\n \"가가가가가가\";\n"); + } + [Test] public void Format_Should_Use_IndentStyle() { diff --git a/Src/CSharpier/Utilities/CharacterSizeCalculator.cs b/Src/CSharpier/Utilities/CharacterSizeCalculator.cs new file mode 100644 index 000000000..b1f247d6c --- /dev/null +++ b/Src/CSharpier/Utilities/CharacterSizeCalculator.cs @@ -0,0 +1,33 @@ +/* Copyright (c) Microsoft Corporation. + * Under MIT License + * From https://github.com/PowerShell/PowerShell/tree/master + */ + +namespace CSharpier.Utilities; + +internal static class CharacterSizeCalculator +{ + // csharpier-ignore + public static int CalculateWidth(char c) + { + // The following is based on http://www.cl.cam.ac.uk/~mgk25/c/wcwidth.c + // which is derived from https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt + var isWide = c >= 0x1100 && + (c <= 0x115f || /* Hangul Jamo init. consonants */ + c == 0x2329 || c == 0x232a || + ((uint)(c - 0x2e80) <= (0xa4cf - 0x2e80) && + c != 0x303f) || /* CJK ... Yi */ + ((uint)(c - 0xac00) <= (0xd7a3 - 0xac00)) || /* Hangul Syllables */ + ((uint)(c - 0xf900) <= (0xfaff - 0xf900)) || /* CJK Compatibility Ideographs */ + ((uint)(c - 0xfe10) <= (0xfe19 - 0xfe10)) || /* Vertical forms */ + ((uint)(c - 0xfe30) <= (0xfe6f - 0xfe30)) || /* CJK Compatibility Forms */ + ((uint)(c - 0xff00) <= (0xff60 - 0xff00)) || /* Fullwidth Forms */ + ((uint)(c - 0xffe0) <= (0xffe6 - 0xffe0))); + + // We can ignore these ranges because .Net strings use surrogate pairs + // for this range and we do not handle surrogate pairs. + // (c >= 0x20000 && c <= 0x2fffd) || + // (c >= 0x30000 && c <= 0x3fffd) + return isWide ? 2 : 1; + } +} diff --git a/Src/CSharpier/Utilities/StringExtensions.cs b/Src/CSharpier/Utilities/StringExtensions.cs index c4aaaf86f..8763958d9 100644 --- a/Src/CSharpier/Utilities/StringExtensions.cs +++ b/Src/CSharpier/Utilities/StringExtensions.cs @@ -5,13 +5,6 @@ namespace CSharpier.Utilities; internal static class StringExtensions { - public static string CalculateHash(this string value) - { - using var hasher = MD5.Create(); - var hashedBytes = hasher.ComputeHash(Encoding.UTF8.GetBytes(value)); - return BitConverter.ToString(hashedBytes).Replace("-", string.Empty).ToLower(); - } - public static bool EqualsIgnoreCase(this string value, string otherValue) { return string.Compare(value, otherValue, StringComparison.OrdinalIgnoreCase) == 0; @@ -37,10 +30,10 @@ public static bool IsBlank(this string? value) return value == null || string.IsNullOrEmpty(value.Trim()); } - // this will eventually deal with the visual width not being the same as the code width https://github.com/belav/csharpier/issues/260 + // some unicode characters should be considered size of 2 when calculating how big this string will be when printed public static int GetPrintedWidth(this string value) { - return value.Length; + return value.Sum(CharacterSizeCalculator.CalculateWidth); } public static int CalculateCurrentLeadingIndentation(this string line, int indentSize)