From 773f46507d28ae895be906641825d1de50a6ef4f Mon Sep 17 00:00:00 2001 From: BDisp Date: Mon, 26 Oct 2020 22:31:18 +0000 Subject: [PATCH 1/3] Fixes #47. ColumnWidth needs to differentiate between non-printable and null characters. --- NStack/unicode/Rune.ColumnWidth.cs | 10 ++++------ NStackTests/RuneTest.cs | 27 ++++++++++++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/NStack/unicode/Rune.ColumnWidth.cs b/NStack/unicode/Rune.ColumnWidth.cs index 7e8839f..7c81482 100644 --- a/NStack/unicode/Rune.ColumnWidth.cs +++ b/NStack/unicode/Rune.ColumnWidth.cs @@ -82,17 +82,15 @@ static int bisearch (uint rune, uint [,] table, int max) /// /// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals. /// - /// The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columsn that the rune occupies. + /// The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies. /// The red component. - public static int ColumnWidth (Rune rune) + public static int ColumnWidth (Rune rune) { uint irune = (uint)rune; - if (irune < 32) - return 0; + if (irune < 32 || (irune >= 0x7f && irune <= 0xa0)) + return -1; if (irune < 127) return 1; - if (irune >= 0x7f && irune <= 0xa0) - return 0; /* binary search in table of non-spacing characters */ if (bisearch (irune, combining, combining.GetLength (0)-1) != 0) return 0; diff --git a/NStackTests/RuneTest.cs b/NStackTests/RuneTest.cs index b7ef0ac..ec17df3 100644 --- a/NStackTests/RuneTest.cs +++ b/NStackTests/RuneTest.cs @@ -1,12 +1,29 @@ -using System; +using NUnit.Framework; +using System; namespace NStackTests { public class RuneTest { - public RuneTest () + Rune a = 'a'; + Rune b = 'b'; + Rune c = 123; + Rune d = '\u1150'; // 0x1150 ᅐ Unicode Technical Report #11 + Rune e = '\u1161'; // 0x1161 ᅡ null character with column equal to 0 + Rune f = 31; // non printable character + Rune g = 127; // non printable character + + [Test] + public void TestColumnWidth() { - Rune a = 'a'; - Rune b = 'b'; + var rt = new RuneTest(); + + Assert.AreEqual(1, Rune.ColumnWidth(rt.a)); + Assert.AreEqual(1, Rune.ColumnWidth(rt.b)); var l = a < b; - Rune c = 123; + Assert.IsTrue(l); + Assert.AreEqual(1, Rune.ColumnWidth(rt.c)); + Assert.AreEqual(2, Rune.ColumnWidth(rt.d)); + Assert.AreEqual(0, Rune.ColumnWidth(rt.e)); + Assert.AreEqual(-1, Rune.ColumnWidth(rt.f)); + Assert.AreEqual(-1, Rune.ColumnWidth(rt.g)); } } } From 37ff0d8fca59e7dba9b2ccfb47b327fb4e7bf37f Mon Sep 17 00:00:00 2001 From: BDisp Date: Thu, 29 Oct 2020 23:49:32 +0000 Subject: [PATCH 2/3] Fixes Rune.ToString return value and added one more Rune constructor to control the high and low surrogate code points. --- NStack/unicode/Rune.ColumnWidth.cs | 12 +++++----- NStack/unicode/Rune.cs | 37 +++++++++++++++++++++++++++--- NStackTests/RuneTest.cs | 19 +++++++++++++++ 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/NStack/unicode/Rune.ColumnWidth.cs b/NStack/unicode/Rune.ColumnWidth.cs index 7c81482..66489e8 100644 --- a/NStack/unicode/Rune.ColumnWidth.cs +++ b/NStack/unicode/Rune.ColumnWidth.cs @@ -79,12 +79,12 @@ static int bisearch (uint rune, uint [,] table, int max) return 0; } - /// - /// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals. - /// - /// The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies. - /// The red component. - public static int ColumnWidth (Rune rune) + /// + /// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals. + /// + /// The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies. + /// The rune. + public static int ColumnWidth (Rune rune) { uint irune = (uint)rune; if (irune < 32 || (irune >= 0x7f && irune <= 0xa0)) diff --git a/NStack/unicode/Rune.cs b/NStack/unicode/Rune.cs index d0f8478..dbc1980 100644 --- a/NStack/unicode/Rune.cs +++ b/NStack/unicode/Rune.cs @@ -58,9 +58,37 @@ public Rune (uint rune) /// C# characters. public Rune (char ch) { + if (ch >= surrogateMin && ch <= surrogateMax) + { + throw new ArgumentException("Value in the surrogate range and isn't part of a surrogate pair!"); + } this.value = (uint)ch; } + /// + /// Initializes a new instance of the from a surrogate pair value. + /// + /// + /// + public Rune (uint sgateMin, uint sgateMax) + { + if (sgateMin < surrogateMin || sgateMax > surrogateMax) + { + throw new ArgumentOutOfRangeException($"Must be between {surrogateMin:x} and {surrogateMax:x} inclusive!"); + } + this.value = DecodeSurrogatePair(sgateMin, sgateMax); + } + + /// + /// Gets a value indicating whether this can be encoded as UTF-8 from a surrogate pair. + /// + /// The high surrogate code points minimum value. + /// The low surrogate code points maximum value. + public static uint DecodeSurrogatePair(uint sgateMin, uint sgateMax) + { + return 0x10000 + ((sgateMin - surrogateMin) * 0x0400) + (sgateMax - lowSurrogateMin); + } + /// /// Gets a value indicating whether this can be encoded as UTF-8 /// @@ -79,6 +107,9 @@ public bool IsValid { const uint surrogateMin = 0xd800; const uint surrogateMax = 0xdfff; + const uint highSurrogateMax = 0xdbff; + const uint lowSurrogateMin = 0xdc00; + const byte t1 = 0x00; // 0000 0000 const byte tx = 0x80; // 1000 0000 const byte t2 = 0xC0; // 1100 0000 @@ -305,7 +336,7 @@ public static (Rune rune, int size) DecodeLastRune (byte [] buffer, int end = -1 /// number of bytes required to encode the rune. /// /// The length, or -1 if the rune is not a valid value to encode in UTF-8. - /// Rune to probe. + /// Rune to probe. public static int RuneLen (Rune rune) { var rvalue = rune.value; @@ -771,8 +802,8 @@ public override int GetHashCode () public override string ToString () { var buff = new byte [4]; - EncodeRune (this, buff, 0); - return System.Text.Encoding.UTF8.GetString (buff); + var size = EncodeRune (this, buff, 0); + return System.Text.Encoding.UTF8.GetString(buff, 0, size); } /// diff --git a/NStackTests/RuneTest.cs b/NStackTests/RuneTest.cs index ec17df3..6327695 100644 --- a/NStackTests/RuneTest.cs +++ b/NStackTests/RuneTest.cs @@ -25,5 +25,24 @@ public void TestColumnWidth() Assert.AreEqual(-1, Rune.ColumnWidth(rt.f)); Assert.AreEqual(-1, Rune.ColumnWidth(rt.g)); } + + [Test] + public void TestRune() + { + Rune a = new Rune('a'); + Assert.AreEqual("a", a.ToString()); + Rune b = new Rune(0x0061); + Assert.AreEqual("a", b.ToString()); + Rune c = new Rune('\u0061'); + Assert.AreEqual("a", c.ToString()); + Rune d = new Rune(0x10421); + Assert.AreEqual("𐐡", d.ToString()); + Assert.Throws(() => new Rune('\ud799', '\udc21')); + Rune e = new Rune('\ud801', '\udc21'); + Assert.AreEqual("𐐡", e.ToString()); + Assert.Throws(() => new Rune('\ud801')); + Rune f = new Rune('\ud83c', '\udf39'); + Assert.AreEqual("🌹", f.ToString()); + } } } From 858a8db7c9f5e65310eb57e374272159c1f115f4 Mon Sep 17 00:00:00 2001 From: BDisp Date: Thu, 29 Oct 2020 23:54:24 +0000 Subject: [PATCH 3/3] Added parameters comments to Rune. --- NStack/unicode/Rune.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/NStack/unicode/Rune.cs b/NStack/unicode/Rune.cs index dbc1980..0e34e14 100644 --- a/NStack/unicode/Rune.cs +++ b/NStack/unicode/Rune.cs @@ -68,8 +68,8 @@ public Rune (char ch) /// /// Initializes a new instance of the from a surrogate pair value. /// - /// - /// + /// The high surrogate code points minimum value. + /// The low surrogate code points maximum value. public Rune (uint sgateMin, uint sgateMax) { if (sgateMin < surrogateMin || sgateMax > surrogateMax)