Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes #47. ColumnWidth needs to differentiate between non-printable and null characters. #48

Merged
merged 3 commits into from
Oct 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 8 additions & 10 deletions NStack/unicode/Rune.ColumnWidth.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,20 +79,18 @@ static int bisearch (uint rune, uint [,] table, int max)
return 0;
}

/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columsn that the rune occupies.</returns>
/// <param name="r">The red component.</param>
public static int ColumnWidth (Rune rune)
/// <summary>
/// Number of column positions of a wide-character code. This is used to measure runes as displayed by text-based terminals.
/// </summary>
/// <returns>The width in columns, 0 if the argument is the null character, -1 if the value is not printable, otherwise the number of columns that the rune occupies.</returns>
/// <param name="rune">The rune.</param>
public static int ColumnWidth (Rune rune)
{
uint irune = (uint)rune;
if (irune < 32)
return 0;
if (irune < 32 || (irune >= 0x7f && irune <= 0xa0))
return -1;
if (irune < 127)
return 1;
if (irune >= 0x7f && irune <= 0xa0)
return 0;
/* binary search in table of non-spacing characters */
if (bisearch (irune, combining, combining.GetLength (0)-1) != 0)
return 0;
Expand Down
37 changes: 34 additions & 3 deletions NStack/unicode/Rune.cs
Original file line number Diff line number Diff line change
Expand Up @@ -58,9 +58,37 @@ public Rune (uint rune)
/// <param name="ch">C# characters.</param>
public Rune (char ch)
{
if (ch >= surrogateMin && ch <= surrogateMax)
{
throw new ArgumentException("Value in the surrogate range and isn't part of a surrogate pair!");
}
this.value = (uint)ch;
}

/// <summary>
/// Initializes a new instance of the <see cref="T:System.Rune"/> from a surrogate pair value.
/// </summary>
/// <param name="sgateMin">The high surrogate code points minimum value.</param>
/// <param name="sgateMax">The low surrogate code points maximum value.</param>
public Rune (uint sgateMin, uint sgateMax)
{
if (sgateMin < surrogateMin || sgateMax > surrogateMax)
{
throw new ArgumentOutOfRangeException($"Must be between {surrogateMin:x} and {surrogateMax:x} inclusive!");
}
this.value = DecodeSurrogatePair(sgateMin, sgateMax);
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8 from a surrogate pair.
/// </summary>
/// <param name="sgateMin">The high surrogate code points minimum value.</param>
/// <param name="sgateMax">The low surrogate code points maximum value.</param>
public static uint DecodeSurrogatePair(uint sgateMin, uint sgateMax)
{
return 0x10000 + ((sgateMin - surrogateMin) * 0x0400) + (sgateMax - lowSurrogateMin);
}

/// <summary>
/// Gets a value indicating whether this <see cref="T:System.Rune"/> can be encoded as UTF-8
/// </summary>
Expand All @@ -79,6 +107,9 @@ public bool IsValid {
const uint surrogateMin = 0xd800;
const uint surrogateMax = 0xdfff;

const uint highSurrogateMax = 0xdbff;
const uint lowSurrogateMin = 0xdc00;

const byte t1 = 0x00; // 0000 0000
const byte tx = 0x80; // 1000 0000
const byte t2 = 0xC0; // 1100 0000
Expand Down Expand Up @@ -305,7 +336,7 @@ public static (Rune rune, int size) DecodeLastRune (byte [] buffer, int end = -1
/// number of bytes required to encode the rune.
/// </summary>
/// <returns>The length, or -1 if the rune is not a valid value to encode in UTF-8.</returns>
/// <param name="rvalue">Rune to probe.</param>
/// <param name="rune">Rune to probe.</param>
public static int RuneLen (Rune rune)
{
var rvalue = rune.value;
Expand Down Expand Up @@ -771,8 +802,8 @@ public override int GetHashCode ()
public override string ToString ()
{
var buff = new byte [4];
EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString (buff);
var size = EncodeRune (this, buff, 0);
return System.Text.Encoding.UTF8.GetString(buff, 0, size);
}

/// <summary>
Expand Down
46 changes: 41 additions & 5 deletions NStackTests/RuneTest.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,48 @@
using System;
using NUnit.Framework;
using System;
namespace NStackTests {
public class RuneTest {
public RuneTest ()
Rune a = 'a';
Rune b = 'b';
Rune c = 123;
Rune d = '\u1150'; // 0x1150 ᅐ Unicode Technical Report #11
Rune e = '\u1161'; // 0x1161 ᅡ null character with column equal to 0
Rune f = 31; // non printable character
Rune g = 127; // non printable character

[Test]
public void TestColumnWidth()
{
Rune a = 'a';
Rune b = 'b';
var rt = new RuneTest();

Assert.AreEqual(1, Rune.ColumnWidth(rt.a));
Assert.AreEqual(1, Rune.ColumnWidth(rt.b));
var l = a < b;
Rune c = 123;
Assert.IsTrue(l);
Assert.AreEqual(1, Rune.ColumnWidth(rt.c));
Assert.AreEqual(2, Rune.ColumnWidth(rt.d));
Assert.AreEqual(0, Rune.ColumnWidth(rt.e));
Assert.AreEqual(-1, Rune.ColumnWidth(rt.f));
Assert.AreEqual(-1, Rune.ColumnWidth(rt.g));
}

[Test]
public void TestRune()
{
Rune a = new Rune('a');
Assert.AreEqual("a", a.ToString());
Rune b = new Rune(0x0061);
Assert.AreEqual("a", b.ToString());
Rune c = new Rune('\u0061');
Assert.AreEqual("a", c.ToString());
Rune d = new Rune(0x10421);
Assert.AreEqual("𐐡", d.ToString());
Assert.Throws<ArgumentOutOfRangeException>(() => new Rune('\ud799', '\udc21'));
Rune e = new Rune('\ud801', '\udc21');
Assert.AreEqual("𐐡", e.ToString());
Assert.Throws<ArgumentException>(() => new Rune('\ud801'));
Rune f = new Rune('\ud83c', '\udf39');
Assert.AreEqual("🌹", f.ToString());
}
}
}