Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

readline: fix character width calculation #13918

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions lib/readline.js
Original file line number Diff line number Diff line change
Expand Up @@ -682,13 +682,14 @@ Interface.prototype._getDisplayPos = function(str) {
row += 1;
continue;
}
if (isFullWidthCodePoint(code)) {
const width = getStringWidth(code);
if (width === 0 || width === 1) {
offset += width;
} else { // width === 2
if ((offset + 1) % col === 0) {
offset++;
}
offset += 2;
} else {
offset++;
}
}
var cols = offset % col;
Expand Down
27 changes: 23 additions & 4 deletions src/node_i18n.cc
Original file line number Diff line number Diff line change
Expand Up @@ -601,14 +601,33 @@ static void ToASCII(const FunctionCallbackInfo<Value>& args) {
// newer wide characters. wcwidth, on the other hand, uses a fixed
// algorithm that does not take things like emoji into proper
// consideration.
//
// TODO(TimothyGu): Investigate Cc (C0/C1 control codes). Both VTE (used by
// GNOME Terminal) and Konsole don't consider them to be zero-width (see refs
// below), and when printed in VTE it is Narrow. However GNOME Terminal doesn't
// allow it to be input. Linux's PTY terminal prints control characters as
// Narrow rhombi.
//
// TODO(TimothyGu): Investigate Hangul jamo characters. Medial vowels and final
// consonants are 0-width when combined with initial consonants; otherwise they
// are technically Wide. But many terminals (including Konsole and
// VTE/GLib-based) implement all medials and finals as 0-width.
//
// Refs: https://eev.ee/blog/2015/09/12/dark-corners-of-unicode/#combining-characters-and-character-width
// Refs: https://github.com/GNOME/glib/blob/79e4d4c6be/glib/guniprop.c#L388-L420
// Refs: https://github.com/KDE/konsole/blob/8c6a5d13c0/src/konsole_wcwidth.cpp#L101-L223
static int GetColumnWidth(UChar32 codepoint,
bool ambiguous_as_full_width = false) {
if (!u_isdefined(codepoint) ||
u_iscntrl(codepoint) ||
u_getCombiningClass(codepoint) > 0 ||
u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER)) {
const auto zero_width_mask = U_GC_CC_MASK | // C0/C1 control code
U_GC_CF_MASK | // Format control character
U_GC_ME_MASK | // Enclosing mark
U_GC_MN_MASK; // Nonspacing mark
if (codepoint != 0x00AD && // SOFT HYPHEN is Cf but not zero-width
((U_MASK(u_charType(codepoint)) & zero_width_mask) ||
u_hasBinaryProperty(codepoint, UCHAR_EMOJI_MODIFIER))) {
return 0;
}

// UCHAR_EAST_ASIAN_WIDTH is the Unicode property that identifies a
// codepoint as being full width, wide, ambiguous, neutral, narrow,
// or halfwidth.
Expand Down
32 changes: 31 additions & 1 deletion test/parallel/test-icu-stringwidth.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,43 @@ const assert = require('assert');
const readline = require('internal/readline');

// Test column width

// Ll (Lowercase Letter): LATIN SMALL LETTER A
assert.strictEqual(readline.getStringWidth('a'), 1);
assert.strictEqual(readline.getStringWidth(0x0061), 1);
// Lo (Other Letter)
assert.strictEqual(readline.getStringWidth('丁'), 2);
assert.strictEqual(readline.getStringWidth(0x4E01), 2);
// Surrogate pairs
assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2);
assert.strictEqual(readline.getStringWidth('👅'), 2);
// Cs (Surrogate): High Surrogate
assert.strictEqual(readline.getStringWidth('\ud83d'), 1);
// Cs (Surrogate): Low Surrogate
assert.strictEqual(readline.getStringWidth('\udc78'), 1);
// Cc (Control): NULL
assert.strictEqual(readline.getStringWidth(0), 0);
// Cc (Control): BELL
assert.strictEqual(readline.getStringWidth(0x0007), 0);
// Cc (Control): LINE FEED
assert.strictEqual(readline.getStringWidth('\n'), 0);
// Cf (Format): SOFT HYPHEN
assert.strictEqual(readline.getStringWidth(0x00AD), 1);
// Cf (Format): LEFT-TO-RIGHT MARK
// Cf (Format): RIGHT-TO-LEFT MARK
assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1);
assert.strictEqual(readline.getStringWidth(97), 1);
// Cn (Unassigned): Not a character
assert.strictEqual(readline.getStringWidth(0x10FFEF), 1);
// Cn (Unassigned): Not a character (but in a CJK range)
assert.strictEqual(readline.getStringWidth(0x3FFEF), 2);
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
assert.strictEqual(readline.getStringWidth(0x0301), 0);
// Mc (Spacing Mark): BALINESE ADEG ADEG
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
// character.
assert.strictEqual(readline.getStringWidth(0x1B44), 1);
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
assert.strictEqual(readline.getStringWidth(0x20DD), 0);

// The following is an emoji sequence. In some implementations, it is
// represented as a single glyph, in other implementations as a sequence
Expand Down
31 changes: 31 additions & 0 deletions test/parallel/test-readline-position.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
'use strict';
require('../common');
const { PassThrough } = require('stream');
const readline = require('readline');
const assert = require('assert');

const ctrlU = { ctrl: true, name: 'u' };

{
const input = new PassThrough();
const rl = readline.createInterface({
terminal: true,
input: input,
prompt: ''
});

for (const [cursor, string] of [
[1, 'a'],
[2, 'ab'],
[2, '丁'],
[0, '\u0301'], // COMBINING ACUTE ACCENT
[1, 'a\u0301'], // á
[0, '\u20DD'], // COMBINING ENCLOSING CIRCLE
[2, 'a\u20DDb'], // a⃝b
[0, '\u200E'] // LEFT-TO-RIGHT MARK
]) {
rl.write(string);
assert.strictEqual(rl._getCursorPos().cols, cursor);
rl.write(null, ctrlU);
}
}