From 5120911a5332f6ff3d0946d425a3c54046d918e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Fri, 15 May 2015 21:43:03 +0200 Subject: [PATCH 1/3] Properly handle UTF-8 multi-byte sequences as a single character --- src/Readline.php | 65 ++++++++++--------- tests/ReadlineTest.php | 140 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 176 insertions(+), 29 deletions(-) diff --git a/src/Readline.php b/src/Readline.php index 3a5f05d..e9c8e1e 100644 --- a/src/Readline.php +++ b/src/Readline.php @@ -48,7 +48,35 @@ public function __construct($output) $this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_DEL, array($this, 'onKeyDelete')); $this->sequencer->addSequence(self::ESC_SEQUENCE . self::ESC_INS, array($this, 'onKeyInsert')); - $this->sequencer->addFallback('', array($this, 'onFallback')); + $expect = 0; + $char = ''; + $that = $this; + $this->sequencer->addFallback('', function ($byte) use (&$expect, &$char, $that) { + if ($expect === 0) { + $code = ord($byte); + // count number of bytes expected for this UTF-8 multi-byte character + $expect = 1; + if ($code & 128 && $code & 64) { + ++$expect; + if ($code & 32) { + ++$expect; + if ($code & 16) { + ++$expect; + } + } + } + } + $char .= $byte; + --$expect; + + // forward buffered bytes as a single multi byte character once last byte has been read + if ($expect === 0) { + $save = $char; + $char = ''; + $that->onFallback($save); + } + }); + $this->sequencer->addFallback(self::ESC_SEQUENCE, function ($bytes) { echo 'unknown sequence: ' . ord($bytes) . PHP_EOL; }); @@ -401,16 +429,12 @@ public function onKeyDown() */ public function onFallback($chars) { - $pre = $this->substr($this->linebuffer, 0, $this->linepos); // read everything up until before backspace + // read everything up until before current position + $pre = $this->substr($this->linebuffer, 0, $this->linepos); $post = $this->substr($this->linebuffer, $this->linepos); $this->linebuffer = $pre . $chars . $post; - - // TODO: fix lineposition for partial multibyte characters ++$this->linepos; - if ($this->linepos >= $this->strlen($this->linebuffer)) { - $this->linepos = $this->strlen($this->linebuffer); - } $this->redraw(); } @@ -429,16 +453,17 @@ public function onFallback($chars) public function deleteChar($n) { $len = $this->strlen($this->linebuffer); - if ($n < 0 || $n > $len) { + if ($n < 0 || $n >= $len) { return; } - // TODO: multibyte-characters - - $pre = $this->substr($this->linebuffer, 0, $n); // read everything up until before current position + // read everything up until before current position + $pre = $this->substr($this->linebuffer, 0, $n); $post = $this->substr($this->linebuffer, $n + 1); + $this->linebuffer = $pre . $post; + // move cursor one cell to the left if we're deleting in front of the cursor if ($n < $this->linepos) { --$this->linepos; } @@ -465,24 +490,6 @@ protected function processLine() $this->redraw(); } - protected function readEscape($char) - { - $this->inEscape = false; - - if($char === self::ESC_LEFT && $this->move) { - $this->moveCursorBy(-1); - } else if($char === self::ESC_RIGHT && $this->move) { - $this->moveCursorBy(1); - } else if ($char === self::ESC_UP && $this->history !== null) { - $this->history->moveUp(); - } else if ($char === self::ESC_DOWN && $this->history !== null) { - $this->history->moveDown(); - } else { - $this->write('invalid char'); - // ignore unknown escape code - } - } - protected function strlen($str) { return mb_strlen($str, $this->encoding); diff --git a/tests/ReadlineTest.php b/tests/ReadlineTest.php index 4159863..cdf7910 100644 --- a/tests/ReadlineTest.php +++ b/tests/ReadlineTest.php @@ -153,4 +153,144 @@ public function testMovingCursorWithoutEchoDoesNotNeedToRedraw() $this->assertSame($this->readline, $this->readline->moveCursorTo(0)); $this->assertSame($this->readline, $this->readline->moveCursorBy(2)); } + + public function testWriteSimpleCharWritesOnce() + { + $this->output->expects($this->once())->method('write')->with($this->equalTo("\r\033[K" . "k")); + + $this->pushInputBytes($this->readline, 'k'); + } + + public function testWriteMultiByteCharWritesOnce() + { + $this->output->expects($this->once())->method('write')->with($this->equalTo("\r\033[K" . "\xF0\x9D\x84\x9E")); + + // "𝄞" – U+1D11E MUSICAL SYMBOL G CLEF + $this->pushInputBytes($this->readline, "\xF0\x9D\x84\x9E"); + } + + public function testKeysSimpleChars() + { + $this->pushInputBytes($this->readline, 'hi!'); + + $this->assertEquals('hi!', $this->readline->getInput()); + $this->assertEquals(3, $this->readline->getCursorPosition()); + + return $this->readline; + } + + /** + * @depends testKeysSimpleChars + * @param Readline $readline + */ + public function testKeysBackspaceDeletesLastCharacter(Readline $readline) + { + $readline->onKeyBackspace(); + + $this->assertEquals('hi', $readline->getInput()); + $this->assertEquals(2, $readline->getCursorPosition()); + } + + public function testKeysMultiByteInput() + { + $this->pushInputBytes($this->readline, 'hä'); + + $this->assertEquals('hä', $this->readline->getInput()); + $this->assertEquals(2, $this->readline->getCursorPosition()); + + return $this->readline; + } + + /** + * @depends testKeysMultiByteInput + * @param Readline $readline + */ + public function testKeysBackspaceDeletesWholeMultibyteCharacter(Readline $readline) + { + $readline->onKeyBackspace(); + + $this->assertEquals('h', $readline->getInput()); + } + + public function testKeysBackspaceMiddle() + { + $this->readline->setInput('test'); + $this->readline->moveCursorTo(2); + + $this->readline->onKeyBackspace(); + + $this->assertEquals('tst', $this->readline->getInput()); + $this->assertEquals(1, $this->readline->getCursorPosition()); + } + + public function testKeysBackspaceFrontDoesNothing() + { + $this->readline->setInput('test'); + $this->readline->moveCursorTo(0); + + $this->readline->onKeyBackspace(); + + $this->assertEquals('test', $this->readline->getInput()); + $this->assertEquals(0, $this->readline->getCursorPosition()); + } + + public function testKeysDeleteMiddle() + { + $this->readline->setInput('test'); + $this->readline->moveCursorTo(2); + + $this->readline->onKeyDelete(); + + $this->assertEquals('tet', $this->readline->getInput()); + $this->assertEquals(2, $this->readline->getCursorPosition()); + } + + public function testKeysDeleteEndDoesNothing() + { + $this->readline->setInput('test'); + + $this->readline->onKeyDelete(); + + $this->assertEquals('test', $this->readline->getInput()); + $this->assertEquals(4, $this->readline->getCursorPosition()); + } + + public function testKeysPrependCharacterInFrontOfMultiByte() + { + $this->readline->setInput('ü'); + $this->readline->moveCursorTo(0); + + $this->pushInputBytes($this->readline, 'h'); + + $this->assertEquals('hü', $this->readline->getInput()); + $this->assertEquals(1, $this->readline->getCursorPosition()); + } + + public function testKeysWriteMultiByteAfterMultiByte() + { + $this->readline->setInput('ü'); + + $this->pushInputBytes($this->readline, 'ä'); + + $this->assertEquals('üä', $this->readline->getInput()); + $this->assertEquals(2, $this->readline->getCursorPosition()); + } + + public function testKeysPrependMultiByteInFrontOfMultiByte() + { + $this->readline->setInput('ü'); + $this->readline->moveCursorTo(0); + + $this->pushInputBytes($this->readline, 'ä'); + + $this->assertEquals('äü', $this->readline->getInput()); + $this->assertEquals(1, $this->readline->getCursorPosition()); + } + + private function pushInputBytes(Readline $readline, $bytes) + { + foreach (str_split($bytes, 1) as $byte) { + $readline->onChar($byte); + } + } } From db4af71a5893f834e010d5aa737f4d0a676000c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Sat, 16 May 2015 12:40:22 +0200 Subject: [PATCH 2/3] Add getCursorCell() to account for variable-width characters --- src/Readline.php | 85 +++++++++++++++++++++++++++++-------- tests/ReadlineTest.php | 96 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 18 deletions(-) diff --git a/src/Readline.php b/src/Readline.php index e9c8e1e..18cd172 100644 --- a/src/Readline.php +++ b/src/Readline.php @@ -162,12 +162,15 @@ public function setMove($move) } /** - * get current cursor position + * Gets current cursor position measured in number of text characters. * - * cursor position is measured in number of text characters + * Note that the number of text characters doesn't necessarily reflect the + * number of monospace cells occupied by the text characters. If you want + * to know the latter, use `self::getCursorCell()` instead. * * @return int - * @see self::moveCursorTo() to move the cursor to a given position + * @see self::getCursorCell() to get the position measured in monospace cells + * @see self::moveCursorTo() to move the cursor to a given character position * @see self::moveCursorBy() to move the cursor by given number of characters * @see self::setMove() to toggle whether the user can move the cursor position */ @@ -177,13 +180,55 @@ public function getCursorPosition() } /** - * move cursor to right by $n chars (or left if $n is negative) + * Gets current cursor position measured in monospace cells. * - * zero or out of range moves are simply ignored + * Note that the cell position doesn't necessarily reflect the number of + * text characters. If you want to know the latter, use + * `self::getCursorPosition()` instead. + * + * Most "normal" characters occupy a single monospace cell, i.e. the ASCII + * sequence for "A" requires a single cell, as do most UTF-8 sequences + * like "Ä". + * + * However, there are a number of code points that do not require a cell + * (i.e. invisible surrogates) or require two cells (e.g. some asian glyphs). + * + * Also note that this takes the echo mode into account, i.e. the cursor is + * always at position zero if echo is off. If using a custom echo character + * (like asterisk), it will take its width into account instead of the actual + * input characters. + * + * @return int + * @see self::getCursorPosition() to get current cursor position measured in characters + * @see self::moveCursorTo() to move the cursor to a given character position + * @see self::moveCursorBy() to move the cursor by given number of characters + * @see self::setMove() to toggle whether the user can move the cursor position + * @see self::setEcho() + */ + public function getCursorCell() + { + if ($this->echo === false) { + return 0; + } + if ($this->echo !== true) { + return $this->strwidth($this->echo) * $this->linepos; + } + return $this->strwidth($this->substr($this->linebuffer, 0, $this->linepos)); + } + + /** + * Moves cursor to right by $n chars (or left if $n is negative). + * + * Zero value or values out of range (exceeding current input buffer) are + * simply ignored. + * + * Will redraw() the readline only if the visible cell position changes, + * see `self::getCursorCell()` for more details. * * @param int $n * @return self * @uses self::moveCursorTo() + * @uses self::redraw() */ public function moveCursorBy($n) { @@ -191,9 +236,12 @@ public function moveCursorBy($n) } /** - * move cursor to given position in current line buffer + * Moves cursor to given position in current line buffer. + * + * Values out of range (exceeding current input buffer) are simply ignored. * - * out of range (exceeding current input buffer) are simply ignored + * Will redraw() the readline only if the visible cell position changes, + * see `self::getCursorCell()` for more details. * * @param int $n * @return self @@ -205,10 +253,11 @@ public function moveCursorTo($n) return $this; } + $old = $this->getCursorCell(); $this->linepos = $n; - // only redraw if cursor is actually visible - if ($this->echo) { + // only redraw if visible cell position change (implies cursor is actually visible) + if ($this->getCursorCell() !== $old) { $this->redraw(); } @@ -308,18 +357,13 @@ public function redraw() $output = "\r\033[K" . $this->prompt; if ($this->echo !== false) { if ($this->echo === true) { - $output .= $this->linebuffer; + $buffer = $this->linebuffer; } else { - $output .= str_repeat($this->echo, $this->strlen($this->linebuffer)); + $buffer = str_repeat($this->echo, $this->strlen($this->linebuffer)); } - $len = $this->strlen($this->linebuffer); - if ($this->linepos !== $len) { - $reverse = $len - $this->linepos; - - // move back $reverse chars (by sending backspace) - $output .= str_repeat("\x08", $reverse); - } + // write output, then move back $reverse chars (by sending backspace) + $output .= $buffer . str_repeat("\x08", $this->strwidth($buffer) - $this->getCursorCell()); } $this->write($output); @@ -503,6 +547,11 @@ protected function substr($str, $start = 0, $len = null) return (string)mb_substr($str, $start, $len, $this->encoding); } + private function strwidth($str) + { + return mb_strwidth($str, $this->encoding); + } + protected function write($data) { $this->output->write($data); diff --git a/tests/ReadlineTest.php b/tests/ReadlineTest.php index cdf7910..f62212e 100644 --- a/tests/ReadlineTest.php +++ b/tests/ReadlineTest.php @@ -21,6 +21,7 @@ public function testInputStartsEmpty() { $this->assertEquals('', $this->readline->getInput()); $this->assertEquals(0, $this->readline->getCursorPosition()); + $this->assertEquals(0, $this->readline->getCursorCell()); } public function testGetInputAfterSetting() @@ -28,6 +29,7 @@ public function testGetInputAfterSetting() $this->assertSame($this->readline, $this->readline->setInput('hello')); $this->assertEquals('hello', $this->readline->getInput()); $this->assertEquals(5, $this->readline->getCursorPosition()); + $this->assertEquals(5, $this->readline->getCursorCell()); } public function testSettingInputMovesCursorToEnd() @@ -37,6 +39,7 @@ public function testSettingInputMovesCursorToEnd() $this->readline->setInput('testing'); $this->assertEquals(7, $this->readline->getCursorPosition()); + $this->assertEquals(7, $this->readline->getCursorCell()); } public function testMultiByteInput() @@ -44,6 +47,7 @@ public function testMultiByteInput() $this->readline->setInput('täst'); $this->assertEquals('täst', $this->readline->getInput()); $this->assertEquals(4, $this->readline->getCursorPosition()); + $this->assertEquals(4, $this->readline->getCursorCell()); } public function testRedrawingReadlineWritesToOutputOnce() @@ -175,6 +179,7 @@ public function testKeysSimpleChars() $this->assertEquals('hi!', $this->readline->getInput()); $this->assertEquals(3, $this->readline->getCursorPosition()); + $this->assertEquals(3, $this->readline->getCursorCell()); return $this->readline; } @@ -189,6 +194,7 @@ public function testKeysBackspaceDeletesLastCharacter(Readline $readline) $this->assertEquals('hi', $readline->getInput()); $this->assertEquals(2, $readline->getCursorPosition()); + $this->assertEquals(2, $readline->getCursorCell()); } public function testKeysMultiByteInput() @@ -197,6 +203,7 @@ public function testKeysMultiByteInput() $this->assertEquals('hä', $this->readline->getInput()); $this->assertEquals(2, $this->readline->getCursorPosition()); + $this->assertEquals(2, $this->readline->getCursorCell()); return $this->readline; } @@ -221,6 +228,7 @@ public function testKeysBackspaceMiddle() $this->assertEquals('tst', $this->readline->getInput()); $this->assertEquals(1, $this->readline->getCursorPosition()); + $this->assertEquals(1, $this->readline->getCursorCell()); } public function testKeysBackspaceFrontDoesNothing() @@ -232,6 +240,7 @@ public function testKeysBackspaceFrontDoesNothing() $this->assertEquals('test', $this->readline->getInput()); $this->assertEquals(0, $this->readline->getCursorPosition()); + $this->assertEquals(0, $this->readline->getCursorCell()); } public function testKeysDeleteMiddle() @@ -243,6 +252,7 @@ public function testKeysDeleteMiddle() $this->assertEquals('tet', $this->readline->getInput()); $this->assertEquals(2, $this->readline->getCursorPosition()); + $this->assertEquals(2, $this->readline->getCursorCell()); } public function testKeysDeleteEndDoesNothing() @@ -253,6 +263,7 @@ public function testKeysDeleteEndDoesNothing() $this->assertEquals('test', $this->readline->getInput()); $this->assertEquals(4, $this->readline->getCursorPosition()); + $this->assertEquals(4, $this->readline->getCursorCell()); } public function testKeysPrependCharacterInFrontOfMultiByte() @@ -264,6 +275,7 @@ public function testKeysPrependCharacterInFrontOfMultiByte() $this->assertEquals('hü', $this->readline->getInput()); $this->assertEquals(1, $this->readline->getCursorPosition()); + $this->assertEquals(1, $this->readline->getCursorCell()); } public function testKeysWriteMultiByteAfterMultiByte() @@ -274,6 +286,7 @@ public function testKeysWriteMultiByteAfterMultiByte() $this->assertEquals('üä', $this->readline->getInput()); $this->assertEquals(2, $this->readline->getCursorPosition()); + $this->assertEquals(2, $this->readline->getCursorCell()); } public function testKeysPrependMultiByteInFrontOfMultiByte() @@ -285,6 +298,89 @@ public function testKeysPrependMultiByteInFrontOfMultiByte() $this->assertEquals('äü', $this->readline->getInput()); $this->assertEquals(1, $this->readline->getCursorPosition()); + $this->assertEquals(1, $this->readline->getCursorCell()); + } + + public function testDoubleWidthCharsOccupyTwoCells() + { + $this->readline->setInput('現'); + + $this->assertEquals(1, $this->readline->getCursorPosition()); + $this->assertEquals(2, $this->readline->getCursorCell()); + + return $this->readline; + } + + /** + * @depends testDoubleWidthCharsOccupyTwoCells + * @param Readline $readline + */ + public function testDoubleWidthCharMoveToStart(Readline $readline) + { + $readline->moveCursorTo(0); + + $this->assertEquals(0, $readline->getCursorPosition()); + $this->assertEquals(0, $readline->getCursorCell()); + + return $readline; + } + + /** + * @depends testDoubleWidthCharMoveToStart + * @param Readline $readline + */ + public function testDoubleWidthCharMovesTwoCellsForward(Readline $readline) + { + $readline->moveCursorBy(1); + + $this->assertEquals(1, $readline->getCursorPosition()); + $this->assertEquals(2, $readline->getCursorCell()); + + return $readline; + } + + /** + * @depends testDoubleWidthCharMovesTwoCellsForward + * @param Readline $readline + */ + public function testDoubleWidthCharMovesTwoCellsBackward(Readline $readline) + { + $readline->moveCursorBy(-1); + + $this->assertEquals(0, $readline->getCursorPosition()); + $this->assertEquals(0, $readline->getCursorCell()); + } + + public function testCursorCellIsAlwaysZeroIfEchoIsOff() + { + $this->readline->setInput('test'); + $this->readline->setEcho(false); + + $this->assertEquals(4, $this->readline->getCursorPosition()); + $this->assertEquals(0, $this->readline->getCursorCell()); + } + + public function testCursorCellAccountsForDoubleWidthCharacters() + { + $this->readline->setInput('現現現現'); + $this->readline->moveCursorTo(3); + + $this->assertEquals(3, $this->readline->getCursorPosition()); + $this->assertEquals(6, $this->readline->getCursorCell()); + + return $this->readline; + } + + /** + * @depends testCursorCellAccountsForDoubleWidthCharacters + * @param Readline $readline + */ + public function testCursorCellObeysCustomEchoAsterisk(Readline $readline) + { + $readline->setEcho('*'); + + $this->assertEquals(3, $readline->getCursorPosition()); + $this->assertEquals(3, $readline->getCursorCell()); } private function pushInputBytes(Readline $readline, $bytes) From 975dd7824584bad48bbedf12e8ccc89636368f42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20L=C3=BCck?= Date: Sun, 17 May 2015 16:41:24 +0200 Subject: [PATCH 3/3] Update documentation WRT UTF-8 and multi-byte / multi-cell characters --- README.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 54f4a69..b50acfc 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # clue/stdio-react [![Build Status](https://travis-ci.org/clue/php-stdio-react.svg?branch=master)](https://travis-ci.org/clue/php-stdio-react) -Async standard console input & output (STDIN, STDOUT) for React PHP +Async, event-driven and UTF-8 aware standard console input & output (STDIN, STDOUT) for React PHP > Note: This project is in early beta stage! Feel free to report any issues you encounter. @@ -181,7 +181,8 @@ simply pass a boolean `true` like this: $readline->setMove(true); ``` -The `getCursorPosition()` method can be used to access the current cursor position. +The `getCursorPosition()` method can be used to access the current cursor position, +measured in number of characters. This can be useful if you want to get a substring of the current *user input buffer*. Simply invoke it like this: @@ -189,6 +190,19 @@ Simply invoke it like this: $position = $readline->getCursorPosition(); ``` +The `getCursorCell()` method can be used to get the current cursor position, +measured in number of monospace cells. +Most *normal* characters (plain ASCII and most multi-byte UTF-8 sequences) take a single monospace cell. +However, there are a number of characters that have no visual representation +(and do not take a cell at all) or characters that do not fit within a single +cell (like some asian glyphs). +This method is mostly useful for calculating the visual cursor position on screen, +but you may also invoke it like this: + +```php +$cell = $readline->getCursorCell(); +``` + The `moveCursorTo($position)` method can be used to set the current cursor position to the given absolute character position. For example, to move the cursor to the beginning of the *user input buffer*, simply call: