diff --git a/composer.json b/composer.json index 2877811..f859879 100644 --- a/composer.json +++ b/composer.json @@ -20,6 +20,7 @@ }, "require": { "php": ">=8.1", + "ext-mbstring": "*", "league/uri": "^7.6", "league/uri-interfaces": "^7.6" }, diff --git a/src/SimpleSourceLocation.php b/src/SimpleSourceLocation.php index 591f6e4..2d7976c 100644 --- a/src/SimpleSourceLocation.php +++ b/src/SimpleSourceLocation.php @@ -12,7 +12,7 @@ final class SimpleSourceLocation extends SourceLocationMixin /** * Creates a new location indicating $offset within $sourceUrl. * - * $line and $column default to assuming the source is a single line. This + * $line and $column default to assuming the source is a single ASCII line. This * means that $line defaults to 0 and $column defaults to $offset. */ public function __construct( diff --git a/src/SourceFile.php b/src/SourceFile.php index 33bc550..52627fb 100644 --- a/src/SourceFile.php +++ b/src/SourceFile.php @@ -212,12 +212,15 @@ private function binarySearch(int $offset): int /** * The 0-based column of that offset. + * + * Unlike offsets (which are byte-offsets), columns are computed based on Unicode + * codepoints to provide a better experience. */ public function getColumn(int $offset): int { $line = $this->getLine($offset); - return $offset - $this->lineStarts[$line]; + return mb_strlen(substr($this->string, $this->lineStarts[$line], $offset - $this->lineStarts[$line]), 'UTF-8'); } /** @@ -237,7 +240,17 @@ public function getOffset(int $line, int $column = 0): int throw new \OutOfRangeException('Column may not be negative.'); } - $result = $this->lineStarts[$line] + $column; + if ($column === 0) { + $result = $this->lineStarts[$line]; + } else { + $lineContent = substr($this->string, $this->lineStarts[$line], $this->lineStarts[$line + 1] ?? null); + + if ($column > mb_strlen($lineContent, 'UTF-8')) { + throw new \OutOfRangeException("Line $line doesn't have $column columns."); + } + + $result = $this->lineStarts[$line] + \strlen(mb_substr($lineContent, 0, $column, 'UTF-8')); + } if ($result > \strlen($this->string) || ($line + 1 < \count($this->lineStarts) && $result >= $this->lineStarts[$line + 1])) { throw new \OutOfRangeException("Line $line doesn't have $column columns."); diff --git a/tests/SourceFileTest.php b/tests/SourceFileTest.php index 04ff595..a2559f8 100644 --- a/tests/SourceFileTest.php +++ b/tests/SourceFileTest.php @@ -254,4 +254,24 @@ public function testGetTextEndDefaultsToTheEndOfTheFile(): void { self::assertEquals("g boom\nzip zap zop", $this->file->getText(20)); } + + public function testGetColumnCountsUnicodeCharacters(): void + { + $file = SourceFile::fromString("foo\nbar éà\nbaz"); + + self::assertEquals(4, $file->getColumn(8)); + self::assertEquals(5, $file->getColumn(10)); + self::assertEquals(6, $file->getColumn(12)); + self::assertEquals(0, $file->getColumn(13)); + } + + public function testGetOffsetCountsUnicodeCharactersForColumns(): void + { + $file = SourceFile::fromString("foo\nbar éà\nbaz"); + + self::assertEquals(8, $file->getOffset(1, 4)); + self::assertEquals(10, $file->getOffset(1, 5)); + self::assertEquals(12, $file->getOffset(1, 6)); + self::assertEquals(13, $file->getOffset(2, 0)); + } }