Skip to content

Commit e7f97cf

Browse files
authored
Merge pull request ethereum#9012 from ethereum/wrong-offset-for-utf8
Fix caret position for errors with utf source
2 parents 7e1f262 + 7f3d437 commit e7f97cf

File tree

7 files changed

+31
-4
lines changed

7 files changed

+31
-4
lines changed

liblangutil/SourceReferenceFormatterHuman.cpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include <liblangutil/SourceReferenceFormatterHuman.h>
2222
#include <liblangutil/Scanner.h>
2323
#include <liblangutil/Exceptions.h>
24+
#include <libsolutil/UTF8.h>
2425
#include <iomanip>
2526

2627
using namespace std;
@@ -103,12 +104,13 @@ void SourceReferenceFormatterHuman::printSourceLocation(SourceReference const& _
103104
m_stream << leftpad << ' ';
104105
frameColored() << '|';
105106
m_stream << ' ';
107+
106108
for_each(
107109
_ref.text.cbegin(),
108-
_ref.text.cbegin() + _ref.startColumn,
110+
_ref.text.cbegin() + numCodepoints(_ref.text.substr(0, _ref.startColumn)),
109111
[this](char ch) { m_stream << (ch == '\t' ? '\t' : ' '); }
110112
);
111-
diagColored() << string(locationLength, '^');
113+
diagColored() << string(numCodepoints(_ref.text.substr(_ref.startColumn, locationLength)), '^');
112114
m_stream << '\n';
113115
}
114116
else

libsolutil/UTF8.cpp

+9
Original file line numberDiff line numberDiff line change
@@ -138,4 +138,13 @@ bool validateUTF8(std::string const& _input, size_t& _invalidPosition)
138138
return validateUTF8(reinterpret_cast<unsigned char const*>(_input.c_str()), _input.length(), _invalidPosition);
139139
}
140140

141+
size_t numCodepoints(std::string const& _utf8EncodedInput)
142+
{
143+
size_t codepoint = 0;
144+
for (char c: _utf8EncodedInput)
145+
codepoint += (c & 0xc0) != 0x80;
146+
147+
return codepoint;
148+
}
149+
141150
}

libsolutil/UTF8.h

+2
Original file line numberDiff line numberDiff line change
@@ -38,4 +38,6 @@ inline bool validateUTF8(std::string const& _input)
3838
return validateUTF8(_input, invalidPos);
3939
}
4040

41+
size_t numCodepoints(std::string const& _utf8EncodedInput);
42+
4143
}

scripts/isolate_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from os.path import join, isfile
1414

1515
def extract_test_cases(path):
16-
lines = open(path, mode='r', encoding='utf8').read().splitlines()
16+
lines = open(path, encoding="utf8", errors='ignore', mode='r').read().splitlines()
1717

1818
inside = False
1919
delimiter = ''

scripts/wasm-rebuild/docker-scripts/isolate_tests.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88

99

1010
def extract_test_cases(path):
11-
lines = open(path, 'rb').read().splitlines()
11+
lines = open(path, encoding="utf8", errors='ignore', mode='rb').read().splitlines()
1212

1313
inside = False
1414
delimiter = ''
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
Warning: SPDX license identifier not provided in source file. Before publishing, consider adding a comment containing "SPDX-License-Identifier: <SPDX-License>" to each source file. Use "SPDX-License-Identifier: UNLICENSED" for non-open-source code. Please see https://spdx.org for more information.
2+
--> message_format_utf16/input.sol
3+
4+
Warning: Source file does not specify required compiler version!
5+
--> message_format_utf16/input.sol
6+
7+
Warning: Statement has no effect.
8+
--> message_format_utf16/input.sol:2:58:
9+
|
10+
2 | /* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; }
11+
| ^^^^^^^^^^^^
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
contract Foo {
2+
/* ©©©©ᄅ©©©©© 2017 */ constructor () public { "©©©©ᄅ©©©©©" ; }
3+
}

0 commit comments

Comments
 (0)