Skip to content

Commit 78ee7b8

Browse files
authored
fix(linter/plugins): handle utf16 characters within comment spans (#14768)
Part of #14564. Corrects start and end offsets to accommodate two byte characters. Conversion of UTF-8 indices to UTF-16 takes place on the rust side for performance.
1 parent 86ecae1 commit 78ee7b8

File tree

6 files changed

+163
-0
lines changed

6 files changed

+163
-0
lines changed

apps/oxlint/test/e2e.test.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -220,4 +220,8 @@ describe('oxlint CLI', () => {
220220
it('should support comments-related APIs in `context.sourceCode`', async () => {
221221
await testFixture('comments');
222222
});
223+
224+
it('should support UTF16 characters in source code and comments with correct spans', async () => {
225+
await testFixture('unicode-comments');
226+
});
223227
});
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
{
2+
"jsPlugins": ["./plugin.ts"],
3+
"categories": { "correctness": "off" },
4+
"rules": {
5+
"unicode-comments/unicode-comments": "error"
6+
}
7+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
// Unicode test with emojis and multi-byte characters
2+
const greeting = 'Hello 🌍'; // Line comment with emoji
3+
4+
/**
5+
* Function with emoji in JSDoc
6+
* @param {string} name - User's name 👤
7+
* @returns {string} Greeting message
8+
*/
9+
function greetUser(name) {
10+
// Comment with multiple emojis 🚀⭐💫
11+
const message = `Hello ${name}! 🌟`;
12+
/* Block comment with unicode: ñáéíóú */
13+
return message;
14+
}
15+
16+
/* Multi-byte comment: 你好世界 */
17+
const 你好世界 = 'Testing üöä'; // Line comment: ñáéíóú
18+
19+
/**
20+
* JSDoc with emojis and unicode: 你好 👋
21+
* @param {number} count - Number of items 🔢
22+
*/
23+
function processItems(count) {
24+
// Comment with mixed unicode: αβγδε русский עברית
25+
const result = count * 2; /* Block: ñáéíóú 🚀 */
26+
return result;
27+
}
28+
29+
// Final comment with emoji: 🎉✨🎊
30+
const finalVar = 'Done ✅';
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Exit code
2+
1
3+
4+
# stdout
5+
```
6+
x unicode-comments(unicode-comments): getAllComments: [
7+
| {
8+
| "type": "Line",
9+
| "value": " Unicode test with emojis and multi-byte characters"
10+
| },
11+
| {
12+
| "type": "Line",
13+
| "value": " Line comment with emoji"
14+
| },
15+
| {
16+
| "type": "Block",
17+
| "value": "*\n * Function with emoji in JSDoc\n * @param {string} name - User's name 👤\n * @returns {string} Greeting message\n "
18+
| },
19+
| {
20+
| "type": "Line",
21+
| "value": " Comment with multiple emojis 🚀⭐💫"
22+
| },
23+
| {
24+
| "type": "Block",
25+
| "value": " Block comment with unicode: ñáéíóú "
26+
| },
27+
| {
28+
| "type": "Block",
29+
| "value": " Multi-byte comment: 你好世界 "
30+
| },
31+
| {
32+
| "type": "Line",
33+
| "value": " Line comment: ñáéíóú"
34+
| },
35+
| {
36+
| "type": "Block",
37+
| "value": "*\n * JSDoc with emojis and unicode: 你好 👋\n * @param {number} count - Number of items 🔢\n "
38+
| },
39+
| {
40+
| "type": "Line",
41+
| "value": " Comment with mixed unicode: αβγδε русский עברית"
42+
| },
43+
| {
44+
| "type": "Block",
45+
| "value": " Block: ñáéíóú 🚀 "
46+
| },
47+
| {
48+
| "type": "Line",
49+
| "value": " Final comment with emoji: 🎉✨🎊"
50+
| }
51+
| ]
52+
,-[files/unicode-comments.js:2:1]
53+
1 | // Unicode test with emojis and multi-byte characters
54+
2 | ,-> const greeting = 'Hello 🌍'; // Line comment with emoji
55+
3 | |
56+
4 | | /**
57+
5 | | * Function with emoji in JSDoc
58+
6 | | * @param {string} name - User's name 👤
59+
7 | | * @returns {string} Greeting message
60+
8 | | */
61+
9 | | function greetUser(name) {
62+
10 | | // Comment with multiple emojis 🚀⭐💫
63+
11 | | const message = `Hello ${name}! 🌟`;
64+
12 | | /* Block comment with unicode: ñáéíóú */
65+
13 | | return message;
66+
14 | | }
67+
15 | |
68+
16 | | /* Multi-byte comment: 你好世界 */
69+
17 | | const 你好世界 = 'Testing üöä'; // Line comment: ñáéíóú
70+
18 | |
71+
19 | | /**
72+
20 | | * JSDoc with emojis and unicode: 你好 👋
73+
21 | | * @param {number} count - Number of items 🔢
74+
22 | | */
75+
23 | | function processItems(count) {
76+
24 | | // Comment with mixed unicode: αβγδε русский עברית
77+
25 | | const result = count * 2; /* Block: ñáéíóú 🚀 */
78+
26 | | return result;
79+
27 | | }
80+
28 | |
81+
29 | | // Final comment with emoji: 🎉✨🎊
82+
30 | `-> const finalVar = 'Done ✅';
83+
`----
84+
85+
Found 0 warnings and 1 error.
86+
Finished in Xms on 1 file using X threads.
87+
```
88+
89+
# stderr
90+
```
91+
WARNING: JS plugins are experimental and not subject to semver.
92+
Breaking changes are possible while JS plugins support is under development.
93+
```
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import type { Plugin, Rule } from '../../../dist/index.js';
2+
3+
const unicodeCommentsRule: Rule = {
4+
create(context) {
5+
const { sourceCode } = context;
6+
const { ast } = sourceCode;
7+
8+
context.report({
9+
message: `getAllComments: ${
10+
JSON.stringify(sourceCode.getAllComments().map(c => ({ type: c.type, value: c.value })), null, 4)
11+
}`,
12+
node: ast,
13+
});
14+
15+
return {};
16+
},
17+
};
18+
19+
const plugin: Plugin = {
20+
meta: {
21+
name: 'unicode-comments',
22+
},
23+
rules: {
24+
'unicode-comments': unicodeCommentsRule,
25+
},
26+
};
27+
28+
export default plugin;

crates/oxc_linter/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,6 +433,7 @@ impl Linter {
433433
// Convert spans to UTF-16
434434
let span_converter = Utf8ToUtf16::new(program.source_text);
435435
span_converter.convert_program(program);
436+
span_converter.convert_comments(&mut program.comments);
436437

437438
// Get offset of `Program` within buffer (bottom 32 bits of pointer)
438439
let program_offset = ptr::from_ref(program) as u32;

0 commit comments

Comments
 (0)