Skip to content

Commit 405fecc

Browse files
committed
NonASCIICharacterChecker should inspect Token.rawText, not Token.text
It is reasonable to enforce a rule that prevents non-ASCII text from appearing directly in source code. However current implementation also flags use of unicode escape sequences, which consist of only ASCII chars (e.g. \u1f34e). NonASCIICharacterChecker should inspect Token.rawText, which represents the literal source prior to applying unicode escapes. Token.text, which is currently being used, already has unicode escapes applied, and thus doesn't represent the actual content of the source code.
1 parent 36bb802 commit 405fecc

File tree

2 files changed

+25
-5
lines changed

2 files changed

+25
-5
lines changed

src/main/scala/org/scalastyle/scalariform/NonASCIICharacterChecker.scala

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616

1717
package org.scalastyle.scalariform
1818

19-
2019
import java.util.regex.Pattern
2120

2221
import org.scalastyle.PositionError
@@ -28,13 +27,12 @@ import _root_.scalariform.parser.CompilationUnit
2827

2928
class NonASCIICharacterChecker extends ScalariformChecker {
3029
val errorKey: String = "non.ascii.character.disallowed"
30+
private val asciiPattern = Pattern.compile("""\p{ASCII}+""", Pattern.DOTALL)
3131

3232
override def verify(ast: CompilationUnit): List[ScalastyleError] = {
3333
ast.tokens.filter(hasNonAsciiChars).map(x => PositionError(x.offset))
3434
}
3535

3636
private def hasNonAsciiChars(x: Token) =
37-
x.text.trim.nonEmpty && !Pattern.compile( """\p{ASCII}+""", Pattern.DOTALL)
38-
.matcher(x.text.trim).matches()
39-
37+
x.rawText.trim.nonEmpty && !asciiPattern.matcher(x.rawText.trim).matches()
4038
}

src/test/scala/org/scalastyle/scalariform/NonASCIICharacterCheckerTest.scala

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,18 @@ class NonASCIICharacterCheckerTest extends AssertionsForJUnit with CheckerTest {
3939
assertErrors(List(), source)
4040
}
4141

42+
@Test def testStringOK(): Unit = {
43+
val source = """
44+
|package foobar
45+
|// non-ascii in string via unicode escape - ok
46+
|class OK {
47+
| val s = "%s"
48+
|}""".stripMargin.format("\\ud83c\\udf4e")
4249

43-
@Test def testClassNotOk(): Unit = {
50+
assertErrors(List(), source)
51+
}
52+
53+
@Test def testClassNotOK(): Unit = {
4454
val source = """
4555
|package foobar
4656
|// \u2190
@@ -49,6 +59,18 @@ class NonASCIICharacterCheckerTest extends AssertionsForJUnit with CheckerTest {
4959
| def `\u21d2` = "test"
5060
|}
5161
| """.stripMargin
62+
5263
assertErrors(List(columnError(2, 14), columnError(5, 6), columnError(6, 6)), source)
5364
}
65+
66+
@Test def testStringNotOK(): Unit = {
67+
val source = """
68+
|package foobar
69+
|// non-ascii literal in string - not ok
70+
|class NotOK {
71+
| val s = "\ud83c\udf4e"
72+
|}""".stripMargin
73+
74+
assertErrors(List(columnError(5, 10)), source)
75+
}
5476
}

0 commit comments

Comments
 (0)