diff --git a/python-frontend/src/main/java/org/sonar/python/EscapeCharPositionInfo.java b/python-frontend/src/main/java/org/sonar/python/EscapeCharPositionInfo.java new file mode 100644 index 0000000000..5dbb0278dc --- /dev/null +++ b/python-frontend/src/main/java/org/sonar/python/EscapeCharPositionInfo.java @@ -0,0 +1,23 @@ +/* + * SonarQube Python Plugin + * Copyright (C) 2011-2024 SonarSource SA + * mailto:info AT sonarsource DOT com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 3 of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +package org.sonar.python; + +public record EscapeCharPositionInfo(int columnInIpynbFile, int numberOfExtraChars) { +} diff --git a/python-frontend/src/main/java/org/sonar/python/IPythonLocation.java b/python-frontend/src/main/java/org/sonar/python/IPythonLocation.java index 3b98f4462a..1b4a209a2d 100644 --- a/python-frontend/src/main/java/org/sonar/python/IPythonLocation.java +++ b/python-frontend/src/main/java/org/sonar/python/IPythonLocation.java @@ -19,10 +19,14 @@ */ package org.sonar.python; -import java.util.Map; +import java.util.List; -public record IPythonLocation(int line, int column, Map colOffset, boolean isCompresssed) { - public IPythonLocation(int line, int column, Map colOffset) { - this(line, column, colOffset, false); +public record IPythonLocation(int line, int column, List colOffsets, boolean isCompresssed) { + public IPythonLocation(int line, int column, List colOffsets) { + this(line, column, colOffsets, false); + } + + public IPythonLocation(int line, int column) { + this(line, column, List.of(), false); } } diff --git a/python-frontend/src/main/java/org/sonar/python/tree/TokenEnricher.java b/python-frontend/src/main/java/org/sonar/python/tree/TokenEnricher.java index 9598f00ef4..36a5966b2b 100644 --- a/python-frontend/src/main/java/org/sonar/python/tree/TokenEnricher.java +++ b/python-frontend/src/main/java/org/sonar/python/tree/TokenEnricher.java @@ -24,6 +24,7 @@ import java.util.Map; import java.util.Set; import org.sonar.plugins.python.api.tree.Trivia; +import org.sonar.python.EscapeCharPositionInfo; import org.sonar.python.IPythonLocation; public class TokenEnricher { @@ -43,47 +44,55 @@ public static TokenImpl enrichToken(Token token, Map o if (location == null) { throw new IllegalStateException(String.format("No IPythonLocation found for line %s", token.getLine())); } - Map escapeCharsMap = location.colOffset(); - int startCol = computeColWithEscapes(token.getColumn(), escapeCharsMap, location.column()); - int escapedCharInToken = computeEscapeCharsInToken(token.getValue()); + List escapeCharPositionInfos = location.colOffsets(); + int startCol = token.getColumn(); + int endCol = token.getColumn() + token.getValue().length(); + int ipynbStartCol = computeColWithEscapes(location.column(), startCol, escapeCharPositionInfos); + int escapedCharInToken = computeEscapeCharsInToken(escapeCharPositionInfos, startCol, endCol); List trivia = token.getTrivia().stream() - .map(t -> computeTriviaLocation(t, location.line(), startCol, token.getLine(), offsetMap)) + .map(t -> computeTriviaLocation(t, location.line(), ipynbStartCol, token.getLine(), offsetMap)) .toList(); - return new TokenImpl(token, location.line(), startCol, escapedCharInToken, trivia, location.isCompresssed()); + return new TokenImpl(token, location.line(), ipynbStartCol, escapedCharInToken, trivia, location.isCompresssed()); } return new TokenImpl(token); } - private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine, Map offsetMap) { - int escapedCharInToken = computeEscapeCharsInToken(trivia.getToken().getValue()); + private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine, + Map offsetMap) { var line = parentLine; + int escapedCharInToken = computeEscapeCharsInTrivia(trivia, offsetMap); var col = parentCol - escapedCharInToken - trivia.getToken().getValue().length(); var isCompressed = false; if (parentPythonLine != trivia.getToken().getLine()) { IPythonLocation location = offsetMap.get(trivia.getToken().getLine()); line = location.line(); - Map escapeCharsMap = location.colOffset(); - col = computeColWithEscapes(trivia.getToken().getColumn(), escapeCharsMap, location.column()); + List escapeCharPositionInfos = location.colOffsets(); + col = computeColWithEscapes(location.column(), trivia.getToken().getColumn(), escapeCharPositionInfos); isCompressed = location.isCompresssed(); } return new TriviaImpl(new TokenImpl(trivia.getToken(), line, col, escapedCharInToken, List.of(), isCompressed)); } - private static int computeEscapeCharsInToken(String tokenValue) { - int escapedCharInToken = 0; - for (int i = 0; i < tokenValue.length(); i++) { - if (ESCAPED_CHARS.contains(tokenValue.charAt(i))) { - escapedCharInToken++; - } - } - return escapedCharInToken; + private static int computeColWithEscapes(int offsetColumn, int currentCol, List escapeCharPositionInfos) { + int escapedCharsOffset = computeEscapeCharsInToken(escapeCharPositionInfos, 0, currentCol); + return offsetColumn + currentCol + escapedCharsOffset; + } + private static int computeEscapeCharsInTrivia(com.sonar.sslr.api.Trivia trivia, Map offsetMap) { + IPythonLocation location = offsetMap.get(trivia.getToken().getLine()); + Token token = trivia.getToken(); + int startCol = token.getColumn(); + int endCol = token.getColumn() + token.getValue().length(); + return computeEscapeCharsInToken(location.colOffsets(), startCol, endCol); } - private static int computeColWithEscapes(int currentCol, Map escapes, int offsetColumn) { - return (int) escapes.keySet().stream().filter(k -> k > 0 && k < currentCol).count() + offsetColumn + currentCol; + private static int computeEscapeCharsInToken(List escapeCharPositionInfos, int startCol, int endCol) { + return escapeCharPositionInfos.stream() + .filter(entry -> entry.columnInIpynbFile() >= startCol && entry.columnInIpynbFile() < endCol) + .mapToInt(EscapeCharPositionInfo::numberOfExtraChars) + .sum(); } } diff --git a/python-frontend/src/test/java/org/sonar/python/FileLinesVisitorTest.java b/python-frontend/src/test/java/org/sonar/python/FileLinesVisitorTest.java index ae61189b8c..6ee8c591f6 100644 --- a/python-frontend/src/test/java/org/sonar/python/FileLinesVisitorTest.java +++ b/python-frontend/src/test/java/org/sonar/python/FileLinesVisitorTest.java @@ -76,10 +76,10 @@ void notebook_locs_single_line_file() { def foo(): return 3 """; - var locations = Map.of(1, new IPythonLocation(1, 383, Map.of(-1, 0)), - 2, new IPythonLocation(1, 390, Map.of(-1, 0)), - 3, new IPythonLocation(1, 402, Map.of(-1, 0)), - 4, new IPythonLocation(1, 402, Map.of(-1, 0))); + var locations = Map.of(1, new IPythonLocation(1, 383), + 2, new IPythonLocation(1, 390), + 3, new IPythonLocation(1, 402), + 4, new IPythonLocation(1, 402)); TestPythonVisitorRunner.scanNotebookFile(new File(BASE_DIR, "notebook_locs_single_line.ipynb"), locations, content, visitor); assertThat(visitor.getExecutableLines()).isEmpty(); assertThat(visitor.getLinesOfCode()).hasSize(3); diff --git a/python-frontend/src/test/java/org/sonar/python/PythonTestUtils.java b/python-frontend/src/test/java/org/sonar/python/PythonTestUtils.java index e3628ba18b..c310fbeb7d 100644 --- a/python-frontend/src/test/java/org/sonar/python/PythonTestUtils.java +++ b/python-frontend/src/test/java/org/sonar/python/PythonTestUtils.java @@ -23,6 +23,7 @@ import java.nio.file.Files; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.function.Predicate; import javax.annotation.CheckForNull; import org.mockito.Mockito; @@ -196,4 +197,11 @@ public static Symbol lastSymbolFromDef(String... code) { } return ((FunctionDef) tree).name().symbol(); } + + public static List mapToColumnMappingList(Map map) { + return map.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .map(entry -> new EscapeCharPositionInfo(entry.getKey(), entry.getValue())) + .toList(); + } } diff --git a/python-frontend/src/test/java/org/sonar/python/tree/IPythonTreeMakerTest.java b/python-frontend/src/test/java/org/sonar/python/tree/IPythonTreeMakerTest.java index 49444e9613..ef03ac51af 100644 --- a/python-frontend/src/test/java/org/sonar/python/tree/IPythonTreeMakerTest.java +++ b/python-frontend/src/test/java/org/sonar/python/tree/IPythonTreeMakerTest.java @@ -31,12 +31,14 @@ import org.sonar.plugins.python.api.tree.LineMagic; import org.sonar.plugins.python.api.tree.Statement; import org.sonar.plugins.python.api.tree.Tree; +import org.sonar.python.EscapeCharPositionInfo; import org.sonar.python.IPythonLocation; import org.sonar.python.api.PythonGrammar; import org.sonar.python.parser.RuleTest; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.sonar.python.PythonTestUtils.mapToColumnMappingList; class IPythonTreeMakerTest extends RuleTest { @@ -308,7 +310,8 @@ void assignmentRhs() { @Test void enrichTokens() { - var offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(4, 15, 8, 20, -1, 2))); + List colOffsets = mapToColumnMappingList(Map.of(4, 1, 8, 1)); + var offsetMap = Map.of(1, new IPythonLocation(7, 10, colOffsets)); var statementList = parseIPython( "a = \"123\"", new IPythonTreeMaker(offsetMap)::fileInput).statements(); assertThat(statementList).isNotNull(); @@ -319,7 +322,7 @@ void enrichTokens() { assertThat(stringLiteral.get(0).firstToken().line()).isEqualTo(7); assertThat(stringLiteral.get(0).firstToken().column()).isEqualTo(14); - offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(-1, 0)), 2, new IPythonLocation(8, 10, Map.of(-1, 0))); + offsetMap = Map.of(1, new IPythonLocation(7, 10), 2, new IPythonLocation(8, 10)); statementList = parseIPython( "def foo(): # comment \n pass", new IPythonTreeMaker(offsetMap)::fileInput).statements(); assertThat(statementList).isNotNull(); diff --git a/python-frontend/src/test/java/org/sonar/python/tree/TokenEnricherTest.java b/python-frontend/src/test/java/org/sonar/python/tree/TokenEnricherTest.java index 996dccb2d1..76c0d375f1 100644 --- a/python-frontend/src/test/java/org/sonar/python/tree/TokenEnricherTest.java +++ b/python-frontend/src/test/java/org/sonar/python/tree/TokenEnricherTest.java @@ -21,7 +21,6 @@ import com.sonar.sslr.api.Token; import com.sonar.sslr.impl.Lexer; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.junit.jupiter.api.BeforeAll; @@ -32,6 +31,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.sonar.python.PythonTestUtils.mapToColumnMappingList; class TokenEnricherTest { private static TestLexer lexer; @@ -78,8 +78,8 @@ void shouldThrowIllegalStateException() { //when the mapping is not present for the current line var code = "a = 1\n\nb=3"; var offsetMap = Map.of( - 1, new IPythonLocation(200, 23, Map.of()), - 2, new IPythonLocation(201, 23, Map.of())); + 1, new IPythonLocation(200, 23), + 2, new IPythonLocation(201, 23)); var originalTokens = lexer.lex(code); Throwable throwable = assertThrows(IllegalStateException.class, () -> TokenEnricher.enrichTokens(originalTokens, offsetMap)); assertThat(throwable.getMessage()).isEqualTo("No IPythonLocation found for line 3"); @@ -89,26 +89,29 @@ void shouldThrowIllegalStateException() { void shouldProvideOffsetForEscapeChar() { var code = "a = \"1\""; var expectedTokens = lexer.lex(code); - var escapedChars = new LinkedHashMap(); - escapedChars.put(4, 305); - escapedChars.put(6, 308); + var escapedChars = mapToColumnMappingList(Map.of(4, 1, 6, 1)); var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars))); var stringToken = tokens.get(2); assertThat(stringToken.line()).isEqualTo(100); assertThat(stringToken.column()).isEqualTo(304); assertThat(stringToken.includedEscapeChars()).isEqualTo(2); + + var eofToken = tokens.get(3); + assertThat(eofToken.line()).isEqualTo(100); + assertThat(eofToken.column()).isEqualTo(309); } @Test void shouldComputeColCorrectly() { var code = "a = f\"{b} \\n test\" + \"1\""; var expectedTokens = lexer.lex(code); - var escapedChars = new LinkedHashMap(); - escapedChars.put(5, 305); - escapedChars.put(10, 311); - escapedChars.put(17, 319); - escapedChars.put(21, 324); - escapedChars.put(23, 327); + var escapedChars = mapToColumnMappingList(Map.ofEntries( + Map.entry(5, 1), + Map.entry(10, 1), + Map.entry(17, 1), + Map.entry(21, 1), + Map.entry(23, 1) + )); var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars))); var stringToken = tokens.get(tokens.size() - 2); assertThat(stringToken.line()).isEqualTo(100); @@ -121,11 +124,33 @@ void shouldComputeColCorrectly() { assertThat(eofToken.includedEscapeChars()).isZero(); } + @Test + void shouldComputeTabColCorrectly() { + var code = "\ta"; + var expectedTokens = lexer.lex(code); + var escapedChars = mapToColumnMappingList(Map.of(0, 1)); + var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars))); + var tabToken = tokens.get(0); + assertThat(tabToken.line()).isEqualTo(100); + assertThat(tabToken.column()).isEqualTo(300); + assertThat(tabToken.includedEscapeChars()).isEqualTo(1); + + var idToken = tokens.get(1); + assertThat(idToken.line()).isEqualTo(100); + assertThat(idToken.column()).isEqualTo(302); + assertThat(idToken.includedEscapeChars()).isZero(); + + var eofToken = tokens.get(2); + assertThat(eofToken.line()).isEqualTo(100); + assertThat(eofToken.column()).isEqualTo(303); + assertThat(eofToken.includedEscapeChars()).isZero(); + } + @Test void shouldComputeColCorrectlyForTrivia() { var code = "a = 3 # comment"; var expectedTokens = lexer.lex(code); - var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0)))); + var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300))); var trivias = tokens.get(tokens.size() - 1).trivia(); assertThat(trivias).hasSize(1); assertThat(trivias.get(0).token().line()).isEqualTo(100); @@ -137,7 +162,8 @@ void shouldComputeColCorrectlyForTrivia() { void shouldComputeColCorrectlyForTriviaWithEscapeChar() { var code = "a = 3 # test\\n"; var expectedTokens = lexer.lex(code); - var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 1, 12, 13)))); + var escapedChars = mapToColumnMappingList(Map.of(12, 1)); + var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars))); var trivias = tokens.get(tokens.size() - 1).trivia(); assertThat(trivias).hasSize(1); assertThat(trivias.get(0).token().line()).isEqualTo(100); @@ -149,7 +175,8 @@ void shouldComputeColCorrectlyForTriviaWithEscapeChar() { void shouldComputeColCorrectlyForTriviaOnDifferentLine() { var code = "# comment\na = 3"; var expectedTokens = lexer.lex(code); - var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0)), 2, new IPythonLocation(101, 300, Map.of(-1, 0)))); + var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300), 2, + new IPythonLocation(101, 300))); assertThat(tokens.get(0).line()).isEqualTo(101); var trivias = tokens.get(0).trivia(); assertThat(trivias).hasSize(1); @@ -162,10 +189,7 @@ void shouldComputeColCorrectlyForTriviaOnDifferentLine() { void shouldComputeCorrectlyForSingleQuote() { var code = "a = '1'"; var expectedTokens = lexer.lex(code); - var escapedChars = new LinkedHashMap(); - escapedChars.put(4, 305); - escapedChars.put(6, 308); - var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars))); + var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300))); var stringToken = tokens.get(2); assertThat(stringToken.line()).isEqualTo(100); assertThat(stringToken.column()).isEqualTo(304); diff --git a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java index 9f44c2458b..1cd4da1aca 100644 --- a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java +++ b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/IpynbNotebookParser.java @@ -26,10 +26,11 @@ import java.io.IOException; import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.LinkedList; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; +import org.sonar.python.EscapeCharPositionInfo; import org.sonar.python.IPythonLocation; public class IpynbNotebookParser { @@ -38,6 +39,7 @@ public class IpynbNotebookParser { private static final Set ACCEPTED_LANGUAGE = Set.of("python", "ipython"); + public static Optional parseNotebook(PythonInputFile inputFile) { try { return new IpynbNotebookParser(inputFile).parse(); @@ -181,7 +183,7 @@ private static NotebookParsingData parseSourceArray(int startLine, JsonParser jP while (jParser.nextToken() != JsonToken.END_ARRAY) { String sourceLine = jParser.getValueAsString(); var newTokenLocation = jParser.currentTokenLocation(); - var countEscapedChar = countEscapeCharacters(sourceLine, newTokenLocation.getColumnNr()); + var countEscapedChar = countEscapeCharacters(sourceLine); cellData.addLineToSource(sourceLine, newTokenLocation.getLineNr(), newTokenLocation.getColumnNr(), countEscapedChar, isCompressed); lastSourceLine = sourceLine; tokenLocation = newTokenLocation; @@ -203,8 +205,8 @@ private static NotebookParsingData parseSourceMultilineString(int startLine, Jso var isFirstLine = true; for (String line : sourceLine.lines().toList()) { - var countEscapedChar = countEscapeCharacters(line, previousLen + previousExtraChars + tokenLocation.getColumnNr()); - var currentCount = countEscapedChar.get(-1); + var countEscapedChar = countEscapeCharacters(line); + var currentCount = countEscapedChar.stream().mapToInt(EscapeCharPositionInfo::numberOfExtraChars).sum(); cellData.addLineToSource(line, new IPythonLocation(tokenLocation.getLineNr(), tokenLocation.getColumnNr() + previousLen + previousExtraChars, countEscapedChar, true)); cellData.appendToSource("\n"); @@ -220,28 +222,16 @@ private static NotebookParsingData parseSourceMultilineString(int startLine, Jso return cellData; } - private static Map countEscapeCharacters(String sourceLine, int colOffSet) { - Map colMap = new LinkedHashMap<>(); - int count = 0; - var numberOfExtraChars = 0; + private static List countEscapeCharacters(String sourceLine) { + List escapeCharPositionInfoList = new LinkedList<>(); var arr = sourceLine.toCharArray(); - for (int i = 0; i < sourceLine.length(); ++i) { - char c = arr[i]; - switch (c) { - case '"', '\\': - numberOfExtraChars++; - colMap.put(i, i + colOffSet + count + numberOfExtraChars); - break; + for (int col = 0; col < sourceLine.length(); ++col) { + char c = arr[col]; + if (c == '"' || c == '\\' || c == '\t' || c == '\b' || c == '\f') { + escapeCharPositionInfoList.add(new EscapeCharPositionInfo(col, 1)); // we never encounter \n or \r as the lines are split at these characters - case '\b', '\f', '\t': - // we increase the count of one char as we count the \ but not the t or b - count += 1; - break; - default: - break; } } - colMap.put(-1, numberOfExtraChars); - return colMap; + return escapeCharPositionInfoList; } } diff --git a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/NotebookParsingData.java b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/NotebookParsingData.java index 7f10bda730..326471dc3e 100644 --- a/sonar-python-plugin/src/main/java/org/sonar/plugins/python/NotebookParsingData.java +++ b/sonar-python-plugin/src/main/java/org/sonar/plugins/python/NotebookParsingData.java @@ -20,8 +20,10 @@ package org.sonar.plugins.python; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map; import java.util.Objects; +import org.sonar.python.EscapeCharPositionInfo; import org.sonar.python.IPythonLocation; public class NotebookParsingData { @@ -70,8 +72,8 @@ public void appendToSource(String str) { aggregatedSource.append(str); } - public void addLineToSource(String sourceLine, int lineNr, int columnNr, Map colOffset, boolean isCompressed) { - addLineToSource(sourceLine, new IPythonLocation(lineNr, columnNr, colOffset, isCompressed)); + public void addLineToSource(String sourceLine, int lineNr, int columnNr, List colOffsets, boolean isCompressed) { + addLineToSource(sourceLine, new IPythonLocation(lineNr, columnNr, colOffsets, isCompressed)); } private void appendLine(String line) { @@ -90,7 +92,7 @@ public void addDelimiterToSource(String delimiter, int lineNr, int columnNr) { } public void addDefaultLocation(int line, int lineNr, int columnNr) { - locationMap.putIfAbsent(line, new IPythonLocation(lineNr, columnNr, Map.of(-1, 0))); + locationMap.putIfAbsent(line, new IPythonLocation(lineNr, columnNr)); } public void removeTrailingExtraLine() { diff --git a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java index 1cab879b0b..3db74e57ec 100644 --- a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java +++ b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/IpynbNotebookParserTest.java @@ -21,6 +21,7 @@ import java.io.File; import java.io.IOException; +import java.util.List; import java.util.Map; import org.junit.jupiter.api.Test; import org.sonar.api.batch.fs.InputFile; @@ -30,6 +31,7 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; import static org.sonar.plugins.python.TestUtils.createInputFile; +import static org.sonar.plugins.python.TestUtils.mapToColumnMappingList; class IpynbNotebookParserTest { private final File baseDir = new File("src/test/resources/org/sonar/plugins/python").getAbsoluteFile(); @@ -48,22 +50,55 @@ void testParseNotebook() throws IOException { assertThat(result.contents()).hasLineCount(27); assertThat(StringUtils.countMatches(result.contents(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER)) .isEqualTo(7); - assertThat(result.locationMap()).extracting(map -> map.get(1)).isEqualTo(new IPythonLocation(17, 5, Map.of(-1, 0))); + assertThat(result.locationMap()).extracting(map -> map.get(1)).isEqualTo(new IPythonLocation(17, 5)); //" print \"not none\"\n" - assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(19, 5, Map.of(10, 16, 19, 26, -1, 2))); + assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(19, 5, + mapToColumnMappingList(Map.of(10, 1, 19, 1)))); //"source": "#Some code\nprint(\"hello world\\n\")", - assertThat(result.locationMap()).extracting(map -> map.get(16)).isEqualTo(new IPythonLocation(64, 14, Map.of(-1, 0), true)); - assertThat(result.locationMap()).extracting(map -> map.get(17)).isEqualTo(new IPythonLocation(64, 27, Map.of(6, 34, 18, 47, 20, 50, -1, 3), true)); + assertThat(result.locationMap()).extracting(map -> map.get(16)).isEqualTo(new IPythonLocation(64, 14, List.of(), true)); + assertThat(result.locationMap()).extracting(map -> map.get(17)).isEqualTo(new IPythonLocation(64, 27, mapToColumnMappingList(Map.of(6 + , 1, 18, 1, 20, 1)), true)); //"source": "print(\"My\\ntext\")\nprint(\"Something else\\n\")" - assertThat(result.locationMap()).extracting(map -> map.get(22)).isEqualTo(new IPythonLocation(83, 14, Map.of(6, 21, 9, 25, 15, 32, -1, 3), true)); - assertThat(result.locationMap()).extracting(map -> map.get(23)).isEqualTo(new IPythonLocation(83, 37, Map.of(6, 44, 21, 60, 23, 63, -1, 3), true)); + assertThat(result.locationMap()).extracting(map -> map.get(22)).isEqualTo(new IPythonLocation(83, 14, mapToColumnMappingList(Map.of(6 + , 1, 9, 1, 15, 1)), true)); + assertThat(result.locationMap()).extracting(map -> map.get(23)).isEqualTo(new IPythonLocation(83, 37, mapToColumnMappingList(Map.of(6 + , 1, 21, 1, 23, 1)), true)); //"source": "a = \"A bunch of characters \\n \\f \\r \\ \"\nb = None" assertThat(result.locationMap()).extracting(map -> map.get(25)) - .isEqualTo(new IPythonLocation(90, 14, Map.of(4,19, 27, 43, 30, 47, 33, 51, 36, 55, 39, 59, -1, 6), true)); - assertThat(result.locationMap()).extracting(map -> map.get(26)).isEqualTo(new IPythonLocation(90, 63, Map.of(-1, 0), true)); - // last line with the cell delimiter which contains the EOF token - assertThat(result.locationMap()).extracting(map -> map.get(27)).isEqualTo(new IPythonLocation(90, 14, Map.of(-1, 0))); + .isEqualTo(new IPythonLocation(90, 14, mapToColumnMappingList(Map.of(4, 1, 27, 1, 30, 1, 33, 1, 36, 1, 39, 1)), true)); + assertThat(result.locationMap()).extracting(map -> map.get(26)).isEqualTo(new IPythonLocation(90, 63, List.of(), true)); + // last line with the cell delimiter which contains the EOF token + assertThat(result.locationMap()).extracting(map -> map.get(27)).isEqualTo(new IPythonLocation(90, 14, List.of())); + } + + @Test + void testParseNotebookWithEscapedChars() throws IOException { + var inputFile = createInputFile(baseDir, "notebook_with_escaped_chars.ipynb", InputFile.Status.CHANGED, InputFile.Type.MAIN); + + var resultOptional = IpynbNotebookParser.parseNotebook(inputFile); + + assertThat(resultOptional).isPresent(); + + var result = resultOptional.get(); + + assertThat(result.locationMap().keySet()).hasSize(2); + assertThat(result.contents()).hasLineCount(2); + assertThat(StringUtils.countMatches(result.contents(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER)) + .isEqualTo(1); + + //"source": "\t \b \f \"\"" + assertThat(result.locationMap()).extracting(map -> map.get(1)) + .isEqualTo(new IPythonLocation(14, 15, mapToColumnMappingList( + Map.ofEntries( + Map.entry(0, 1), + Map.entry(2, 1), + Map.entry(4, 1), + Map.entry(6, 1), + Map.entry(7, 1) + ) + ), true)); + } @Test @@ -80,10 +115,10 @@ void testParseNotebookWithEmptyLines() throws IOException { assertThat(result.contents()).hasLineCount(4); assertThat(StringUtils.countMatches(result.contents(), IpynbNotebookParser.SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER)) .isEqualTo(1); - assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(11, 5, Map.of(-1, 0))); + assertThat(result.locationMap()).extracting(map -> map.get(3)).isEqualTo(new IPythonLocation(11, 5)); // last line with the cell delimiter which contains the EOF token - assertThat(result.locationMap()).extracting(map -> map.get(4)).isEqualTo(new IPythonLocation(11, 5, Map.of(-1, 0))); + assertThat(result.locationMap()).extracting(map -> map.get(4)).isEqualTo(new IPythonLocation(11, 5)); } @Test @@ -141,12 +176,12 @@ void testParseNotebookSingleLine() throws IOException { assertThat(result.locationMap().get(4).column()).isEqualTo(452); // First and second line - assertThat(result.locationMap()).containsEntry(1, new IPythonLocation(1, 382, Map.of(-1, 0), true)); - assertThat(result.locationMap()).containsEntry(2, new IPythonLocation(1, 429, Map.of(-1, 0), true)); + assertThat(result.locationMap()).containsEntry(1, new IPythonLocation(1, 382, List.of(), true)); + assertThat(result.locationMap()).containsEntry(2, new IPythonLocation(1, 429, List.of(), true)); - assertThat(result.locationMap()).containsEntry(6, new IPythonLocation(1, 559, Map.of(-1, 3, 0, 560, 1, 562, 2, 564), true)); - assertThat(result.locationMap()).containsEntry(7, new IPythonLocation(1, 610, Map.of(-1, 0), true)); - assertThat(result.locationMap()).containsEntry(8, new IPythonLocation(1, 637, Map.of(-1, 3, 1, 640, 2, 642, 0, 638), true)); + assertThat(result.locationMap()).containsEntry(6, new IPythonLocation(1, 559, mapToColumnMappingList(Map.of(0, 1, 1, 1, 2, 1)), true)); + assertThat(result.locationMap()).containsEntry(7, new IPythonLocation(1, 610, List.of(), true)); + assertThat(result.locationMap()).containsEntry(8, new IPythonLocation(1, 637, mapToColumnMappingList(Map.of(1, 1, 2, 1, 0, 1)), true)); } @Test diff --git a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/NotebookParsingDataTest.java b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/NotebookParsingDataTest.java index de96c77167..c409b3c4d7 100644 --- a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/NotebookParsingDataTest.java +++ b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/NotebookParsingDataTest.java @@ -45,7 +45,7 @@ void testAddDelimiterToSource() { void testAddLineToSource() { var data = new NotebookParsingData(new StringBuilder().append("First line"), new LinkedHashMap<>(), 5); - data.addLineToSource("Test", new IPythonLocation(1, 2, Map.of(-1, 0))); + data.addLineToSource("Test", new IPythonLocation(1, 2)); assertThat(data).extracting(d -> d.getAggregatedSource().toString()).isEqualTo("First lineTest"); assertThat(data).extracting(d -> d.getAggregatedSourceLine()).isEqualTo(6); assertThat(data).extracting(d -> d.getLocationMap().size()).isEqualTo(1); @@ -65,11 +65,11 @@ void testCombineEmpty() { @Test void testCombine() { var location1 = new LinkedHashMap(); - location1.put(1, new IPythonLocation(0, 1, Map.of())); + location1.put(1, new IPythonLocation(0, 1)); var data = new NotebookParsingData(new StringBuilder().append("a"), location1, 4); var location2 = new LinkedHashMap(); - location2.put(3, new IPythonLocation(2, 1, Map.of())); + location2.put(3, new IPythonLocation(2, 1)); var data2 = new NotebookParsingData(new StringBuilder().append("b"), location2, 3); data.combine(data2); diff --git a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/PythonHighlighterTest.java b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/PythonHighlighterTest.java index c85f42587b..d054c6cec6 100644 --- a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/PythonHighlighterTest.java +++ b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/PythonHighlighterTest.java @@ -34,6 +34,7 @@ import org.sonar.python.TestPythonVisitorRunner; import static org.assertj.core.api.Assertions.assertThat; +import static org.sonar.plugins.python.TestUtils.mapToColumnMappingList; class PythonHighlighterTest { @@ -211,15 +212,24 @@ void number() { @Test void highlightingNotebooks() { - String pythonContent = "def foo():\n pass\na = \"test\" # comment\n# test \\n \\\\n test\nb = 3J\n#SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER"; + String pythonContent = """ + def foo(): + pass + a = "test" # comment + # test \\n \\\\n test + b = 3J + c = \t4.2 + #SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER"""; var locations = Map.of( - 1, new IPythonLocation(9, 5, Map.of(-1, 0)), - 2, new IPythonLocation(10, 5, Map.of(-1, 0)), - 3, new IPythonLocation(11, 5, Map.of(-1, 2, 9, 10, 14, 16)), - 4, new IPythonLocation(12, 5, Map.of(-1, 3, 7, 12, 10, 16, 11, 18)), - 5, new IPythonLocation(13, 5, Map.of(-1, 0)), - 6, new IPythonLocation(13, 5, Map.of(-1, 0))); //EOF Token - PythonHighlighter pythonHighlighter = new PythonHighlighter(context, new GeneratedIPythonFile(notebookInputFile, pythonContent, locations)); + 1, new IPythonLocation(9, 5), + 2, new IPythonLocation(10, 5), + 3, new IPythonLocation(11, 5, mapToColumnMappingList(Map.of(-1, 2, 4, 1, 9, 1))), + 4, new IPythonLocation(12, 5, mapToColumnMappingList(Map.of(-1, 3, 7, 1, 10, 1, 11, 1))), + 5, new IPythonLocation(13, 5), + 6, new IPythonLocation(14, 5, mapToColumnMappingList(Map.of(4, 1))), + 7, new IPythonLocation(14, 5)); //EOF Token + PythonHighlighter pythonHighlighter = new PythonHighlighter(context, new GeneratedIPythonFile(notebookInputFile, pythonContent, + locations)); TestPythonVisitorRunner.scanNotebookFile(notebookFile, locations, pythonContent, pythonHighlighter); // def checkOnRange(9, 5, 3, notebookFile, TypeOfText.KEYWORD); @@ -233,17 +243,20 @@ void highlightingNotebooks() { checkOnRange(12, 5, 21, notebookFile, TypeOfText.COMMENT); // 3J checkOnRange(13, 9, 2, notebookFile, TypeOfText.CONSTANT); + // 4.2 + checkOnRange(14, 11, 3, notebookFile, TypeOfText.CONSTANT); } @Test void highlightingNotebooksSingleLine() { String pythonContent = "def foo():\n pass\na = 2 # comment\n#SONAR_PYTHON_NOTEBOOK_CELL_DELIMITER"; var locations = Map.of( - 1, new IPythonLocation(1, 93, Map.of(-1, 0), true), - 2, new IPythonLocation(1, 108, Map.of(-1, 0), true), - 3, new IPythonLocation(1, 121, Map.of(-1, 0), true), - 4, new IPythonLocation(1, 93, Map.of(-1, 0), true)); //EOF Token - PythonHighlighter pythonHighlighter = new PythonHighlighter(context, new GeneratedIPythonFile(notebookInputFileSingleLine, pythonContent, locations)); + 1, new IPythonLocation(1, 93, List.of(), true), + 2, new IPythonLocation(1, 108, List.of(), true), + 3, new IPythonLocation(1, 121, List.of(), true), + 4, new IPythonLocation(1, 93, List.of(), true)); //EOF Token + PythonHighlighter pythonHighlighter = new PythonHighlighter(context, new GeneratedIPythonFile(notebookInputFileSingleLine, + pythonContent, locations)); TestPythonVisitorRunner.scanNotebookFile(notebookFileSingleLine, locations, pythonContent, pythonHighlighter); // def checkOnRange(1, 93, 3, notebookFileSingleLine, TypeOfText.KEYWORD); @@ -252,7 +265,7 @@ void highlightingNotebooksSingleLine() { // 2 checkOnRange(1, 125, 1, notebookFileSingleLine, TypeOfText.CONSTANT); // # comment - checkOnRange(1, 127 , 9, notebookFileSingleLine, TypeOfText.COMMENT); + checkOnRange(1, 127, 9, notebookFileSingleLine, TypeOfText.COMMENT); } /** diff --git a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/TestUtils.java b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/TestUtils.java index 9b3857105f..e906802627 100644 --- a/sonar-python-plugin/src/test/java/org/sonar/plugins/python/TestUtils.java +++ b/sonar-python-plugin/src/test/java/org/sonar/plugins/python/TestUtils.java @@ -24,6 +24,8 @@ import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; +import java.util.List; +import java.util.Map; import org.sonar.api.SonarEdition; import org.sonar.api.SonarQubeSide; import org.sonar.api.SonarRuntime; @@ -31,6 +33,7 @@ import org.sonar.api.batch.fs.internal.TestInputFileBuilder; import org.sonar.api.internal.SonarRuntimeImpl; import org.sonar.api.utils.Version; +import org.sonar.python.EscapeCharPositionInfo; public final class TestUtils { @@ -59,4 +62,11 @@ public static PythonInputFile createInputFile(File baseDir, String name, InputFi .build()); } + public static List mapToColumnMappingList(Map map) { + return map.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .map(entry -> new EscapeCharPositionInfo(entry.getKey(), entry.getValue())) + .toList(); + } + } diff --git a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebookHighlighter.ipynb b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebookHighlighter.ipynb index 8738a272f7..19e76be50a 100644 --- a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebookHighlighter.ipynb +++ b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebookHighlighter.ipynb @@ -10,7 +10,8 @@ " pass\n", "a = \"test\" # comment\n", "# test \\n \\\\n test\n", - "b = 3J" + "b = 3J\n" + "c = \t4.2" ] } ], diff --git a/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_with_escaped_chars.ipynb b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_with_escaped_chars.ipynb new file mode 100644 index 0000000000..06a3f7af73 --- /dev/null +++ b/sonar-python-plugin/src/test/resources/org/sonar/plugins/python/notebook_with_escaped_chars.ipynb @@ -0,0 +1,38 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ ] + } + ], + "source": "\t \b \f \"\"" + } + ], + "metadata": { + "kernelspec": { + "display_name": "jupyter-experiment_venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}