Skip to content

Commit

Permalink
SONARPY-2068: Tabulations (\t) should be handled correctly as an esca…
Browse files Browse the repository at this point in the history
…pe character (#2050)

* SONARPY-2068 add testcase for TokenEnricher

* SONARPY-2068 store how many chars are skipped in the hashmap

* SONARPY-2068 consider \t, \b, \f as two char characters

* SONARPY-2068 refactoring/simplifiying

* SONARPY-2068 update tests

* SONARPY-2068 change Map<Integer, Integer> to list of ColumnMapping

* SONARPY-2068 fix review
  • Loading branch information
Seppli11 authored Oct 10, 2024
1 parent 40449b9 commit 3690cb2
Show file tree
Hide file tree
Showing 15 changed files with 269 additions and 109 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* SonarQube Python Plugin
* Copyright (C) 2011-2024 SonarSource SA
* mailto:info AT sonarsource DOT com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 3 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.sonar.python;

public record EscapeCharPositionInfo(int columnInIpynbFile, int numberOfExtraChars) {
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@
*/
package org.sonar.python;

import java.util.Map;
import java.util.List;

public record IPythonLocation(int line, int column, Map<Integer, Integer> colOffset, boolean isCompresssed) {
public IPythonLocation(int line, int column, Map<Integer, Integer> colOffset) {
this(line, column, colOffset, false);
public record IPythonLocation(int line, int column, List<EscapeCharPositionInfo> colOffsets, boolean isCompresssed) {
public IPythonLocation(int line, int column, List<EscapeCharPositionInfo> colOffsets) {
this(line, column, colOffsets, false);
}

public IPythonLocation(int line, int column) {
this(line, column, List.of(), false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Map;
import java.util.Set;
import org.sonar.plugins.python.api.tree.Trivia;
import org.sonar.python.EscapeCharPositionInfo;
import org.sonar.python.IPythonLocation;

public class TokenEnricher {
Expand All @@ -43,47 +44,55 @@ public static TokenImpl enrichToken(Token token, Map<Integer, IPythonLocation> o
if (location == null) {
throw new IllegalStateException(String.format("No IPythonLocation found for line %s", token.getLine()));
}
Map<Integer, Integer> escapeCharsMap = location.colOffset();
int startCol = computeColWithEscapes(token.getColumn(), escapeCharsMap, location.column());
int escapedCharInToken = computeEscapeCharsInToken(token.getValue());
List<EscapeCharPositionInfo> escapeCharPositionInfos = location.colOffsets();
int startCol = token.getColumn();
int endCol = token.getColumn() + token.getValue().length();
int ipynbStartCol = computeColWithEscapes(location.column(), startCol, escapeCharPositionInfos);
int escapedCharInToken = computeEscapeCharsInToken(escapeCharPositionInfos, startCol, endCol);
List<Trivia> trivia = token.getTrivia().stream()
.map(t -> computeTriviaLocation(t, location.line(), startCol, token.getLine(), offsetMap))
.map(t -> computeTriviaLocation(t, location.line(), ipynbStartCol, token.getLine(), offsetMap))
.toList();

return new TokenImpl(token, location.line(), startCol, escapedCharInToken, trivia, location.isCompresssed());
return new TokenImpl(token, location.line(), ipynbStartCol, escapedCharInToken, trivia, location.isCompresssed());
}
return new TokenImpl(token);
}

private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine, Map<Integer, IPythonLocation> offsetMap) {
int escapedCharInToken = computeEscapeCharsInToken(trivia.getToken().getValue());
private static Trivia computeTriviaLocation(com.sonar.sslr.api.Trivia trivia, int parentLine, int parentCol, int parentPythonLine,
Map<Integer, IPythonLocation> offsetMap) {
var line = parentLine;
int escapedCharInToken = computeEscapeCharsInTrivia(trivia, offsetMap);
var col = parentCol - escapedCharInToken - trivia.getToken().getValue().length();
var isCompressed = false;
if (parentPythonLine != trivia.getToken().getLine()) {
IPythonLocation location = offsetMap.get(trivia.getToken().getLine());
line = location.line();
Map<Integer, Integer> escapeCharsMap = location.colOffset();
col = computeColWithEscapes(trivia.getToken().getColumn(), escapeCharsMap, location.column());
List<EscapeCharPositionInfo> escapeCharPositionInfos = location.colOffsets();
col = computeColWithEscapes(location.column(), trivia.getToken().getColumn(), escapeCharPositionInfos);
isCompressed = location.isCompresssed();
}
return new TriviaImpl(new TokenImpl(trivia.getToken(), line, col,
escapedCharInToken, List.of(), isCompressed));
}

private static int computeEscapeCharsInToken(String tokenValue) {
int escapedCharInToken = 0;
for (int i = 0; i < tokenValue.length(); i++) {
if (ESCAPED_CHARS.contains(tokenValue.charAt(i))) {
escapedCharInToken++;
}
}
return escapedCharInToken;
private static int computeColWithEscapes(int offsetColumn, int currentCol, List<EscapeCharPositionInfo> escapeCharPositionInfos) {
int escapedCharsOffset = computeEscapeCharsInToken(escapeCharPositionInfos, 0, currentCol);
return offsetColumn + currentCol + escapedCharsOffset;
}

private static int computeEscapeCharsInTrivia(com.sonar.sslr.api.Trivia trivia, Map<Integer, IPythonLocation> offsetMap) {
IPythonLocation location = offsetMap.get(trivia.getToken().getLine());
Token token = trivia.getToken();
int startCol = token.getColumn();
int endCol = token.getColumn() + token.getValue().length();
return computeEscapeCharsInToken(location.colOffsets(), startCol, endCol);
}

private static int computeColWithEscapes(int currentCol, Map<Integer, Integer> escapes, int offsetColumn) {
return (int) escapes.keySet().stream().filter(k -> k > 0 && k < currentCol).count() + offsetColumn + currentCol;
private static int computeEscapeCharsInToken(List<EscapeCharPositionInfo> escapeCharPositionInfos, int startCol, int endCol) {
return escapeCharPositionInfos.stream()
.filter(entry -> entry.columnInIpynbFile() >= startCol && entry.columnInIpynbFile() < endCol)
.mapToInt(EscapeCharPositionInfo::numberOfExtraChars)
.sum();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,10 @@ void notebook_locs_single_line_file() {
def foo():
return 3
""";
var locations = Map.of(1, new IPythonLocation(1, 383, Map.of(-1, 0)),
2, new IPythonLocation(1, 390, Map.of(-1, 0)),
3, new IPythonLocation(1, 402, Map.of(-1, 0)),
4, new IPythonLocation(1, 402, Map.of(-1, 0)));
var locations = Map.of(1, new IPythonLocation(1, 383),
2, new IPythonLocation(1, 390),
3, new IPythonLocation(1, 402),
4, new IPythonLocation(1, 402));
TestPythonVisitorRunner.scanNotebookFile(new File(BASE_DIR, "notebook_locs_single_line.ipynb"), locations, content, visitor);
assertThat(visitor.getExecutableLines()).isEmpty();
assertThat(visitor.getLinesOfCode()).hasSize(3);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.function.Predicate;
import javax.annotation.CheckForNull;
import org.mockito.Mockito;
Expand Down Expand Up @@ -196,4 +197,11 @@ public static Symbol lastSymbolFromDef(String... code) {
}
return ((FunctionDef) tree).name().symbol();
}

public static List<EscapeCharPositionInfo> mapToColumnMappingList(Map<Integer, Integer> map) {
return map.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.map(entry -> new EscapeCharPositionInfo(entry.getKey(), entry.getValue()))
.toList();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@
import org.sonar.plugins.python.api.tree.LineMagic;
import org.sonar.plugins.python.api.tree.Statement;
import org.sonar.plugins.python.api.tree.Tree;
import org.sonar.python.EscapeCharPositionInfo;
import org.sonar.python.IPythonLocation;
import org.sonar.python.api.PythonGrammar;
import org.sonar.python.parser.RuleTest;

import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.sonar.python.PythonTestUtils.mapToColumnMappingList;

class IPythonTreeMakerTest extends RuleTest {

Expand Down Expand Up @@ -308,7 +310,8 @@ void assignmentRhs() {

@Test
void enrichTokens() {
var offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(4, 15, 8, 20, -1, 2)));
List<EscapeCharPositionInfo> colOffsets = mapToColumnMappingList(Map.of(4, 1, 8, 1));
var offsetMap = Map.of(1, new IPythonLocation(7, 10, colOffsets));
var statementList = parseIPython(
"a = \"123\"", new IPythonTreeMaker(offsetMap)::fileInput).statements();
assertThat(statementList).isNotNull();
Expand All @@ -319,7 +322,7 @@ void enrichTokens() {
assertThat(stringLiteral.get(0).firstToken().line()).isEqualTo(7);
assertThat(stringLiteral.get(0).firstToken().column()).isEqualTo(14);

offsetMap = Map.of(1, new IPythonLocation(7, 10, Map.of(-1, 0)), 2, new IPythonLocation(8, 10, Map.of(-1, 0)));
offsetMap = Map.of(1, new IPythonLocation(7, 10), 2, new IPythonLocation(8, 10));
statementList = parseIPython(
"def foo(): # comment \n pass", new IPythonTreeMaker(offsetMap)::fileInput).statements();
assertThat(statementList).isNotNull();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.sonar.sslr.api.Token;
import com.sonar.sslr.impl.Lexer;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import org.junit.jupiter.api.BeforeAll;
Expand All @@ -32,6 +31,7 @@

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.sonar.python.PythonTestUtils.mapToColumnMappingList;

class TokenEnricherTest {
private static TestLexer lexer;
Expand Down Expand Up @@ -78,8 +78,8 @@ void shouldThrowIllegalStateException() {
//when the mapping is not present for the current line
var code = "a = 1\n\nb=3";
var offsetMap = Map.of(
1, new IPythonLocation(200, 23, Map.of()),
2, new IPythonLocation(201, 23, Map.of()));
1, new IPythonLocation(200, 23),
2, new IPythonLocation(201, 23));
var originalTokens = lexer.lex(code);
Throwable throwable = assertThrows(IllegalStateException.class, () -> TokenEnricher.enrichTokens(originalTokens, offsetMap));
assertThat(throwable.getMessage()).isEqualTo("No IPythonLocation found for line 3");
Expand All @@ -89,26 +89,29 @@ void shouldThrowIllegalStateException() {
void shouldProvideOffsetForEscapeChar() {
var code = "a = \"1\"";
var expectedTokens = lexer.lex(code);
var escapedChars = new LinkedHashMap<Integer, Integer>();
escapedChars.put(4, 305);
escapedChars.put(6, 308);
var escapedChars = mapToColumnMappingList(Map.of(4, 1, 6, 1));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
var stringToken = tokens.get(2);
assertThat(stringToken.line()).isEqualTo(100);
assertThat(stringToken.column()).isEqualTo(304);
assertThat(stringToken.includedEscapeChars()).isEqualTo(2);

var eofToken = tokens.get(3);
assertThat(eofToken.line()).isEqualTo(100);
assertThat(eofToken.column()).isEqualTo(309);
}

@Test
void shouldComputeColCorrectly() {
var code = "a = f\"{b} \\n test\" + \"1\"";
var expectedTokens = lexer.lex(code);
var escapedChars = new LinkedHashMap<Integer, Integer>();
escapedChars.put(5, 305);
escapedChars.put(10, 311);
escapedChars.put(17, 319);
escapedChars.put(21, 324);
escapedChars.put(23, 327);
var escapedChars = mapToColumnMappingList(Map.ofEntries(
Map.entry(5, 1),
Map.entry(10, 1),
Map.entry(17, 1),
Map.entry(21, 1),
Map.entry(23, 1)
));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
var stringToken = tokens.get(tokens.size() - 2);
assertThat(stringToken.line()).isEqualTo(100);
Expand All @@ -121,11 +124,33 @@ void shouldComputeColCorrectly() {
assertThat(eofToken.includedEscapeChars()).isZero();
}

@Test
void shouldComputeTabColCorrectly() {
var code = "\ta";
var expectedTokens = lexer.lex(code);
var escapedChars = mapToColumnMappingList(Map.of(0, 1));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
var tabToken = tokens.get(0);
assertThat(tabToken.line()).isEqualTo(100);
assertThat(tabToken.column()).isEqualTo(300);
assertThat(tabToken.includedEscapeChars()).isEqualTo(1);

var idToken = tokens.get(1);
assertThat(idToken.line()).isEqualTo(100);
assertThat(idToken.column()).isEqualTo(302);
assertThat(idToken.includedEscapeChars()).isZero();

var eofToken = tokens.get(2);
assertThat(eofToken.line()).isEqualTo(100);
assertThat(eofToken.column()).isEqualTo(303);
assertThat(eofToken.includedEscapeChars()).isZero();
}

@Test
void shouldComputeColCorrectlyForTrivia() {
var code = "a = 3 # comment";
var expectedTokens = lexer.lex(code);
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0))));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300)));
var trivias = tokens.get(tokens.size() - 1).trivia();
assertThat(trivias).hasSize(1);
assertThat(trivias.get(0).token().line()).isEqualTo(100);
Expand All @@ -137,7 +162,8 @@ void shouldComputeColCorrectlyForTrivia() {
void shouldComputeColCorrectlyForTriviaWithEscapeChar() {
var code = "a = 3 # test\\n";
var expectedTokens = lexer.lex(code);
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 1, 12, 13))));
var escapedChars = mapToColumnMappingList(Map.of(12, 1));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
var trivias = tokens.get(tokens.size() - 1).trivia();
assertThat(trivias).hasSize(1);
assertThat(trivias.get(0).token().line()).isEqualTo(100);
Expand All @@ -149,7 +175,8 @@ void shouldComputeColCorrectlyForTriviaWithEscapeChar() {
void shouldComputeColCorrectlyForTriviaOnDifferentLine() {
var code = "# comment\na = 3";
var expectedTokens = lexer.lex(code);
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, Map.of(-1, 0)), 2, new IPythonLocation(101, 300, Map.of(-1, 0))));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300), 2,
new IPythonLocation(101, 300)));
assertThat(tokens.get(0).line()).isEqualTo(101);
var trivias = tokens.get(0).trivia();
assertThat(trivias).hasSize(1);
Expand All @@ -162,10 +189,7 @@ void shouldComputeColCorrectlyForTriviaOnDifferentLine() {
void shouldComputeCorrectlyForSingleQuote() {
var code = "a = '1'";
var expectedTokens = lexer.lex(code);
var escapedChars = new LinkedHashMap<Integer, Integer>();
escapedChars.put(4, 305);
escapedChars.put(6, 308);
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300, escapedChars)));
var tokens = TokenEnricher.enrichTokens(expectedTokens, Map.of(1, new IPythonLocation(100, 300)));
var stringToken = tokens.get(2);
assertThat(stringToken.line()).isEqualTo(100);
assertThat(stringToken.column()).isEqualTo(304);
Expand Down
Loading

0 comments on commit 3690cb2

Please sign in to comment.