From e20d0c07b1da445f2363aa3d267f8f8fd56e8de1 Mon Sep 17 00:00:00 2001 From: Eric Traut Date: Sat, 30 Nov 2024 04:53:13 -0800 Subject: [PATCH] Added check for `\u` and `\N` escapes within bytes literals, which are illegal. --- .../pyright-internal/src/analyzer/checker.ts | 4 +++- .../src/localization/localize.ts | 1 + .../src/localization/package.nls.en-us.json | 4 ++++ .../src/parser/stringTokenUtils.ts | 20 +++++++++++++------ .../src/tests/samples/strings2.py | 2 ++ .../src/tests/typeEvaluator8.test.ts | 2 +- 6 files changed, 25 insertions(+), 8 deletions(-) diff --git a/packages/pyright-internal/src/analyzer/checker.ts b/packages/pyright-internal/src/analyzer/checker.ts index 6cc9a62120cc..0f33c72d49f9 100644 --- a/packages/pyright-internal/src/analyzer/checker.ts +++ b/packages/pyright-internal/src/analyzer/checker.ts @@ -1351,7 +1351,9 @@ export class Checker extends ParseTreeWalker { this._evaluator.addDiagnosticForTextRange( this._fileInfo, DiagnosticRule.reportInvalidStringEscapeSequence, - LocMessage.stringUnsupportedEscape(), + node.d.strings.some((string) => (string.d.token.flags & StringTokenFlags.Bytes) !== 0) + ? LocMessage.bytesUnsupportedEscape() + : LocMessage.stringUnsupportedEscape(), { start: start + error.offset, length: error.length } ); } diff --git a/packages/pyright-internal/src/localization/localize.ts b/packages/pyright-internal/src/localization/localize.ts index 9bfb57553a34..f68f88e411a2 100644 --- a/packages/pyright-internal/src/localization/localize.ts +++ b/packages/pyright-internal/src/localization/localize.ts @@ -286,6 +286,7 @@ export namespace Localizer { ); export const breakInExceptionGroup = () => getRawString('Diagnostic.breakInExceptionGroup'); export const breakOutsideLoop = () => getRawString('Diagnostic.breakOutsideLoop'); + export const bytesUnsupportedEscape = () => getRawString('Diagnostic.bytesUnsupportedEscape'); export const callableExtraArgs = () => getRawString('Diagnostic.callableExtraArgs'); export const callableFirstArg = () => getRawString('Diagnostic.callableFirstArg'); export const callableNotInstantiable = () => diff --git a/packages/pyright-internal/src/localization/package.nls.en-us.json b/packages/pyright-internal/src/localization/package.nls.en-us.json index 283e8361f2b0..244673a11bde 100644 --- a/packages/pyright-internal/src/localization/package.nls.en-us.json +++ b/packages/pyright-internal/src/localization/package.nls.en-us.json @@ -115,6 +115,10 @@ "message": "\"break\" can be used only within a loop", "comment": "{Locked='break'}" }, + "bytesUnsupportedEscape": { + "message": "Unsupported escape sequence in bytes literal", + "comment": "{Locked='bytes'}" + }, "callableExtraArgs": { "message": "Expected only two type arguments to \"Callable\"", "comment": "{Locked='Callable'}" diff --git a/packages/pyright-internal/src/parser/stringTokenUtils.ts b/packages/pyright-internal/src/parser/stringTokenUtils.ts index 0bcd123d175b..c53dda0dd96b 100644 --- a/packages/pyright-internal/src/parser/stringTokenUtils.ts +++ b/packages/pyright-internal/src/parser/stringTokenUtils.ts @@ -41,8 +41,9 @@ interface IncompleteUnescapedString { function completeUnescapedString(incomplete: IncompleteUnescapedString, originalString: string): UnescapedString { const newValue = incomplete.valueParts.join(''); - // Use the original string if it's identical. This prevents us from allocating memory to hold - // a copy (a copy is made because the original string is a 'slice' of another, so it doesn't exist in the cache yet). + // Use the original string if it's identical. This prevents us from allocating + // memory to hold a copy. A copy is made because the original string is a + // 'slice' of another, so it doesn't exist in the cache yet. const value = originalString !== newValue ? newValue : originalString; return { ...incomplete, @@ -224,6 +225,12 @@ export function getUnescapedString(stringToken: StringToken | FStringMiddleToken case Char.N: { let foundIllegalChar = false; let charCount = 1; + + // This type of escape isn't allowed for bytes. + if (isBytes) { + foundIllegalChar = true; + } + if (getEscapedCharacter(charCount) !== Char.OpenBrace) { foundIllegalChar = true; } else { @@ -260,11 +267,12 @@ export function getUnescapedString(stringToken: StringToken | FStringMiddleToken } case Char.u: - localValue = scanHexEscape(4); - break; - case Char.U: - localValue = scanHexEscape(8); + // This type of escape isn't allowed for bytes. + if (isBytes) { + addInvalidEscapeOffset(); + } + localValue = scanHexEscape(curChar === Char.u ? 4 : 8); break; default: diff --git a/packages/pyright-internal/src/tests/samples/strings2.py b/packages/pyright-internal/src/tests/samples/strings2.py index 6a8f6a76f02a..7604d278c279 100644 --- a/packages/pyright-internal/src/tests/samples/strings2.py +++ b/packages/pyright-internal/src/tests/samples/strings2.py @@ -12,3 +12,5 @@ # This should generate an error. v4 = b"a" f"" +# This should generate a warning. +v5 = b"\u00FF" diff --git a/packages/pyright-internal/src/tests/typeEvaluator8.test.ts b/packages/pyright-internal/src/tests/typeEvaluator8.test.ts index 32ad5f1c9c01..cfbba767a084 100644 --- a/packages/pyright-internal/src/tests/typeEvaluator8.test.ts +++ b/packages/pyright-internal/src/tests/typeEvaluator8.test.ts @@ -830,7 +830,7 @@ test('PseudoGeneric3', () => { test('Strings2', () => { const analysisResults = TestUtils.typeAnalyzeSampleFiles(['strings2.py']); - TestUtils.validateResults(analysisResults, 2); + TestUtils.validateResults(analysisResults, 2, 1); }); test('LiteralString1', () => {