From e20d0c07b1da445f2363aa3d267f8f8fd56e8de1 Mon Sep 17 00:00:00 2001
From: Eric Traut <eric@traut.com>
Date: Sat, 30 Nov 2024 04:53:13 -0800
Subject: [PATCH] Added check for `\u` and `\N` escapes within bytes literals,
 which are illegal.

---
 .../pyright-internal/src/analyzer/checker.ts  |  4 +++-
 .../src/localization/localize.ts              |  1 +
 .../src/localization/package.nls.en-us.json   |  4 ++++
 .../src/parser/stringTokenUtils.ts            | 20 +++++++++++++------
 .../src/tests/samples/strings2.py             |  2 ++
 .../src/tests/typeEvaluator8.test.ts          |  2 +-
 6 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/packages/pyright-internal/src/analyzer/checker.ts b/packages/pyright-internal/src/analyzer/checker.ts
index 6cc9a62120cc..0f33c72d49f9 100644
--- a/packages/pyright-internal/src/analyzer/checker.ts
+++ b/packages/pyright-internal/src/analyzer/checker.ts
@@ -1351,7 +1351,9 @@ export class Checker extends ParseTreeWalker {
                         this._evaluator.addDiagnosticForTextRange(
                             this._fileInfo,
                             DiagnosticRule.reportInvalidStringEscapeSequence,
-                            LocMessage.stringUnsupportedEscape(),
+                            node.d.strings.some((string) => (string.d.token.flags & StringTokenFlags.Bytes) !== 0)
+                                ? LocMessage.bytesUnsupportedEscape()
+                                : LocMessage.stringUnsupportedEscape(),
                             { start: start + error.offset, length: error.length }
                         );
                     }
diff --git a/packages/pyright-internal/src/localization/localize.ts b/packages/pyright-internal/src/localization/localize.ts
index 9bfb57553a34..f68f88e411a2 100644
--- a/packages/pyright-internal/src/localization/localize.ts
+++ b/packages/pyright-internal/src/localization/localize.ts
@@ -286,6 +286,7 @@ export namespace Localizer {
             );
         export const breakInExceptionGroup = () => getRawString('Diagnostic.breakInExceptionGroup');
         export const breakOutsideLoop = () => getRawString('Diagnostic.breakOutsideLoop');
+        export const bytesUnsupportedEscape = () => getRawString('Diagnostic.bytesUnsupportedEscape');
         export const callableExtraArgs = () => getRawString('Diagnostic.callableExtraArgs');
         export const callableFirstArg = () => getRawString('Diagnostic.callableFirstArg');
         export const callableNotInstantiable = () =>
diff --git a/packages/pyright-internal/src/localization/package.nls.en-us.json b/packages/pyright-internal/src/localization/package.nls.en-us.json
index 283e8361f2b0..244673a11bde 100644
--- a/packages/pyright-internal/src/localization/package.nls.en-us.json
+++ b/packages/pyright-internal/src/localization/package.nls.en-us.json
@@ -115,6 +115,10 @@
             "message": "\"break\" can be used only within a loop",
             "comment": "{Locked='break'}"
         },
+        "bytesUnsupportedEscape": {
+            "message": "Unsupported escape sequence in bytes literal",
+            "comment": "{Locked='bytes'}"
+        },
         "callableExtraArgs": {
             "message": "Expected only two type arguments to \"Callable\"",
             "comment": "{Locked='Callable'}"
diff --git a/packages/pyright-internal/src/parser/stringTokenUtils.ts b/packages/pyright-internal/src/parser/stringTokenUtils.ts
index 0bcd123d175b..c53dda0dd96b 100644
--- a/packages/pyright-internal/src/parser/stringTokenUtils.ts
+++ b/packages/pyright-internal/src/parser/stringTokenUtils.ts
@@ -41,8 +41,9 @@ interface IncompleteUnescapedString {
 
 function completeUnescapedString(incomplete: IncompleteUnescapedString, originalString: string): UnescapedString {
     const newValue = incomplete.valueParts.join('');
-    // Use the original string if it's identical. This prevents us from allocating memory to hold
-    // a copy (a copy is made because the original string is a 'slice' of another, so it doesn't exist in the cache yet).
+    // Use the original string if it's identical. This prevents us from allocating
+    // memory to hold a copy. A copy is made because the original string is a
+    // 'slice' of another, so it doesn't exist in the cache yet.
     const value = originalString !== newValue ? newValue : originalString;
     return {
         ...incomplete,
@@ -224,6 +225,12 @@ export function getUnescapedString(stringToken: StringToken | FStringMiddleToken
                         case Char.N: {
                             let foundIllegalChar = false;
                             let charCount = 1;
+
+                            // This type of escape isn't allowed for bytes.
+                            if (isBytes) {
+                                foundIllegalChar = true;
+                            }
+
                             if (getEscapedCharacter(charCount) !== Char.OpenBrace) {
                                 foundIllegalChar = true;
                             } else {
@@ -260,11 +267,12 @@ export function getUnescapedString(stringToken: StringToken | FStringMiddleToken
                         }
 
                         case Char.u:
-                            localValue = scanHexEscape(4);
-                            break;
-
                         case Char.U:
-                            localValue = scanHexEscape(8);
+                            // This type of escape isn't allowed for bytes.
+                            if (isBytes) {
+                                addInvalidEscapeOffset();
+                            }
+                            localValue = scanHexEscape(curChar === Char.u ? 4 : 8);
                             break;
 
                         default:
diff --git a/packages/pyright-internal/src/tests/samples/strings2.py b/packages/pyright-internal/src/tests/samples/strings2.py
index 6a8f6a76f02a..7604d278c279 100644
--- a/packages/pyright-internal/src/tests/samples/strings2.py
+++ b/packages/pyright-internal/src/tests/samples/strings2.py
@@ -12,3 +12,5 @@
 # This should generate an error.
 v4 = b"a" f""
 
+# This should generate a warning.
+v5 = b"\u00FF"
diff --git a/packages/pyright-internal/src/tests/typeEvaluator8.test.ts b/packages/pyright-internal/src/tests/typeEvaluator8.test.ts
index 32ad5f1c9c01..cfbba767a084 100644
--- a/packages/pyright-internal/src/tests/typeEvaluator8.test.ts
+++ b/packages/pyright-internal/src/tests/typeEvaluator8.test.ts
@@ -830,7 +830,7 @@ test('PseudoGeneric3', () => {
 test('Strings2', () => {
     const analysisResults = TestUtils.typeAnalyzeSampleFiles(['strings2.py']);
 
-    TestUtils.validateResults(analysisResults, 2);
+    TestUtils.validateResults(analysisResults, 2, 1);
 });
 
 test('LiteralString1', () => {