Skip to content

Add support for byte and unicode Literal strings #6087

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions mypy/checkexpr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1784,11 +1784,17 @@ def visit_str_expr(self, e: StrExpr) -> Type:

def visit_bytes_expr(self, e: BytesExpr) -> Type:
"""Type check a bytes literal (trivial)."""
return self.named_type('builtins.bytes')
typ = self.named_type('builtins.bytes')
if is_literal_type_like(self.type_context[-1]):
return LiteralType(value=e.value, fallback=typ)
return typ

def visit_unicode_expr(self, e: UnicodeExpr) -> Type:
"""Type check a unicode literal (trivial)."""
return self.named_type('builtins.unicode')
typ = self.named_type('builtins.unicode')
if is_literal_type_like(self.type_context[-1]):
return LiteralType(value=e.value, fallback=typ)
return typ

def visit_float_expr(self, e: FloatExpr) -> Type:
"""Type check a float literal (trivial)."""
Expand Down
13 changes: 10 additions & 3 deletions mypy/exprtotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
ListExpr, StrExpr, BytesExpr, UnicodeExpr, EllipsisExpr, CallExpr,
get_member_expr_fullname
)
from mypy.fastparse import parse_type_comment, parse_type_string
from mypy.fastparse import parse_type_string
from mypy.types import (
Type, UnboundType, TypeList, EllipsisType, AnyType, Optional, CallableArgument, TypeOfAny,
RawLiteralType,
Expand Down Expand Up @@ -111,8 +111,15 @@ def expr_to_unanalyzed_type(expr: Expression, _parent: Optional[Expression] = No
elif isinstance(expr, ListExpr):
return TypeList([expr_to_unanalyzed_type(t, expr) for t in expr.items],
line=expr.line, column=expr.column)
elif isinstance(expr, (StrExpr, BytesExpr, UnicodeExpr)):
return parse_type_string(expr.value, expr.line, expr.column)
elif isinstance(expr, StrExpr):
return parse_type_string(expr.value, 'builtins.str', expr.line, expr.column,
assume_str_is_unicode=expr.from_python_3)
elif isinstance(expr, BytesExpr):
return parse_type_string(expr.value, 'builtins.bytes', expr.line, expr.column,
assume_str_is_unicode=False)
elif isinstance(expr, UnicodeExpr):
return parse_type_string(expr.value, 'builtins.unicode', expr.line, expr.column,
assume_str_is_unicode=True)
elif isinstance(expr, UnaryExpr):
typ = expr_to_unanalyzed_type(expr.expr)
if isinstance(typ, RawLiteralType) and isinstance(typ.value, int) and expr.op == '-':
Expand Down
86 changes: 70 additions & 16 deletions mypy/fastparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
NameConstant,
Expression as ast3_Expression,
Str,
Bytes,
Index,
Num,
UnaryOp,
Expand Down Expand Up @@ -140,7 +141,11 @@ def parse(source: Union[str, bytes],
return tree


def parse_type_comment(type_comment: str, line: int, errors: Optional[Errors]) -> Optional[Type]:
def parse_type_comment(type_comment: str,
line: int,
errors: Optional[Errors],
assume_str_is_unicode: bool = True,
) -> Optional[Type]:
try:
typ = ast3.parse(type_comment, '<type_comment>', 'eval')
except SyntaxError as e:
Expand All @@ -151,24 +156,39 @@ def parse_type_comment(type_comment: str, line: int, errors: Optional[Errors]) -
raise
else:
assert isinstance(typ, ast3_Expression)
return TypeConverter(errors, line=line).visit(typ.body)
return TypeConverter(errors, line=line,
assume_str_is_unicode=assume_str_is_unicode).visit(typ.body)


def parse_type_string(expr_string: str, line: int, column: int) -> Type:
"""Parses a type that was originally present inside of an explicit string.
def parse_type_string(expr_string: str, expr_fallback_name: str,
line: int, column: int, assume_str_is_unicode: bool = True) -> Type:
"""Parses a type that was originally present inside of an explicit string,
byte string, or unicode string.

For example, suppose we have the type `Foo["blah"]`. We should parse the
string expression "blah" using this function.

If `assume_str_is_unicode` is set to true, this function will assume that
`Foo["blah"]` is equivalent to `Foo[u"blah"]`. Otherwise, it assumes it's
equivalent to `Foo[b"blah"]`.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is misleading. IIUC on Python 3 Literal["blah"] is equivalent to Literal[u"blah"], but from the last sentence it looks like it is Literal[b"blah"].

Probably my confusion stems from using unicode_literals for both future import and the argument name. Maybe make them different?


The caller is responsible for keeping track of the context in which the
type string was encountered (e.g. in Python 3 code, Python 2 code, Python 2
code with unicode_literals...) and setting `assume_str_is_unicode` accordingly.
"""
try:
node = parse_type_comment(expr_string.strip(), line=line, errors=None)
node = parse_type_comment(expr_string.strip(), line=line, errors=None,
assume_str_is_unicode=assume_str_is_unicode)
if isinstance(node, UnboundType) and node.original_str_expr is None:
node.original_str_expr = expr_string
node.original_str_fallback = expr_fallback_name
return node
else:
return RawLiteralType(expr_string, 'builtins.str', line, column)
except SyntaxError:
return RawLiteralType(expr_string, 'builtins.str', line, column)
return RawLiteralType(expr_string, expr_fallback_name, line, column)
except (SyntaxError, ValueError):
# Note: the parser will raise a `ValueError` instead of a SyntaxError if
# the string happens to contain things like \x00.
return RawLiteralType(expr_string, expr_fallback_name, line, column)


def is_no_type_check_decorator(expr: ast3.expr) -> bool:
Expand Down Expand Up @@ -966,10 +986,7 @@ def visit_FormattedValue(self, n: ast3.FormattedValue) -> Expression:

# Bytes(bytes s)
def visit_Bytes(self, n: ast3.Bytes) -> Union[BytesExpr, StrExpr]:
# The following line is a bit hacky, but is the best way to maintain
# compatibility with how mypy currently parses the contents of bytes literals.
contents = str(n.s)[2:-1]
e = BytesExpr(contents)
e = BytesExpr(bytes_to_human_readable_repr(n.s))
return self.set_line(e, n)

# NameConstant(singleton value)
Expand Down Expand Up @@ -1042,10 +1059,15 @@ def visit_Index(self, n: Index) -> Node:


class TypeConverter:
def __init__(self, errors: Optional[Errors], line: int = -1) -> None:
def __init__(self,
errors: Optional[Errors],
line: int = -1,
assume_str_is_unicode: bool = True,
) -> None:
self.errors = errors
self.line = line
self.node_stack = [] # type: List[AST]
self.assume_str_is_unicode = assume_str_is_unicode

@overload
def visit(self, node: ast3.expr) -> Type: ...
Expand Down Expand Up @@ -1090,8 +1112,11 @@ def visit_raw_str(self, s: str) -> Type:
# An escape hatch that allows the AST walker in fastparse2 to
# directly hook into the Python 3.5 type converter in some cases
# without needing to create an intermediary `Str` object.
return (parse_type_comment(s.strip(), self.line, self.errors) or
AnyType(TypeOfAny.from_error))
return (parse_type_comment(s.strip(),
self.line,
self.errors,
self.assume_str_is_unicode)
or AnyType(TypeOfAny.from_error))

def visit_Call(self, e: Call) -> Type:
# Parse the arg constructor
Expand Down Expand Up @@ -1190,7 +1215,22 @@ def visit_Num(self, n: Num) -> Type:

# Str(string s)
def visit_Str(self, n: Str) -> Type:
return parse_type_string(n.s, line=self.line, column=-1)
# Note: we transform these fallback types into the correct types in
# 'typeanal.py' -- specifically in the named_type_with_normalized_str method.
# If we're analyzing Python 3, that function will translate 'builtins.unicode'
# into 'builtins.str'. In contrast, if we're analyzing Python 2 code, we'll
# translate 'builtins.bytes' in the method below into 'builtins.str'.
if 'u' in n.kind or self.assume_str_is_unicode:
return parse_type_string(n.s, 'builtins.unicode', self.line, n.col_offset,
assume_str_is_unicode=self.assume_str_is_unicode)
else:
return parse_type_string(n.s, 'builtins.str', self.line, n.col_offset,
assume_str_is_unicode=self.assume_str_is_unicode)

# Bytes(bytes s)
def visit_Bytes(self, n: Bytes) -> Type:
contents = bytes_to_human_readable_repr(n.s)
return RawLiteralType(contents, 'builtins.bytes', self.line, column=n.col_offset)

# Subscript(expr value, slice slice, expr_context ctx)
def visit_Subscript(self, n: ast3.Subscript) -> Type:
Expand Down Expand Up @@ -1246,3 +1286,17 @@ def stringify_name(n: AST) -> Optional[str]:
if sv is not None:
return "{}.{}".format(sv, n.attr)
return None # Can't do it.


def bytes_to_human_readable_repr(b: bytes) -> str:
"""Converts bytes into some human-readable representation. Unprintable
bytes such as the nul byte are escaped. For example:

>>> b = bytes([102, 111, 111, 10, 0])
>>> s = bytes_to_human_readable_repr(b)
>>> print(s)
foo\n\x00
>>> print(repr(s))
'foo\\n\\x00'
"""
return str(b)[2:-1]
69 changes: 46 additions & 23 deletions mypy/fastparse2.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
)
from mypy import messages
from mypy.errors import Errors
from mypy.fastparse import TypeConverter, parse_type_comment
from mypy.fastparse import TypeConverter, parse_type_comment, bytes_to_human_readable_repr
from mypy.options import Options

try:
Expand Down Expand Up @@ -113,7 +113,6 @@ def parse(source: Union[str, bytes],
assert options.python_version[0] < 3 and not is_stub_file
ast = ast27.parse(source, fnam, 'exec')
tree = ASTConverter(options=options,
is_stub=is_stub_file,
errors=errors,
).visit(ast)
assert isinstance(tree, MypyFile)
Expand Down Expand Up @@ -141,15 +140,32 @@ def is_no_type_check_decorator(expr: ast27.expr) -> bool:
class ASTConverter:
def __init__(self,
options: Options,
is_stub: bool,
errors: Errors) -> None:
self.class_nesting = 0
self.imports = [] # type: List[ImportBase]

self.options = options
self.is_stub = is_stub
self.errors = errors

# Indicates whether this file is being parsed with unicode_literals enabled.
# Note: typed_ast already naturally takes unicode_literals into account when
# parsing so we don't have to worry when analyzing strings within this class.
#
# The only place where we use this field is when we call fastparse's TypeConverter
# and any related methods. That class accepts a Python 3 AST instead of a Python 2
# AST: as a result, it don't special-case the `unicode_literals` import and won't know
# exactly whether to parse some string as bytes or unicode.
#
# This distinction is relevant mostly when handling Literal types -- Literal[u"foo"]
# is not the same type as Literal[b"foo"], and Literal["foo"] could mean either the
# former or the latter based on context.
#
# This field is set in the 'visit_ImportFrom' method: it's ok to delay computing it
# because any `from __future__ import blah` import must be located at the top of the
# file, with the exception of the docstring. This means we're guaranteed to correctly
# set this field before we encounter any type hints.
self.unicode_literals = False

# Cache of visit_X methods keyed by type of visited object
self.visitor_cache = {} # type: Dict[type, Callable[[Optional[AST]], Any]]

Expand Down Expand Up @@ -306,7 +322,8 @@ def visit_Module(self, mod: ast27.Module) -> MypyFile:
# arg? kwarg, expr* defaults)
def visit_FunctionDef(self, n: ast27.FunctionDef) -> Statement:
lineno = n.lineno
converter = TypeConverter(self.errors, line=lineno)
converter = TypeConverter(self.errors, line=lineno,
assume_str_is_unicode=self.unicode_literals)
args, decompose_stmts = self.transform_args(n.args, lineno)

arg_kinds = [arg.kind for arg in args]
Expand Down Expand Up @@ -413,7 +430,8 @@ def transform_args(self,
line: int,
) -> Tuple[List[Argument], List[Statement]]:
type_comments = n.type_comments # type: Sequence[Optional[str]]
converter = TypeConverter(self.errors, line=line)
converter = TypeConverter(self.errors, line=line,
assume_str_is_unicode=self.unicode_literals)
decompose_stmts = [] # type: List[Statement]

n_args = n.args
Expand Down Expand Up @@ -532,7 +550,8 @@ def visit_Delete(self, n: ast27.Delete) -> DelStmt:
def visit_Assign(self, n: ast27.Assign) -> AssignmentStmt:
typ = None
if n.type_comment:
typ = parse_type_comment(n.type_comment, n.lineno, self.errors)
typ = parse_type_comment(n.type_comment, n.lineno, self.errors,
assume_str_is_unicode=self.unicode_literals)

stmt = AssignmentStmt(self.translate_expr_list(n.targets),
self.visit(n.value),
Expand All @@ -549,7 +568,8 @@ def visit_AugAssign(self, n: ast27.AugAssign) -> OperatorAssignmentStmt:
# For(expr target, expr iter, stmt* body, stmt* orelse, string? type_comment)
def visit_For(self, n: ast27.For) -> ForStmt:
if n.type_comment is not None:
target_type = parse_type_comment(n.type_comment, n.lineno, self.errors)
target_type = parse_type_comment(n.type_comment, n.lineno, self.errors,
assume_str_is_unicode=self.unicode_literals)
else:
target_type = None
stmt = ForStmt(self.visit(n.target),
Expand All @@ -576,7 +596,8 @@ def visit_If(self, n: ast27.If) -> IfStmt:
# With(withitem* items, stmt* body, string? type_comment)
def visit_With(self, n: ast27.With) -> WithStmt:
if n.type_comment is not None:
target_type = parse_type_comment(n.type_comment, n.lineno, self.errors)
target_type = parse_type_comment(n.type_comment, n.lineno, self.errors,
assume_str_is_unicode=self.unicode_literals)
else:
target_type = None
stmt = WithStmt([self.visit(n.context_expr)],
Expand Down Expand Up @@ -680,9 +701,12 @@ def visit_ImportFrom(self, n: ast27.ImportFrom) -> ImportBase:
mod = n.module if n.module is not None else ''
i = ImportAll(mod, n.level) # type: ImportBase
else:
i = ImportFrom(self.translate_module_id(n.module) if n.module is not None else '',
n.level,
[(a.name, a.asname) for a in n.names])
module_id = self.translate_module_id(n.module) if n.module is not None else ''
i = ImportFrom(module_id, n.level, [(a.name, a.asname) for a in n.names])

# See comments in the constructor for more information about this field.
if module_id == '__future__' and any(a.name == 'unicode_literals' for a in n.names):
self.unicode_literals = True
self.imports.append(i)
return self.set_line(i, n)

Expand Down Expand Up @@ -900,18 +924,17 @@ def visit_Num(self, n: ast27.Num) -> Expression:

# Str(string s)
def visit_Str(self, n: ast27.Str) -> Expression:
# Hack: assume all string literals in Python 2 stubs are normal
# strs (i.e. not unicode). All stubs are parsed with the Python 3
# parser, which causes unprefixed string literals to be interpreted
# as unicode instead of bytes. This hack is generally okay,
# because mypy considers str literals to be compatible with
# unicode.
# Note: typed_ast.ast27 will handled unicode_literals for us. If
# n.s is of type 'bytes', we know unicode_literals was not enabled;
# otherwise we know it was.
#
# Note that the following code is NOT run when parsing Python 2.7 stubs:
# we always parse stub files (no matter what version) using the Python 3
# parser. This is also why string literals in Python 2.7 stubs are assumed
# to be unicode.
if isinstance(n.s, bytes):
value = n.s
# The following line is a bit hacky, but is the best way to maintain
# compatibility with how mypy currently parses the contents of bytes literals.
contents = str(value)[2:-1]
e = StrExpr(contents) # type: Union[StrExpr, UnicodeExpr]
contents = bytes_to_human_readable_repr(n.s)
e = StrExpr(contents, from_python_3=False) # type: Union[StrExpr, UnicodeExpr]
return self.set_line(e, n)
else:
e = UnicodeExpr(n.s)
Expand Down
2 changes: 1 addition & 1 deletion mypy/literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def visit_int_expr(self, e: IntExpr) -> Key:
return ('Literal', e.value)

def visit_str_expr(self, e: StrExpr) -> Key:
return ('Literal', e.value)
return ('Literal', e.value, e.from_python_3)

def visit_bytes_expr(self, e: BytesExpr) -> Key:
return ('Literal', e.value)
Expand Down
Loading