Skip to content

Commit

Permalink
fea: improve error messages (#2806)
Browse files Browse the repository at this point in the history
Use Levenshtein distance to improve error messages when a similar word
can be found in the namespace
  • Loading branch information
tserg authored Apr 17, 2022
1 parent ead39ce commit 857bc86
Show file tree
Hide file tree
Showing 7 changed files with 130 additions and 6 deletions.
4 changes: 3 additions & 1 deletion vyper/semantics/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
StructureException,
UndeclaredDefinition,
)
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions


class Namespace(dict):
Expand Down Expand Up @@ -42,7 +43,8 @@ def __setitem__(self, attr, obj):

def __getitem__(self, key):
if key not in self:
raise UndeclaredDefinition(f"'{key}' has not been declared")
suggestions_str = get_levenshtein_error_suggestions(key, self, 0.2)
raise UndeclaredDefinition(f"'{key}' has not been declared. {suggestions_str}")
return super().__getitem__(key)

def __enter__(self):
Expand Down
4 changes: 3 additions & 1 deletion vyper/semantics/types/bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
UnknownAttribute,
)
from vyper.semantics.types.abstract import AbstractDataType
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions


class DataLocation(Enum):
Expand Down Expand Up @@ -585,7 +586,8 @@ def get_member(self, key: str, node: vy_ast.VyperNode) -> BaseTypeDefinition:
type_.location = self.location
type_.is_constant = self.is_constant
return type_
raise UnknownAttribute(f"{self} has no member '{key}'", node)
suggestions_str = get_levenshtein_error_suggestions(key, self.members, 0.3)
raise UnknownAttribute(f"{self} has no member '{key}'. {suggestions_str}", node)

def __repr__(self):
return f"{self._id}"
Expand Down
7 changes: 6 additions & 1 deletion vyper/semantics/types/user/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from vyper.semantics.types.bases import DataLocation, MemberTypeDefinition, ValueTypeDefinition
from vyper.semantics.types.indexable.mapping import MappingDefinition
from vyper.semantics.types.utils import get_type_from_annotation
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions
from vyper.semantics.validation.utils import validate_expected_type


Expand Down Expand Up @@ -97,7 +98,11 @@ def fetch_call_return(self, node: vy_ast.Call) -> StructDefinition:
keys = list(self.members.keys())
for i, (key, value) in enumerate(zip(node.args[0].keys, node.args[0].values)):
if key is None or key.get("id") not in members:
raise UnknownAttribute("Unknown or duplicate struct member", key or value)
suggestions_str = get_levenshtein_error_suggestions(key.get("id"), members, 1.0)
raise UnknownAttribute(
f"Unknown or duplicate struct member. {suggestions_str}",
key or value,
)
expected_key = keys[i]
if key.id != expected_key:
raise InvalidAttribute(
Expand Down
7 changes: 6 additions & 1 deletion vyper/semantics/types/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from vyper.semantics.namespace import get_namespace
from vyper.semantics.types.bases import BaseTypeDefinition, DataLocation
from vyper.semantics.types.indexable.sequence import ArrayDefinition, TupleDefinition
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions
from vyper.semantics.validation.utils import get_exact_type_from_node, get_index_value


Expand Down Expand Up @@ -154,7 +155,11 @@ def get_type_from_annotation(
try:
type_obj = namespace[type_name]
except UndeclaredDefinition:
raise UnknownType(f"No builtin or user-defined type named '{type_name}'", node) from None
suggestions_str = get_levenshtein_error_suggestions(type_name, namespace, 0.3)
raise UnknownType(
f"No builtin or user-defined type named '{type_name}'. {suggestions_str}",
node,
) from None

if getattr(type_obj, "_as_array", False) and isinstance(node, vy_ast.Subscript):
# TODO: handle `is_immutable` for arrays
Expand Down
104 changes: 104 additions & 0 deletions vyper/semantics/validation/levenshtein_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
from typing import Any, Dict


def levenshtein_norm(source: str, target: str) -> float:
"""Calculates the normalized Levenshtein distance between two string
arguments. The result will be a float in the range [0.0, 1.0], with 1.0
signifying the biggest possible distance between strings with these lengths
From jazzband/docopt-ng
https://github.com/jazzband/docopt-ng/blob/bbed40a2335686d2e14ac0e6c3188374dc4784da/docopt.py
"""

# Compute Levenshtein distance using helper function. The max is always
# just the length of the longer string, so this is used to normalize result
# before returning it
distance = levenshtein(source, target)
return float(distance) / max(len(source), len(target))


def levenshtein(source: str, target: str) -> int:
"""Computes the Levenshtein
(https://en.wikipedia.org/wiki/Levenshtein_distance)
and restricted Damerau-Levenshtein
(https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance)
distances between two Unicode strings with given lengths using the
Wagner-Fischer algorithm
(https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm).
These distances are defined recursively, since the distance between two
strings is just the cost of adjusting the last one or two characters plus
the distance between the prefixes that exclude these characters (e.g. the
distance between "tester" and "tested" is 1 + the distance between "teste"
and "teste"). The Wagner-Fischer algorithm retains this idea but eliminates
redundant computations by storing the distances between various prefixes in
a matrix that is filled in iteratively.
From jazzband/docopt-ng
https://github.com/jazzband/docopt-ng/blob/bbed40a2335686d2e14ac0e6c3188374dc4784da/docopt.py
"""

# Create matrix of correct size (this is s_len + 1 * t_len + 1 so that the
# empty prefixes "" can also be included). The leftmost column represents
# transforming various source prefixes into an empty string, which can
# always be done by deleting all characters in the respective prefix, and
# the top row represents transforming the empty string into various target
# prefixes, which can always be done by inserting every character in the
# respective prefix. The ternary used to build the list should ensure that
# this row and column are now filled correctly
s_range = range(len(source) + 1)
t_range = range(len(target) + 1)
matrix = [[(i if j == 0 else j) for j in t_range] for i in s_range]

# Iterate through rest of matrix, filling it in with Levenshtein
# distances for the remaining prefix combinations
for i in s_range[1:]:
for j in t_range[1:]:
# Applies the recursive logic outlined above using the values
# stored in the matrix so far. The options for the last pair of
# characters are deletion, insertion, and substitution, which
# amount to dropping the source character, the target character,
# or both and then calculating the distance for the resulting
# prefix combo. If the characters at this point are the same, the
# situation can be thought of as a free substitution
del_dist = matrix[i - 1][j] + 1
ins_dist = matrix[i][j - 1] + 1
sub_trans_cost = 0 if source[i - 1] == target[j - 1] else 1
sub_dist = matrix[i - 1][j - 1] + sub_trans_cost

# Choose option that produces smallest distance
matrix[i][j] = min(del_dist, ins_dist, sub_dist)

# At this point, the matrix is full, and the biggest prefixes are just the
# strings themselves, so this is the desired distance
return matrix[len(source)][len(target)]


def get_levenshtein_error_suggestions(key: str, namespace: Dict[str, Any], threshold: float) -> str:
"""
Generate an error message snippet for the suggested closest values in the provided namespace
with the shortest normalized Levenshtein distance from the given key if that distance
is below the threshold. Otherwise, return an empty string.
As a heuristic, the threshold value is inversely correlated to the size of the namespace.
For a small namespace (e.g. struct members), the threshold value can be the maximum of
1.0 since the key must be one of the defined struct members. For a large namespace
(e.g. types, builtin functions and state variables), the threshold value should be lower
to ensure the matches are relevant.
:param key: A string of the identifier being accessed
:param namespace: A dictionary of the possible identifiers
:param threshold: A floating value between 0.0 and 1.0
:return: The error message snippet if the Levenshtein value is below the threshold,
or an empty string.
"""

if key is None or key == "":
return ""

distances = sorted([(i, levenshtein_norm(key, i)) for i in namespace], key=lambda k: k[1])
if len(distances) > 0 and distances[0][1] <= threshold:
if len(distances) > 1 and distances[1][1] <= threshold:
return f"Did you mean '{distances[0][0]}', or maybe '{distances[1][0]}'?"
return f"Did you mean '{distances[0][0]}'?"
return ""
4 changes: 3 additions & 1 deletion vyper/semantics/validation/module.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from vyper.semantics.types.user.event import Event
from vyper.semantics.types.utils import check_constant, get_type_from_annotation
from vyper.semantics.validation.base import VyperNodeVisitorBase
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions
from vyper.semantics.validation.utils import validate_expected_type, validate_unique_method_ids
from vyper.typing import InterfaceDict

Expand Down Expand Up @@ -305,7 +306,8 @@ def _add_import(
if module == "vyper.interfaces":
interface_codes = _get_builtin_interfaces()
if name not in interface_codes:
raise UndeclaredDefinition(f"Unknown interface: {name}", node)
suggestions_str = get_levenshtein_error_suggestions(name, _get_builtin_interfaces(), 1.0)
raise UndeclaredDefinition(f"Unknown interface: {name}. {suggestions_str}", node)

if interface_codes[name]["type"] == "vyper":
interface_ast = vy_ast.parse_to_ast(interface_codes[name]["code"], contract_name=name)
Expand Down
6 changes: 5 additions & 1 deletion vyper/semantics/validation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
)
from vyper.semantics.types.value.array_value import BytesArrayDefinition, StringDefinition
from vyper.semantics.types.value.boolean import BoolDefinition
from vyper.semantics.validation.levenshtein_utils import get_levenshtein_error_suggestions


def _validate_op(node, types_list, validation_fn_name):
Expand Down Expand Up @@ -151,8 +152,11 @@ def types_from_Attribute(self, node):
f"'{name}' is not a storage variable, it should not be prepended with self",
node,
) from None

suggestions_str = get_levenshtein_error_suggestions(name, var.members, 0.4)
raise UndeclaredDefinition(
f"Storage variable '{name}' has not been declared", node
f"Storage variable '{name}' has not been declared. {suggestions_str}",
node,
) from None

def types_from_BinOp(self, node):
Expand Down

0 comments on commit 857bc86

Please sign in to comment.