Skip to content

Commit

Permalink
Check for duplicate dictionary keys (#72)
Browse files Browse the repository at this point in the history
  • Loading branch information
geokala authored and jayvdb committed Jul 24, 2016
1 parent 2ab47d7 commit 152ca18
Show file tree
Hide file tree
Showing 3 changed files with 333 additions and 1 deletion.
101 changes: 100 additions & 1 deletion pyflakes/checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,17 @@ def __missing__(self, node_class):
return fields


def counter(items):
"""
Simplest required implementation of collections.Counter. Required as 2.6
does not have Counter in collections.
"""
results = {}
for item in items:
results[item] = results.get(item, 0) + 1
return results


def iter_child_nodes(node, omit=None, _fields_order=_FieldsOrder()):
"""
Yield all direct child nodes of *node*, that is, all fields that
Expand All @@ -97,6 +108,33 @@ def iter_child_nodes(node, omit=None, _fields_order=_FieldsOrder()):
yield item


def convert_to_value(item):
if isinstance(item, ast.Str):
return item.s
elif hasattr(ast, 'Bytes') and isinstance(item, ast.Bytes):
return item.s
elif isinstance(item, ast.Tuple):
return tuple(convert_to_value(i) for i in item.elts)
elif isinstance(item, ast.Num):
return item.n
elif isinstance(item, ast.Name):
result = VariableKey(item=item)
constants_lookup = {
'True': True,
'False': False,
'None': None,
}
return constants_lookup.get(
result.name,
result,
)
elif (not PY33) and isinstance(item, ast.NameConstant):
# None, True, False are nameconstants in python3, but names in 2
return item.value
else:
return UnhandledKeyType()


class Binding(object):
"""
Represents the binding of a value to a name.
Expand Down Expand Up @@ -133,6 +171,31 @@ class Definition(Binding):
"""


class UnhandledKeyType(object):
"""
A dictionary key of a type that we cannot or do not check for duplicates.
"""


class VariableKey(object):
"""
A dictionary key which is a variable.
@ivar item: The variable AST object.
"""
def __init__(self, item):
self.name = item.id

def __eq__(self, compare):
return (
compare.__class__ == self.__class__
and compare.name == self.name
)

def __hash__(self):
return hash(self.name)


class Importation(Definition):
"""
A binding created by an import statement.
Expand Down Expand Up @@ -855,7 +918,7 @@ def ignore(self, node):
PASS = ignore

# "expr" type nodes
BOOLOP = BINOP = UNARYOP = IFEXP = DICT = SET = \
BOOLOP = BINOP = UNARYOP = IFEXP = SET = \
COMPARE = CALL = REPR = ATTRIBUTE = SUBSCRIPT = \
STARRED = NAMECONSTANT = handleChildren

Expand All @@ -876,6 +939,42 @@ def ignore(self, node):
# additional node types
COMPREHENSION = KEYWORD = FORMATTEDVALUE = handleChildren

def DICT(self, node):
# Complain if there are duplicate keys with different values
# If they have the same value it's not going to cause potentially
# unexpected behaviour so we'll not complain.
keys = [
convert_to_value(key) for key in node.keys
]

key_counts = counter(keys)
duplicate_keys = [
key for key, count in key_counts.items()
if count > 1
]

for key in duplicate_keys:
key_indices = [i for i, i_key in enumerate(keys) if i_key == key]

values = counter(
convert_to_value(node.values[index])
for index in key_indices
)
if any(count == 1 for value, count in values.items()):
for key_index in key_indices:
key_node = node.keys[key_index]
if isinstance(key, VariableKey):
self.report(messages.MultiValueRepeatedKeyVariable,
key_node,
key.name)
else:
self.report(
messages.MultiValueRepeatedKeyLiteral,
key_node,
key,
)
self.handleChildren(node)

def ASSERT(self, node):
if isinstance(node.test, ast.Tuple) and node.test.elts != []:
self.report(messages.AssertTuple, node)
Expand Down
16 changes: 16 additions & 0 deletions pyflakes/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,22 @@ def __init__(self, filename, loc, name):
self.message_args = (name,)


class MultiValueRepeatedKeyLiteral(Message):
message = 'dictionary key %r repeated with different values'

def __init__(self, filename, loc, key):
Message.__init__(self, filename, loc)
self.message_args = (key,)


class MultiValueRepeatedKeyVariable(Message):
message = 'dictionary key variable %s repeated with different values'

def __init__(self, filename, loc, key):
Message.__init__(self, filename, loc)
self.message_args = (key,)


class LateFutureImport(Message):
message = 'from __future__ imports must occur at the beginning of the file'

Expand Down
217 changes: 217 additions & 0 deletions pyflakes/test/test_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
"""
Tests for dict duplicate keys Pyflakes behavior.
"""

from sys import version_info

from pyflakes import messages as m
from pyflakes.test.harness import TestCase, skipIf


class Test(TestCase):

def test_duplicate_keys(self):
self.flakes(
"{'yes': 1, 'yes': 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

@skipIf(version_info < (3,),
"bytes and strings with same 'value' are not equal in python3")
@skipIf(version_info[0:2] == (3, 2),
"python3.2 does not allow u"" literal string definition")
def test_duplicate_keys_bytes_vs_unicode_py3(self):
self.flakes("{b'a': 1, u'a': 2}")

@skipIf(version_info < (3,),
"bytes and strings with same 'value' are not equal in python3")
@skipIf(version_info[0:2] == (3, 2),
"python3.2 does not allow u"" literal string definition")
def test_duplicate_values_bytes_vs_unicode_py3(self):
self.flakes(
"{1: b'a', 1: u'a'}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

@skipIf(version_info >= (3,),
"bytes and strings with same 'value' are equal in python2")
def test_duplicate_keys_bytes_vs_unicode_py2(self):
self.flakes(
"{b'a': 1, u'a': 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

@skipIf(version_info >= (3,),
"bytes and strings with same 'value' are equal in python2")
def test_duplicate_values_bytes_vs_unicode_py2(self):
self.flakes("{1: b'a', 1: u'a'}")

def test_multiple_duplicate_keys(self):
self.flakes(
"{'yes': 1, 'yes': 2, 'no': 2, 'no': 3}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_in_function(self):
self.flakes(
'''
def f(thing):
pass
f({'yes': 1, 'yes': 2})
''',
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_in_lambda(self):
self.flakes(
"lambda x: {(0,1): 1, (0,1): 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_tuples(self):
self.flakes(
"{(0,1): 1, (0,1): 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_tuples_int_and_float(self):
self.flakes(
"{(0,1): 1, (0,1.0): 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_ints(self):
self.flakes(
"{1: 1, 1: 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_bools(self):
self.flakes(
"{True: 1, True: 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_bools_false(self):
# Needed to ensure 2.x correctly coerces these from variables
self.flakes(
"{False: 1, False: 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_keys_none(self):
self.flakes(
"{None: 1, None: 2}",
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_variable_keys(self):
self.flakes(
'''
a = 1
{a: 1, a: 2}
''',
m.MultiValueRepeatedKeyVariable,
m.MultiValueRepeatedKeyVariable,
)

def test_duplicate_variable_values(self):
self.flakes(
'''
a = 1
b = 2
{1: a, 1: b}
''',
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_variable_values_same_value(self):
# Current behaviour is not to look up variable values. This is to
# confirm that.
self.flakes(
'''
a = 1
b = 1
{1: a, 1: b}
''',
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_duplicate_key_float_and_int(self):
"""
These do look like different values, but when it comes to their use as
keys, they compare as equal and so are actually duplicates.
The literal dict {1: 1, 1.0: 1} actually becomes {1.0: 1}.
"""
self.flakes(
'''
{1: 1, 1.0: 2}
''',
m.MultiValueRepeatedKeyLiteral,
m.MultiValueRepeatedKeyLiteral,
)

def test_no_duplicate_key_error_same_value(self):
self.flakes('''
{'yes': 1, 'yes': 1}
''')

def test_no_duplicate_key_errors(self):
self.flakes('''
{'yes': 1, 'no': 2}
''')

def test_no_duplicate_keys_tuples_same_first_element(self):
self.flakes("{(0,1): 1, (0,2): 1}")

def test_no_duplicate_key_errors_func_call(self):
self.flakes('''
def test(thing):
pass
test({True: 1, None: 2, False: 1})
''')

def test_no_duplicate_key_errors_bool_or_none(self):
self.flakes("{True: 1, None: 2, False: 1}")

def test_no_duplicate_key_errors_ints(self):
self.flakes('''
{1: 1, 2: 1}
''')

def test_no_duplicate_key_errors_vars(self):
self.flakes('''
test = 'yes'
rest = 'yes'
{test: 1, rest: 2}
''')

def test_no_duplicate_key_errors_tuples(self):
self.flakes('''
{(0,1): 1, (0,2): 1}
''')

def test_no_duplicate_key_errors_instance_attributes(self):
self.flakes('''
class Test():
pass
f = Test()
f.a = 1
{f.a: 1, f.a: 1}
''')

0 comments on commit 152ca18

Please sign in to comment.