1414"""
1515
1616
17- def jaccard_similarity (set_a , set_b , alternative_union = False ):
17+ def jaccard_similarity (
18+ set_a : set [str ] | list [str ] | tuple [str ],
19+ set_b : set [str ] | list [str ] | tuple [str ],
20+ alternative_union = False ,
21+ ):
1822 """
1923 Finds the jaccard similarity between two sets.
2024 Essentially, its intersection over union.
@@ -37,41 +41,52 @@ def jaccard_similarity(set_a, set_b, alternative_union=False):
3741 >>> set_b = {'c', 'd', 'e', 'f', 'h', 'i'}
3842 >>> jaccard_similarity(set_a, set_b)
3943 0.375
40-
4144 >>> jaccard_similarity(set_a, set_a)
4245 1.0
43-
4446 >>> jaccard_similarity(set_a, set_a, True)
4547 0.5
46-
4748 >>> set_a = ['a', 'b', 'c', 'd', 'e']
4849 >>> set_b = ('c', 'd', 'e', 'f', 'h', 'i')
4950 >>> jaccard_similarity(set_a, set_b)
5051 0.375
52+ >>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
53+ >>> set_b = ['a', 'b', 'c', 'd', 'e']
54+ >>> jaccard_similarity(set_a, set_b)
55+ 0.375
56+ >>> set_a = ('c', 'd', 'e', 'f', 'h', 'i')
57+ >>> set_b = ['a', 'b', 'c', 'd']
58+ >>> jaccard_similarity(set_a, set_b, True)
59+ 0.2
60+ >>> set_a = {'a', 'b'}
61+ >>> set_b = ['c', 'd']
62+ >>> jaccard_similarity(set_a, set_b)
63+ Traceback (most recent call last):
64+ ...
65+ ValueError: Set a and b must either both be sets or be either a list or a tuple.
5166 """
5267
5368 if isinstance (set_a , set ) and isinstance (set_b , set ):
54- intersection = len (set_a .intersection (set_b ))
69+ intersection_length = len (set_a .intersection (set_b ))
5570
5671 if alternative_union :
57- union = len (set_a ) + len (set_b )
72+ union_length = len (set_a ) + len (set_b )
5873 else :
59- union = len (set_a .union (set_b ))
74+ union_length = len (set_a .union (set_b ))
6075
61- return intersection / union
76+ return intersection_length / union_length
6277
63- if isinstance (set_a , (list , tuple )) and isinstance (set_b , (list , tuple )):
78+ elif isinstance (set_a , (list , tuple )) and isinstance (set_b , (list , tuple )):
6479 intersection = [element for element in set_a if element in set_b ]
6580
6681 if alternative_union :
67- union = len (set_a ) + len (set_b )
68- return len (intersection ) / union
82+ return len (intersection ) / (len (set_a ) + len (set_b ))
6983 else :
70- union = set_a + [element for element in set_b if element not in set_a ]
84+ # Cast set_a to list because tuples cannot be mutated
85+ union = list (set_a ) + [element for element in set_b if element not in set_a ]
7186 return len (intersection ) / len (union )
72-
73- return len ( intersection ) / len ( union )
74- return None
87+ raise ValueError (
88+ "Set a and b must either both be sets or be either a list or a tuple."
89+ )
7590
7691
7792if __name__ == "__main__" :
0 commit comments