Skip to content

Commit

Permalink
Merge pull request #1021 from simonwoerpel/fix/levensthein-segfault
Browse files Browse the repository at this point in the history
Fix levensthein segmentation fault in invalid name picking
  • Loading branch information
pudo authored Feb 27, 2023
2 parents 741972d + 788458a commit 58ba901
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 3 deletions.
9 changes: 6 additions & 3 deletions followthemoney/types/name.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import Dict, List, Optional, Sequence, TYPE_CHECKING, Union
from banal import first
from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, Union

from Levenshtein import distance, setmedian
from normality import slugify
from normality.cleaning import collapse_spaces, strip_quotes
from Levenshtein import distance, setmedian

from followthemoney.types.common import PropertyType
from followthemoney.util import dampen
Expand Down Expand Up @@ -57,6 +57,9 @@ def pick(self, values: Sequence[str]) -> Optional[str]:
lookup.setdefault(norm, [])
lookup[norm].append(value)

if not normalised:
return None

norm = setmedian(normalised)
if norm is None:
return None
Expand Down
6 changes: 6 additions & 0 deletions tests/types/test_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,12 @@ def test_pick(self):
values = ["Robert Smith", "Rob Smith", "Robert SMITH"]
self.assertEqual(names.pick(values), "Robert SMITH")

# handle dirty edgecases
values = ["", "(", "Peter"]
self.assertEqual(names.pick(values), "Peter")
values = ["", "("]
self.assertEqual(names.pick(values), None)

def test_domain_validity(self):
self.assertTrue(names.validate("huhu"))
self.assertFalse(names.validate(""))
Expand Down

0 comments on commit 58ba901

Please sign in to comment.