-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathProblem1.py
54 lines (41 loc) · 1.8 KB
/
Problem1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import re
def clean_names(rawData):
'''Parse a list and split on a regex for dba and make a tuple out of left subset and right subset'''
ret=[]
pattern='d+\W*b+\W*a\W'
for data in rawData:
#x=re.search(pattern,data,re.IGNORECASE)
result=re.split(pattern,data,flags=re.IGNORECASE)
result=parse(result)
if len(result)==1:
result.append(None)
ret.append(tuple(result))
else:
ret.append(tuple(result))
return ret
def parse(data):
'''Parse the string and clean upleft side, right side and replace _ in the middle'''
result=[]
for d in data:
pattern='\A\W*'
replace=''
d=re.sub(pattern,replace,d)
pattern='\W*\Z'
d=re.sub(pattern,replace,d)
d=re.sub('_',' ', d)
result.append(d.strip())
return result
if __name__=="__main__":
CLEANED_NAME_PAIRS = [('SPV Inc', 'Super Company'),('Michael Forsky LLC', 'F/B Burgers'),('Youthful You Aesthetics', None),('Aruna Indika', 'NGXess'),('Diot SA', 'Diot-Technologies'),('PERFECT PRIVACY, LLC', 'Perfection'),('PostgreSQL DB Analytics', None),('JAYE INC', None),('ETABLISSEMENTS SCHEPENS', 'ETS SCHEPENS'),('DUIKERSTRAINING OOSTENDE', 'D.T.O'),]
rawData=['SPV Inc., DBA: Super Company',
'Michael Forsky LLC d.b.a F/B Burgers .',
'*** Youthful You Aesthetics ***',
'Aruna Indika (dba. NGXess)',
'Diot SA, - D. B. A. *Diot-Technologies*',
'PERFECT PRIVACY, LLC, d-b-a Perfection,',
'PostgreSQL DB Analytics',
'/JAYE INC/',
' ETABLISSEMENTS SCHEPENS /D.B.A./ ETS_SCHEPENS',
'DUIKERSTRAINING OOSTENDE | D.B.A.: D.T.O. ']
#print(clean_names(rawData))
assert clean_names(rawData) == CLEANED_NAME_PAIRS