-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathtransform.py
60 lines (50 loc) · 1.28 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python
#coding=utf-8
#Program:
# transform the full name in pos* files into short name
#Date:
# 2016-3-16
from entity_match import *
ENTITY_FILE = "company_full_short.csv"
entity_dict = loaddict(ENTITY_FILE)
def loaddict(filename):
dict = {}
file = open(filename, "r")
file.readline()
for line in file.readlines():
coms = line.split()
full = coms[0]
short = coms[1]
dict[full] = short
return dict
def transformFile(filename, dict):
comp = ['总公司','公司','有限','集团','股份','投资','发展','责任','合伙','销售','合作']
symbol = ['(',')','《','》','(',')']
fin = open(filename, "r")
#fout = open(filename.split('.')[0] + ".out.csv", "w")
for line in fin.readlines():
coms = line.split(",")
com1 = coms[0]
com2 = coms[1]
#for word in comp:
# com1 = com1.replace(word, '');
# com2 = com2.replace(word, '');
#for word in symbol:
# com1 = com1.replace(word, '');
# com2 = com2.replace(word, '');
try:
com1 = link(com1, entity_dict)
if com1 == None or com1 == '':
continue
except:
pass
for c in com2.split(','):
try:
c = link(c, entity_dict)
if c == None or c == '':
continue
except:
pass
print com1+','+c
#fout.write((com1 + "," + c))
#fout.close()