-
Notifications
You must be signed in to change notification settings - Fork 24
/
Copy path60.build_dic.py
executable file
·50 lines (41 loc) · 956 Bytes
/
60.build_dic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
#!/usr/bin/env python
#
# (C) Copyright 2017 Hojin Choi <hojin.choi@gmail.com>
#
from __future__ import print_function, unicode_literals
from io import open
import re
import sys
import os
try: os.mkdir('logs')
except: pass
fds = {}
try: os.mkdir( 'dictionary' )
except: pass
dictionaries = []
def get_fd(pos):
if not pos in fds:
filename = 'dictionary/%s.dic' % pos
dictionaries.append(filename)
fds[pos] = open(filename, "wt")
return fds[pos]
def extract(path):
print( "Loading: %s" % path )
f = open(path)
count = 0
for line in f:
count = count + 1
for w in re.split( '[\s+]+', line ):
idx = w.rfind('/')
if idx == -1:
continue
(word,pos) = (w[0:idx], w[idx+1:])
word = word.strip()
pos = pos.strip()
if re.match( r'^[A-Z]+$', pos ):
get_fd(pos).write(word + "\n")
print( "Word count: %d" % (count,) )
if __name__ == '__main__':
for p in sys.argv[1:]:
extract(p)
# vim: ts=4 noexpandtab sw=4 sts=4