-
Notifications
You must be signed in to change notification settings - Fork 0
/
EZName.py
433 lines (378 loc) · 17.7 KB
/
EZName.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
try:
from bs4 import BeautifulSoup
except ImportError:
print("[!_!]ERROR INFO: You have to install bs4 module.")
exit()
try:
import requests
except ImportError:
print("[!_!]ERROR INFO: You have to install requests module.")
exit()
try:
import ngender
except ImportError:
print("[!_!]ERROR INFO: You have to install ngender module.")
exit()
try:
from pypinyin import lazy_pinyin
except ImportError:
print("[!_!]ERROR INFO: You have to install pypinyin module.")
exit()
import argparse
import sys
import signal
import csv
import random
from util.boxcalendar import *
SCORE_LINE = 70
SEARCH_LIMIT = 1000
def compute_wuxing(year, month, day, hour):
horoscope = lunarday(year, month, day)
day_stem = horoscope[2].split('-')[2][0]
time_stem_branch_list = [
[u"甲子",u"丙子", u"戊子", u"庚子", u"壬子"],
[u"乙丑",u"丁丑", u"己丑", u"辛丑", u"癸丑"],
[u"丙寅",u"戊寅", u"庚寅", u"壬寅", u"甲寅"],
[u"丁卯",u"己卯", u"辛卯", u"癸卯", u"乙卯"],
[u"戊辰",u"庚辰", u"壬辰", u"甲辰", u"丙辰"],
[u"己巳",u"辛巳", u"癸巳", u"己巳", u"丁巳"],
[u"庚午", u"壬午",u"甲午", u"丙午", u"戊午"],
[u"辛未",u"癸未", u"乙未", u"丁未", u"己未"],
[u"壬申",u"甲申", u"丙申", u"戊申", u"庚申"],
[u"癸酉",u"乙酉", u"丁酉", u"己酉", u"辛酉"],
[u"甲戌",u"丙戌", u"戊戌", u"庚戌", u"壬戌"],
[u"乙亥",u"丁亥", u"己亥", u"辛亥", u"癸亥"]
]
sky_branch = [u'甲', u'乙', u'丙', u'丁', u'戊', u'己', u'庚', u'辛', u'壬', u'癸']
index = 0
for index in range(10):
if day_stem == sky_branch[index]:
break
index_X = index - 5 if index >= 5 else index
index_Y = int(hour / 2)
# Generate horoscope
horoscope = horoscope[2] + '-' + time_stem_branch_list[index_Y][index_X]
wuxing_dic = {
u"金": [u"申", u"酉", u"庚", u"辛"],
u"木": [u"寅", u"卯", u"甲", u"乙"],
u"水": [u"子", u"亥", u"壬", u"癸"],
u"火": [u"巳", u"午", u"丙", u"丁"],
u"土": [u"辰", u"戌", u"丑", u"未", u"戊", u"己"]
}
wuxing = {}
horoscope_list = list(''.join(horoscope.split('-')))
for key, value in wuxing_dic.items():
count = 0
for item in horoscope_list:
if item in value:
count += 1
wuxing[key] = count
return wuxing
def name_score(name, sur_type=1):
"""
Get score of name from 1518.com
:param name: full name
:param sur_type: surname single(1) or double(2)
:return: name score
"""
print('Getting score for name: {0} from http://1518.com...'.format(name))
header = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'User-Agent': 'Chrome/63.0.3239.84 Safari/537.36'
}
name = str(name.encode('gbk')).split("'")[1].replace('\\x', '%')
name_url = 'http://m.1518.com/xingming_view.php?word={}&submit1=&FrontType={}'
page_html = requests.get(name_url.format(name, sur_type), headers=header)
page_html = page_html.content
parse_file = BeautifulSoup(page_html, 'lxml')
name_score = parse_file.select('dt > u strong')
try:
score = name_score[0].text.split('分')[0]
except IndexError:
score = 0
finally:
score = int(score)
return score
def output_wuxing(year, month, day, hour):
"""
Compute WuXing with birth datetime.
:param year:
:param month:
:param day:
:param hour:
:return: attribute list
"""
wuxing = compute_wuxing(year, month, day, hour)
print("[*] 出生日期:%s年%s月%s日, %s时" % (year, month, day, hour))
attr_list = [attr for attr in wuxing if wuxing[attr] < 2]
name_attr = list(set(['金', '木', '水', '火', '土']) - set(attr_list))
print("[*] 五行属性:%s\n" % ', '.join(name_attr))
return attr_list
def select_name(surname, gender, hour, attr, name_source, wuxing_dict, difficulty_dict,
modal_particles, enableScoring, easy_mode, cutoff_score, num_of_matches=5):
'''
Select name based on Wuxing attributes and difficulty of the words' pinyin syllables
gender = M: select words from <chuci>
gender = F: select words from <shijing>
hour = 0 ~ 23: the exact hour when the baby was born
attr: list of wuxing attributes
name_source: the dictionary is used for name picking
wuxing_dict: wuxing dictionary, input - Chinese word; output -wuxing
difficulty_dict: pinyin syllable difficulty dictionary, input - pinyin syllable; output - difficulty level
e.g., 'Hao' - Low, 'Zuo' - High, 'Xuan' - Very High
enableScoring: True - get name score from some online website
easy_mode: True - turn on easy-to-pronounce mode
cutoff_score: int - the cutoff value below which the name will not be considered
'''
sur_type = 1 if len(surname) == 1 else 2
match_count = 0
name_tuples = []
full_names = []
name_syllables = []
name_scores = []
found_names = get_name_from_wuxing(gender, attr, name_source, wuxing_dict, modal_particles)
count = 0
while match_count < num_of_matches and count < SEARCH_LIMIT and count < len(found_names):
#name = found_names[random.randint(0, len(found_names) - 1)] # randomly pick a name from the matched names
name = found_names[count] # exhaust all found names or until while condition is not met
count += 1
full_name = surname + name
# Match gender and general name word.
if gender != ngender.guess(full_name)[0][0].upper():
continue
# if name already exists, skip it
if full_name in full_names:
continue
print('picked name: {}'.format(full_name))
name_vec = lazy_pinyin(name)
letters = 0
isHard = False
for n in name_vec:
letters += len(n)
n = capitalize_first_letter(n)
if n in difficulty_dict:
if is_no_easier_than(difficulty_dict[n], 'High') and easy_mode:
isHard = True
if letters > 6 or isHard: # if any of the syllables is hard for English speakers, just skip it
print('pinyin is too long or too hard')
continue
if enableScoring:
score = name_score(full_name, sur_type)
print('score is {0:2d}'.format(score))
if score < cutoff_score:
print('Score is below the cutoff value. Continue searching...')
continue # skip those score is below the cutoff value
name_scores.append(score)
full_names.append(full_name)
name_syllables.append('-'.join(lazy_pinyin(name)))
match_count += 1
print('no. of matches = {}'.format(match_count))
if (count == SEARCH_LIMIT): # if the search limit is hit, show the warning
print('Search limit {} is hit! Stop searching!'.format(SEARCH_LIMIT))
print('Searching Done!\nName, Pinyin, Score(Optional)')
if enableScoring:
# score the name_scores in a descending order
# print out the names whose score higher than the threshold in a descending order of their scores
indices = [index for index, value in sorted(enumerate(name_scores), reverse=True, key=lambda x: x[1])]
"""with open('./name/babyname_{0}_{1}.csv'.format(surname, hour), 'w') as f:
for index in indices:
f.write(full_names[index] + ', ' + name_syllables[index] + ', ' + str(name_scores[index]))
f.write('\n')"""
for index in indices:
name_tuples.append([hour, full_names[index], name_syllables[index], str(name_scores[index])])
print('{}, {}, {}, {}'.format(hour, full_names[index], name_syllables[index], str(name_scores[index])))
else:
# if name scoring is not requested, simply return the name tuples
for i, name in enumerate(full_names):
name_tuples.append([hour, name, name_syllables[i], 'N/A'])
print('{}, {}, {}, N/A'.format(hour, full_names[i], name_syllables[i]))
return name_tuples
def is_no_easier_than(lvl1, lvl2):
known_levels = {'Low':1, 'Medium':2, 'High':3, 'Very High':4}
if lvl1 not in known_levels or lvl2 not in known_levels:
raise Exception('Unknown difficulty level detected: {} or {}'.format(lvl1, lvl2))
else:
if known_levels[lvl1] >= known_levels[lvl2]:
return True
else:
return False
def get_name_from_wuxing(gender, wuxing_attrib_list, name_source, wuxing_dict, modal_particles):
'''
get name from word cells based on the wuxing attributes in the given list
'''
selected_names = []
word_cells_list = []
online_wuxing_dict = {}
#sources = find_name_sources()
#name_sources = ['M-chuci,F-shijing', 'IChing', 'Lunyu']
if 'Chuci' in name_source and 'Shijing' in name_source:
if gender.upper() == 'M':
word_cells_list = [line.strip() for line in open('./input/chuci.txt', 'r', encoding='utf8')]
elif gender.upper() == 'F':
word_cells_list = [line.strip() for line in open('./input/shijing.txt', 'r', encoding='utf8')]
else:
print('Sorry. LGBTQ is not supported. ;-(')
else:
if '(' in name_source:
name_source = name_source.split('(')[0]
word_cells_list = [line.strip() for line in open('./input/{}.txt'.format(name_source), 'r', encoding='utf-8')]
#count = 0
for word_cells in word_cells_list:
filtered_word_cells = []
for word in word_cells:
if word not in modal_particles:
filtered_word_cells.append(word)
if len(filtered_word_cells) < 2: # if the length of the filtered word cells if less than 2, skip using it
continue
#print(''.join(filtered_word_cells))
refined_word_cells = random.sample(filtered_word_cells, 2) # pick two words from the cell randomly
is_match = True
for word in refined_word_cells:
if word not in wuxing_dict:
# word not found in wuxing dictionary, get it from some online website instead
wuxing = get_wuxing_online(word)
online_wuxing_dict[word] = wuxing # save the new word dictionary for file writing
wuxing_dict[word] = wuxing
else:
wuxing = wuxing_dict[word]
if wuxing not in wuxing_attrib_list:
is_match = False # as long as there is one word in a cell doesn't match the wuxing attribute, skip it
if is_match:
selected_names.append(''.join(refined_word_cells))
#if count < 50:
# print(''.join(refined_word_cells))
# count += 1
# add the new word wuxing to the dictionary file for next run
if online_wuxing_dict:
add_new_word_wuxing_to_dict_file(newdict=online_wuxing_dict)
return selected_names
def add_new_word_wuxing_to_dict_file(newdict):
with open('./input/wuxing_dict.csv', 'a', encoding='utf8') as f:
print('adding new words and their wuxing...\n')
for word in newdict:
line = '{0},{1}\n'.format(word, newdict[word])
print(line)
f.write(line)
print('done\n')
def get_wuxing_online(word):
"""
Get Wuxing attribute from 5156edu.com
:param word: the word to be looked up
:return: Wuxing attribute, i.e., '金 木 水 火 土'
"""
print('Getting wuxing for word: {0} from http://5156edu.com...\n'.format(word))
header = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'User-Agent': 'Chrome/63.0.3239.84 Safari/537.36'
}
word = str(word.encode('gbk')).split("'")[1].replace('\\x', '%')
query_url = 'http://xh.5156edu.com/sowx.php?f_key={}&B1='
page_html = requests.get(query_url.format(word), headers=header)
page_html = page_html.content
parse_file = BeautifulSoup(page_html, 'lxml')
word_wuxing = parse_file.select('p')[2]
try:
wuxing = word_wuxing.text.split(':')[1]
except IndexError:
wuxing = ''
return wuxing
def load_difficulty_dict():
syllable_difficult = {}
with open('input/pinyin_syllable_difficulty.csv', newline='', encoding='utf8') as f:
reader = csv.DictReader(f)
for row in reader:
#print(row['Syllable'], row['Difficulty for English speakers'])
syllable_difficult[row['Syllable']] = row['Difficulty for English speakers']
return syllable_difficult
def capitalize_first_letter(str):
capitalizedStr = ''
for i, letter in enumerate(str):
if i == 0:
capitalizedStr = letter.upper()
else:
capitalizedStr += letter
return capitalizedStr
def sigint_handler(signum, frame):
print('You pressed the Ctrl+C.')
sys.exit(0)
def load_wuxing_dict():
wuxing_dict = {}
with open('input/wuxing_dict.csv', newline='', encoding='utf8') as f:
reader = csv.DictReader(f)
for row in reader:
wuxing_dict[row['Word']] = row['Wuxing']
return wuxing_dict
def load_modal_particles():
modal_particles = [line.strip() for line in open('input/modal_particles_dict.csv', encoding='utf8')]
return modal_particles
def init_args_parser():
parser = argparse.ArgumentParser(description="Name children with birth datetime and WuXing balance.")
parser.add_argument("-s", metavar="surname", required=True, help="Surname.")
parser.add_argument("-g", metavar="gender", choices=('F', 'M'), required=True, help="Gender(F/M).")
parser.add_argument("-y", type=int, choices=range(1901, 2049), metavar="year", required=True,
help="Year of birth date.")
parser.add_argument("-m", type=int, choices=range(1, 13), metavar="month", required=True,
help="Month of birth date.")
parser.add_argument("-d", type=int, choices=range(1, 32), metavar="day", required=True,
help="Day of birth date.")
parser.add_argument("-H", type=int, choices=range(0, 24), metavar="hour", required=False,
help="Hour of birth datetime.")
parser.add_argument("-n", type=int, metavar="namescore", required=False, help="Get name score?")
parser.add_argument("-i", metavar="namesource", required=False, help="Specify the name source file")
parser.add_argument("-e", type=bool, metavar="easy-to-pronounce mode", required=False, help="Turn on easy-to-pronounce mode?")
return parser
def load_dictionaries(): # load required dictionaries priori to name searching
wuxing_dict = load_wuxing_dict()
difficulty_dict = load_difficulty_dict()
modal_particles = load_modal_particles()
return (wuxing_dict, difficulty_dict, modal_particles)
def main(args, num_of_matches, cutoff_score=SCORE_LINE, name_source='M-Chuci,F-Shijing', easy_mode=True):
signal.signal(signal.SIGINT, sigint_handler)
parser = init_args_parser()
args_tuple = parser.parse_known_args(args=args)
wuxing_dict, difficulty_dict, modal_particles = load_dictionaries()
if args_tuple[0].n:
nameScoring = True if args_tuple[0].n > 0 else False
else:
nameScoring = True
if args_tuple[0].H:
attr_list = output_wuxing(args_tuple[0].y, args_tuple[0].m, args_tuple[0].d, args_tuple[0].H)
name_tuples = select_name(args_tuple[0].s, args_tuple[0].g, args_tuple[0].H, attr_list, name_source, wuxing_dict, difficulty_dict,
modal_particles, enableScoring=nameScoring, easy_mode=easy_mode,
cutoff_score=cutoff_score, num_of_matches=num_of_matches)
else: # no hour is specified, select names for all hours of that day
name_tuples = []
for hour in range(0, 24):
attr_list = output_wuxing(args_tuple[0].y, args_tuple[0].m, args_tuple[0].d, hour)
name_tuples.append(select_name(args_tuple[0].s, args_tuple[0].g, hour, attr_list, name_source, wuxing_dict, difficulty_dict,
modal_particles, enableScoring=nameScoring, easy_mode=easy_mode,
cutoff_score=cutoff_score, num_of_matches=num_of_matches))
return name_tuples
if __name__ == '__main__':
signal.signal(signal.SIGINT, sigint_handler)
parser = init_args_parser()
args = parser.parse_args()
wuxing_dict, difficulty_dict, modal_particles = load_dictionaries()
if args.n:
nameScoring = True if args.n > 0 else False
else: # if the option is not given
nameScoring = True
if args.i:
name_source = args.i
else:
name_source = 'M-Chuci,F-Shijing'
if args.e:
easy_mode = args.e
else:
easy_mode = True
if args.H:
attr_list = output_wuxing(args.y, args.m, args.d, args.H)
select_name(args.s, args.g, args.H, attr_list, name_source, wuxing_dict, difficulty_dict,
modal_particles, enableScoring=nameScoring, easy_mode=easy_mode, cutoff_score=SCORE_LINE)
else: # no hour is specified, select names for all hours of that day
for hour in range(0, 24):
attr_list = output_wuxing(args.y, args.m, args.d, hour)
select_name(args.s, args.g, hour, attr_list, name_source, wuxing_dict, difficulty_dict,
modal_particles, enableScoring=nameScoring, easy_mode=easy_mode, cutoff_score=SCORE_LINE)