forked from rfg1024/GlossaryGenerator
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenxlsx_from_wordstxt.py
executable file
·183 lines (129 loc) · 5.07 KB
/
genxlsx_from_wordstxt.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#!/usr/bin/env python3
'''通过每行一词或短语的文本文件生成含两个sheet的xlsx单词表
从生字表文件(每单词一行)获取单词短语,输出包含两个sheet的excel文件,一个sheet
含单词、词根、音标和中文释义。另一个sheet由前者词根生成,含单词,音标和中文释义。
Example:
示例::
if not Path(MYSQLITE).exists():
init_ecdict_sqlite() # 只需运行一次,生成sqlite3 db文件
write_from_file('FOO.txt')
Note:
别忘修改MYSQLITE变量
'''
import sqlite3
from pathlib import Path, PurePath
from openpyxl import Workbook, load_workbook
from ECDICT import stardict
MYSQLITE = 'ecdictSqlite.db'
def exchange2dict(orig_word, exchange):
"""ECDICT exchange字段的转为字典
Note:
ecdict exchange列中0: 代表Lemma,如 perceived 的 Lemma 是 perceive
类型 说明
p 过去式(did)
d 过去分词(done)
i 现在分词(doing)
3 第三人称单数(does)
r 形容词比较级(-er)
t 形容词最高级(-est)
s 名词复数形式
0 Lemma,如 perceived 的 Lemma 是 perceive
1 Lemma 的变换形式,比如 s 代表 apples 是其 lemma 的复数形式
此外,还有f: b: z:等分段
https://github.com/skywind3000/ECDICT/issues/23
"""
lists = exchange.split('/')
inflection = {}
for trans in lists:
if len(trans) < 3:
continue
inflection[trans[0]] = trans[2:]
if not inflection.get('0'):
inflection['0'] = orig_word
return inflection
def init_ecdict_sqlite():
"""将ecdict.csv转换为sqlite数据库,并新增lemma列
"""
stardict.convert_dict(MYSQLITE, 'ECDICT/ecdict.csv')
con = sqlite3.connect(MYSQLITE)
cur = con.cursor()
cur.execute("ALTER TABLE stardict ADD lemma TEXT")
# 查询所有行的数据
select_query = "SELECT word, exchange FROM stardict"
cur.execute(select_query)
rows = cur.fetchall()
# 遍历每一行,根据 word和exchange 的值更新新建列lemma
for row in rows:
word = row[0]
exchange = row[1]
lemma = exchange2dict(word, exchange)['0']
# 更新 lemma 列的值
update_query = "UPDATE stardict SET lemma = ? WHERE word = ?"
cur.execute(update_query, (lemma, word))
con.commit()
con.close()
def xlsx_write(word_lines, columns, filename, sheetname='sheet1'):
"""将列表文件写入filename文件
"""
if Path(filename).exists():
wb = load_workbook(filename)
else:
wb = Workbook()
# 如果sheetname已存在的话,openpyxl会依次在sheetname后追加1,2,3...
worksheet = wb.create_sheet(sheetname)
rows = len(word_lines)
print('行数有:', rows)
if rows <= 1:
print(f'word_lines行数为{rows},有误,不写入文件,退出')
return
cols = len(columns)
if cols != len(word_lines[0]):
print(
'参数 columns 有 {cols}列,但word_lines第一行有{len(word_lines[0])}列\n列数匹配有误,程序退出'
)
for j in range(cols):
worksheet.cell(1, j + 1, columns[j])
# Iterate over the data and write it out row by row.
for i in range(rows):
for j in range(cols):
worksheet.cell(i + 2, j + 1, word_lines[i][j])
wb.save(filename)
wb.close()
def write_from_words(words, out_file):
"""从单词表(每单词一行)获取单词,输出单词、音标和中文释义
"""
con = sqlite3.connect(MYSQLITE)
cur = con.cursor()
# 查询所有行的数据
holders = ', '.join('?' for _ in words)
select_query = f"SELECT word, phonetic, translation FROM stardict WHERE word IN ({holders})"
cur.execute(select_query, words)
rows = cur.fetchall()
con.commit()
con.close()
xlsx_write(rows, ['word', 'phonetic', 'translation'], out_file,
'Words_Lemma_Sheet')
def write_from_file(input_file):
"""从生字表文件(每单词一行)获取单词短语,输出包含两个sheet的excel文件,一个
sheet含单词、词根、音标和中文释义。另一个sheet由前者词根生成,含单词,音标和
中文释义。
"""
with open(input_file, 'r') as f:
words = [line.strip() for line in f.readlines()]
con = sqlite3.connect(MYSQLITE)
cur = con.cursor()
# 查询所有行的数据
holders = ', '.join('?' for _ in words)
select_query = f"SELECT word, lemma, phonetic, translation FROM stardict WHERE word IN ({holders})"
cur.execute(select_query, words)
rows = cur.fetchall()
con.commit()
con.close()
out_file = PurePath(input_file).stem + '_dict.xlsx'
xlsx_write(rows, ['word', 'lemma', 'phonetic', 'translation'], out_file,
'Words_Sheet')
lemmas = [j[1] for j in rows if j[1] is not None]
write_from_words(lemmas, out_file)
if not Path(MYSQLITE).exists():
init_ecdict_sqlite() # 只需运行一次,生成sqlite3 db文件
write_from_file('1984_glossary.txt')