-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWarhammerRosterTradutorReadCat.py
155 lines (126 loc) · 7.48 KB
/
WarhammerRosterTradutorReadCat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# encoding: utf-8
# encoding: iso-8859-1
# encoding: win-1252
# encoding=utf8
########################################################################################################################
# Le o arquivo cat e traduz com o google tradutor
# gera o carquivo cat traduzido na pasta wh40kBR-master
########################################################################################################################
import sys
reload(sys)
sys.setdefaultencoding('utf8')
import json
from bs4 import BeautifulSoup
from googletrans import Translator
translator = Translator()
import os
def translate(text,dest):
textTransl=''
try:
textTransl = translator.translate(text, dest=dest).text
print "Traduzindo>>>\n", text, "\n", textTransl
return textTransl
except Exception:
print "falha na traducao", Exception.message
sys.exc_clear()
return textTransl
#filesToRead = ["./wh40k-master/Imperium - Space Marines.cat", "./wh40k-master/Imperium - Space Wolves.cat", "./wh40k-master/Warhammer 40,000 8th Edition.gst"]
filesToRead = ["Imperium - Space Marines.cat", "Imperium - Space Wolves.cat", "Warhammer 40,000 8th Edition.gst"]
#Traduzirá somente novas entradas
onlyNew = 1
originPath = "./wh40k-master/"
destPath = "./wh40kBR-master/"
if not os.path.exists(destPath):
os.makedirs(destPath)
catFiles = os.listdir(originPath)
#catFiles = filter(lambda x: x in filesToRead, catFiles)
catFiles = filter(lambda x: os.path.splitext(x)[1] in [".cat",".gst"], catFiles)
with open('dicionario.json', "r") as file:
dicionarioOrigem = json.loads(file.read())
if onlyNew == 1:
dicionario = {}
else:
dicionario = dicionarioOrigem
for catFilesToRead in catFiles:
with open(originPath+catFilesToRead) as catFile:
print originPath + catFilesToRead
x = catFile.read()
catFile.close()
cat = BeautifulSoup(x, "xml")
# extrai texto do arquivo cat
for catalogues in cat.find_all("catalogue"):
for catalogue in catalogues.children:
#print catalogue.name, type(catalogue)
if catalogue.name <> None:
#print 'n1',catalogue.name
if catalogue.name in ["sharedrules", "sharedprofiles", "selectionentries", "sharedselectionentries",
"sharedselectionentrygroups","sharedRules","sharedProfiles","sharedSelectionEntryGroups",
"sharedSelectionEntries","selectionEntries"]:
for catElement in catalogue.children:
if catElement.name <> None:
#print 'n2\t',catElement.name
#print '____'
#print "desc",catElement.description
#print '____'
for ruleTag in catElement.find_all("description"):
rule = ruleTag.get_text()#
#print rule
print '------------------------------------------------------------------'
if rule not in dicionarioOrigem.keys() and len(rule) > 1:
rulePT = translate(rule, dest='pt')
dicionario[rule] = rulePT
#else:
# print "ja existe no dicionario:", rule
for characteristicsTag in catElement.find_all("characteristic"):
if characteristicsTag["name"] in["Abilities", "Description","Details"]:
#print profiles["name"], characteristic["name"], characteristic["value"],characteristic
#if characteristicsTag.has_attr('value'):
#description=characteristicsTag["value"]
description=characteristicsTag.text.upper()
#print description
print '------------------------------------------------------------------'
if description not in dicionarioOrigem.keys() and len(description) > 1:
descriptionPT = translate(description, dest='pt')
dicionario[description] = descriptionPT
#else:
# print "ja existe no dicionario:", description
# extrai texto do arquivo gst
for catalogues in cat.find_all("gameSystem"):
for catalogue in catalogues.children:
#print catalogue.name, type(catalogue)
if catalogue.name <> None:
print 'n1',catalogue.name
if catalogue.name in ["sharedrules", "sharedprofiles", "selectionentries", "sharedselectionentries",
"sharedselectionentrygroups","sharedRules","sharedProfiles","sharedSelectionEntryGroups",
"sharedSelectionEntries","selectionEntries"]:
for catElement in catalogue.children:
if catElement.name <> None:
#print 'n2\t',catElement.name
#print '____'
#print "desc",catElement.description
#print '____'
for ruleTag in catElement.find_all("description"):
rule = ruleTag.get_text()#
#print rule
print '------------------------------------------------------------------'
if rule not in dicionarioOrigem.keys() and len(rule) > 1:
rulePT = translate(rule, dest='pt')
dicionario[rule] = rulePT
#else:
# print "ja existe no dicionario", rule
for characteristicsTag in catElement.find_all("characteristic"):
if characteristicsTag["name"] in["Abilities", "Description","Details"]:
#print profiles["name"], characteristic["name"], characteristic["value"],characteristic
if characteristicsTag.has_attr('value'):
description=characteristicsTag["value"]
#print description
print '------------------------------------------------------------------'
if description not in dicionarioOrigem.keys() and len(description) > 1:
descriptionPT = translate(description, dest='pt')
dicionario[description] = descriptionPT
#else:
# print "ja existe no dicionario", description
#
newFile = "dicionario_new.json"
with open(newFile, "wb") as file:
file.write(json.dumps(dicionario,indent=4, sort_keys=True, ensure_ascii=False))