-
Notifications
You must be signed in to change notification settings - Fork 2
/
xl_rencode_badly_encoded_files.py
48 lines (36 loc) · 1.43 KB
/
xl_rencode_badly_encoded_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# stolen from https://stackoverflow.com/questions/191359/how-to-convert-a-file-to-utf-8-in-python
# this is just a script to reencoded non UTF-8 encoded files
import os
import pathlib
from chardet import detect
# if true, the script will remove any badly encoded files it encounters
REMOVE_BADLY_ENCODED_FILES = True
# get file encoding type
def get_encoding_type(file):
with open(file, 'rb') as f:
raw_data = f.read()
return detect(raw_data)['encoding']
def remove_file(file):
print("Removing it...")
os.remove(file)
mods_folder_path = "C:/Games/CDDA_MODDING_BN_dummy/cdda/data/mods/"
every_mod_files = pathlib.Path(mods_folder_path).glob('**/*.json')
for mod_file in every_mod_files:
src_file = mod_file
trg_file = f"C:\Games\CDDA_MODDING_BN_dummy\cdda\whatever.json"
from_codec = get_encoding_type(src_file)
# add try: except block for reliability
try:
with open(src_file, 'r', encoding=from_codec) as f, open(trg_file, 'w', encoding='utf-8') as e:
text = f.read() # for small files, for big use chunks
e.write(text)
os.remove(src_file) # remove old encoding file
os.rename(trg_file, src_file) # rename new encoding
except UnicodeDecodeError:
print('Decode Error ' + str(src_file) )
if REMOVE_BADLY_ENCODED_FILES:
remove_file(src_file)
except UnicodeEncodeError:
print('Encode Error' + str(src_file) )
if REMOVE_BADLY_ENCODED_FILES:
remove_file(src_file)