Skip to content

Commit

Permalink
add txt encode option
Browse files Browse the repository at this point in the history
  • Loading branch information
dtlnor committed May 9, 2023
1 parent e8c9436 commit 0e41c15
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 8 deletions.
14 changes: 10 additions & 4 deletions src/REMSGUtil.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,19 +234,25 @@ def importCSV(msgObj: MSG, filename: str, version: int = None, langCount: int =
return msg


def exportTXT(msg: MSG, filename: str, lang: int, encode="utf-8"):
def exportTXT(msg: MSG, filename: str, lang: int, encode=None):
"""write txt file from MSG object with specified language"""

with io.open(filename, "w", encoding=encode) as txtf:
with io.open(filename, "w", encoding=encode if encode is not None else 'utf-8') as txtf:
txtf.writelines(['<string>'+entry.langs[lang].replace('\r\n','<lf>')+'\n' for entry in msg.entrys])


def importTXT(msgObj: MSG, filename: str, lang: int) -> MSG:
def importTXT(msgObj: MSG, filename: str, lang: int, encode=None) -> MSG:
"""read txt file, modify the provided msg object, and return the new MSG object"""
if encode is None:
encode = getEncoding(filename)
elif 'utf' in encode and 'sig' not in encode:
testEncode = getEncoding(filename)
if testEncode.endswith('sig'):
encode = testEncode

msg = copy.deepcopy(msgObj)
lines = None
with io.open(filename, mode="r", encoding=getEncoding(filename)) as txtf:
with io.open(filename, mode="r", encoding=encode) as txtf:
lines = list([s.rstrip('\n').rstrip('\r').removeprefix("<string>").replace('<lf>','\r\n') for s in txtf.readlines() if s.startswith("<string>")])

assert len(lines) == len(msg.entrys), "Invalid number of entry"
Expand Down
9 changes: 5 additions & 4 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def fillList(path: str, filetype = 'msg'):
else:
return []

def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT_LANG_LU["ja"], attrSum=""):
def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT_LANG_LU["ja"], **kwargs):
try:
filenameFull = os.path.abspath(item)
print("processing:"+filenameFull)
Expand All @@ -108,9 +108,9 @@ def worker(item, mode = "csv", modFile: str = None, lang : int = REMSGUtil.SHORT

elif mode == "txt":
if modFile is None:
REMSGUtil.exportTXT(msg, filenameFull+'.'+mode, lang)
REMSGUtil.exportTXT(msg, filenameFull+'.'+mode, lang, encode=kwargs["txtformat"])
else:
REMSGUtil.exportMSG(msg=REMSGUtil.importTXT(msg, modFile, lang), filename=filenameFull+'.new')
REMSGUtil.exportMSG(msg=REMSGUtil.importTXT(msg, modFile, lang, encode=kwargs["txtformat"]), filename=filenameFull+'.new')

elif mode == "json":
if modFile is None:
Expand All @@ -137,6 +137,7 @@ def main():
parser.add_argument('-m', '--mode', type=str, choices=['csv','txt','json'], default='csv', help='choose output file format.\n txt = msg tool style txt.\n csv = all lang in one csv with rich info.\n json = all lang in one json with rich info in mhrice format')
parser.add_argument('-e', '--edit', type=str, help='input (csv/txt/json) file to edit the content.\n if input as folder, the filename and number of files\n should be same as original .msg file\n (with corresponding (.txt/.csv/.json) extension)')
parser.add_argument('-l', '--lang', type=str, default='ja', choices=REMSGUtil.SHORT_LANG_LU.keys(), help='input the lang you want to export for txt mode (default ja)\n')
parser.add_argument('-f', '--txtformat', type=str, default=None, choices=['utf-8', 'utf-8-sig'], help="force txt read/write format to be 'utf-8' or 'utf-8-sig'(BOM).\n")
parser.add_argument('args', nargs=argparse.REMAINDER)
args = parser.parse_args()

Expand Down Expand Up @@ -218,7 +219,7 @@ def main():
sys.exit(1)

executor = concurrent.futures.ProcessPoolExecutor(args.multiprocess)
futures = [executor.submit(worker, file, mode = args.mode, modFile = edit, lang = REMSGUtil.SHORT_LANG_LU[args.lang]) for file, edit in zip(filenameList, editList)]
futures = [executor.submit(worker, file, mode = args.mode, modFile = edit, lang = REMSGUtil.SHORT_LANG_LU[args.lang], txtformat=args.txtformat) for file, edit in zip(filenameList, editList)]
concurrent.futures.wait(futures)

if __name__ == "__main__":
Expand Down

0 comments on commit 0e41c15

Please sign in to comment.