Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

warn on -d option of python cli #278

Merged
merged 2 commits into from
Nov 11, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 26 additions & 16 deletions python/py_src/sudachipy/command_line.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2019 Works Applications Co., Ltd.
# Copyright (c) 2019-2024 Works Applications Co., Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -24,6 +24,13 @@
from . import sudachipy


logging.basicConfig(
style="{",
format='{levelname} {asctime} [{module}:{funcName}:{lineno}] {message}',
datefmt="%m-%d-%Y %H:%M:%S",
)


def _set_default_subparser(self, name, args=None):
"""
copy and modify code from https://bitbucket.org/ruamel/std.argparse
Expand Down Expand Up @@ -51,7 +58,7 @@ def _set_default_subparser(self, name, args=None):
argparse.ArgumentParser.set_default_subparser = _set_default_subparser


def run(tokenizer, input_, output, print_all, morphs, is_stdout):
def run(tokenizer, input_, output, print_all, pos_list, is_stdout):
# get an empty MorphemeList for memory reuse
mlist = tokenizer.tokenize("")
for line in input_:
Expand All @@ -60,7 +67,7 @@ def run(tokenizer, input_, output, print_all, morphs, is_stdout):
for m in tokenizer.tokenize(line, out=mlist):
list_info = [
m.surface(),
morphs[m.part_of_speech_id()],
pos_list[m.part_of_speech_id()],
m.normalized_form()]
if print_all:
list_info += [
Expand Down Expand Up @@ -97,27 +104,27 @@ def _command_tokenize(args, print_usage):
if args.fpath_out:
output = open(args.fpath_out, "w", encoding="utf-8")

stdout_logger = logging.getLogger(__name__)
handler = logging.StreamHandler(sys.stdout)
handler.setLevel(logging.DEBUG)
stdout_logger.addHandler(handler)
stdout_logger.setLevel(logging.DEBUG)
stdout_logger.propagate = False
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

print_all = args.a
debug = args.d
if debug:
logger.warning("-d option is not implemented in python.")

try:
dict_ = Dictionary(config_path=args.fpath_setting,
dict_type=args.system_dict_type)
# empty matcher - get all POS tags
all_morphs = dict_.pos_matcher([()])
all_pos_matcher = dict_.pos_matcher([()])
# precompute output POS strings
morphs = [",".join(ms) for ms in all_morphs]
pos_list = [",".join(ms) for ms in all_pos_matcher]

tokenizer_obj = dict_.create(mode=args.mode)
input_ = fileinput.input(
args.in_files, openhook=fileinput.hook_encoded("utf-8"))
run(tokenizer_obj, input_, output, print_all, morphs, is_stdout=args.fpath_out is None)
run(tokenizer_obj, input_, output, print_all,
pos_list, is_stdout=args.fpath_out is None)
finally:
if args.fpath_out:
output.close()
Expand All @@ -139,7 +146,8 @@ def _command_build(args, print_usage):

out_file = Path(args.out_file)
if out_file.exists():
print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr)
print("File", out_file,
"already exists, refusing to overwrite it", file=sys.stderr)
return

description = args.description or ""
Expand All @@ -161,7 +169,8 @@ def _command_build(args, print_usage):
def _command_user_build(args, print_usage):
system = Path(args.system_dic)
if not system.exists():
print("System dictionary file", system, "does not exist", file=sys.stderr)
print("System dictionary file", system,
"does not exist", file=sys.stderr)
return print_usage()

in_files = []
Expand All @@ -174,7 +183,8 @@ def _command_user_build(args, print_usage):

out_file = Path(args.out_file)
if out_file.exists():
print("File", out_file, "already exists, refusing to overwrite it", file=sys.stderr)
print("File", out_file,
"already exists, refusing to overwrite it", file=sys.stderr)
return

description = args.description or ""
Expand Down Expand Up @@ -217,7 +227,7 @@ def main():
parser_tk.add_argument("-a", action="store_true",
help="print all of the fields")
parser_tk.add_argument("-d", action="store_true",
help="print the debug information")
help="print the debug information (not implemented yet)")
parser_tk.add_argument("-v", "--version", action="store_true",
dest="version", help="print sudachipy version")
parser_tk.add_argument("in_files", metavar="file",
Expand Down