diff --git a/ginza/command_line.py b/ginza/command_line.py index 856b574..8db72e6 100644 --- a/ginza/command_line.py +++ b/ginza/command_line.py @@ -62,6 +62,11 @@ def run( files: List[str] = None, ): assert model_path is None or ensure_model is None + if output_format in ["3", "json"] and hash_comment != "analyze": + print( + f'hash_comment="{hash_comment}" not permitted for JSON output. Forced to use hash_comment="analyze".', + file=sys.stderr + ) if parallel_level <= 0: level = max(1, cpu_count() + parallel_level) diff --git a/ginza/tests/test_command_line.py b/ginza/tests/test_command_line.py index 2bd5484..b9ea14e 100644 --- a/ginza/tests/test_command_line.py +++ b/ginza/tests/test_command_line.py @@ -56,7 +56,7 @@ def output_file(tmpdir: Path) -> Path: def _conllu_parsable(result: str): for line in result.split("\n"): - if line.startswith("# text = ") or line.strip() == "": + if line.startswith("#") or line.strip() == "": continue if not len(line.strip().split("\t")) == 10: raise Exception @@ -64,7 +64,7 @@ def _conllu_parsable(result: str): def _cabocha_parsable(result: str): for line in result.split("\n"): - if line.strip() in ("", "EOS") or line.startswith("*"): + if line.strip() in ("", "EOS") or line.startswith("*") or line.startswith("#"): continue if not len(line.split("\t")) == 3: raise Exception @@ -74,7 +74,7 @@ def _cabocha_parsable(result: str): def _mecab_parsable(result: str): for line in result.split("\n"): - if line.strip() in ("", "EOS"): + if line.strip() in ("", "EOS") or line.startswith("#"): continue if not len(line.split("\t")) == 2: raise Exception @@ -210,6 +210,18 @@ def test_output_format(self, output_format, result_parsable, input_file): assert p.returncode == 0 result_parsable(p.stdout.strip()) + @pytest.mark.parametrize( + "hash_comment", ["print", "skip"] + ) + def test_warn_if_json_hash_comment_not_analyze(self, hash_comment, input_file): + p = run_cmd(["ginza", "-c", hash_comment, "-f", "json", input_file], stderr=sp.PIPE) + assert p.returncode == 0 + msg = ( + f'hash_comment={hash_comment} may break output json if input contains a line starts with "#".\n' + 'In order to keep the json in proper format, please use hash_comment=analyze or remove the lines start with "#" from input.' + ) + assert msg in p.stderr + def test_require_gpu(self, input_file): p = run_cmd(["ginza", "-g", input_file]) gpu_available = int(os.environ.get("CUDA_VISIBLE_DEVICES", -1)) > 0