Skip to content

Commit

Permalink
refactor(tokenizer): change file pairs extension
Browse files Browse the repository at this point in the history
  • Loading branch information
tduigou committed Aug 4, 2023
1 parent fe73fc0 commit 576139c
Showing 1 changed file with 3 additions and 3 deletions.
6 changes: 3 additions & 3 deletions src/paper/dataset/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ def tokenize(src_file: str, model_prefix: str, vocab_size: int = -1):
# SMILES - SIG
df_pretokenized[["SMILES", "SIG"]].to_csv(
os.path.join(
args.output_directory_str, PAIRS_DIR, f"sig.smiles.{type_}.txt"
args.output_directory_str, PAIRS_DIR, f"sig.smiles.{type_}"
),
sep="\t",
index=False,
Expand All @@ -208,7 +208,7 @@ def tokenize(src_file: str, model_prefix: str, vocab_size: int = -1):
# SIG - ECFP4
df_pretokenized[["SIG", "ECFP4"]].to_csv(
os.path.join(
args.output_directory_str, PAIRS_DIR, f"ecfp4.sig.{type_}.txt"
args.output_directory_str, PAIRS_DIR, f"ecfp4.sig.{type_}"
),
sep="\t",
index=False,
Expand All @@ -217,7 +217,7 @@ def tokenize(src_file: str, model_prefix: str, vocab_size: int = -1):
# SMILES - ECFP4
df_pretokenized[["SMILES", "ECFP4"]].to_csv(
os.path.join(
args.output_directory_str, PAIRS_DIR, f"ecfp4.smiles.{type_}.txt"
args.output_directory_str, PAIRS_DIR, f"ecfp4.smiles.{type_}"
),
sep="\t",
index=False,
Expand Down

0 comments on commit 576139c

Please sign in to comment.