Skip to content

Commit

Permalink
Enforce string-formatting with f-strings (huggingface#10980)
Browse files Browse the repository at this point in the history
* First third

* Styling and fix mistake

* Quality

* All the rest

* Treat %s and %d

* typo

* Missing )

* Apply suggestions from code review

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>

Co-authored-by: Lysandre Debut <lysandre@huggingface.co>
  • Loading branch information
2 people authored and Iwontbecreative committed Jul 15, 2021
1 parent 27f3666 commit 735e5ed
Show file tree
Hide file tree
Showing 224 changed files with 984 additions and 1,312 deletions.
2 changes: 1 addition & 1 deletion examples/language-modeling/run_clm.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/language-modeling/run_mlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
12 changes: 4 additions & 8 deletions examples/language-modeling/run_mlm_flax.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def step_fn(step):
progress = jnp.maximum(0.0, (step - warmup_steps) / float(steps_per_cycle))
ret *= jnp.maximum(0.0, 0.5 * (1.0 + jnp.cos(jnp.pi * (progress % 1.0))))
else:
raise ValueError("Unknown factor %s." % name)
raise ValueError(f"Unknown factor {name}.")
return jnp.asarray(ret, dtype=jnp.float32)

return step_fn
Expand All @@ -332,9 +332,7 @@ def accuracy(logits, targets, weights=None):
Tuple of scalar loss and batch normalizing factor.
"""
if logits.ndim != targets.ndim + 1:
raise ValueError(
"Incorrect shapes. Got shape %s logits and %s targets" % (str(logits.shape), str(targets.shape))
)
raise ValueError(f"Incorrect shapes. Got shape {logits.shape} logits and {targets.shape} targets")

loss = jnp.equal(jnp.argmax(logits, axis=-1), targets)
loss *= weights
Expand All @@ -353,9 +351,7 @@ def cross_entropy(logits, targets, weights=None, label_smoothing=0.0):
Tuple of scalar loss and batch normalizing factor.
"""
if logits.ndim != targets.ndim + 1:
raise ValueError(
"Incorrect shapes. Got shape %s logits and %s targets" % (str(logits.shape), str(targets.shape))
)
raise ValueError(f"Incorrect shapes. Got shape {logits.shape} logits and {targets.shape} targets")

vocab_size = logits.shape[-1]
confidence = 1.0 - label_smoothing
Expand Down Expand Up @@ -463,7 +459,7 @@ def generate_batch_splits(samples_idx: jnp.ndarray, batch_size: int) -> jnp.ndar
)

# Set the verbosity to info of the Transformers logger (on main process only):
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/language-modeling/run_plm.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/multiple-choice/run_swag.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
14 changes: 6 additions & 8 deletions examples/multiple-choice/run_tf_multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,10 @@ def main():
level=logging.INFO,
)
logger.warning(
"device: %s, n_replicas: %s, 16-bits training: %s",
training_args.device,
training_args.n_replicas,
training_args.fp16,
f"device: {training_args.device}, n_replicas: {training_args.n_replicas}, "
f"16-bits training: {training_args.fp16}"
)
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed
set_seed(training_args.seed)
Expand All @@ -131,7 +129,7 @@ def main():
label_list = processor.get_labels()
num_labels = len(label_list)
except KeyError:
raise ValueError("Task not found: %s" % (data_args.task_name))
raise ValueError(f"Task not found: {data_args.task_name}")

# Load pretrained model and tokenizer
#
Expand Down Expand Up @@ -210,8 +208,8 @@ def compute_metrics(p: EvalPrediction) -> Dict:
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****")
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")

results.update(result)

Expand Down
58 changes: 26 additions & 32 deletions examples/multiple-choice/utils_multiple_choice.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,7 @@ def __init__(
processor = processors[task]()

cached_features_file = os.path.join(
data_dir,
"cached_{}_{}_{}_{}".format(
mode.value,
tokenizer.__class__.__name__,
str(max_seq_length),
task,
),
data_dir, f"cached_{mode.value}_{tokenizer.__class__.__name__}_{max_seq_length}_{task}"
)

# Make sure only the first process in distributed training processes the dataset,
Expand All @@ -125,14 +119,14 @@ def __init__(
examples = processor.get_test_examples(data_dir)
else:
examples = processor.get_train_examples(data_dir)
logger.info("Training examples: %s", len(examples))
logger.info(f"Training examples: {len(examples)}")
self.features = convert_examples_to_features(
examples,
label_list,
max_seq_length,
tokenizer,
)
logger.info("Saving features into cached file %s", cached_features_file)
logger.info(f"Saving features into cached file {cached_features_file}")
torch.save(self.features, cached_features_file)

def __len__(self):
Expand Down Expand Up @@ -172,7 +166,7 @@ def __init__(
examples = processor.get_test_examples(data_dir)
else:
examples = processor.get_train_examples(data_dir)
logger.info("Training examples: %s", len(examples))
logger.info(f"Training examples: {len(examples)}")

self.features = convert_examples_to_features(
examples,
Expand All @@ -184,7 +178,7 @@ def __init__(
def gen():
for (ex_index, ex) in tqdm.tqdm(enumerate(self.features), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
logger.info(f"Writing example {ex_index} of {len(examples)}")

yield (
{
Expand Down Expand Up @@ -255,7 +249,7 @@ class RaceProcessor(DataProcessor):

def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
high = os.path.join(data_dir, "train/high")
middle = os.path.join(data_dir, "train/middle")
high = self._read_txt(high)
Expand All @@ -264,7 +258,7 @@ def get_train_examples(self, data_dir):

def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
high = os.path.join(data_dir, "dev/high")
middle = os.path.join(data_dir, "dev/middle")
high = self._read_txt(high)
Expand All @@ -273,7 +267,7 @@ def get_dev_examples(self, data_dir):

def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} test".format(data_dir))
logger.info(f"LOOKING AT {data_dir} test")
high = os.path.join(data_dir, "test/high")
middle = os.path.join(data_dir, "test/middle")
high = self._read_txt(high)
Expand All @@ -298,7 +292,7 @@ def _create_examples(self, lines, set_type):
"""Creates examples for the training and dev sets."""
examples = []
for (_, data_raw) in enumerate(lines):
race_id = "%s-%s" % (set_type, data_raw["race_id"])
race_id = f"{set_type}-{data_raw['race_id']}"
article = data_raw["article"]
for i in range(len(data_raw["answers"])):
truth = str(ord(data_raw["answers"][i]) - ord("A"))
Expand All @@ -322,17 +316,17 @@ class SynonymProcessor(DataProcessor):

def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mctrain.csv")), "train")

def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "mchp.csv")), "dev")

def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")

return self._create_examples(self._read_csv(os.path.join(data_dir, "mctest.csv")), "test")

Expand Down Expand Up @@ -368,17 +362,17 @@ class SwagProcessor(DataProcessor):

def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_csv(os.path.join(data_dir, "train.csv")), "train")

def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_csv(os.path.join(data_dir, "val.csv")), "dev")

def get_test_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
raise ValueError(
"For swag testing, the input file does not contain a label column. It can not be tested in current code"
"setting!"
Expand Down Expand Up @@ -419,16 +413,16 @@ class ArcProcessor(DataProcessor):

def get_train_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} train".format(data_dir))
logger.info(f"LOOKING AT {data_dir} train")
return self._create_examples(self._read_json(os.path.join(data_dir, "train.jsonl")), "train")

def get_dev_examples(self, data_dir):
"""See base class."""
logger.info("LOOKING AT {} dev".format(data_dir))
logger.info(f"LOOKING AT {data_dir} dev")
return self._create_examples(self._read_json(os.path.join(data_dir, "dev.jsonl")), "dev")

def get_test_examples(self, data_dir):
logger.info("LOOKING AT {} test".format(data_dir))
logger.info(f"LOOKING AT {data_dir} test")
return self._create_examples(self._read_json(os.path.join(data_dir, "test.jsonl")), "test")

def get_labels(self):
Expand All @@ -450,7 +444,7 @@ def normalize(truth):
elif truth in "1234":
return int(truth) - 1
else:
logger.info("truth ERROR! %s", str(truth))
logger.info(f"truth ERROR! {truth}")
return None

examples = []
Expand Down Expand Up @@ -496,11 +490,11 @@ def normalize(truth):
if type == "train":
assert len(examples) > 1
assert examples[0].label is not None
logger.info("len examples: %s}", str(len(examples)))
logger.info("Three choices: %s", str(three_choice))
logger.info("Five choices: %s", str(five_choice))
logger.info("Other choices: %s", str(other_choices))
logger.info("four choices: %s", str(four_choice))
logger.info(f"len examples: {len(examples)}")
logger.info(f"Three choices: {three_choice}")
logger.info(f"Five choices: {five_choice}")
logger.info(f"Other choices: {other_choices}")
logger.info(f"four choices: {four_choice}")

return examples

Expand All @@ -520,7 +514,7 @@ def convert_examples_to_features(
features = []
for (ex_index, example) in tqdm.tqdm(enumerate(examples), desc="convert examples to features"):
if ex_index % 10000 == 0:
logger.info("Writing example %d of %d" % (ex_index, len(examples)))
logger.info(f"Writing example {ex_index} of {len(examples)}")
choices_inputs = []
for ending_idx, (context, ending) in enumerate(zip(example.contexts, example.endings)):
text_a = context
Expand Down Expand Up @@ -570,7 +564,7 @@ def convert_examples_to_features(

for f in features[:2]:
logger.info("*** Example ***")
logger.info("feature: %s" % f)
logger.info("feature: {f}")

return features

Expand Down
2 changes: 1 addition & 1 deletion examples/question-answering/run_qa.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/question-answering/run_qa_beam_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ def main():
transformers.utils.logging.set_verbosity_info()
transformers.utils.logging.enable_default_handler()
transformers.utils.logging.enable_explicit_format()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
8 changes: 3 additions & 5 deletions examples/question-answering/run_tf_squad.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,12 +148,10 @@ def main():
level=logging.INFO,
)
logger.info(
"n_replicas: %s, distributed training: %s, 16-bits training: %s",
training_args.n_replicas,
bool(training_args.n_replicas > 1),
training_args.fp16,
f"n_replicas: {training_args.n_replicas}, distributed training: {bool(training_args.n_replicas > 1)}, "
f"16-bits training: {training_args.fp16}"
)
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Prepare Question-Answering task
# Load pretrained model and tokenizer
Expand Down
2 changes: 1 addition & 1 deletion examples/seq2seq/run_summarization.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def main():
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
transformers.utils.logging.set_verbosity_info()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
2 changes: 1 addition & 1 deletion examples/seq2seq/run_translation.py
Original file line number Diff line number Diff line change
Expand Up @@ -264,7 +264,7 @@ def main():
# Set the verbosity to info of the Transformers logger (on main process only):
if is_main_process(training_args.local_rank):
transformers.utils.logging.set_verbosity_info()
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

# Set seed before initializing model.
set_seed(training_args.seed)
Expand Down
14 changes: 6 additions & 8 deletions examples/text-classification/run_tf_glue.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,18 +160,16 @@ def main():
level=logging.INFO,
)
logger.info(
"n_replicas: %s, distributed training: %s, 16-bits training: %s",
training_args.n_replicas,
bool(training_args.n_replicas > 1),
training_args.fp16,
f"n_replicas: {training_args.n_replicas}, distributed training: {bool(training_args.n_replicas > 1)}, "
f"16-bits training: {training_args.fp16}",
)
logger.info("Training/evaluation parameters %s", training_args)
logger.info(f"Training/evaluation parameters {training_args}")

try:
num_labels = glue_tasks_num_labels["mnli" if data_args.task_name == "mnli-mm" else data_args.task_name]
output_mode = glue_output_modes[data_args.task_name]
except KeyError:
raise ValueError("Task not found: %s" % (data_args.task_name))
raise ValueError(f"Task not found: {data_args.task_name}")

# Load pretrained model and tokenizer
#
Expand Down Expand Up @@ -255,8 +253,8 @@ def compute_metrics(p: EvalPrediction) -> Dict:
logger.info("***** Eval results *****")

for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))
logger.info(f" {key} = {value}")
writer.write(f"{key} = {value}\n")

results.update(result)

Expand Down
Loading

0 comments on commit 735e5ed

Please sign in to comment.