Skip to content

Commit

Permalink
Remove warnings (open-mmlab#2402)
Browse files Browse the repository at this point in the history
* replace iteritems with items

* replace iteritems with items

* remove seqeval warning

* Update metric.py

* set num_workers to 0 by default

Some huggingface checkpoints such as deberta-v3, mdeberta-v3, roberta, flan-t5-xl do not work with ddp_spawn when setting num_workers=2

* update ner presets

* add fixme

* Update presets.py
  • Loading branch information
cheungdaven authored Nov 16, 2022
1 parent 7d46666 commit ce1ac12
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ env:
eval_batch_size_ratio: 4 # per_gpu_batch_size_evaluation = per_gpu_batch_size * eval_batch_size_ratio
per_gpu_batch_size_evaluation: # This is deprecated. Use eval_batch_size_ratio instead.
precision: 16 # training precision. Refer to https://pytorch-lightning.readthedocs.io/en/latest/advanced/mixed_precision.html
num_workers: 2 # pytorch training dataloader workers
num_workers: 0 # pytorch training dataloader workers. FIXME: setting it to 0 to avoid ProcessExitedException for deberta-v3, flan-t5-xl, etc.
num_workers_evaluation: 2 # pytorch prediction/test dataloader workers
fast_dev_run: False
deterministic: False
Expand Down
2 changes: 1 addition & 1 deletion multimodal/src/autogluon/multimodal/data/infer_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def infer_problem_type_output_shape(
elif provided_problem_type == NER:
unique_entity_groups = [
annot[ENTITY_GROUP]
for annotation in data[label_column].iteritems()
for annotation in data[label_column].items()
for annot in json.loads(annotation[-1])
]
return provided_problem_type, len(set(unique_entity_groups)) + 2
Expand Down
4 changes: 2 additions & 2 deletions multimodal/src/autogluon/multimodal/data/labelencoder_ner.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def extract_ner_annotations(self, y: pd.Series):
}
all_annotations = []
all_entity_groups = []
for ner_annotations in y.iteritems():
for ner_annotations in y.items():
json_ner_annotations = json.loads(ner_annotations[-1]) # load the json annotations
try:
jsonschema.validate(json_ner_annotations, schema) # verify the json schema
Expand Down Expand Up @@ -142,7 +142,7 @@ def transform_label_for_metric(self, y: pd.Series, x: pd.Series, tokenizer):
"""
all_annotations, _ = self.extract_ner_annotations(y)
transformed_y = []
for annotation, text_snippet in zip(all_annotations, x.iteritems()):
for annotation, text_snippet in zip(all_annotations, x.items()):
word_label, _, _, _ = process_ner_annotations(annotation, text_snippet[-1], tokenizer, is_eval=True)
word_label_invers = []
for l in word_label:
Expand Down
8 changes: 3 additions & 5 deletions multimodal/src/autogluon/multimodal/presets.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,25 +395,23 @@ def medium_quality_faster_inference_image_text_similarity():
def best_quality_ner():
return {
"model.names": ["ner_text"],
"model.ner_text.checkpoint_name": "google/flan-t5-base",
"env.precision": "bf16",
"model.ner_text.checkpoint_name": "microsoft/deberta-v3-base",
}


@automm_presets.register()
def medium_quality_faster_inference_ner():
return {
"model.names": ["ner_text"],
"model.ner_text.checkpoint_name": "google/flan-t5-small",
"env.precision": "bf16",
"model.ner_text.checkpoint_name": "google/electra-small-discriminator",
}


@automm_presets.register()
def high_quality_fast_inference_ner():
return {
"model.names": ["ner_text"],
"model.ner_text.checkpoint_name": "bert-base-cased",
"model.ner_text.checkpoint_name": "microsoft/deberta-v3-base",
}


Expand Down
1 change: 1 addition & 0 deletions multimodal/src/autogluon/multimodal/utils/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ def compute_score(
"""
if metric_name == OVERALL_ACCURACY:
metric = evaluate.load("seqeval")
warnings.filterwarnings("ignore")
return metric.compute(references=metric_data[Y_TRUE], predictions=metric_data[Y_PRED])

metric = get_metric(metric_name)
Expand Down

0 comments on commit ce1ac12

Please sign in to comment.