From 698f762197fab9bbb1d3a236a0ac5a5be88c06ed Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Fri, 9 Aug 2024 12:03:06 +0200 Subject: [PATCH 1/2] Ignore FutureWarning --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index b3ed1eaedd..78d1692a09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,7 @@ filterwarnings = [ 'ignore:distutils Version classes are deprecated.', # faiss uses deprecated distutils. 'ignore:`resume_download` is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use `force_download=True`.', # transformers calls deprecated hf_hub "ignore:`torch.cuda.amp.GradScaler", # GradScaler changes in torch 2.3.0 but we want to be backwards compatible. + "ignore:`clean_up_tokenization_spaces` was not set", # Default behavior changes in transformers v4.45, raising irrelevant FutureWarning for serialized models. ] markers = [ "integration", From cdfa0ee097c2b8a663c8027d912125d8124a2b7e Mon Sep 17 00:00:00 2001 From: Alan Akbik Date: Fri, 9 Aug 2024 12:51:39 +0200 Subject: [PATCH 2/2] Remove some large dataset sets --- tests/test_datasets.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 2d0391b264..25a99f87e0 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -418,7 +418,7 @@ def test_load_universal_dependencies_conllu_corpus(tasks_base_path): _assert_universal_dependencies_conllu_dataset(corpus.train) -@pytest.mark.integration() +@pytest.mark.skip() def test_hipe_2022_corpus(tasks_base_path): # This test covers the complete HIPE 2022 dataset. # https://github.com/hipe-eval/HIPE-2022-data @@ -682,7 +682,7 @@ def test_hipe_2022(dataset_version="v2.1", add_document_separator=True): test_hipe_2022(dataset_version="v2.1", add_document_separator=False) -@pytest.mark.integration() +@pytest.mark.skip() def test_icdar_europeana_corpus(tasks_base_path): # This test covers the complete ICDAR Europeana corpus: # https://github.com/stefan-it/historic-domain-adaptation-icdar @@ -700,7 +700,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str): check_number_sentences(len(corpus.test), gold_stats[language]["test"], "test") -@pytest.mark.integration() +@pytest.mark.skip() def test_masakhane_corpus(tasks_base_path): # This test covers the complete MasakhaNER dataset, including support for v1 and v2. supported_versions = ["v1", "v2"] @@ -784,7 +784,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) -@pytest.mark.integration() +@pytest.mark.skip() def test_nermud_corpus(tasks_base_path): # This test covers the NERMuD dataset. Official stats can be found here: # https://github.com/dhfbk/KIND/tree/main/evalita-2023 @@ -803,6 +803,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str): check_number_sentences(len(corpus.dev), stats["dev"], "dev") +@pytest.mark.skip() def test_german_ler_corpus(tasks_base_path): corpus = flair.datasets.NER_GERMAN_LEGAL() @@ -812,7 +813,7 @@ def test_german_ler_corpus(tasks_base_path): assert len(corpus.test) == 6673, "Mismatch in number of sentences for test split" -@pytest.mark.integration() +@pytest.mark.skip() def test_masakha_pos_corpus(tasks_base_path): # This test covers the complete MasakhaPOS dataset. supported_versions = ["v1"] @@ -881,7 +882,7 @@ def check_number_sentences(reference: int, actual: int, split_name: str, languag check_number_sentences(len(corpus.test), gold_stats["test"], "test", language, version) -@pytest.mark.integration() +@pytest.mark.skip() def test_german_mobie(tasks_base_path): corpus = flair.datasets.NER_GERMAN_MOBIE() @@ -966,7 +967,7 @@ def test_jsonl_corpus_loads_metadata(tasks_base_path): assert dataset.sentences[2].get_metadata("from") == 125 -@pytest.mark.integration() +@pytest.mark.skip() def test_ontonotes_download(): from urllib.parse import urlparse @@ -974,6 +975,7 @@ def test_ontonotes_download(): assert all([res.scheme, res.netloc]) +@pytest.mark.skip() def test_ontonotes_extraction(tasks_base_path): import os import tempfile