Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix Utils CI #1867

Merged
merged 10 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 0 additions & 37 deletions .github/workflows/test_dummy_inputs.yml

This file was deleted.

40 changes: 40 additions & 0 deletions .github/workflows/test_utils.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Utils / Python - Test

on:
push:
branches: [main]
pull_request:
branches: [main]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
build:
strategy:
fail-fast: false
matrix:
os: [ubuntu-20.04, macos-13]
python-version: [3.8, 3.9]

runs-on: ${{ matrix.os }}
steps:
- name: Checkout code
uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests]

- name: Test with pytest
working-directory: tests
run: |
python -m pytest -s -vvvv utils
6 changes: 3 additions & 3 deletions optimum/utils/preprocessing/task_processors_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@


if TYPE_CHECKING:
from .base import DatasetProcessing
from .base import TaskProcessor


class TaskProcessorsManager:
Expand All @@ -35,7 +35,7 @@ class TaskProcessorsManager:
}

@classmethod
def get_task_processor_class_for_task(cls, task: str) -> Type:
def get_task_processor_class_for_task(cls, task: str) -> Type["TaskProcessor"]:
IlyasMoutawwakil marked this conversation as resolved.
Show resolved Hide resolved
if task not in cls._TASK_TO_DATASET_PROCESSING_CLASS:
supported_tasks = ", ".join(cls._TASK_TO_DATASET_PROCESSING_CLASS.keys())
raise KeyError(
Expand All @@ -45,5 +45,5 @@ def get_task_processor_class_for_task(cls, task: str) -> Type:
return cls._TASK_TO_DATASET_PROCESSING_CLASS[task]

@classmethod
def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "DatasetProcessing":
def for_task(cls, task: str, *dataset_processing_args, **dataset_processing_kwargs: Any) -> "TaskProcessor":
return cls.get_task_processor_class_for_task(task)(*dataset_processing_args, **dataset_processing_kwargs)
8 changes: 4 additions & 4 deletions tests/utils/test_dummpy_input_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,13 @@
from optimum.utils.input_generators import DummyInputGenerator


TEXT_ENCODER_MODELS = {"distilbert": "distilbert-base-cased"}
TEXT_ENCODER_MODELS = {"distilbert": "hf-internal-testing/tiny-random-DistilBertModel"}

VISION_MODELS = {"resnet": "hf-internal-testing/tiny-random-resnet"}

SEQ2SEQ_MODELS = {"t5": "t5-small"}
SEQ2SEQ_MODELS = {"t5": "hf-internal-testing/tiny-random-T5Model"}

AUDIO_MODELS = {"whisper": "openai/whisper-tiny.en"}
AUDIO_MODELS = {"whisper": "hf-internal-testing/tiny-random-WhisperModel"}

DUMMY_SHAPES = {
"batch_size": [2, 4],
Expand All @@ -60,7 +60,7 @@ class GenerateDummy(TestCase):
"np": tuple,
}
if is_tf_available():
import tensorflow as tf
import tensorflow as tf # type: ignore[import]

_FRAMEWORK_TO_SHAPE_CLS["tf"] = tf.TensorShape

Expand Down
17 changes: 15 additions & 2 deletions tests/utils/test_task_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@
},
}

LOAD_SMALLEST_SPLIT = True
NUM_SAMPLES = 10


# Taken from https://pynative.com/python-generate-random-string/
def get_random_string(length: int) -> str:
Expand Down Expand Up @@ -148,7 +151,11 @@ def _test_load_dataset(
)
dataset_with_all_columns = None
if default_dataset:
dataset = task_processor.load_default_dataset(only_keep_necessary_columns=only_keep_necessary_columns)
dataset = task_processor.load_default_dataset(
only_keep_necessary_columns=only_keep_necessary_columns,
load_smallest_split=LOAD_SMALLEST_SPLIT,
num_samples=NUM_SAMPLES,
)
if only_keep_necessary_columns:
dataset_with_all_columns = task_processor.load_default_dataset()
else:
Expand All @@ -157,11 +164,17 @@ def _test_load_dataset(
path,
data_keys=data_keys,
only_keep_necessary_columns=only_keep_necessary_columns,
load_smallest_split=LOAD_SMALLEST_SPLIT,
num_samples=NUM_SAMPLES,
**load_dataset_kwargs,
)
if only_keep_necessary_columns:
dataset_with_all_columns = task_processor.load_dataset(
path, data_keys=data_keys, **load_dataset_kwargs
path,
data_keys=data_keys,
load_smallest_split=LOAD_SMALLEST_SPLIT,
num_samples=NUM_SAMPLES,
**load_dataset_kwargs,
)

# We only check if the column names of the dataset with the not necessary columns removed are a strict subset
Expand Down
Loading