Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix import for HuggingFace Dataset Provider #2085

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/pytorch/language-modeling/train_api_hf_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
" HuggingFaceDatasetParams,\n",
")\n",
"from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n",
"from peft import LoraConfig\n",
Expand Down Expand Up @@ -70,7 +70,7 @@
" ),\n",
" # it is assumed for text related tasks, you have 'text' column in the dataset.\n",
" # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n",
" dataset_provider_parameters=HfDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n",
" dataset_provider_parameters=HuggingFaceDatasetParams(repo_id=\"imdatta0/ultrachat_1k\"),\n",
" trainer_parameters=HuggingFaceTrainerParams(\n",
" lora_config=LoraConfig(\n",
" r=8,\n",
Expand Down
16 changes: 7 additions & 9 deletions examples/pytorch/language-modeling/train_api_s3_dataset.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
")\n",
"from kubeflow.storage_initializer.s3 import S3DatasetParams\n",
"from kubeflow.storage_initializer.constants import INIT_CONTAINER_MOUNT_PATH\n",
"from peft import LoraConfig\n",
"import transformers\n",
Expand Down Expand Up @@ -81,14 +81,12 @@
" # it is assumed for text related tasks, you have 'text' column in the dataset.\n",
" # for more info on how dataset is loaded check load_and_preprocess_data function in sdk/python/kubeflow/trainer/hf_llm_training.py\n",
" dataset_provider_parameters=S3DatasetParams(\n",
" {\n",
" \"endpoint_url\": \"http://10.117.63.3\",\n",
" \"bucket_name\": \"test\",\n",
" \"file_key\": \"imdatta0___ultrachat_1k\",\n",
" \"region_name\": \"us-east-1\",\n",
" \"access_key\": s3_access_key,\n",
" \"secret_key\": s3_secret_key,\n",
" }\n",
" endpoint_url=\"http://10.117.63.3\",\n",
" bucket_name=\"test\",\n",
" file_key=\"imdatta0___ultrachat_1k\",\n",
" region_name=\"us-east-1\",\n",
" access_key=s3_access_key,\n",
" secret_key=s3_secret_key,\n",
" ),\n",
" trainer_parameters=HuggingFaceTrainerParams(\n",
" lora_config=LoraConfig(\n",
Expand Down
4 changes: 2 additions & 2 deletions examples/pytorch/text-classification/Fine-Tune-BERT-LLM.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -614,7 +614,7 @@
"from kubeflow.storage_initializer.hugging_face import (\n",
" HuggingFaceModelParams,\n",
" HuggingFaceTrainerParams,\n",
" HfDatasetParams,\n",
" HuggingFaceDatasetParams,\n",
")\n",
"\n",
"import transformers\n",
Expand Down Expand Up @@ -646,7 +646,7 @@
" \"access_modes\": [\"ReadWriteOnce\"] # Since we use 1 Worker, PVC access mode is ReadWriteOnce.\n",
" },\n",
" # Use 3000 samples from Yelp dataset.\n",
" dataset_provider_parameters=HfDatasetParams(\n",
" dataset_provider_parameters=HuggingFaceDatasetParams(\n",
" repo_id=\"yelp_review_full\",\n",
" split=\"train[:3000]\",\n",
" ),\n",
Expand Down
Loading