Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HuggingFaceModel #21

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c582ac8
Draft models and tests
simple-easydev Apr 9, 2024
e761491
update huggleface model
simple-easydev Apr 10, 2024
365927a
Done first version of HuggingFaceModel
simple-easydev Apr 11, 2024
d9c19b7
Fix tiny bugs
simple-easydev Apr 11, 2024
ab22045
Fix feedbacks
simple-easydev Apr 11, 2024
4fc31b4
Fix missing feedback
simple-easydev Apr 11, 2024
d40be9d
[wip] gpu support
jjleng Mar 26, 2024
5af3e16
feat(gpu): run models on cuda GPUs
jjleng Apr 5, 2024
615cc4d
feat(gpu): make nvidia device plugin tolerate model group taints
jjleng Apr 6, 2024
8181314
feat(gpu): set n_gpu_layers to offload work to gpu for the llama.cpp …
jjleng Apr 9, 2024
91e4571
feat(gpu): larger disk for gpu nodes
jjleng Apr 9, 2024
28075b7
feat(gpu): make model group node disk size configerable
jjleng Apr 10, 2024
ac8c726
feat(gpu): be able to request a number of GPUs through config
jjleng Apr 10, 2024
a945de8
docs: update README with the GPU support message
jjleng Apr 10, 2024
62e9a62
docs: add llama2 chat template for the invoice extraction example
jjleng Apr 10, 2024
c842495
docs: README for the invoice extraction example
jjleng Apr 10, 2024
ed40b64
docs(invoice_extraction): gpu_cluster.yaml for GPU inferences
jjleng Apr 10, 2024
0aadc74
feat: remove finalizers before tearing down a cluster
jjleng Apr 10, 2024
4e2bdf7
chore: bump version
jjleng Apr 10, 2024
6f88d8a
docs: instructions for installing the pack CLI
jjleng Apr 11, 2024
c1bcd37
update the progress status logging for downloading
simple-easydev Apr 13, 2024
a0f0ad4
docs: add pulumi CLI as a dependency
jjleng Apr 13, 2024
5863ad0
Fix test case for HuggingFaceModel.upload_file_to_s3
simple-easydev Apr 14, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 51 additions & 39 deletions paka/kube_resources/model_group/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
from botocore.exceptions import ClientError

from paka.kube_resources.model_group.manifest import Manifest
from paka.kube_resources.model_group.supported_models import SUPPORTED_MODELS
from paka.kube_resources.model_group.supported_models import (
SUPPORTED_MODELS,
SUPPORTED_MODELS_V2,
)
from paka.logger import logger
from paka.utils import read_current_cluster_data, to_yaml

Expand Down Expand Up @@ -204,47 +207,56 @@ def download_model(name: str) -> None:
Returns:
None
"""
if name not in SUPPORTED_MODELS:
logger.error(
f"Model {name} is not supported."
f"Available models are: {', '.join(SUPPORTED_MODELS.keys())}"
)
raise Exception(f"Model {name} is not supported.")

model = SUPPORTED_MODELS[name]

logger.info(f"Downloading model from {model.url}...")
# Get the model name from the URL
model_file_name = model.url.split("/")[-1]
model_path = f"{MODEL_PATH_PREFIX}/{name}"

full_model_file_path = f"{model_path}/{model_file_name}"
bucket = read_current_cluster_data("bucket")

if s3_file_prefix_exists(bucket, f"{model_path}/"):
logger.info(f"Model {name} already exists.")
return

sha256 = download_file_to_s3(model.url, bucket, full_model_file_path)
if sha256 != model.sha256:
logger.error(f"SHA256 hash of the downloaded file does not match.")
# Delete the file
delete_s3_file(bucket, full_model_file_path)
raise Exception(f"SHA256 hash of the downloaded file does not match.")

# Save model manifest
manifest = Manifest(
name=name,
sha256=model.sha256,
url=model.url,
type="gguf", # TODO: hard-coded for now
file=model_file_name,
)
if name in SUPPORTED_MODELS_V2:
# new version of the model class
new_model = SUPPORTED_MODELS_V2[name]
if str(new_model) == "HuggingFaceModel":
new_model.upload_files()
return
else:
# old version
if name not in SUPPORTED_MODELS:
logger.error(
f"Model {name} is not supported."
f"Available models are: {', '.join(SUPPORTED_MODELS.keys())}"
)
raise Exception(f"Model {name} is not supported.")

model = SUPPORTED_MODELS[name]

logger.info(f"Downloading model from {model.url}...")
# Get the model name from the URL
model_file_name = model.url.split("/")[-1]
model_path = f"{MODEL_PATH_PREFIX}/{name}"

full_model_file_path = f"{model_path}/{model_file_name}"
bucket = read_current_cluster_data("bucket")

if s3_file_prefix_exists(bucket, f"{model_path}/"):
logger.info(f"Model {name} already exists.")
return

sha256 = download_file_to_s3(model.url, bucket, full_model_file_path)
if sha256 != model.sha256:
logger.error(f"SHA256 hash of the downloaded file does not match.")
# Delete the file
delete_s3_file(bucket, full_model_file_path)
raise Exception(f"SHA256 hash of the downloaded file does not match.")

# Save model manifest
manifest = Manifest(
name=name,
sha256=model.sha256,
url=model.url,
type="gguf", # TODO: hard-coded for now
file=model_file_name,
)

manifest_yaml = to_yaml(manifest.model_dump(exclude_none=True))
save_string_to_s3(bucket, f"{model_path}/manifest.yaml", manifest_yaml)
manifest_yaml = to_yaml(manifest.model_dump(exclude_none=True))
save_string_to_s3(bucket, f"{model_path}/manifest.yaml", manifest_yaml)

logger.info(f"Model {name} downloaded successfully.")
logger.info(f"Model {name} downloaded successfully.")


def get_model_file_name(model_name: str) -> str:
Expand Down
Empty file.
Loading