Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HuggingFaceModel #21

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
c582ac8
Draft models and tests
simple-easydev Apr 9, 2024
e761491
update huggleface model
simple-easydev Apr 10, 2024
365927a
Done first version of HuggingFaceModel
simple-easydev Apr 11, 2024
d9c19b7
Fix tiny bugs
simple-easydev Apr 11, 2024
ab22045
Fix feedbacks
simple-easydev Apr 11, 2024
4fc31b4
Fix missing feedback
simple-easydev Apr 11, 2024
d40be9d
[wip] gpu support
jjleng Mar 26, 2024
5af3e16
feat(gpu): run models on cuda GPUs
jjleng Apr 5, 2024
615cc4d
feat(gpu): make nvidia device plugin tolerate model group taints
jjleng Apr 6, 2024
8181314
feat(gpu): set n_gpu_layers to offload work to gpu for the llama.cpp …
jjleng Apr 9, 2024
91e4571
feat(gpu): larger disk for gpu nodes
jjleng Apr 9, 2024
28075b7
feat(gpu): make model group node disk size configerable
jjleng Apr 10, 2024
ac8c726
feat(gpu): be able to request a number of GPUs through config
jjleng Apr 10, 2024
a945de8
docs: update README with the GPU support message
jjleng Apr 10, 2024
62e9a62
docs: add llama2 chat template for the invoice extraction example
jjleng Apr 10, 2024
c842495
docs: README for the invoice extraction example
jjleng Apr 10, 2024
ed40b64
docs(invoice_extraction): gpu_cluster.yaml for GPU inferences
jjleng Apr 10, 2024
0aadc74
feat: remove finalizers before tearing down a cluster
jjleng Apr 10, 2024
4e2bdf7
chore: bump version
jjleng Apr 10, 2024
6f88d8a
docs: instructions for installing the pack CLI
jjleng Apr 11, 2024
c1bcd37
update the progress status logging for downloading
simple-easydev Apr 13, 2024
a0f0ad4
docs: add pulumi CLI as a dependency
jjleng Apr 13, 2024
5863ad0
Fix test case for HuggingFaceModel.upload_file_to_s3
simple-easydev Apr 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Fix feedbacks
simple-easydev committed Apr 11, 2024
commit ab220453729f80e6b761ba3271099b63fc6a1238
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@
from botocore.exceptions import ClientError

from paka.logger import logger
from paka.utils import get_item, read_current_cluster_data
from paka.utils import read_current_cluster_data

MODEL_PATH_PREFIX = "models"

@@ -191,26 +191,23 @@ def upload_to_s3(
return upload_id, sha256_value

def upload_fs_to_s3(
self, fs: Any, total_size: int, s3_file_name: str
) -> tuple[Any, str]:
self, fs: Any, total_size: int, s3_file_name: str, upload_id: str
) -> str:
"""
Uploads a single file to S3.

Args:
fs (Any): The file stream object.
total_size (int): The total size of the file.
s3_file_name (str): The name of the file in S3.
upload_id: The upload ID of the multipart upload.

Returns:
tuple: A tuple containing the upload ID and the SHA256 hash of the file.
tuple: the SHA256 hash of the file.
"""
logger.info(f"Uploading model to {s3_file_name}")
sha256 = hashlib.sha256()
processed_size = 0
upload = self.s3.create_multipart_upload(
Bucket=self.s3_bucket, Key=s3_file_name
)
upload_id = upload["UploadId"]
parts = []

with concurrent.futures.ThreadPoolExecutor(
@@ -259,7 +256,7 @@ def upload_fs_to_s3(
logger.info(f"File uploaded to S3: {s3_file_name}")
sha256_value = sha256.hexdigest()
logger.info(f"SHA256 hash of the file: {sha256_value}")
return upload_id, sha256_value
return sha256_value

def upload_part(
self,
48 changes: 37 additions & 11 deletions paka/kube_resources/model_group/models/hugging_face_model.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import concurrent.futures
from typing import Any

import boto3
from huggingface_hub import HfFileSystem
from pydantic import BaseModel

from paka.kube_resources.model_group.models.abstract import Model
from paka.kube_resources.model_group.models.base import Model
from paka.logger import logger
from paka.utils import to_yaml


class Manifest(BaseModel):
repo_id: str
files: list[str]
files: list[tuple[str, str]]
inference_devices: list[str]
quantization: str
runtime: str
@@ -47,6 +48,7 @@ def __init__(
self.repo_id = repo_id
self.fs = HfFileSystem()
self.files = self.validate_files(files)
self.completed_files: list[tuple[str, str]] = []

def validate_files(self, files: list[str]) -> list[str]:
"""
@@ -94,32 +96,37 @@ def upload_file_to_s3(self, hf_file_path: str) -> None:
return

self.logging_for_class(f"Downloading huggingface model from {hf_file_path}")
completed_upload_id = None
upload_id = None
file_uploaded = False
file_info = self.get_file_info(hf_file_path)
total_size = file_info["size"]
sha256 = file_info["lfs"]["sha256"] if "lfs" in file_info else None
try:
with self.fs.open(hf_file_path, "rb") as hf_file:
upload_id, sha256_value = self.upload_fs_to_s3(
hf_file, total_size, full_model_file_path
upload = self.s3.create_multipart_upload(
Bucket=self.s3_bucket, Key=full_model_file_path
)
upload_id = upload["UploadId"]
sha256_value = self.upload_fs_to_s3(
hf_file, total_size, full_model_file_path, upload_id
)
if sha256 is not None and sha256 != sha256_value:
self.delete_s3_file(full_model_file_path)
raise Exception(
f"SHA256 hash of the downloaded file does not match the expected value. {full_model_file_path}"
)
completed_upload_id = upload_id
file_uploaded = True
except Exception as e:
self.logging_for_class(f"An error occurred, download: {str(e)}", "error")
raise e
finally:
# If an error occurred and upload was not completed
if completed_upload_id is None:
if upload_id is not None and not file_uploaded:
self.s3.abort_multipart_upload(
Bucket=self.s3_bucket, Key=full_model_file_path, UploadId=upload_id
)
else:
self.save_manifest_yml()
self.completed_files.append((hf_file_path, sha256))
self.logging_for_class(
f"Model file {full_model_file_path} uploaded successfully."
)
@@ -139,15 +146,16 @@ def save_manifest_yml(self) -> None:
"""
manifest = Manifest(
repo_id=self.repo_id,
files=self.files,
files=self.completed_files,
inference_devices=self.inference_devices,
quantization=self.quantization,
runtime=self.runtime,
prompt_template=self.prompt_template,
)
manifest_yaml = to_yaml(manifest.model_dump(exclude_none=True))
file_path = self.get_s3_file_path(f"{self.repo_id}/manifest.yml")
self.s3.Object(self.s3_bucket, file_path).put(Body=manifest_yaml)
s3 = boto3.resource("s3")
s3.Object(self.s3_bucket, file_path).put(Body=manifest_yaml)
self.logging_for_class(f"Manifest file saved to {file_path}")

def upload_files(self) -> None:
@@ -159,7 +167,25 @@ def upload_files(self) -> None:
with concurrent.futures.ThreadPoolExecutor(
max_workers=self.download_max_concurrency
) as executor:
executor.map(self.upload_file_to_s3, self.files)
futures = [
executor.submit(self.upload_file_to_s3, file) for file in self.files
]
concurrent.futures.wait(futures)
# Callback function to handle completion of all workers
self.handle_upload_completion()

def handle_upload_completion(self) -> None:
"""
Callback function to handle completion of all workers.
This function will be called after all files have been uploaded.
Returns:
None
"""
# Add your code here to handle the completion of all workers
# For example, you can log a message or perform any post-processing tasks
self.save_manifest_yml()
self.completed_files = []
self.logging_for_class("All files have been uploaded.")

def logging_for_class(self, message: str, type: str = "info") -> None:
"""
10 changes: 0 additions & 10 deletions paka/utils.py
Original file line number Diff line number Diff line change
@@ -317,13 +317,3 @@ def random_str(length: int = 5) -> str:
str: The generated random string.
"""
return "".join(random.choices(string.ascii_letters + string.digits, k=length))


def get_item(data: list[Any], index: int) -> Any:
"""
Get an item from a list by index and return None if the index is out of bounds.
"""
try:
return data[index]
except IndexError:
return None
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import unittest
from unittest.mock import ANY, MagicMock, Mock, patch

from paka.kube_resources.model_group.models.abstract import Model
from paka.kube_resources.model_group.models.base import Model


class TestModel(unittest.TestCase):
def setUp(self) -> None:
self.model = Model("TheBloke/Llama-2-7B-Chat-GGUF")

@patch("paka.kube_resources.model_group.models.abstract.read_current_cluster_data")
@patch("paka.kube_resources.model_group.models.abstract.boto3.client")
@patch("paka.kube_resources.model_group.models.base.read_current_cluster_data")
@patch("paka.kube_resources.model_group.models.base.boto3.client")
def test_init(
self, mock_boto3_client: Mock, mock_read_current_cluster_data: Mock
) -> None:
@@ -28,11 +28,11 @@ def test_init(
mock_read_current_cluster_data.assert_called_once_with("bucket")
# mock_boto3_client.assert_called_once_with("s3", config=MagicMock(signature_version="s3v4"))

@patch("paka.kube_resources.model_group.models.abstract.logger")
@patch("paka.kube_resources.model_group.models.abstract.requests.get")
@patch("paka.kube_resources.model_group.models.abstract.Model.s3_file_exists")
@patch("paka.kube_resources.model_group.models.abstract.Model.upload_part")
@patch("paka.kube_resources.model_group.models.abstract.Model.upload_to_s3")
@patch("paka.kube_resources.model_group.models.base.logger")
@patch("paka.kube_resources.model_group.models.base.requests.get")
@patch("paka.kube_resources.model_group.models.base.Model.s3_file_exists")
@patch("paka.kube_resources.model_group.models.base.Model.upload_part")
@patch("paka.kube_resources.model_group.models.base.Model.upload_to_s3")
def test_download(
self,
mock_upload_to_s3: Mock,
43 changes: 25 additions & 18 deletions tests/model_group/models/test_hugging_face_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import unittest
from unittest.mock import ANY, Mock, patch
from unittest.mock import ANY, MagicMock, Mock, patch

from paka.kube_resources.model_group.models.hugging_face_model import (
HuggingFaceModel, # replace with the actual module name
@@ -8,19 +8,18 @@

class TestHuggingFaceModel(unittest.TestCase):
def setUp(self) -> None:
self.model = HuggingFaceModel(
"TheBloke/Llama-2-7B-Chat-GGUF",
files=[
"llama-2-7b-chat.Q4_0.gguf",
"llama-2-7b-chat.Q2_K.gguf",
],
)

def test_validate_files(self) -> None:
# Assert
self.assertEqual(len(self.model.files), 2)
for file in self.model.files:
self.assertTrue(file.startswith(self.model.repo_id))
with patch.object(HuggingFaceModel, "validate_files") as mock_validate_files:
mock_validate_files.return_value = [
"TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_0.gguf",
"TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q2_K.gguf",
]
self.model = HuggingFaceModel(
"TheBloke/Llama-2-7B-Chat-GGUF",
files=[
"llama-2-7b-chat.Q4_0.gguf",
"llama-2-7b-chat.Q2_K.gguf",
],
)

@patch.object(HuggingFaceModel, "get_file_info")
@patch.object(HuggingFaceModel, "upload_fs_to_s3")
@@ -35,9 +34,9 @@ def test_upload_file_to_s3(
) -> None:
mock_s3_file_exists.return_value = False
mock_upload_fs_to_s3.return_value = (
"test_upload_id",
"9958ee9b670594147b750bbc7d0540b928fa12dcc5dd4c58cc56ed2eb85e371b",
"9958ee9b670594147b750bbc7d0540b928fa12dcc5dd4c58cc56ed2eb85e371b"
)

hf_file_path = "TheBloke/Llama-2-7B-Chat-GGUF/llama-2-7b-chat.Q4_0.gguf"
full_model_file_path = self.model.get_s3_file_path(hf_file_path)
mock_get_file_info.return_value = {
@@ -50,14 +49,22 @@ def test_upload_file_to_s3(
}

# Act
self.model.s3 = MagicMock()
self.model.s3.create_multipart_upload = MagicMock()
self.model.s3.create_multipart_upload.return_value = {"UploadId": "test"}
self.model.upload_file_to_s3(hf_file_path)

# Assert
mock_get_file_info.assert_called_once_with(hf_file_path)
mock_upload_fs_to_s3.assert_called_once_with(ANY, 1024, full_model_file_path)
mock_upload_fs_to_s3.assert_called_once_with(
ANY, 1024, full_model_file_path, "test"
)

@patch.object(HuggingFaceModel, "upload_file_to_s3")
def test_upload_files(self, mock_upload_file_to_s3: Mock) -> None:
@patch.object(HuggingFaceModel, "save_manifest_yml")
def test_upload_files(
self, mock_save_manifest_yml: Mock, mock_upload_file_to_s3: Mock
) -> None:
# Act
self.model.upload_files()