Skip to content

Commit

Permalink
add model_type in model registry
Browse files Browse the repository at this point in the history
  • Loading branch information
kushal-10 committed Apr 28, 2024
1 parent ea68246 commit d9002ea
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
24 changes: 13 additions & 11 deletions backends/huggingface_multimodal_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
import backends
from PIL import Image
import requests
from transformers import AutoProcessor, AutoModelForVision2Seq, IdeficsForVisionText2Text
from transformers import AutoProcessor, AutoModelForVision2Seq, IdeficsForVisionText2Text, AutoModelForCausalLM
from jinja2 import Template

# Define a map to load model from transformers Auto Classes
MODEL_TYPE_MAP = {
"Idefics": IdeficsForVisionText2Text,
"Vision2Seq": AutoModelForVision2Seq
}

logger = backends.get_logger(__name__)

def load_processor(model_spec: backends.ModelSpec) -> AutoProcessor:
Expand Down Expand Up @@ -37,12 +43,12 @@ def load_model(model_spec: backends.ModelSpec) -> AutoModelForVision2Seq:
logger.info(f'Start loading huggingface model weights: {model_spec.model_name}')
hf_model_str = model_spec['huggingface_id'] # Get the model name

if model_spec['model_name'] != 'idefics-80b-instruct':
model = AutoModelForVision2Seq.from_pretrained(hf_model_str, device_map="auto", torch_dtype="auto")
else:
model = IdeficsForVisionText2Text.from_pretrained(hf_model_str, device_map="auto", torch_dtype=torch.bfloat16)
model_type = MODEL_TYPE_MAP[model_spec['model_type']] # Use the appropriate Auto class to load the model

model = model_type.from_pretrained(hf_model_str, device_map="auto", torch_dtype="auto") # Load the model

logger.info(f"Finished loading huggingface model: {model_spec.model_name}")
logger.info(f"Device Map: {model.hf_device_map}")

return model

Expand Down Expand Up @@ -139,6 +145,7 @@ def __init__(self, model_spec: backends.ModelSpec):
self.template = model_spec["custom_chat_template"]
self.assistant_tag = model_spec["assistant"]
self.image_placeholder = model_spec["placeholder"]

self.padding = False
self.IDEFICS = False
if model_spec['model_name'] == 'idefics-80b-instruct':
Expand Down Expand Up @@ -166,9 +173,6 @@ def generate_response(self, messages: List[Dict],
template = Template(template_str)
prompt_text = template.render(messages=messages)

print("### PROMPT TEXT ###")
print(prompt_text)

# Get a list of images that will be passed to the Processor
images = get_images(prompt_text, messages, self.image_placeholder)
if self.padding:
Expand Down Expand Up @@ -204,9 +208,7 @@ def generate_response(self, messages: List[Dict],

# Store generated text
response = {'response': generated_text}
print("### GENERATED RESPONSE ###")
print(response)

response_text = generated_text[0].split(self.assistant_tag)[-1] # Get the last assistant response

return prompt, response, response_text
return prompt, response, response_text
8 changes: 8 additions & 0 deletions backends/model_registry.json
Original file line number Diff line number Diff line change
Expand Up @@ -374,6 +374,7 @@
"model_name": "llava-1.5-7b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-1.5-7b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "ASSISTANT",
"padding": false,
Expand All @@ -384,6 +385,7 @@
"model_name": "llava-1.5-13b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-1.5-13b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "ASSISTANT",
"padding": false,
Expand All @@ -394,6 +396,7 @@
"model_name": "vip-llava-7b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/vip-llava-7b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "ASSISTANT",
"padding": false,
Expand All @@ -404,6 +407,7 @@
"model_name": "llava-v1.6-34b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-v1.6-34b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "assistant",
"padding": true,
Expand All @@ -414,6 +418,7 @@
"model_name": "llava-v1.6-mistral-7b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-v1.6-mistral-7b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "[/INST]",
"padding": true,
Expand All @@ -424,6 +429,7 @@
"model_name": "llava-v1.6-vicuna-13b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-v1.6-vicuna-13b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "ASSISTANT",
"padding": true,
Expand All @@ -434,6 +440,7 @@
"model_name": "llava-v1.6-vicuna-7b-hf",
"backend": "huggingface_multimodal",
"huggingface_id": "llava-hf/llava-v1.6-vicuna-7b-hf",
"model_type": "Vision2Seq",
"placeholder": "<image>",
"assistant": "ASSISTANT",
"padding": true,
Expand All @@ -444,6 +451,7 @@
"model_name": "idefics-80b-instruct",
"backend": "huggingface_multimodal",
"huggingface_id": "HuggingFaceM4/idefics-80b-instruct",
"model_type": "Idefics",
"placeholder": "",
"assistant": "ASSISTANT:",
"padding": false,
Expand Down

0 comments on commit d9002ea

Please sign in to comment.