@@ -104,7 +104,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache, infe
104104device = f " { infer_device()} :0 "
105105
106106model_id = " meta-llama/Llama-2-7b-chat-hf"
107- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype = torch.bfloat16, device_map = device)
107+ model = AutoModelForCausalLM.from_pretrained(model_id, dtype = torch.bfloat16, device_map = device)
108108tokenizer = AutoTokenizer.from_pretrained(model_id)
109109
110110past_key_values = DynamicCache()
@@ -150,7 +150,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache, infe
150150device = f " { infer_device()} :0 "
151151
152152model_id = " meta-llama/Llama-2-7b-chat-hf"
153- model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype = torch.bfloat16, device_map = device)
153+ model = AutoModelForCausalLM.from_pretrained(model_id, dtype = torch.bfloat16, device_map = device)
154154tokenizer = AutoTokenizer.from_pretrained(model_id)
155155
156156messages = [{" role" : " user" , " content" : " You are a helpful assistant." }]
@@ -176,7 +176,7 @@ import torch
176176from transformers import AutoTokenizer, AutoModelForCausalLM, DynamicCache
177177
178178tokenizer = AutoTokenizer.from_pretrained(" meta-llama/Llama-2-7b-chat-hf" )
179- model = AutoModelForCausalLM.from_pretrained(" meta-llama/Llama-2-7b-chat-hf" , torch_dtype = torch.float16, device_map = " auto" )
179+ model = AutoModelForCausalLM.from_pretrained(" meta-llama/Llama-2-7b-chat-hf" , dtype = torch.float16, device_map = " auto" )
180180inputs = tokenizer(" Hello, my name is" , return_tensors = " pt" ).to(model.device)
181181
182182# 캐시를 반환하려면 `return_dict_in_generate=True`가 필요하고 `return_legacy_cache`는 반환된 캐시를
0 commit comments