Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 36 additions & 31 deletions code/ChatHaruhi/ChatHaruhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ def __init__(self, system_prompt = None, \
self.llm, self.tokenizer = self.get_models(llm)
elif "llama" in llm:
self.llm, self.tokenizer = self.get_models(llm)
elif "phi" in llm:
self.llm, self.tokenizer = self.get_models(llm)
elif "Mixtral" in llm:
self.llm, self.tokenizer = self.get_models(llm)
elif "Qwen-118k" in llm:
self.llm, self.tokenizer = self.get_models(llm)
elif "mistral" in llm:
self.llm, self.tokenizer = self.get_models(llm)
elif "openChat" in llm:
self.llm, self.tokenizer = self.get_models(llm)
else:
print(f'warning! undefined llm {llm}, use openai instead.')
self.llm, self.tokenizer = self.get_models('openai')
Expand Down Expand Up @@ -306,30 +316,26 @@ def get_models(self, model_name):
from .llama2 import ChatLLaMA
return (ChatLLaMA(), tiktokenizer)
elif "qwen" in model_name:
if model_name == "qwen118k_raw":
from .Qwen118k2GPT import Qwen118k2GPT, Qwen_tokenizer
return (Qwen118k2GPT(model = "Qwen/Qwen-1_8B-Chat"), Qwen_tokenizer)
from huggingface_hub import HfApi
from huggingface_hub.hf_api import ModelFilter
qwen_api = HfApi()
qwen_models = qwen_api.list_models(
filter = ModelFilter(model_name=model_name),
author = "silk-road"
)
qwen_models_id = []
for qwen_model in qwen_models:
qwen_models_id.append(qwen_model.id)
# print(model.id)
if "silk-road/" + model_name in qwen_models_id:
from .Qwen118k2GPT import Qwen118k2GPT, Qwen_tokenizer
return (Qwen118k2GPT(model = "silk-road/" + model_name), Qwen_tokenizer)
else:
print(f'warning! undefined model {model_name}, use openai instead.')
from .LangChainGPT import LangChainGPT
return (LangChainGPT(), tiktokenizer)
from.qwen import ChatQwen
return (ChatQwen(), tiktokenizer)
# print(models_id)
elif model_name == "phi":
from.phi import Chatphi
return (Chatphi(), tiktokenizer)
elif "Mixtral" in model_name:
from.Mixtral import ChatMixtral
return (ChatMixtral(), tiktokenizer)
elif model_name == "Qwen-118k":
from .Qwen118k2GPT import Qwen118k2GPT
return (Qwen118k2GPT(), tiktokenizer)
elif "mistral" in model_name:
from.mistral import ChatMistral
return (ChatMistral(), tiktokenizer)
elif "openChat" in model_name:
from.openChat import ChatOpenChat
return (ChatOpenChat(), tiktokenizer)
else:
print(f'warning! undefined model {model_name}, use openai instead.')
print(f'warning! undefinecd model {model_name}, use openai instead.')
from .LangChainGPT import LangChainGPT
return (LangChainGPT(), tiktokenizer)

Expand Down Expand Up @@ -448,14 +454,11 @@ def chat(self, text, role, nth_test):

# add system prompt
self.llm.initialize_message()

if not 'no_description' in self.llm_type.split('='):
self.llm.system_message(self.system_prompt)

self.llm.system_message(self.system_prompt)

# add story
query = self.get_query_string(text, role)
if not 'no_retrieve' in self.llm_type.split('='):
# add story
self.add_story( query )
self.add_story( query )

# add history
self.add_history()
Expand All @@ -467,10 +470,12 @@ def chat(self, text, role, nth_test):
response_raw = self.llm.get_response()

response = response_postprocess(response_raw, self.dialogue_bra_token, self.dialogue_ket_token)

# record dialogue history
self.dialogue_history.append((query, response))



return response

def get_query_string(self, text, role):
Expand All @@ -485,7 +490,7 @@ def add_story(self, query):
return

query_vec = self.embedding(query)

stories = self.db.search(query_vec, self.k_search)

story_string = self.story_prefix_prompt
Expand Down
53 changes: 53 additions & 0 deletions code/ChatHaruhi/Mixtral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from .BaseLLM import BaseLLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import LlamaTokenizer, MixtralForCausalLM
import bitsandbytes, flash_attn
tokenizer_LLaMA = None
model_LLaMA = None

def initialize_Mixtral():
global model_LLaMA, tokenizer_LLaMA

if model_LLaMA is None:
model_LLaMA = MixtralForCausalLM.from_pretrained(
"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
torch_dtype=torch.float16,
device_map="auto"
)

if tokenizer_LLaMA is None:
tokenizer_LLaMA = LlamaTokenizer.from_pretrained('NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO', trust_remote_code=True)

return model_LLaMA, tokenizer_LLaMA

def LLaMA_tokenizer(text):
return len(tokenizer_LLaMA.encode(text))

class ChatMixtral(BaseLLM):
def __init__(self, model="Mixtral"):
super(ChatMixtral, self).__init__()
self.model, self.tokenizer = initialize_Mixtral()
self.messages = ""

def initialize_message(self):
self.messages = ""

def ai_message(self, payload):
self.messages = self.messages + "\n " + payload

def system_message(self, payload):
self.messages = self.messages + "\n " + payload

def user_message(self, payload):
self.messages = self.messages + "\n " + payload

def get_response(self):
with torch.no_grad():
input_ids = self.tokenizer(self.messages, return_tensors="pt").input_ids.to("cuda")
generated_ids = self.model.generate(input_ids, max_new_tokens=750, temperature=0.8, repetition_penalty=1.1, do_sample=True, eos_token_id=self.tokenizer.eos_token_id)
response = self.tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)
return response

def print_prompt(self):
print(self.messages)
50 changes: 14 additions & 36 deletions code/ChatHaruhi/Qwen118k2GPT.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,16 @@
import torch
import torch
from .BaseLLM import BaseLLM
from transformers import AutoTokenizer, AutoModel
from peft import PeftModel
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers.generation import GenerationConfig

from transformers import AutoTokenizer, AutoModelForCausalLM
import pdb
tokenizer_qwen = None
model_qwen = None



def initialize_Qwen2LORA(model):
# Load model directly
def initialize_Qwen2LORA():
global model_qwen, tokenizer_qwen

if model_qwen is None:
model_qwen = AutoModelForCausalLM.from_pretrained(
model,
"silk-road/ChatHaruhi_RolePlaying_qwen_7b",
device_map="auto",
trust_remote_code=True
)
Expand All @@ -27,49 +22,34 @@ def initialize_Qwen2LORA(model):

if tokenizer_qwen is None:
tokenizer_qwen = AutoTokenizer.from_pretrained(
model,
"silk-road/ChatHaruhi_RolePlaying_qwen_7b",
# use_fast=True,
trust_remote_code=True
)

return model_qwen, tokenizer_qwen

def Qwen_tokenizer(text):

def LLaMA_tokenizer(text):
return len(tokenizer_qwen.encode(text))

class Qwen118k2GPT(BaseLLM):
def __init__(self, model):
def __init__(self, model="qwen-118k"):
super(Qwen118k2GPT, self).__init__()
global model_qwen, tokenizer_qwen
if model == "Qwen/Qwen-1_8B-Chat":
tokenizer_qwen = AutoTokenizer.from_pretrained(
"Qwen/Qwen-1_8B-Chat",
trust_remote_code=True
)
model_qwen = AutoModelForCausalLM.from_pretrained(
"Qwen/Qwen-1_8B-Chat",
device_map="auto",
trust_remote_code=True
).eval()
self.model = model_qwen
self.tokenizer = tokenizer_qwen
elif "silk-road/" in model :
self.model, self.tokenizer = initialize_Qwen2LORA(model)
else:
raise Exception("Unknown Qwen model")
self.model, self.tokenizer = initialize_Qwen2LORA()
self.messages = ""

def initialize_message(self):
self.messages = ""

def ai_message(self, payload):
self.messages = "AI: " + self.messages + "\n " + payload
self.messages = self.messages + "\n " + payload

def system_message(self, payload):
self.messages = "SYSTEM PROMPT: " + self.messages + "\n " + payload
self.messages = self.messages + "\n " + payload

def user_message(self, payload):
self.messages = "User: " + self.messages + "\n " + payload
self.messages = self.messages + "\n " + payload

def get_response(self):
with torch.no_grad():
Expand All @@ -80,5 +60,3 @@ def get_response(self):
def print_prompt(self):
print(type(self.messages))
print(self.messages)


53 changes: 53 additions & 0 deletions code/ChatHaruhi/mistral.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from .BaseLLM import BaseLLM
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import bitsandbytes, flash_attn
tokenizer_LLaMA = None
model_LLaMA = None

def initialize_Mistral():
global model_LLaMA, tokenizer_LLaMA

if model_LLaMA is None:
model_LLaMA = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-Instruct-v0.2",
torch_dtype=torch.float16,
device_map="auto"
)

if tokenizer_LLaMA is None:
tokenizer_LLaMA = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.2", trust_remote_code=True)

return model_LLaMA, tokenizer_LLaMA

def LLaMA_tokenizer(text):
return len(tokenizer_LLaMA.encode(text))

class ChatMistral(BaseLLM):
def __init__(self, model="Mistral"):
super(ChatMistral, self).__init__()
self.model, self.tokenizer = initialize_Mistral()
self.messages = ""

def initialize_message(self):
self.messages = "[INST]"

def ai_message(self, payload):
self.messages = self.messages + "\n " + payload

def system_message(self, payload):
self.messages = self.messages + "\n " + payload

def user_message(self, payload):
self.messages = self.messages + "\n " + payload

def get_response(self):
with torch.no_grad():
encodeds = self.tokenizer.encode(self.messages+"[/INST]", return_tensors="pt")
generated_ids = self.model.generate(encodeds, max_new_tokens=2000, do_sample=True)
decoded = self.tokenizer.batch_decode(generated_ids)

return decoded[0].split("[/INST]")[1]

def print_prompt(self):
print(self.messages)
66 changes: 66 additions & 0 deletions code/ChatHaruhi/phi.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import torch
from .BaseLLM import BaseLLM
from transformers import AutoTokenizer, PhiForCausalLM
tokenizer_phi = None
model_phi = None
# Load model directly
def initialize_phi():
global model_phi, tokenizer_phi

if model_phi is None:
model_phi = PhiForCausalLM.from_pretrained(
"cognitivecomputations/dolphin-2_6-phi-2",
local_files_only=True,
torch_dtype=torch.float16,
device_map="auto",
)

if tokenizer_phi is None:
tokenizer_phi = AutoTokenizer.from_pretrained(
"cognitivecomputations/dolphin-2_6-phi-2",
local_files_only=True,
use_fast=True,
)




return model_phi, tokenizer_phi

def LLaMA_tokenizer(text):
return len(tokenizer_phi.encode(text))

class Chatphi(BaseLLM):
def __init__(self, model="phi"):
super(Chatphi, self).__init__()
self.model, self.tokenizer = initialize_phi()
self.messages = ""

def initialize_message(self):
self.messages = ""

def ai_message(self, payload):
self.messages = self.messages + "\n " + payload

def system_message(self, payload):
self.messages = self.messages + "\n " + payload

def user_message(self, payload):
self.messages = self.messages + "\n " + payload

def get_response(self):
with torch.no_grad():
# Prepare the model input with attention mask
inputs = self.tokenizer(self.messages, return_tensors="pt", padding=True, truncation=True)
attention_mask = inputs['attention_mask']

# Generate the model output using the prepared input and attention mask
outputs = self.model.generate(input_ids=inputs['input_ids'], attention_mask=attention_mask, max_length=114514)
response = self.tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

return response


def print_prompt(self):
print(type(self.messages))
print(self.messages)
Loading