Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: context switch of RR scheduler #57

Merged
merged 1 commit into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions src/agents/agent_config/MathAgent.json
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
{
"name": "MathAgent",
"description": [
"You are an expert who is good at solving mathematical problems. You are provided with several mathematical tools to help you solve the problem.",
"You should be truly honest about whether you can solve the problems or part of it by yourself, or else you need to select and call the provided tools.",
"It is extremely important that if you can not solve it by yourself but claim that you can, this could cause serious problems.",
"Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve.",
"Give the final solution of the problem with explanations."
"You are an expert who is good at solving mathematical problems. ",
"Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve."
],
"flow": [

Expand Down
2 changes: 1 addition & 1 deletion src/agents/native_agents/math_agent/math_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def run(self):
# predefined steps
steps = [
"identify and outline the sub-problems that need to be solved as stepping stones toward the solution. ",
"apply mathematical theorems, formulas to solve each sub-problem. ",
"solve each sub-problem. ",
"integrate the solutions to these sub-problems in the previous step to get the final solution. "
]
for i, step in enumerate(steps):
Expand Down
30 changes: 18 additions & 12 deletions src/llms/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def load_llm_and_tokenizer(self): # load model from config
self.model_name,
use_auth_token = hf_token,
cache_dir = cache_dir,
torch_dtype=torch.float16,
# torch_dtype=torch.float16,
# load_in_8bit = True,
device_map="auto",
max_memory = self.max_gpu_memory
Expand All @@ -116,7 +116,7 @@ def load_llm_and_tokenizer(self): # load model from config
cache_dir = cache_dir
)
# print(f"EOS token id: {self.model.config.eos_token_id}")
self.tokenizer.pad_token_id = self.model.config.eos_token_id
self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
# print(self.tokenizer.pad_token_id)
else:
if re.search(r'gpt', self.model_name, re.IGNORECASE):
Expand Down Expand Up @@ -213,6 +213,7 @@ def gpt_process(self,
temperature=0.0
):
prompt = agent_process.prompt,
print(f"Prompt: {prompt}")
response = self.model.chat.completions.create(
model=self.model_name,
messages=[
Expand Down Expand Up @@ -287,11 +288,12 @@ def beam_search(self,
idx = start_idx

for step in range(start_idx, max_new_tokens):
# print(step)
candidate_beams = []
candidate_scores = []
candidate_attention_masks = []

# print(step)

for beam, score, beam_attention_mask in zip(beams, beam_scores, beam_attention_masks):
with torch.no_grad():
outputs = self.model(
Expand Down Expand Up @@ -325,6 +327,9 @@ def beam_search(self,
break

# Break if all beams end with the end-of-sequence token
# print(self.tokenizer.eos_token_id)
# print(f"Step: {step}, End: {all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams)}")

if all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams):
idx = max_new_tokens
finished_flag = True
Expand All @@ -346,7 +351,7 @@ def beam_search(self,
"beams": beams,
"beam_scores": beam_scores,
"beam_attention_masks": beam_attention_masks,
"result": best_beam
"result": best_beam if finished_flag else None
}

return outputs
Expand Down Expand Up @@ -396,14 +401,15 @@ def open_llm_process(self,
timestamp = agent_process.get_time_limit()
)

output_ids = outputs["result"]
if outputs["finished_flag"]: # finished flag is set as True
output_ids = outputs["result"]

print(f"Output ID: {output_ids}")
prompt = agent_process.prompt
result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
result = result[len(prompt)+1: ]
# print(f"Output ID: {output_ids}")
prompt = agent_process.prompt
result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
# print(result)
result = result[len(prompt)+1: ]

if outputs["finished_flag"]: # finished flag is set as True
if self.context_manager.check_restoration(
agent_process.get_pid()):
self.context_manager.clear_restoration(
Expand All @@ -415,6 +421,7 @@ def open_llm_process(self,

else:
# print(f"{agent_process.agent_name} suspended: {result}")
self.logger.info(f"[{agent_process.agent_name}] is suspended due to the time limit.")
self.context_manager.gen_snapshot(
pid = agent_process.get_pid(),
context = {
Expand All @@ -424,7 +431,6 @@ def open_llm_process(self,
"beam_attention_masks": outputs["beam_attention_masks"]
}
)
agent_process.set_status("suspending")
agent_process.set_response(result)
agent_process.set_status("suspended")

agent_process.set_end_time(time.time())
4 changes: 3 additions & 1 deletion src/scheduler/rr_scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ def run(self):
agent_request = self.agent_process_queue.get(block=True, timeout=1)
agent_request.set_time_limit(self.time_limit)
self.execute_request(agent_request)
if agent_request.get_status() is not "done":
if agent_request.get_status() != "done":
time.sleep(2)
agent_request.set_created_time(time.time())
self.agent_process_queue.put(agent_request)

except Empty:
Expand Down