From 52be3a23a82ed5a5d8319051ebe5eb41aa97a00c Mon Sep 17 00:00:00 2001 From: dongyuanjushi Date: Thu, 11 Apr 2024 12:53:25 -0400 Subject: [PATCH] fix: context switch of RR scheduler --- src/agents/agent_config/MathAgent.json | 7 ++--- .../native_agents/math_agent/math_agent.py | 2 +- src/llms/llms.py | 30 +++++++++++-------- src/scheduler/rr_scheduler.py | 4 ++- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/agents/agent_config/MathAgent.json b/src/agents/agent_config/MathAgent.json index 3f8934c5..4dc3ce3d 100644 --- a/src/agents/agent_config/MathAgent.json +++ b/src/agents/agent_config/MathAgent.json @@ -1,11 +1,8 @@ { "name": "MathAgent", "description": [ - "You are an expert who is good at solving mathematical problems. You are provided with several mathematical tools to help you solve the problem.", - "You should be truly honest about whether you can solve the problems or part of it by yourself, or else you need to select and call the provided tools.", - "It is extremely important that if you can not solve it by yourself but claim that you can, this could cause serious problems.", - "Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve.", - "Give the final solution of the problem with explanations." + "You are an expert who is good at solving mathematical problems. ", + "Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve." ], "flow": [ diff --git a/src/agents/native_agents/math_agent/math_agent.py b/src/agents/native_agents/math_agent/math_agent.py index f9c204b6..2edc1da6 100644 --- a/src/agents/native_agents/math_agent/math_agent.py +++ b/src/agents/native_agents/math_agent/math_agent.py @@ -55,7 +55,7 @@ def run(self): # predefined steps steps = [ "identify and outline the sub-problems that need to be solved as stepping stones toward the solution. ", - "apply mathematical theorems, formulas to solve each sub-problem. ", + "solve each sub-problem. ", "integrate the solutions to these sub-problems in the previous step to get the final solution. " ] for i, step in enumerate(steps): diff --git a/src/llms/llms.py b/src/llms/llms.py index 8d7e05a6..329d5093 100644 --- a/src/llms/llms.py +++ b/src/llms/llms.py @@ -104,7 +104,7 @@ def load_llm_and_tokenizer(self): # load model from config self.model_name, use_auth_token = hf_token, cache_dir = cache_dir, - torch_dtype=torch.float16, + # torch_dtype=torch.float16, # load_in_8bit = True, device_map="auto", max_memory = self.max_gpu_memory @@ -116,7 +116,7 @@ def load_llm_and_tokenizer(self): # load model from config cache_dir = cache_dir ) # print(f"EOS token id: {self.model.config.eos_token_id}") - self.tokenizer.pad_token_id = self.model.config.eos_token_id + self.tokenizer.pad_token_id = self.tokenizer.eos_token_id # print(self.tokenizer.pad_token_id) else: if re.search(r'gpt', self.model_name, re.IGNORECASE): @@ -213,6 +213,7 @@ def gpt_process(self, temperature=0.0 ): prompt = agent_process.prompt, + print(f"Prompt: {prompt}") response = self.model.chat.completions.create( model=self.model_name, messages=[ @@ -287,11 +288,12 @@ def beam_search(self, idx = start_idx for step in range(start_idx, max_new_tokens): - # print(step) candidate_beams = [] candidate_scores = [] candidate_attention_masks = [] + # print(step) + for beam, score, beam_attention_mask in zip(beams, beam_scores, beam_attention_masks): with torch.no_grad(): outputs = self.model( @@ -325,6 +327,9 @@ def beam_search(self, break # Break if all beams end with the end-of-sequence token + # print(self.tokenizer.eos_token_id) + # print(f"Step: {step}, End: {all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams)}") + if all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams): idx = max_new_tokens finished_flag = True @@ -346,7 +351,7 @@ def beam_search(self, "beams": beams, "beam_scores": beam_scores, "beam_attention_masks": beam_attention_masks, - "result": best_beam + "result": best_beam if finished_flag else None } return outputs @@ -396,14 +401,15 @@ def open_llm_process(self, timestamp = agent_process.get_time_limit() ) - output_ids = outputs["result"] + if outputs["finished_flag"]: # finished flag is set as True + output_ids = outputs["result"] - print(f"Output ID: {output_ids}") - prompt = agent_process.prompt - result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) - result = result[len(prompt)+1: ] + # print(f"Output ID: {output_ids}") + prompt = agent_process.prompt + result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True) + # print(result) + result = result[len(prompt)+1: ] - if outputs["finished_flag"]: # finished flag is set as True if self.context_manager.check_restoration( agent_process.get_pid()): self.context_manager.clear_restoration( @@ -415,6 +421,7 @@ def open_llm_process(self, else: # print(f"{agent_process.agent_name} suspended: {result}") + self.logger.info(f"[{agent_process.agent_name}] is suspended due to the time limit.") self.context_manager.gen_snapshot( pid = agent_process.get_pid(), context = { @@ -424,7 +431,6 @@ def open_llm_process(self, "beam_attention_masks": outputs["beam_attention_masks"] } ) - agent_process.set_status("suspending") - agent_process.set_response(result) + agent_process.set_status("suspended") agent_process.set_end_time(time.time()) diff --git a/src/scheduler/rr_scheduler.py b/src/scheduler/rr_scheduler.py index de31d41e..71970885 100644 --- a/src/scheduler/rr_scheduler.py +++ b/src/scheduler/rr_scheduler.py @@ -22,7 +22,9 @@ def run(self): agent_request = self.agent_process_queue.get(block=True, timeout=1) agent_request.set_time_limit(self.time_limit) self.execute_request(agent_request) - if agent_request.get_status() is not "done": + if agent_request.get_status() != "done": + time.sleep(2) + agent_request.set_created_time(time.time()) self.agent_process_queue.put(agent_request) except Empty: