From 52be3a23a82ed5a5d8319051ebe5eb41aa97a00c Mon Sep 17 00:00:00 2001
From: dongyuanjushi <kai.mei@rutgers.edu>
Date: Thu, 11 Apr 2024 12:53:25 -0400
Subject: [PATCH] fix: context switch of RR scheduler

---
 src/agents/agent_config/MathAgent.json        |  7 ++---
 .../native_agents/math_agent/math_agent.py    |  2 +-
 src/llms/llms.py                              | 30 +++++++++++--------
 src/scheduler/rr_scheduler.py                 |  4 ++-
 4 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/src/agents/agent_config/MathAgent.json b/src/agents/agent_config/MathAgent.json
index 3f8934c5..4dc3ce3d 100644
--- a/src/agents/agent_config/MathAgent.json
+++ b/src/agents/agent_config/MathAgent.json
@@ -1,11 +1,8 @@
 {
     "name": "MathAgent",
     "description": [
-        "You are an expert who is good at solving mathematical problems. You are provided with several mathematical tools to help you solve the problem.",
-        "You should be truly honest about whether you can solve the problems or part of it by yourself, or else you need to select and call the provided tools.",
-        "It is extremely important that if you can not solve it by yourself but claim that you can, this could cause serious problems.",
-        "Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve.",
-        "Give the final solution of the problem with explanations."
+        "You are an expert who is good at solving mathematical problems. ",
+        "Given a mathematical problem, if the problem is complex, you need to break down this problem into smaller sub-problems and solve."
     ],
     "flow": [
 
diff --git a/src/agents/native_agents/math_agent/math_agent.py b/src/agents/native_agents/math_agent/math_agent.py
index f9c204b6..2edc1da6 100644
--- a/src/agents/native_agents/math_agent/math_agent.py
+++ b/src/agents/native_agents/math_agent/math_agent.py
@@ -55,7 +55,7 @@ def run(self):
         # predefined steps
         steps = [
             "identify and outline the sub-problems that need to be solved as stepping stones toward the solution. ",
-            "apply mathematical theorems, formulas to solve each sub-problem. ",
+            "solve each sub-problem. ",
             "integrate the solutions to these sub-problems in the previous step to get the final solution. "
         ]
         for i, step in enumerate(steps):
diff --git a/src/llms/llms.py b/src/llms/llms.py
index 8d7e05a6..329d5093 100644
--- a/src/llms/llms.py
+++ b/src/llms/llms.py
@@ -104,7 +104,7 @@ def load_llm_and_tokenizer(self): # load model from config
                 self.model_name,
                 use_auth_token = hf_token,
                 cache_dir = cache_dir,
-                torch_dtype=torch.float16,
+                # torch_dtype=torch.float16,
                 # load_in_8bit = True,
                 device_map="auto",
                 max_memory = self.max_gpu_memory
@@ -116,7 +116,7 @@ def load_llm_and_tokenizer(self): # load model from config
                 cache_dir = cache_dir
             )
             # print(f"EOS token id: {self.model.config.eos_token_id}")
-            self.tokenizer.pad_token_id = self.model.config.eos_token_id
+            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
             # print(self.tokenizer.pad_token_id)
         else:
             if re.search(r'gpt', self.model_name, re.IGNORECASE):
@@ -213,6 +213,7 @@ def gpt_process(self,
             temperature=0.0
         ):
         prompt = agent_process.prompt,
+        print(f"Prompt: {prompt}")
         response = self.model.chat.completions.create(
             model=self.model_name,
             messages=[
@@ -287,11 +288,12 @@ def beam_search(self,
         idx = start_idx
 
         for step in range(start_idx, max_new_tokens):
-            # print(step)
             candidate_beams = []
             candidate_scores = []
             candidate_attention_masks = []
 
+            # print(step)
+
             for beam, score, beam_attention_mask in zip(beams, beam_scores, beam_attention_masks):
                 with torch.no_grad():
                     outputs = self.model(
@@ -325,6 +327,9 @@ def beam_search(self,
                     break
 
             # Break if all beams end with the end-of-sequence token
+            # print(self.tokenizer.eos_token_id)
+            # print(f"Step: {step}, End: {all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams)}")
+
             if all(beam[-1, -1].item() == self.tokenizer.eos_token_id for beam in beams):
                 idx = max_new_tokens
                 finished_flag = True
@@ -346,7 +351,7 @@ def beam_search(self,
             "beams": beams,
             "beam_scores": beam_scores,
             "beam_attention_masks": beam_attention_masks,
-            "result": best_beam
+            "result": best_beam if finished_flag else None
         }
 
         return outputs
@@ -396,14 +401,15 @@ def open_llm_process(self,
                 timestamp = agent_process.get_time_limit()
             )
 
-        output_ids = outputs["result"]
+        if outputs["finished_flag"]: # finished flag is set as True
+            output_ids = outputs["result"]
 
-        print(f"Output ID: {output_ids}")
-        prompt = agent_process.prompt
-        result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
-        result = result[len(prompt)+1: ]
+            # print(f"Output ID: {output_ids}")
+            prompt = agent_process.prompt
+            result = self.tokenizer.decode(output_ids[0], skip_special_tokens=True)
+            # print(result)
+            result = result[len(prompt)+1: ]
 
-        if outputs["finished_flag"]: # finished flag is set as True
             if self.context_manager.check_restoration(
                 agent_process.get_pid()):
                 self.context_manager.clear_restoration(
@@ -415,6 +421,7 @@ def open_llm_process(self,
 
         else:
             # print(f"{agent_process.agent_name} suspended: {result}")
+            self.logger.info(f"[{agent_process.agent_name}] is suspended due to the time limit.")
             self.context_manager.gen_snapshot(
                 pid = agent_process.get_pid(),
                 context = {
@@ -424,7 +431,6 @@ def open_llm_process(self,
                     "beam_attention_masks": outputs["beam_attention_masks"]
                 }
             )
-            agent_process.set_status("suspending")
-            agent_process.set_response(result)
+            agent_process.set_status("suspended")
 
         agent_process.set_end_time(time.time())
diff --git a/src/scheduler/rr_scheduler.py b/src/scheduler/rr_scheduler.py
index de31d41e..71970885 100644
--- a/src/scheduler/rr_scheduler.py
+++ b/src/scheduler/rr_scheduler.py
@@ -22,7 +22,9 @@ def run(self):
                 agent_request = self.agent_process_queue.get(block=True, timeout=1)
                 agent_request.set_time_limit(self.time_limit)
                 self.execute_request(agent_request)
-                if agent_request.get_status() is not "done":
+                if agent_request.get_status() != "done":
+                    time.sleep(2)
+                    agent_request.set_created_time(time.time())
                     self.agent_process_queue.put(agent_request)
 
             except Empty: