From 749fb52d20ae091516366ee63c8fb7dadfebc3c2 Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Sat, 28 Oct 2023 20:16:49 +0200
Subject: [PATCH 1/4] Enable streaming support for openai ChatCompletion #217

---
 autogen/oai/chat_completion_proxy.py | 57 ++++++++++++++++++++++++++++
 autogen/oai/completion.py            |  3 +-
 setup.py                             |  1 +
 3 files changed, 60 insertions(+), 1 deletion(-)
 create mode 100644 autogen/oai/chat_completion_proxy.py

diff --git a/autogen/oai/chat_completion_proxy.py b/autogen/oai/chat_completion_proxy.py
new file mode 100644
index 000000000000..5ac64e5ce9af
--- /dev/null
+++ b/autogen/oai/chat_completion_proxy.py
@@ -0,0 +1,57 @@
+import openai
+import tiktoken
+
+class ChatCompletionProxy():
+    @classmethod
+    def _prompt_tokens(cls, messages):
+        # Get the encoding for OpenAI's "cl100k_base" model
+        encoding = tiktoken.get_encoding("cl100k_base")
+        
+        # Calculate the total number of tokens in the prompt
+        # by iterating over each message in the 'messages' list,
+        # encoding its content, and summing up the token counts.
+        return sum([len(encoding.encode(msg['content'])) for msg in messages])
+
+    @classmethod
+    def create(cls, *args, **kwargs):
+        # Check if streaming is enabled in the function arguments
+        if kwargs.get("stream", False):
+            response_content = ""
+            completion_tokens = 0
+            
+            # Set the terminal text color to green for better visibility
+            print("\033[32m", end='')
+            
+            # Send the chat completion request to OpenAI's API and process the response in chunks
+            for chunk in openai.ChatCompletion.create(*args, **kwargs):
+                if chunk["choices"]:
+                    content = chunk["choices"][0].get("delta", {}).get("content")
+                    # If content is present, print it to the terminal and update response variables
+                    if content is not None:
+                        print(content, end='', flush=True)
+                        response_content += content
+                        completion_tokens += 1
+            
+            # Reset the terminal text color
+            print("\033[0m\n")
+            
+            # Prepare the final response object based on the accumulated data
+            response = chunk
+            response["choices"][0]["message"] = {
+                'role': 'assistant',
+                'content': response_content
+            }
+            
+            prompt_tokens = cls._prompt_tokens(kwargs["messages"])
+            # Add usage information to the response
+            response["usage"] = {
+                "prompt_tokens": prompt_tokens,
+                "completion_tokens": completion_tokens,
+                "total_tokens": prompt_tokens + completion_tokens
+            }
+        else:
+            # If streaming is not enabled, send a regular chat completion request
+            response = openai.ChatCompletion.create(*args, **kwargs)
+        
+        # Return the final response object
+        return response
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index a720ccc24466..38d8cde4dee9 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -9,6 +9,7 @@
 from flaml.tune.space import is_constant
 from flaml.automl.logger import logger_formatter
 from .openai_utils import get_key
+from .chat_completion_proxy import ChatCompletionProxy
 from collections import defaultdict
 
 try:
@@ -207,7 +208,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                 cls._book_keeping(config, response)
                 return response
         openai_completion = (
-            openai.ChatCompletion
+            ChatCompletionProxy # Support streaming for chat models
             if config["model"].replace("gpt-35-turbo", "gpt-3.5-turbo") in cls.chat_models
             or issubclass(cls, ChatCompletion)
             else openai.Completion
diff --git a/setup.py b/setup.py
index d3308bf1ccc6..2b89fc8e291d 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,7 @@
 
 install_requires = [
     "openai<1",
+    "tiktoken",
     "diskcache",
     "termcolor",
     "flaml",

From d72a86828b2f95116f92b56a5c9f213803ec834f Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <Alvaromah@users.noreply.github.com>
Date: Mon, 30 Oct 2023 22:27:33 +0100
Subject: [PATCH 2/4] Improve streaming implementation (openai v0.28)

---
 autogen/oai/chat_completion_proxy.py | 50 +++++++++++++++++++---------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/autogen/oai/chat_completion_proxy.py b/autogen/oai/chat_completion_proxy.py
index 5ac64e5ce9af..5a1d105715f1 100644
--- a/autogen/oai/chat_completion_proxy.py
+++ b/autogen/oai/chat_completion_proxy.py
@@ -1,6 +1,8 @@
 import openai
 import tiktoken
 
+from openai.openai_object import OpenAIObject
+
 class ChatCompletionProxy():
     @classmethod
     def _prompt_tokens(cls, messages):
@@ -15,8 +17,10 @@ def _prompt_tokens(cls, messages):
     @classmethod
     def create(cls, *args, **kwargs):
         # Check if streaming is enabled in the function arguments
-        if kwargs.get("stream", False):
-            response_content = ""
+        if kwargs.get('stream', False) and 'functions' not in kwargs:
+            # Prepare response array based on parameter 'n'
+            response_contents = [""] * kwargs.get('n', 1)
+            finish_reasons = [""] * kwargs.get('n', 1)
             completion_tokens = 0
             
             # Set the terminal text color to green for better visibility
@@ -25,32 +29,46 @@ def create(cls, *args, **kwargs):
             # Send the chat completion request to OpenAI's API and process the response in chunks
             for chunk in openai.ChatCompletion.create(*args, **kwargs):
                 if chunk["choices"]:
-                    content = chunk["choices"][0].get("delta", {}).get("content")
-                    # If content is present, print it to the terminal and update response variables
-                    if content is not None:
-                        print(content, end='', flush=True)
-                        response_content += content
-                        completion_tokens += 1
+                    for choice in chunk["choices"]:
+                        content = choice.get("delta", {}).get("content")
+                        # If content is present, print it to the terminal and update response variables
+                        if content is not None:
+                            print(content, end='', flush=True)
+                            response_contents[choice.index] += content
+                            finish_reasons[choice.index] = choice.get("finish_reasons", None)
+                            completion_tokens += 1
+                        else:
+                            print()
             
             # Reset the terminal text color
             print("\033[0m\n")
             
             # Prepare the final response object based on the accumulated data
-            response = chunk
-            response["choices"][0]["message"] = {
-                'role': 'assistant',
-                'content': response_content
-            }
-            
             prompt_tokens = cls._prompt_tokens(kwargs["messages"])
-            # Add usage information to the response
-            response["usage"] = {
+            response = OpenAIObject()
+            response.id = chunk['id']
+            response.object = 'chat.completion'
+            response.created = chunk['created']
+            response.model = chunk['model']
+            response.choices = []
+            response.usage = {
                 "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
                 "total_tokens": prompt_tokens + completion_tokens
             }
+            for i in range(len(response_contents)):
+                response["choices"].append({
+                    "index": i,
+                    "finish_reason": finish_reasons[i],
+                    "message": {
+                        'role': 'assistant',
+                        'content': response_contents[i]
+                    }
+                })
         else:
             # If streaming is not enabled, send a regular chat completion request
+            # Ensure streaming is disabled
+            kwargs['stream'] = False
             response = openai.ChatCompletion.create(*args, **kwargs)
         
         # Return the final response object

From 1ee148c998891262919f0ec26cc01445f73a254b Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Mon, 30 Oct 2023 22:31:42 +0100
Subject: [PATCH 3/4] Improve streaming implementation (openai v0.28)

---
 autogen/oai/chat_completion_proxy.py | 50 +++++++++++++++++++---------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/autogen/oai/chat_completion_proxy.py b/autogen/oai/chat_completion_proxy.py
index 5ac64e5ce9af..5a1d105715f1 100644
--- a/autogen/oai/chat_completion_proxy.py
+++ b/autogen/oai/chat_completion_proxy.py
@@ -1,6 +1,8 @@
 import openai
 import tiktoken
 
+from openai.openai_object import OpenAIObject
+
 class ChatCompletionProxy():
     @classmethod
     def _prompt_tokens(cls, messages):
@@ -15,8 +17,10 @@ def _prompt_tokens(cls, messages):
     @classmethod
     def create(cls, *args, **kwargs):
         # Check if streaming is enabled in the function arguments
-        if kwargs.get("stream", False):
-            response_content = ""
+        if kwargs.get('stream', False) and 'functions' not in kwargs:
+            # Prepare response array based on parameter 'n'
+            response_contents = [""] * kwargs.get('n', 1)
+            finish_reasons = [""] * kwargs.get('n', 1)
             completion_tokens = 0
             
             # Set the terminal text color to green for better visibility
@@ -25,32 +29,46 @@ def create(cls, *args, **kwargs):
             # Send the chat completion request to OpenAI's API and process the response in chunks
             for chunk in openai.ChatCompletion.create(*args, **kwargs):
                 if chunk["choices"]:
-                    content = chunk["choices"][0].get("delta", {}).get("content")
-                    # If content is present, print it to the terminal and update response variables
-                    if content is not None:
-                        print(content, end='', flush=True)
-                        response_content += content
-                        completion_tokens += 1
+                    for choice in chunk["choices"]:
+                        content = choice.get("delta", {}).get("content")
+                        # If content is present, print it to the terminal and update response variables
+                        if content is not None:
+                            print(content, end='', flush=True)
+                            response_contents[choice.index] += content
+                            finish_reasons[choice.index] = choice.get("finish_reasons", None)
+                            completion_tokens += 1
+                        else:
+                            print()
             
             # Reset the terminal text color
             print("\033[0m\n")
             
             # Prepare the final response object based on the accumulated data
-            response = chunk
-            response["choices"][0]["message"] = {
-                'role': 'assistant',
-                'content': response_content
-            }
-            
             prompt_tokens = cls._prompt_tokens(kwargs["messages"])
-            # Add usage information to the response
-            response["usage"] = {
+            response = OpenAIObject()
+            response.id = chunk['id']
+            response.object = 'chat.completion'
+            response.created = chunk['created']
+            response.model = chunk['model']
+            response.choices = []
+            response.usage = {
                 "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
                 "total_tokens": prompt_tokens + completion_tokens
             }
+            for i in range(len(response_contents)):
+                response["choices"].append({
+                    "index": i,
+                    "finish_reason": finish_reasons[i],
+                    "message": {
+                        'role': 'assistant',
+                        'content': response_contents[i]
+                    }
+                })
         else:
             # If streaming is not enabled, send a regular chat completion request
+            # Ensure streaming is disabled
+            kwargs['stream'] = False
             response = openai.ChatCompletion.create(*args, **kwargs)
         
         # Return the final response object

From 3afc70dafe58bfc01dbde35d28c3fbc990179b38 Mon Sep 17 00:00:00 2001
From: Alvaro Mateos <alvaroma@live.com>
Date: Tue, 31 Oct 2023 11:23:47 +0100
Subject: [PATCH 4/4] Fixed code formatting issues with pre-commit

---
 autogen/oai/chat_completion_proxy.py | 52 ++++++++++++++--------------
 autogen/oai/completion.py            |  2 +-
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/autogen/oai/chat_completion_proxy.py b/autogen/oai/chat_completion_proxy.py
index 5a1d105715f1..a8f6c443400a 100644
--- a/autogen/oai/chat_completion_proxy.py
+++ b/autogen/oai/chat_completion_proxy.py
@@ -3,29 +3,30 @@
 
 from openai.openai_object import OpenAIObject
 
-class ChatCompletionProxy():
+
+class ChatCompletionProxy:
     @classmethod
     def _prompt_tokens(cls, messages):
         # Get the encoding for OpenAI's "cl100k_base" model
         encoding = tiktoken.get_encoding("cl100k_base")
-        
+
         # Calculate the total number of tokens in the prompt
         # by iterating over each message in the 'messages' list,
         # encoding its content, and summing up the token counts.
-        return sum([len(encoding.encode(msg['content'])) for msg in messages])
+        return sum([len(encoding.encode(msg["content"])) for msg in messages])
 
     @classmethod
     def create(cls, *args, **kwargs):
         # Check if streaming is enabled in the function arguments
-        if kwargs.get('stream', False) and 'functions' not in kwargs:
+        if kwargs.get("stream", False) and "functions" not in kwargs:
             # Prepare response array based on parameter 'n'
-            response_contents = [""] * kwargs.get('n', 1)
-            finish_reasons = [""] * kwargs.get('n', 1)
+            response_contents = [""] * kwargs.get("n", 1)
+            finish_reasons = [""] * kwargs.get("n", 1)
             completion_tokens = 0
-            
+
             # Set the terminal text color to green for better visibility
-            print("\033[32m", end='')
-            
+            print("\033[32m", end="")
+
             # Send the chat completion request to OpenAI's API and process the response in chunks
             for chunk in openai.ChatCompletion.create(*args, **kwargs):
                 if chunk["choices"]:
@@ -33,43 +34,42 @@ def create(cls, *args, **kwargs):
                         content = choice.get("delta", {}).get("content")
                         # If content is present, print it to the terminal and update response variables
                         if content is not None:
-                            print(content, end='', flush=True)
+                            print(content, end="", flush=True)
                             response_contents[choice.index] += content
                             finish_reasons[choice.index] = choice.get("finish_reasons", None)
                             completion_tokens += 1
                         else:
                             print()
-            
+
             # Reset the terminal text color
             print("\033[0m\n")
-            
+
             # Prepare the final response object based on the accumulated data
             prompt_tokens = cls._prompt_tokens(kwargs["messages"])
             response = OpenAIObject()
-            response.id = chunk['id']
-            response.object = 'chat.completion'
-            response.created = chunk['created']
-            response.model = chunk['model']
+            response.id = chunk["id"]
+            response.object = "chat.completion"
+            response.created = chunk["created"]
+            response.model = chunk["model"]
             response.choices = []
             response.usage = {
                 "prompt_tokens": prompt_tokens,
                 "completion_tokens": completion_tokens,
-                "total_tokens": prompt_tokens + completion_tokens
+                "total_tokens": prompt_tokens + completion_tokens,
             }
             for i in range(len(response_contents)):
-                response["choices"].append({
-                    "index": i,
-                    "finish_reason": finish_reasons[i],
-                    "message": {
-                        'role': 'assistant',
-                        'content': response_contents[i]
+                response["choices"].append(
+                    {
+                        "index": i,
+                        "finish_reason": finish_reasons[i],
+                        "message": {"role": "assistant", "content": response_contents[i]},
                     }
-                })
+                )
         else:
             # If streaming is not enabled, send a regular chat completion request
             # Ensure streaming is disabled
-            kwargs['stream'] = False
+            kwargs["stream"] = False
             response = openai.ChatCompletion.create(*args, **kwargs)
-        
+
         # Return the final response object
         return response
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 38d8cde4dee9..18239842a324 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -208,7 +208,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                 cls._book_keeping(config, response)
                 return response
         openai_completion = (
-            ChatCompletionProxy # Support streaming for chat models
+            ChatCompletionProxy  # Support streaming for chat models
             if config["model"].replace("gpt-35-turbo", "gpt-3.5-turbo") in cls.chat_models
             or issubclass(cls, ChatCompletion)
             else openai.Completion