diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index 174f62f4e156..fd481632ed76 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -120,7 +120,6 @@ def __init__(
         max_consecutive_auto_reply: Optional[int] = sys.maxsize,
         human_input_mode: Optional[str] = "NEVER",
         system_message: Optional[str] = "Group chat manager.",
-        # seed: Optional[int] = 4,
         **kwargs,
     ):
         super().__init__(
@@ -136,8 +135,6 @@ def __init__(
         # Allow async chat if initiated using a_initiate_chat
         self.register_reply(Agent, GroupChatManager.a_run_chat, config=groupchat, reset_config=GroupChat.reset)
 
-        # self._random = random.Random(seed)
-
     def run_chat(
         self,
         messages: Optional[List[Dict]] = None,
diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index 5e65a65f11cb..616bcf6da576 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -509,11 +509,11 @@ def eval_function_completions(
 _FUNC_COMPLETION_PROMPT = "# Python 3{definition}"
 _FUNC_COMPLETION_STOP = ["\nclass", "\ndef", "\nif", "\nprint"]
 _IMPLEMENT_CONFIGS = [
-    {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 0},
-    {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "seed": 0},
-    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "seed": 1},
-    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "seed": 2},
-    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "seed": 2},
+    {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 0},
+    {"model": FAST_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 7, "cache_seed": 0},
+    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "temperature": 0, "cache_seed": 1},
+    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 2, "cache_seed": 2},
+    {"model": DEFAULT_MODEL, "prompt": _FUNC_COMPLETION_PROMPT, "stop": _FUNC_COMPLETION_STOP, "n": 1, "cache_seed": 2},
 ]
 
 
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
index 46748207bec9..4b6a92c4dc2f 100644
--- a/autogen/oai/client.py
+++ b/autogen/oai/client.py
@@ -31,7 +31,7 @@ class OpenAIWrapper:
     """A wrapper class for openai client."""
 
     cache_path_root: str = ".cache"
-    extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_version"}
+    extra_kwargs = {"cache_seed", "filter_func", "allow_format_str_template", "context", "api_version"}
     openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
 
     def __init__(self, *, config_list: List[Dict] = None, **base_config):
@@ -191,8 +191,8 @@ def create(self, **config):
                 The actual prompt will be:
                 "Complete the following sentence: Today I feel".
                 More examples can be found at [templating](/docs/Use-Cases/enhanced_inference#templating).
-            - `seed` (int | None) for the cache. Default to 41.
-                An integer seed is useful when implementing "controlled randomness" for the completion.
+            - `cache_seed` (int | None) for the cache. Default to 41.
+                An integer cache_seed is useful when implementing "controlled randomness" for the completion.
                 None for no caching.
             - filter_func (Callable | None): A function that takes in the context and the response
                 and returns a boolean to indicate whether the response is valid. E.g.,
@@ -219,12 +219,12 @@ def yes_or_no_filter(context, response):
             self._process_for_azure(create_config, extra_kwargs, "extra")
             # construct the create params
             params = self._construct_create_params(create_config, extra_kwargs)
-            # get the seed, filter_func and context
-            seed = extra_kwargs.get("seed", 41)
+            # get the cache_seed, filter_func and context
+            cache_seed = extra_kwargs.get("cache_seed", 41)
             filter_func = extra_kwargs.get("filter_func")
             context = extra_kwargs.get("context")
-            with diskcache.Cache(f"{self.cache_path_root}/{seed}") as cache:
-                if seed is not None:
+            with diskcache.Cache(f"{self.cache_path_root}/{cache_seed}") as cache:
+                if cache_seed is not None:
                     # Try to get the response from cache
                     key = get_key(params)
                     response = cache.get(key, None)
@@ -245,7 +245,7 @@ def yes_or_no_filter(context, response):
                     if i == last:
                         raise
                 else:
-                    if seed is not None:
+                    if cache_seed is not None:
                         # Cache the response
                         cache.set(key, response)
                     return response
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index 88d53bca4c0b..2fe95e8f60c6 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -109,8 +109,8 @@ class Completion(openai_Completion):
         "prompt": "{prompt}",
     }
 
-    seed = 41
-    cache_path = f".cache/{seed}"
+    cache_seed = 41
+    cache_path = f".cache/{cache_seed}"
     # retry after this many seconds
     retry_wait_time = 10
     # fail a request after hitting RateLimitError for this many seconds
@@ -134,7 +134,7 @@ def set_cache(cls, seed: Optional[int] = 41, cache_path_root: Optional[str] = ".
             cache_path (str, Optional): The root path for the cache.
                 The complete cache path will be {cache_path}/{seed}.
         """
-        cls.seed = seed
+        cls.cache_seed = seed
         cls.cache_path = f"{cache_path_root}/{seed}"
 
     @classmethod
@@ -145,7 +145,7 @@ def clear_cache(cls, seed: Optional[int] = None, cache_path_root: Optional[str]
             seed (int, Optional): The integer identifier for the pseudo seed.
                 If omitted, all caches under cache_path_root will be cleared.
             cache_path (str, Optional): The root path for the cache.
-                The complete cache path will be {cache_path}/{seed}.
+                The complete cache path will be {cache_path}/{cache_seed}.
         """
         if seed is None:
             shutil.rmtree(cache_path_root, ignore_errors=True)
@@ -773,7 +773,7 @@ def yes_or_no_filter(context, config, response):
                 Besides the parameters for the openai API call, it can also contain:
                 - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
                 - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
-                - `seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
+                - `cache_seed` (int) for the cache. This is useful when implementing "controlled randomness" for the completion.
 
         Returns:
             Responses from OpenAI API, with additional fields.
@@ -831,11 +831,11 @@ def yes_or_no_filter(context, config, response):
             return cls._get_response(
                 params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout, use_cache=False
             )
-        seed = cls.seed
-        if "seed" in params:
-            cls.set_cache(params.pop("seed"))
+        cache_seed = cls.cache_seed
+        if "cache_seed" in params:
+            cls.set_cache(params.pop("cache_seed"))
         with diskcache.Cache(cls.cache_path) as cls._cache:
-            cls.set_cache(seed)
+            cls.set_cache(cache_seed)
             return cls._get_response(params, raise_on_ratelimit_or_timeout=raise_on_ratelimit_or_timeout)
 
     @classmethod
diff --git a/autogen/version.py b/autogen/version.py
index 24374a4e9d74..16bf2ffe042f 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.2.0b3"
+__version__ = "0.2.0b4"
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index e0aa629cb758..faee4cf2bf5e 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -185,7 +185,7 @@
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    },\n",
     ")\n",
diff --git a/notebook/agentchat_auto_feedback_from_code_execution.ipynb b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
index 21543ffc2088..1f29c6d0aa56 100644
--- a/notebook/agentchat_auto_feedback_from_code_execution.ipynb
+++ b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
@@ -45,7 +45,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -330,7 +330,7 @@
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"seed\": 42,  # seed for caching and reproducibility\n",
+    "        \"cache_seed\": 42,  # seed for caching and reproducibility\n",
     "        \"config_list\": config_list,  # a list of OpenAI API configurations\n",
     "        \"temperature\": 0,  # temperature for sampling\n",
     "    },  # configuration for autogen's enhanced inference API which is compatible with OpenAI API\n",
@@ -806,7 +806,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.17"
+   "version": "3.11.4"
   },
   "vscode": {
    "interpreter": {
diff --git a/notebook/agentchat_chess.ipynb b/notebook/agentchat_chess.ipynb
index 06bee1d65913..6b2a64a2a37f 100644
--- a/notebook/agentchat_chess.ipynb
+++ b/notebook/agentchat_chess.ipynb
@@ -35,7 +35,7 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "# %pip install \"pyautogen~=0.2.0b2\"\n",
+    "# %pip install \"pyautogen~=0.2.0b4\"\n",
     "%pip install chess -U"
    ]
   },
@@ -292,13 +292,13 @@
     "    color=\"black\",\n",
     "    board_agent=board_agent,\n",
     "    max_turns=max_turn,\n",
-    "    llm_config={\"temperature\": 0.5, \"seed\": 1, \"config_list\": config_list_gpt4},\n",
+    "    llm_config={\"temperature\": 0.5, \"cache_seed\": 1, \"config_list\": config_list_gpt4},\n",
     ")\n",
     "player_white = ChessPlayerAgent(\n",
     "    color=\"white\",\n",
     "    board_agent=board_agent,\n",
     "    max_turns=max_turn,\n",
-    "    llm_config={\"temperature\": 0.5, \"seed\": 2, \"config_list\": config_list_gpt4},\n",
+    "    llm_config={\"temperature\": 0.5, \"cache_seed\": 2, \"config_list\": config_list_gpt4},\n",
     ")"
    ]
   },
diff --git a/notebook/agentchat_groupchat.ipynb b/notebook/agentchat_groupchat.ipynb
index fccd7449921c..042aebf4b6ff 100644
--- a/notebook/agentchat_groupchat.ipynb
+++ b/notebook/agentchat_groupchat.ipynb
@@ -35,7 +35,7 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -124,7 +124,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
+    "llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
     "user_proxy = autogen.UserProxyAgent(\n",
     "   name=\"User_proxy\",\n",
     "   system_message=\"A human admin.\",\n",
diff --git a/notebook/agentchat_groupchat_research.ipynb b/notebook/agentchat_groupchat_research.ipynb
index abe408320ae0..b01e35f86921 100644
--- a/notebook/agentchat_groupchat_research.ipynb
+++ b/notebook/agentchat_groupchat_research.ipynb
@@ -33,7 +33,7 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -111,7 +111,7 @@
    "outputs": [],
    "source": [
     "gpt4_config = {\n",
-    "    \"seed\": 42,  # change the seed for different trials\n",
+    "    \"cache_seed\": 42,  # change the cache_seed for different trials\n",
     "    \"temperature\": 0,\n",
     "    \"config_list\": config_list_gpt4,\n",
     "    \"timeout\": 120,\n",
diff --git a/notebook/agentchat_groupchat_vis.ipynb b/notebook/agentchat_groupchat_vis.ipynb
index 499e6cee4fcb..0a7b984a551c 100644
--- a/notebook/agentchat_groupchat_vis.ipynb
+++ b/notebook/agentchat_groupchat_vis.ipynb
@@ -33,7 +33,7 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -132,7 +132,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
+    "llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
     "user_proxy = autogen.UserProxyAgent(\n",
     "   name=\"User_proxy\",\n",
     "   system_message=\"A human admin.\",\n",
diff --git a/notebook/agentchat_hierarchy_flow_using_select_speaker.ipynb b/notebook/agentchat_hierarchy_flow_using_select_speaker.ipynb
index bf69277a2333..d03157b6dc6c 100644
--- a/notebook/agentchat_hierarchy_flow_using_select_speaker.ipynb
+++ b/notebook/agentchat_hierarchy_flow_using_select_speaker.ipynb
@@ -50,7 +50,7 @@
    "outputs": [],
    "source": [
     "%%capture --no-stderr\n",
-    "# %pip install pyautogen~=0.1.0"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -163,7 +163,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}"
+    "llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}"
    ]
   },
   {
@@ -359,7 +359,7 @@
     "\n",
     "\n",
     "# Create the manager\n",
-    "llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42, \"use_cache\":False}  # use_cache is False because we want to observe if there is any communication pattern difference if we reran the group chat.\n",
+    "llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": None}  # cache_seed is None because we want to observe if there is any communication pattern difference if we reran the group chat.\n",
     "manager = autogen.GroupChatManager(groupchat=group_chat, llm_config=llm_config)\n",
     "\n",
     "\n",
diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb
index 031d4901e9ca..93b3afdf62d5 100644
--- a/notebook/agentchat_human_feedback.ipynb
+++ b/notebook/agentchat_human_feedback.ipynb
@@ -45,7 +45,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -123,7 +123,7 @@
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"seed\": 41,\n",
+    "        \"cache_seed\": 41,\n",
     "        \"config_list\": config_list,\n",
     "    }\n",
     ")\n",
diff --git a/notebook/agentchat_lmm_gpt-4v.ipynb b/notebook/agentchat_lmm_gpt-4v.ipynb
index 50e018076bb1..d3ece5c66ddf 100644
--- a/notebook/agentchat_lmm_gpt-4v.ipynb
+++ b/notebook/agentchat_lmm_gpt-4v.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "### Before everything starts, install AutoGen with the `lmm` option\n",
     "```bash\n",
-    "pip install pyautogen[lmm]\n",
+    "pip install \"pyautogen[lmm]~=0.2.0b4\"\n",
     "```"
    ]
   },
@@ -85,7 +85,7 @@
     "    },\n",
     ")\n",
     "\n",
-    "gpt4_llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}"
+    "gpt4_llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}"
    ]
   },
   {
@@ -699,7 +699,7 @@
     "#     },\n",
     "# )\n",
     "\n",
-    "# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"seed\": 42}\n",
+    "# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"cache_seed\": 42}\n",
     "\n",
     "\n",
     "creator = FigureCreator(\n",
diff --git a/notebook/agentchat_lmm_llava.ipynb b/notebook/agentchat_lmm_llava.ipynb
index 35a51f45ad98..e86a1007e68f 100644
--- a/notebook/agentchat_lmm_llava.ipynb
+++ b/notebook/agentchat_lmm_llava.ipynb
@@ -5,9 +5,9 @@
    "id": "2c75da30",
    "metadata": {},
    "source": [
-    "# Agent Chat with Multimodal Models\n",
+    "# Agent Chat with Multimodal Models: LLaVA\n",
     "\n",
-    "We use **LLaVA** as an example for the multimodal feature.  More information about LLaVA can be found in their [GitHub page](https://github.com/haotian-liu/LLaVA)\n",
+    "This notebook uses **LLaVA** as an example for the multimodal feature. More information about LLaVA can be found in their [GitHub page](https://github.com/haotian-liu/LLaVA)\n",
     "\n",
     "\n",
     "This notebook contains the following information and examples:\n",
@@ -26,7 +26,7 @@
    "source": [
     "### Before everything starts, install AutoGen with the `lmm` option\n",
     "```bash\n",
-    "pip install pyautogen[lmm]\n",
+    "pip install \"pyautogen[lmm]~=0.2.0b4\"\n",
     "```"
    ]
   },
@@ -848,7 +848,7 @@
     "    },\n",
     ")\n",
     "\n",
-    "gpt4_llm_config = {\"config_list\": config_list_gpt4, \"seed\": 42}\n",
+    "gpt4_llm_config = {\"config_list\": config_list_gpt4, \"cache_seed\": 42}\n",
     "\n",
     "# config_list_gpt35 = autogen.config_list_from_json(\n",
     "#     \"OAI_CONFIG_LIST\",\n",
@@ -857,7 +857,7 @@
     "#     },\n",
     "# )\n",
     "\n",
-    "# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"seed\": 42}\n",
+    "# gpt35_llm_config = {\"config_list\": config_list_gpt35, \"cache_seed\": 42}\n",
     "\n",
     "\n",
     "creator = FigureCreator(\n",
diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb
index 70f5ad150f5d..b1a6789560ea 100644
--- a/notebook/agentchat_planning.ipynb
+++ b/notebook/agentchat_planning.ipynb
@@ -45,7 +45,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2 docker"
+    "# %pip install pyautogen~=0.2.0b4 docker"
    ]
   },
   {
@@ -156,7 +156,7 @@
     "    llm_config={\n",
     "        \"temperature\": 0,\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "        \"functions\": [\n",
     "            {\n",
diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb
index d3f485305c83..3ea61fcc359d 100644
--- a/notebook/agentchat_stream.ipynb
+++ b/notebook/agentchat_stream.ipynb
@@ -45,7 +45,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -209,7 +209,7 @@
     "    name=\"assistant\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 41,\n",
+    "        \"cache_seed\": 41,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
     "    },\n",
diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb
index 0f29374f2920..b170d2ec63f4 100644
--- a/notebook/agentchat_two_users.ipynb
+++ b/notebook/agentchat_two_users.ipynb
@@ -44,7 +44,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2"
+    "# %pip install pyautogen~=0.2.0b4"
    ]
   },
   {
@@ -161,7 +161,7 @@
     "    system_message=\"You are a helpful assistant. Reply TERMINATE when the task is done.\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
     "        \"functions\": [\n",
diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb
index 849b4f4c2f5c..2dc5339544c4 100644
--- a/notebook/agentchat_web_info.ipynb
+++ b/notebook/agentchat_web_info.ipynb
@@ -49,7 +49,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install pyautogen~=0.2.0b2 docker"
+    "# %pip install pyautogen~=0.2.0b4 docker"
    ]
   },
   {
@@ -79,7 +79,7 @@
     "\n",
     "llm_config={\n",
     "    \"timeout\": 600,\n",
-    "    \"seed\": 42,\n",
+    "    \"cache_seed\": 42,\n",
     "    \"config_list\": config_list,\n",
     "    \"temperature\": 0,\n",
     "}"
diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
index 24973e3d9085..3c020b4ab1a9 100644
--- a/notebook/oai_openai_utils.ipynb
+++ b/notebook/oai_openai_utils.ipynb
@@ -39,7 +39,7 @@
     "    name=\"assistant\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
     "    },\n",
@@ -60,7 +60,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# ! pip install pyautogen"
+    "# %pip install \"pyautogen~=0.2.0b4\""
    ]
   },
   {
@@ -227,7 +227,7 @@
     "    name=\"3.5-assistant\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": cheap_config_list,\n",
     "        \"temperature\": 0,\n",
     "    },\n",
@@ -238,7 +238,7 @@
     "    name=\"4-assistant\",\n",
     "    llm_config={\n",
     "        \"timeout\": 600,\n",
-    "        \"seed\": 42,\n",
+    "        \"cache_seed\": 42,\n",
     "        \"config_list\": costly_config_list,\n",
     "        \"temperature\": 0,\n",
     "    },\n",
diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py
index 61510a44f9c7..361c71f56fc1 100644
--- a/test/agentchat/test_assistant_agent.py
+++ b/test/agentchat/test_assistant_agent.py
@@ -31,7 +31,7 @@ def test_ai_user_proxy_agent():
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
-            "seed": 42,
+            "cache_seed": 42,
             "config_list": config_list,
         },
     )
@@ -78,7 +78,7 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
         },
     )
     llm_config = {
-        "seed": 42,
+        "cache_seed": 42,
         "config_list": config_list,
         "max_tokens": 1024,
     }
@@ -125,7 +125,7 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
     # autogen.ChatCompletion.start_logging(conversations)
     llm_config = {
         "timeout": 600,
-        "seed": 42,
+        "cache_seed": 42,
         "config_list": config_list,
     }
     assistant = AssistantAgent(
diff --git a/test/agentchat/test_async.py b/test/agentchat/test_async.py
index ba332b253797..76176131015e 100644
--- a/test/agentchat/test_async.py
+++ b/test/agentchat/test_async.py
@@ -71,7 +71,7 @@ async def add_stock_price_data():
         name="assistant",
         llm_config={
             "timeout": 600,
-            "seed": 41,
+            "cache_seed": 41,
             "config_list": config_list,
             "temperature": 0,
         },
diff --git a/test/agentchat/test_async_get_human_input.py b/test/agentchat/test_async_get_human_input.py
index fc8a8e315044..2daca19f2d90 100644
--- a/test/agentchat/test_async_get_human_input.py
+++ b/test/agentchat/test_async_get_human_input.py
@@ -16,7 +16,7 @@ async def test_async_get_human_input():
     assistant = autogen.AssistantAgent(
         name="assistant",
         max_consecutive_auto_reply=2,
-        llm_config={"timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
+        llm_config={"timeout": 600, "cache_seed": 41, "config_list": config_list, "temperature": 0},
     )
 
     user_proxy = autogen.UserProxyAgent(name="user", human_input_mode="ALWAYS", code_execution_config=False)
diff --git a/test/agentchat/test_function_call_groupchat.py b/test/agentchat/test_function_call_groupchat.py
index fefb620ad1c6..484149035621 100644
--- a/test/agentchat/test_function_call_groupchat.py
+++ b/test/agentchat/test_function_call_groupchat.py
@@ -30,7 +30,7 @@ def get_random_number():
     )
     llm_config = {
         "config_list": config_list_gpt4,
-        "seed": 42,
+        "cache_seed": 42,
         "functions": [
             {
                 "name": "get_random_number",
diff --git a/test/agentchat/test_math_user_proxy_agent.py b/test/agentchat/test_math_user_proxy_agent.py
index 2975e8d0f166..565aa80eb36c 100644
--- a/test/agentchat/test_math_user_proxy_agent.py
+++ b/test/agentchat/test_math_user_proxy_agent.py
@@ -38,7 +38,7 @@ def test_math_user_proxy_agent():
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
-            "seed": 42,
+            "cache_seed": 42,
             "config_list": config_list,
         },
     )
diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py
index 5a5be0379a12..712ed553f37d 100644
--- a/test/agentchat/test_teachable_agent.py
+++ b/test/agentchat/test_teachable_agent.py
@@ -25,7 +25,7 @@ def colored(x, *args, **kwargs):
 
 assert_on_error = False  # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable.
 recall_threshold = 1.5  # Higher numbers allow more (but less relevant) memos to be recalled.
-seed = None
+cache_seed = None
 # If int, cached LLM calls will be skipped and responses pulled from cache. None exposes LLM non-determinism.
 
 # Specify the model to use by uncommenting one of the following lines.
@@ -43,7 +43,7 @@ def create_teachable_agent(reset_db=False, verbosity=0):
     config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)
     teachable_agent = TeachableAgent(
         name="teachableagent",
-        llm_config={"config_list": config_list, "timeout": 120, "seed": seed},
+        llm_config={"config_list": config_list, "timeout": 120, "cache_seed": cache_seed},
         teach_config={
             "verbosity": verbosity,
             "reset_db": reset_db,
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
index c74332588b6a..83e01bceddf5 100644
--- a/test/oai/test_client.py
+++ b/test/oai/test_client.py
@@ -18,7 +18,11 @@ def test_aoai_chat_completion():
         filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo"]},
     )
     client = OpenAIWrapper(config_list=config_list)
-    response = client.create(messages=[{"role": "user", "content": "2+2="}])
+    # for config in config_list:
+    #     print(config)
+    #     client = OpenAIWrapper(**config)
+    #     response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
+    response = client.create(messages=[{"role": "user", "content": "2+2="}], cache_seed=None)
     print(response)
     print(client.extract_text_or_function_call(response))
 
diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index d03e50c1b50c..4100f1ffc23f 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -65,7 +65,10 @@ print(client.extract_text_or_function_call(response))
 ```
 - Inference parameter tuning and inference logging features are currently unavailable in `OpenAIWrapper`. Logging will be added in a future release.
 Inference parameter tuning can be done via [`flaml.tune`](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function).
-- `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `seed`: int | None.
+- `seed` in autogen is renamed into `cache_seed` to accommodate the newly added `seed` param in openai chat completion api. `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `cache_seed`: int | None. The difference between autogen's `cache_seed` and openai's `seed` is that:
+    * autogen uses local disk cache to guarantee the exactly same output is produced for the same input and when cache is hit, no openai api call will be made.
+    * openai's `seed` is a best-effort deterministic sampling with no guarantee of determinism. When using openai's `seed` with `cache_seed` set to None, even for the same input, an openai api call will be made and there is no guarantee for getting exactly the same output.
+
 
 ### Optional Dependencies
 - #### docker
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 5b5a1e81101c..ffd4fd60f789 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -137,19 +137,23 @@ For local LLMs, one can spin up an endpoint using a package like [FastChat](http
 
 ## Caching
 
-API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
+API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "cache_seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
 
 ```python
-client = OpenAIWrapper(seed=...)
+client = OpenAIWrapper(cache_seed=...)
 client.create(...)
 ```
 
 ```python
 client = OpenAIWrapper()
-client.create(seed=..., ...)
+client.create(cache_seed=..., ...)
 ```
 
-Caching is enabled by default with seed 41. To disable it please set `seed` to None.
+Caching is enabled by default with cache_seed 41. To disable it please set `cache_seed` to None.
+
+_NOTE_. openai v1.1 introduces a new param `seed`. The difference between autogen's `cache_seed` and openai's `seed` is that:
+* autogen uses local disk cache to guarantee the exactly same output is produced for the same input and when cache is hit, no openai api call will be made.
+* openai's `seed` is a best-effort deterministic sampling with no guarantee of determinism. When using openai's `seed` with `cache_seed` set to None, even for the same input, an openai api call will be made and there is no guarantee for getting exactly the same output.
 
 ## Error handling