From 9b58f8d3b53f5ebe88c5a2f58f189b72bf4a59f9 Mon Sep 17 00:00:00 2001 From: cpcdoy Date: Sun, 7 Jan 2024 17:15:05 +0100 Subject: [PATCH 1/6] Add: Usage of new debug output for easier prompt implementation --- notebooks/implement_new_model_prompt.ipynb | 312 +++++++++++++++++++++ 1 file changed, 312 insertions(+) create mode 100644 notebooks/implement_new_model_prompt.ipynb diff --git a/notebooks/implement_new_model_prompt.ipynb b/notebooks/implement_new_model_prompt.ipynb new file mode 100644 index 000000000..1fda781ae --- /dev/null +++ b/notebooks/implement_new_model_prompt.ipynb @@ -0,0 +1,312 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Implementing a new model prompt with debug output" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## The new prompt format we want to use\n", + "\n", + "As an example, we will implement the [Orca Mini 3B](https://huggingface.co/pankajmathur/orca_mini_3b) prompt.\n", + "\n", + "The prompt looks like this:\n", + "\n", + "```Python\n", + "prompt = \"### System:\\n{system}\\n\\n### User:\\n{instruction}\\n\\n### Response:\\n\"\n", + "```\n", + "\n", + "Where `system` is the system prompt and `instruction` is the user input instructions." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": {}, + "outputs": [], + "source": [ + "from guidance import gen, system, user, assistant\n", + "\n", + "# Custom prompt implementation\n", + "from guidance.models.transformers._transformers import Transformers, TransformersChat\n", + "\n", + "class Orca(Transformers):\n", + " pass\n", + "\n", + "class OrcaChat(TransformersChat, Orca):\n", + " def get_role_start(self, role_name, **kwargs):\n", + " if role_name == \"system\":\n", + " return \"### System:\\n\"\n", + " elif role_name == \"user\":\n", + " if str(self).endswith(\"\\n\\n### User:\\n\"):\n", + " return \"\" # we don't need to start anything if we are starting with a top level unnested system tag\n", + " else:\n", + " return \"### System:\\n\"\n", + " else:\n", + " return \" \"\n", + "\n", + " def get_role_end(self, role_name=None):\n", + " if role_name == \"system\":\n", + " return \"\\n\\n### User:\\n\"\n", + " elif role_name == \"user\":\n", + " return \"\\n\\n### Response:\\n\"\n", + " else:\n", + " return \" \"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the new OrcaChat model" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "orca = OrcaChat('pankajmathur/orca_mini_3b', torch_dtype=torch.float16, device_map='auto')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With full debug output enabled" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
### System:\n", + "You are a cat expert.\n", + "\n", + "### User:\n", + "
user
What are the smallest cats?\n", + "\n", + "### Response:\n", + "
assistant
(This is a question that I have been asked many times, and I have not answered it
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system(debug=True):\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user(debug=True):\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant(debug=True):\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With no debug output enabled\n", + "\n", + "It is hard to see if our prompt is correctly being used without the debug output" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### With granular debug output enabled" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here we can try to activate only part of the prompt output, to see how Guidance actually generates it and from which blocks (system/user/assistant)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only System prompt debug:" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
### System:\n", + "You are a cat expert.\n", + "\n", + "### User:\n", + "
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system(debug=True):\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only User prompt debug:" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
You are a cat expert.
user
What are the smallest cats?\n", + "\n", + "### Response:\n", + "
assistant
(This is a question that I have been asked many times, and I have not answered it
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user(debug=True):\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Only Assistant prompt debug:" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant(debug=True):\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 02689d02b379a6533cf1fa218066b07c248c4bd9 Mon Sep 17 00:00:00 2001 From: cpcdoy Date: Sun, 7 Jan 2024 17:15:42 +0100 Subject: [PATCH 2/6] Add: debug parameter with a nice centered orange text display --- guidance/library/_role.py | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/guidance/library/_role.py b/guidance/library/_role.py index 220b32f6b..93cb3950c 100644 --- a/guidance/library/_role.py +++ b/guidance/library/_role.py @@ -2,16 +2,42 @@ from ._block import block @guidance -def role_opener(lm, role_name, **kwargs): +def role_opener(lm, role_name, debug=False, **kwargs): if not hasattr(lm, "get_role_start"): raise Exception(f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?") - lm += f"<||_html:
{role_name.lower()}
_||>" - lm += "<||_#NODISP_||>" + lm.get_role_start(role_name, **kwargs) + "<||_/NODISP_||>" + lm += f"<||_html:
{role_name.lower()}
_||>" + + if debug: + lm += f"<||_html:_||>" + else: + lm += "<||_#NODISP_||>" + + lm += lm.get_role_start(role_name, **kwargs) + + if debug: + lm += "<||_html:_||>" + else: + lm += "<||_/NODISP_||>" + return lm @guidance -def role_closer(lm, role_name, **kwargs): - lm += "<||_html:
_||>" + "<||_#NODISP_||>" + lm.get_role_end(role_name) + "<||_/NODISP_||>" +def role_closer(lm, role_name, debug=False, **kwargs): + + if debug: + lm += f"<||_html:_||>" + else: + lm += "<||_#NODISP_||>" + + lm += lm.get_role_end(role_name) + + if debug: + lm += "<||_html:_||>" + else: + lm += "<||_/NODISP_||>" + + lm += "<||_html:
_||>" + return lm def role(role_name, text=None, **kwargs): From 69acbbb21ef9b05929119ad2745fbefff5022111 Mon Sep 17 00:00:00 2001 From: cpcdoy Date: Sun, 7 Jan 2024 17:35:33 +0100 Subject: [PATCH 3/6] Add: nicer block color + formatting --- guidance/library/_role.py | 58 +++++++++++++++++++++++++++------------ 1 file changed, 41 insertions(+), 17 deletions(-) diff --git a/guidance/library/_role.py b/guidance/library/_role.py index 93cb3950c..794ad8070 100644 --- a/guidance/library/_role.py +++ b/guidance/library/_role.py @@ -1,63 +1,87 @@ import guidance from ._block import block +nodisp_start = "<||_#NODISP_||>" +nodisp_end = "<||_/NODISP_||>" +span_start = "<||_html:_||>" +span_end = "<||_html:_||>" + + @guidance def role_opener(lm, role_name, debug=False, **kwargs): if not hasattr(lm, "get_role_start"): - raise Exception(f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?") + raise Exception( + f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?" + ) + + # Block start container (centers elements) lm += f"<||_html:
{role_name.lower()}
_||>" - + + # Start of either debug or HTML no disp block if debug: - lm += f"<||_html:_||>" + lm += span_start else: - lm += "<||_#NODISP_||>" + lm += nodisp_start lm += lm.get_role_start(role_name, **kwargs) - + + # End of either debug or HTML no disp block if debug: - lm += "<||_html:_||>" + lm += span_end else: - lm += "<||_/NODISP_||>" + lm += nodisp_end return lm + @guidance def role_closer(lm, role_name, debug=False, **kwargs): - + # Start of either debug or HTML no disp block if debug: - lm += f"<||_html:_||>" + lm += span_start else: - lm += "<||_#NODISP_||>" - + lm += nodisp_start + lm += lm.get_role_end(role_name) - + + # End of either debug or HTML no disp block if debug: - lm += "<||_html:_||>" + lm += span_end else: - lm += "<||_/NODISP_||>" + lm += nodisp_end + # End of top container lm += "<||_html:
_||>" return lm + def role(role_name, text=None, **kwargs): if text is None: - return block(opener=role_opener(role_name, **kwargs), closer=role_closer(role_name, **kwargs)) + return block( + opener=role_opener(role_name, **kwargs), + closer=role_closer(role_name, **kwargs), + ) else: assert False - #return self.append(open_text + text + close_text) + # return self.append(open_text + text + close_text) + def system(text=None, **kwargs): return role("system", text, **kwargs) + def user(text=None, **kwargs): return role("user", text, **kwargs) + def assistant(text=None, **kwargs): return role("assistant", text, **kwargs) + def function(text=None, **kwargs): return role("function", text, **kwargs) + def instruction(text=None, **kwargs): - return role("instruction", text, **kwargs) \ No newline at end of file + return role("instruction", text, **kwargs) From 21848370bd707c1c634cf6702df76766685c2c26 Mon Sep 17 00:00:00 2001 From: cpcdoy Date: Sun, 7 Jan 2024 17:35:56 +0100 Subject: [PATCH 4/6] Add: render latest debug color --- notebooks/implement_new_model_prompt.ipynb | 60 ++++++++++++++-------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/notebooks/implement_new_model_prompt.ipynb b/notebooks/implement_new_model_prompt.ipynb index 1fda781ae..9a817934b 100644 --- a/notebooks/implement_new_model_prompt.ipynb +++ b/notebooks/implement_new_model_prompt.ipynb @@ -26,7 +26,7 @@ }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -68,13 +68,23 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/cpcdoy/.pyenv/versions/3.11.4/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import torch\n", "\n", - "orca = OrcaChat('pankajmathur/orca_mini_3b', torch_dtype=torch.float16, device_map='auto')" + "orca = OrcaChat('pankajmathur/orca_mini_3b', torch_dtype=torch.float16, device_map='auto')\n", + "# orca = OrcaChat('gpt2', device_map='auto') # Can use a small mock model while iterating on the prompt" ] }, { @@ -86,20 +96,20 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
system
### System:\n", - "You are a cat expert.\n", + "
system
### System:\n", + "You are a cat expert.\n", "\n", "### User:\n", - "
user
What are the smallest cats?\n", + "
user
What are the smallest cats?\n", "\n", "### Response:\n", - "
assistant
(This is a question that I have been asked many times, and I have not answered it
" + "
assistant
(This is a question that I have been asked many times, and I have not answered it
" ], "text/plain": [ "" @@ -121,17 +131,18 @@ ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 1, "metadata": {}, + "outputs": [], "source": [ - "### With no debug output enabled\n", - "\n", - "It is hard to see if our prompt is correctly being used without the debug output" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 22, "metadata": {}, "outputs": [ { @@ -181,14 +192,14 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
system
### System:\n", - "You are a cat expert.\n", + "
system
### System:\n", + "You are a cat expert.\n", "\n", "### User:\n", "
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" @@ -221,13 +232,13 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
system
You are a cat expert.
user
What are the smallest cats?\n", + "
system
You are a cat expert.
user
What are the smallest cats?\n", "\n", "### Response:\n", "
assistant
(This is a question that I have been asked many times, and I have not answered it
" @@ -260,13 +271,13 @@ }, { "cell_type": "code", - "execution_count": 95, + "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ - "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" + "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" ], "text/plain": [ "" @@ -286,6 +297,13 @@ "with assistant(debug=True):\n", " lm += gen(\"answer\", stop=\".\", max_tokens=20)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From 4584bb2cd13522475317a6de63b4c3b916663a0b Mon Sep 17 00:00:00 2001 From: Scott Lundberg Date: Wed, 10 Jan 2024 03:58:51 +0000 Subject: [PATCH 5/6] Convert the syntax to use with blocks, and fix some context block ordering bugs --- guidance/library/__init__.py | 3 +- guidance/library/_role.py | 40 ++- guidance/library/_set_var.py | 21 ++ guidance/models/_model.py | 57 ++-- guidance/models/transformers/__init__.py | 3 +- notebooks/implement_new_model_prompt.ipynb | 330 -------------------- notebooks/tutorials/adding_new_models.ipynb | 298 ++++++++++++++++++ 7 files changed, 381 insertions(+), 371 deletions(-) create mode 100644 guidance/library/_set_var.py delete mode 100644 notebooks/implement_new_model_prompt.ipynb create mode 100644 notebooks/tutorials/adding_new_models.ipynb diff --git a/guidance/library/__init__.py b/guidance/library/__init__.py index 59d52df98..45dd5a1b0 100644 --- a/guidance/library/__init__.py +++ b/guidance/library/__init__.py @@ -13,9 +13,10 @@ # context blocks from ._block import block -from ._role import role, system, assistant, user, function, instruction +from ._role import role, system, assistant, user, function, instruction, indent_roles from ._format import monospace from ._silent import silent +from ._set_var import set_var # from ..models._model import context_free # stateless library functions diff --git a/guidance/library/_role.py b/guidance/library/_role.py index 794ad8070..c2b4cffb3 100644 --- a/guidance/library/_role.py +++ b/guidance/library/_role.py @@ -1,5 +1,6 @@ import guidance from ._block import block +from ._set_var import set_var nodisp_start = "<||_#NODISP_||>" nodisp_end = "<||_/NODISP_||>" @@ -8,50 +9,54 @@ @guidance -def role_opener(lm, role_name, debug=False, **kwargs): +def role_opener(lm, role_name, **kwargs): + indent = lm.get("__role_indent", True) if not hasattr(lm, "get_role_start"): raise Exception( f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?" ) # Block start container (centers elements) - lm += f"<||_html:
{role_name.lower()}
_||>" + if indent: + lm += f"<||_html:
{role_name.lower()}
_||>" # Start of either debug or HTML no disp block - if debug: - lm += span_start - else: + if indent: lm += nodisp_start + else: + lm += span_start lm += lm.get_role_start(role_name, **kwargs) # End of either debug or HTML no disp block - if debug: - lm += span_end - else: + if indent: lm += nodisp_end + else: + lm += span_end return lm @guidance -def role_closer(lm, role_name, debug=False, **kwargs): +def role_closer(lm, role_name, **kwargs): + indent = lm.get("__role_indent", True) # Start of either debug or HTML no disp block - if debug: - lm += span_start - else: + if indent: lm += nodisp_start + else: + lm += span_start lm += lm.get_role_end(role_name) # End of either debug or HTML no disp block - if debug: - lm += span_end - else: + if indent: lm += nodisp_end + else: + lm += span_end # End of top container - lm += "<||_html:
_||>" + if indent: + lm += "<||_html:
_||>" return lm @@ -85,3 +90,6 @@ def function(text=None, **kwargs): def instruction(text=None, **kwargs): return role("instruction", text, **kwargs) + +def indent_roles(indent=True): + return set_var("__role_indent", indent) \ No newline at end of file diff --git a/guidance/library/_set_var.py b/guidance/library/_set_var.py new file mode 100644 index 000000000..0d39a2408 --- /dev/null +++ b/guidance/library/_set_var.py @@ -0,0 +1,21 @@ +import guidance +from ._block import block + +@guidance +def set_opener(lm, name, value): + if name in lm: + lm = lm.set("__save" + name, lm[name]) + return lm.set(name, value) + +@guidance +def set_closer(lm, name): + if "__save" + name in lm: + return lm.set(name, lm["__save" + name]).remove("__save" + name) + else: + return lm.remove(name) + +def set_var(name, value=True): + return block( + opener=set_opener(name, value), + closer=set_closer(name), + ) \ No newline at end of file diff --git a/guidance/models/_model.py b/guidance/models/_model.py index 605186fab..fee51a2cd 100644 --- a/guidance/models/_model.py +++ b/guidance/models/_model.py @@ -252,31 +252,44 @@ def __add__(self, value): # inside this context we are free to drop display calls that come too close together with throttle_refresh(): - # close any newly closed contexts + # find what new blocks need to be applied + new_blocks = [] + for context in Model.open_blocks: + if context not in lm.opened_blocks: + new_blocks.append(context) + + # mark this so we don't re-add when computing the opener or closer (even though we don't know the close text yet) + lm.opened_blocks[context] = (0, "") + + # find what old blocks need to be removed + old_blocks = [] for context in list(reversed(lm.opened_blocks)): if context not in Model.open_blocks and context in lm.opened_blocks: - pos, close_text = lm.opened_blocks[context] # save so we can delete it before adding it - if context.name is not None: - lm._variables[context.name] = format_pattern.sub("", lm._state[pos:]) + old_blocks.append((lm.opened_blocks[context], context)) + + # delete this so we don't re-close when computing the opener or closer del lm.opened_blocks[context] - lm._inplace_append(close_text) + + # close any newly closed contexts + for (pos, close_text), context in old_blocks: + if context.name is not None: + lm._variables[context.name] = format_pattern.sub("", lm._state[pos:]) + lm += context.closer # apply any newly opened contexts (new from this object's perspective) - for context in Model.open_blocks: - if context not in lm.opened_blocks: - lm.opened_blocks[context] = (0, "") # mark this so we don't readd when computing the opener (even though we don't know the close text yet) - lm += context.opener - with grammar_only(): - tmp = lm + context.closer - close_text = tmp._state[len(lm._state):] # get the new state added by calling the closer - lm.opened_blocks[context] = (len(lm._state), close_text) - - # clear out names that we override - if context.name is not None: - if context.name in lm._variables: - del lm._variables[context.name] - if context.name in lm._variables_log_probs: - del lm._variables_log_probs[context.name] + for context in new_blocks: + lm += context.opener + with grammar_only(): + tmp = lm + context.closer + close_text = tmp._state[len(lm._state):] # get the new state added by calling the closer + lm.opened_blocks[context] = (len(lm._state), close_text) + + # clear out names that we override + if context.name is not None: + if context.name in lm._variables: + del lm._variables[context.name] + if context.name in lm._variables_log_probs: + del lm._variables_log_probs[context.name] # wrap raw string values if isinstance(value, str): @@ -957,9 +970,7 @@ def __call__(self, grammar, max_tokens=1000000, n=1, top_p=1, temperature=0.0, e # self._cache_state["new_token_ids"].append(sampled_token_ind) # capture the named groups from the parse tree - new_captured_data, new_captured_log_prob_data = parser.get_captures() - captured_data.update(new_captured_data) - captured_log_prob_data.update(new_captured_log_prob_data) + parser.get_captures(captured_data, captured_log_prob_data) # we have no valid log prob data if we didn't compute it yield new_bytes[hidden_count:], is_generated, new_bytes_prob, captured_data, captured_log_prob_data, token_count - last_token_count diff --git a/guidance/models/transformers/__init__.py b/guidance/models/transformers/__init__.py index f6afc28b4..0b62d8483 100644 --- a/guidance/models/transformers/__init__.py +++ b/guidance/models/transformers/__init__.py @@ -1 +1,2 @@ -from ._llama import Llama, LlamaChat \ No newline at end of file +from ._llama import Llama, LlamaChat +from ._transformers import Transformers, TransformersChat \ No newline at end of file diff --git a/notebooks/implement_new_model_prompt.ipynb b/notebooks/implement_new_model_prompt.ipynb deleted file mode 100644 index 9a817934b..000000000 --- a/notebooks/implement_new_model_prompt.ipynb +++ /dev/null @@ -1,330 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Implementing a new model prompt with debug output" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## The new prompt format we want to use\n", - "\n", - "As an example, we will implement the [Orca Mini 3B](https://huggingface.co/pankajmathur/orca_mini_3b) prompt.\n", - "\n", - "The prompt looks like this:\n", - "\n", - "```Python\n", - "prompt = \"### System:\\n{system}\\n\\n### User:\\n{instruction}\\n\\n### Response:\\n\"\n", - "```\n", - "\n", - "Where `system` is the system prompt and `instruction` is the user input instructions." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from guidance import gen, system, user, assistant\n", - "\n", - "# Custom prompt implementation\n", - "from guidance.models.transformers._transformers import Transformers, TransformersChat\n", - "\n", - "class Orca(Transformers):\n", - " pass\n", - "\n", - "class OrcaChat(TransformersChat, Orca):\n", - " def get_role_start(self, role_name, **kwargs):\n", - " if role_name == \"system\":\n", - " return \"### System:\\n\"\n", - " elif role_name == \"user\":\n", - " if str(self).endswith(\"\\n\\n### User:\\n\"):\n", - " return \"\" # we don't need to start anything if we are starting with a top level unnested system tag\n", - " else:\n", - " return \"### System:\\n\"\n", - " else:\n", - " return \" \"\n", - "\n", - " def get_role_end(self, role_name=None):\n", - " if role_name == \"system\":\n", - " return \"\\n\\n### User:\\n\"\n", - " elif role_name == \"user\":\n", - " return \"\\n\\n### Response:\\n\"\n", - " else:\n", - " return \" \"\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Loading the new OrcaChat model" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/cpcdoy/.pyenv/versions/3.11.4/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n" - ] - } - ], - "source": [ - "import torch\n", - "\n", - "orca = OrcaChat('pankajmathur/orca_mini_3b', torch_dtype=torch.float16, device_map='auto')\n", - "# orca = OrcaChat('gpt2', device_map='auto') # Can use a small mock model while iterating on the prompt" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### With full debug output enabled" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
system
### System:\n", - "You are a cat expert.\n", - "\n", - "### User:\n", - "
user
What are the smallest cats?\n", - "\n", - "### Response:\n", - "
assistant
(This is a question that I have been asked many times, and I have not answered it
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with system(debug=True):\n", - " lm = orca + \"You are a cat expert.\"\n", - "\n", - "with user(debug=True):\n", - " lm += \"What are the smallest cats?\"\n", - "\n", - "with assistant(debug=True):\n", - " lm += gen(\"answer\", stop=\".\", max_tokens=20)" - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with system():\n", - " lm = orca + \"You are a cat expert.\"\n", - "\n", - "with user():\n", - " lm += \"What are the smallest cats?\"\n", - "\n", - "with assistant():\n", - " lm += gen(\"answer\", stop=\".\", max_tokens=20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### With granular debug output enabled" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Here we can try to activate only part of the prompt output, to see how Guidance actually generates it and from which blocks (system/user/assistant)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Only System prompt debug:" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
system
### System:\n", - "You are a cat expert.\n", - "\n", - "### User:\n", - "
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with system(debug=True):\n", - " lm = orca + \"You are a cat expert.\"\n", - "\n", - "with user():\n", - " lm += \"What are the smallest cats?\"\n", - "\n", - "with assistant():\n", - " lm += gen(\"answer\", stop=\".\", max_tokens=20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Only User prompt debug:" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
system
You are a cat expert.
user
What are the smallest cats?\n", - "\n", - "### Response:\n", - "
assistant
(This is a question that I have been asked many times, and I have not answered it
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with system():\n", - " lm = orca + \"You are a cat expert.\"\n", - "\n", - "with user(debug=True):\n", - " lm += \"What are the smallest cats?\"\n", - "\n", - "with assistant():\n", - " lm += gen(\"answer\", stop=\".\", max_tokens=20)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Only Assistant prompt debug:" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
system
You are a cat expert.
user
What are the smallest cats?
assistant
(This is a question that I have been asked many times, and I have not answered it
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "with system():\n", - " lm = orca + \"You are a cat expert.\"\n", - "\n", - "with user():\n", - " lm += \"What are the smallest cats?\"\n", - "\n", - "with assistant(debug=True):\n", - " lm += gen(\"answer\", stop=\".\", max_tokens=20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.4" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/tutorials/adding_new_models.ipynb b/notebooks/tutorials/adding_new_models.ipynb new file mode 100644 index 000000000..ac1539d21 --- /dev/null +++ b/notebooks/tutorials/adding_new_models.ipynb @@ -0,0 +1,298 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Adding support for a new models\n", + "\n", + "Different models are tuned with different role prompt formats. If the model you are using is not already a subclass of `guidance.Model`, you can define your own new subclass with whatever role prompt format you want. Then you can use the guidance role tags and they will get translated into the correct prompt format." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Orca Mini Chat Example\n", + "\n", + "As an example of how this works, below we implement the [Orca Mini 3B](https://huggingface.co/pankajmathur/orca_mini_3b) prompt. The prompt looks like this:\n", + "\n", + "`### System:\\n{system}\\n\\n### User:\\n{instruction}\\n\\n### Response:\\n\"`\n", + "\n", + "Where `system` is the system prompt and `instruction` is the user input instructions." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Define a new OrcaChat class" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from guidance import models\n", + "\n", + "# this is our new chat model that inherits from the TransformersChat model and redefines role starts and ends\n", + "class OrcaChat(models.transformers.TransformersChat):\n", + " def get_role_start(self, role_name, **kwargs):\n", + " if role_name == \"system\":\n", + " return \"### System:\\n\"\n", + " elif role_name == \"user\":\n", + " if str(self).endswith(\"\\n\\n### User:\\n\"):\n", + " return \"\" # we don't need to start anything if we are starting with a top level unnested system tag\n", + " else:\n", + " return \"### System:\\n\"\n", + " else:\n", + " return \" \"\n", + "\n", + " def get_role_end(self, role_name=None):\n", + " if role_name == \"system\":\n", + " return \"\\n\\n### User:\\n\"\n", + " elif role_name == \"user\":\n", + " return \"\\n\\n### Response:\\n\"\n", + " else:\n", + " return \" \"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Load the model" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You are using the default legacy behaviour of the . This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thouroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "5698dac461ba479ba684d12328f7947c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/3 [00:00### System:\n", + "You are a cat expert.\n", + "\n", + "### User:\n", + "What are the smallest cats?\n", + "\n", + "### Response:\n", + " The smallest cats are the dwarf cats, which include the following breeds:\n", + "\n", + "1
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from guidance import gen, system, user, assistant, indent_roles\n", + "\n", + "with indent_roles(False):\n", + " with system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + " with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + " with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Note that you can also just print the string representation of the model for a truely raw view of the model's context:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "### System:\n", + "You are a cat expert.\n", + "\n", + "### User:\n", + "What are the smallest cats?\n", + "\n", + "### Response:\n", + " The smallest cats are the dwarf cats, which include the following breeds:\n", + "\n", + "1 \n" + ] + } + ], + "source": [ + "print(str(lm))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can also change just a single role tag:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
### System:\n",
+       "You are a cat expert.\n",
+       "\n",
+       "### User:\n",
+       "
user
What are the smallest cats?
assistant
The smallest cats are the dwarf cats, which include the following breeds:\n", + "\n", + "1
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with indent_roles(False), system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Normal use\n", + "\n", + "When you are satisfied with the correctness of your role formatting you can then use the model like normal:" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
system
You are a cat expert.
user
What are the smallest cats?
assistant
The smallest cats are the dwarf cats, which include the following breeds:\n", + "\n", + "1
" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "with system():\n", + " lm = orca + \"You are a cat expert.\"\n", + "\n", + "with user():\n", + " lm += \"What are the smallest cats?\"\n", + "\n", + "with assistant():\n", + " lm += gen(\"answer\", stop=\".\", max_tokens=20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "
\n", + "
Have an idea for more helpful examples? Pull requests that add to this documentation notebook are encouraged!
" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 47b3339e804a6b0b51fd04b9d9de6be70ea5214e Mon Sep 17 00:00:00 2001 From: Scott Lundberg Date: Wed, 10 Jan 2024 04:38:38 +0000 Subject: [PATCH 6/6] Switch to using attribute instead of variable to track indentation --- guidance/library/__init__.py | 1 + guidance/library/_role.py | 8 +++---- guidance/library/_set_attribute.py | 21 +++++++++++++++++ guidance/models/_model.py | 26 +++++++++++++++++++++ notebooks/tutorials/adding_new_models.ipynb | 2 +- 5 files changed, 53 insertions(+), 5 deletions(-) create mode 100644 guidance/library/_set_attribute.py diff --git a/guidance/library/__init__.py b/guidance/library/__init__.py index 45dd5a1b0..3afe63bf3 100644 --- a/guidance/library/__init__.py +++ b/guidance/library/__init__.py @@ -17,6 +17,7 @@ from ._format import monospace from ._silent import silent from ._set_var import set_var +from ._set_attribute import set_attribute # from ..models._model import context_free # stateless library functions diff --git a/guidance/library/_role.py b/guidance/library/_role.py index c2b4cffb3..f879a7ee8 100644 --- a/guidance/library/_role.py +++ b/guidance/library/_role.py @@ -1,6 +1,6 @@ import guidance from ._block import block -from ._set_var import set_var +from ._set_attribute import set_attribute nodisp_start = "<||_#NODISP_||>" nodisp_end = "<||_/NODISP_||>" @@ -10,7 +10,7 @@ @guidance def role_opener(lm, role_name, **kwargs): - indent = lm.get("__role_indent", True) + indent = getattr(lm, "indent_roles", True) if not hasattr(lm, "get_role_start"): raise Exception( f"You need to use a chat model in order the use role blocks like `with {role_name}():`! Perhaps you meant to use the {type(lm).__name__}Chat class?" @@ -39,7 +39,7 @@ def role_opener(lm, role_name, **kwargs): @guidance def role_closer(lm, role_name, **kwargs): - indent = lm.get("__role_indent", True) + indent = getattr(lm, "indent_roles", True) # Start of either debug or HTML no disp block if indent: lm += nodisp_start @@ -92,4 +92,4 @@ def instruction(text=None, **kwargs): return role("instruction", text, **kwargs) def indent_roles(indent=True): - return set_var("__role_indent", indent) \ No newline at end of file + return set_attribute("indent_roles", indent) \ No newline at end of file diff --git a/guidance/library/_set_attribute.py b/guidance/library/_set_attribute.py new file mode 100644 index 000000000..c2b0b3292 --- /dev/null +++ b/guidance/library/_set_attribute.py @@ -0,0 +1,21 @@ +import guidance +from ._block import block + +@guidance +def set_attr_opener(lm, name, value): + if hasattr(lm, name): + lm = lm.setattr("__save" + name, getattr(lm, name)) + return lm.setattr(name, value) + +@guidance +def set_attr_closer(lm, name): + if hasattr(lm, "__save" + name): + return lm.setattr(name, lm["__save" + name]).delattr("__save" + name) + else: + return lm.delattr(name) + +def set_attribute(name, value=True): + return block( + opener=set_attr_opener(name, value), + closer=set_attr_closer(name), + ) \ No newline at end of file diff --git a/guidance/models/_model.py b/guidance/models/_model.py index fee51a2cd..9d02833a1 100644 --- a/guidance/models/_model.py +++ b/guidance/models/_model.py @@ -380,6 +380,32 @@ def get(self, key, default=None): The value to return if the variable is not current set. ''' return self._variables.get(key, default) + + def setattr(self, key, value): + '''Return a new model with the given model attribute set. + + Parameters + ---------- + key : str + The name of the attribute to be set. + value : any + The value to set the attribute to. + ''' + copy = self.copy() + setattr(copy, key, value) + return copy + + def delattr(self, key): + '''Return a new model with the given attribute deleted. + + Parameters + ---------- + key : str + The attribute name to remove. + ''' + copy = self.copy() + delattr(copy, key) + return copy def set(self, key, value): '''Return a new model with the given variable value set. diff --git a/notebooks/tutorials/adding_new_models.ipynb b/notebooks/tutorials/adding_new_models.ipynb index ac1539d21..e7ba334be 100644 --- a/notebooks/tutorials/adding_new_models.ipynb +++ b/notebooks/tutorials/adding_new_models.ipynb @@ -81,7 +81,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "5698dac461ba479ba684d12328f7947c", + "model_id": "3e2eb7c444ba4d92a5f29593faa919e9", "version_major": 2, "version_minor": 0 },