diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml index 597e0db..d72a7d3 100644 --- a/.github/workflows/pypi-publish.yml +++ b/.github/workflows/pypi-publish.yml @@ -19,6 +19,9 @@ jobs: python -m pip install --upgrade pip pip install poetry poetry-dynamic-versioning poetry install + - name: Copy README + run: | + cp README.md doccano_mini/docs/ - name: Build a binary wheel and a source tarball run: | poetry build diff --git a/README.md b/README.md index c047fb9..6794994 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ The editor also supports pasting in tabular data from Google Sheets, Excel, and In this step, we will test your task. We can enter a new test to the text box and click the `Predict` button. Then, we can see the result of the test. -![Step2](https://raw.githubusercontent.com/doccano/doccano-mini/master/docs/images/test_new_example.jpg) +“Step2” ### Step3: Download the config @@ -57,5 +57,5 @@ chain.run("YOUR TEXT") ```bash poetry install -streamlit run doccano_mini/app.py +streamlit run doccano_mini/home.py ``` diff --git a/doccano_mini/app.py b/doccano_mini/app.py deleted file mode 100644 index cc8f647..0000000 --- a/doccano_mini/app.py +++ /dev/null @@ -1,131 +0,0 @@ -import os - -import streamlit as st -from langchain.chains import LLMChain -from langchain.llms import OpenAI - -from doccano_mini.examples import select_example_df -from doccano_mini.prompts import select_prompt_maker -from doccano_mini.tasks import TaskType, options - -CODE = """from langchain.chains import load_chain - -chain = load_chain("chain.yaml") -chain.run("YOUR TEXT")""" - - -# https://platform.openai.com/docs/models/gpt-3-5 -AVAILABLE_MODELS = ( - "gpt-3.5-turbo", - "gpt-3.5-turbo-0301", - "text-davinci-003", - "text-davinci-002", - "code-davinci-002", -) - - -def display_download_button(): - st.header("Download LangChain's config") - with open("config.yaml", "r", encoding="utf-8") as f: - st.download_button( - label="Download", - data=f, - file_name="config.yaml", - ) - - -def task_classification(task: TaskType): - st.header("Annotate your data") - df = select_example_df(task) - edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) - examples = edited_df.to_dict(orient="records") - - # Create prompt - prompt = select_prompt_maker(task)(examples) - - st.header("Optional: Edit instruction") - with st.expander("See instruction"): - instruction = st.text_area(label="Instruction", value=prompt.prefix, height=200) - prompt.prefix = instruction - - st.header("Test") - api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password") - col1, col2 = st.columns([3, 1]) - text = col1.text_area(label="Please enter your text.", value="", height=300) - - # Use text-davinci-003 by default. - model_name = col2.selectbox("Model", AVAILABLE_MODELS, index=2) - temperature = col2.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) - top_p = col2.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) - - with st.expander("See your prompt"): - st.markdown(f"```\n{prompt.format(input=text)}\n```") - - if st.button("Predict"): - llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore - chain = LLMChain(llm=llm, prompt=prompt) - response = chain.run(text) - label = response.split(":")[1] - st.text(label) - - chain.save("config.yaml") - display_download_button() - - -def task_free(task: TaskType): - st.header("Annotate your data") - - num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10) - columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, int(num_cols) + 1)] - - df = select_example_df(task) - df = df.reindex(columns, axis="columns", fill_value="") - edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) - examples = edited_df.to_dict(orient="records") - - prompt = select_prompt_maker(task)(examples) - - prompt.prefix = st.text_area( - label="Enter task instruction", - placeholder=f"Predict {columns[-1]} based on {', '.join(columns[:-1])}.", - height=200, - ) - - inputs = {column: st.text_input(f"Input for {column}:") for column in columns[:-1]} - - st.markdown(f"Your prompt\n```\n{prompt.format(**inputs)}\n```") - - # Use text-davinci-003 by default. - api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password") - model_name = st.selectbox("Model", AVAILABLE_MODELS, index=2) - temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) - top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) - if st.button("Predict"): - llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore - chain = LLMChain(llm=llm, prompt=prompt) - response = chain.run(**inputs) - st.text(response) - - chain.save("config.yaml") - display_download_button() - - -def main(): - st.set_page_config(page_title="doccano-mini", page_icon=":memo:") - - st.title("doccano-mini") - - st.header("Select your task") - task = st.selectbox("", options=options, label_visibility="collapsed") - - if task == TaskType.TEXT_CLASSIFICATION.value: - task_classification(task) - else: - task_free(task) - - st.header("Usage") - st.code(CODE) - - -if __name__ == "__main__": - main() diff --git a/doccano_mini/cli.py b/doccano_mini/cli.py index bc6547e..e2f81cc 100644 --- a/doccano_mini/cli.py +++ b/doccano_mini/cli.py @@ -5,7 +5,7 @@ def main(): - filepath = str(Path(__file__).parent.resolve() / "app.py") + filepath = str(Path(__file__).parent.resolve() / "home.py") sys.argv = ["streamlit", "run", filepath, "--global.developmentMode=false"] sys.exit(stcli.main()) diff --git a/doccano_mini/components.py b/doccano_mini/components.py new file mode 100644 index 0000000..3bbb601 --- /dev/null +++ b/doccano_mini/components.py @@ -0,0 +1,21 @@ +import streamlit as st + +CODE = """from langchain.chains import load_chain + +chain = load_chain("chain.yaml") +chain.run("YOUR TEXT")""" + + +def display_download_button(): + st.header("Download a config file") + with open("config.yaml", "r", encoding="utf-8") as f: + st.download_button( + label="Download", + data=f, + file_name="config.yaml", + ) + + +def display_usage(): + st.header("Usage") + st.code(CODE) diff --git a/doccano_mini/docs/.gitkeep b/doccano_mini/docs/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/doccano_mini/examples.py b/doccano_mini/examples.py index bafecbe..5d78531 100644 --- a/doccano_mini/examples.py +++ b/doccano_mini/examples.py @@ -1,20 +1,18 @@ import pandas as pd -from .tasks import TaskType +def make_classification_example() -> pd.DataFrame: + df = pd.DataFrame( + [ + {"text": "That would be awesome!", "label": "positive"}, + {"text": "This is awful!", "label": "negative"}, + {"text": "Today is hot day.", "label": "neutral"}, + ], + columns=["text", "label"], + ) + return df -def select_example_df(task: TaskType) -> pd.DataFrame: - if task == TaskType.TEXT_CLASSIFICATION.value: - df = pd.DataFrame( - [ - {"text": "That would be awesome!", "label": "positive"}, - {"text": "This is awful!", "label": "negative"}, - {"text": "Today is hot day.", "label": "neutral"}, - ], - columns=["text", "label"], - ) - return df - elif task == TaskType.TASK_FREE.value: - df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"]) - return df - raise ValueError(f"Task {task} is not supported.") + +def make_task_free_example() -> pd.DataFrame: + df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"]) + return df diff --git a/doccano_mini/home.py b/doccano_mini/home.py new file mode 100644 index 0000000..0fca07d --- /dev/null +++ b/doccano_mini/home.py @@ -0,0 +1,19 @@ +from pathlib import Path + +import streamlit as st + + +def main(): + st.set_page_config(page_title="doccano-mini", page_icon=":memo:") + filepath = Path(__file__).parent.resolve() / "docs" / "README.md" + + # Development + if not filepath.exists(): + filepath = Path(__file__).parent.parent.resolve() / "README.md" + + with filepath.open("r", encoding="utf-8") as f: + st.markdown(f.read(), unsafe_allow_html=True) + + +if __name__ == "__main__": + main() diff --git a/doccano_mini/models.py b/doccano_mini/models.py new file mode 100644 index 0000000..806b101 --- /dev/null +++ b/doccano_mini/models.py @@ -0,0 +1,8 @@ +# https://platform.openai.com/docs/models/gpt-3-5 +AVAILABLE_MODELS = ( + "gpt-3.5-turbo", + "gpt-3.5-turbo-0301", + "text-davinci-003", + "text-davinci-002", + "code-davinci-002", +) diff --git a/doccano_mini/pages/01_Text_Classification.py b/doccano_mini/pages/01_Text_Classification.py new file mode 100644 index 0000000..71285f2 --- /dev/null +++ b/doccano_mini/pages/01_Text_Classification.py @@ -0,0 +1,48 @@ +import os + +import streamlit as st +from langchain.chains import LLMChain +from langchain.llms import OpenAI + +from doccano_mini.components import display_download_button, display_usage +from doccano_mini.examples import make_classification_example +from doccano_mini.models import AVAILABLE_MODELS +from doccano_mini.prompts import make_classification_prompt + +st.title("Text Classification") +st.header("Annotate your data") +df = make_classification_example() +edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) +examples = edited_df.to_dict(orient="records") + +# Create prompt +prompt = make_classification_prompt(examples) + +st.header("Optional: Edit instruction") +with st.expander("See instruction"): + instruction = st.text_area(label="Instruction", value=prompt.prefix, height=200) + prompt.prefix = instruction + +st.header("Test") +api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password") +col1, col2 = st.columns([3, 1]) +text = col1.text_area(label="Please enter your text.", value="", height=300) + +# Use text-davinci-003 by default. +model_name = col2.selectbox("Model", AVAILABLE_MODELS, index=2) +temperature = col2.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) +top_p = col2.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) + +with st.expander("See your prompt"): + st.markdown(f"```\n{prompt.format(input=text)}\n```") + +if st.button("Predict"): + llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore + chain = LLMChain(llm=llm, prompt=prompt) + response = chain.run(text) + label = response.split(":")[1] + st.text(label) + + chain.save("config.yaml") + display_download_button() +display_usage() diff --git a/doccano_mini/pages/09_Task_Free.py b/doccano_mini/pages/09_Task_Free.py new file mode 100644 index 0000000..2b9c34c --- /dev/null +++ b/doccano_mini/pages/09_Task_Free.py @@ -0,0 +1,47 @@ +import os + +import streamlit as st +from langchain.chains import LLMChain +from langchain.llms import OpenAI + +from doccano_mini.components import display_download_button, display_usage +from doccano_mini.examples import make_task_free_example +from doccano_mini.models import AVAILABLE_MODELS +from doccano_mini.prompts import make_task_free_prompt + +st.title("Task Free") +st.header("Annotate your data") +num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10) +columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, int(num_cols) + 1)] + +df = make_task_free_example() +df = df.reindex(columns, axis="columns", fill_value="") +edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000) +examples = edited_df.to_dict(orient="records") + +prompt = make_task_free_prompt(examples) + +prompt.prefix = st.text_area( + label="Enter task instruction", + placeholder=f"Predict {columns[-1]} based on {', '.join(columns[:-1])}.", + height=200, +) + +inputs = {column: st.text_input(f"Input for {column}:") for column in columns[:-1]} + +st.markdown(f"Your prompt\n```\n{prompt.format(**inputs)}\n```") + +# Use text-davinci-003 by default. +api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password") +model_name = st.selectbox("Model", AVAILABLE_MODELS, index=2) +temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01) +top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01) +if st.button("Predict"): + llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore + chain = LLMChain(llm=llm, prompt=prompt) + response = chain.run(**inputs) + st.text(response) + + chain.save("config.yaml") + display_download_button() +display_usage() diff --git a/doccano_mini/prompts.py b/doccano_mini/prompts.py index 17c0fca..786e614 100644 --- a/doccano_mini/prompts.py +++ b/doccano_mini/prompts.py @@ -3,8 +3,6 @@ from langchain.prompts.few_shot import FewShotPromptTemplate from langchain.prompts.prompt import PromptTemplate -from .tasks import TaskType - def make_classification_prompt(examples: List[dict]) -> FewShotPromptTemplate: unique_labels = set([example["label"] for example in examples]) @@ -39,11 +37,3 @@ def make_task_free_prompt(examples: List[dict]) -> FewShotPromptTemplate: input_variables=columns[:-1], ) return prompt - - -def select_prompt_maker(task: TaskType): - if task == TaskType.TEXT_CLASSIFICATION.value: - return make_classification_prompt - elif task == TaskType.TASK_FREE.value: - return make_task_free_prompt - raise ValueError(f"Task {task} is not supported.") diff --git a/doccano_mini/tasks.py b/doccano_mini/tasks.py deleted file mode 100644 index c00fb5c..0000000 --- a/doccano_mini/tasks.py +++ /dev/null @@ -1,9 +0,0 @@ -from enum import Enum - - -class TaskType(Enum): - TEXT_CLASSIFICATION = "Text Classification" - TASK_FREE = "Task Free" - - -options = [task_type.value for task_type in TaskType]