doccano · Hironsan · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023 · Mar 24, 2023
diff --git a/.github/workflows/pypi-publish.yml b/.github/workflows/pypi-publish.yml
@@ -19,6 +19,9 @@ jobs:
         python -m pip install --upgrade pip
         pip install poetry poetry-dynamic-versioning
         poetry install
+    - name: Copy README
+      run: |
+        cp README.md doccano_mini/docs/
     - name: Build a binary wheel and a source tarball
       run: |
         poetry build

diff --git a/README.md b/README.md
@@ -40,7 +40,7 @@ The editor also supports pasting in tabular data from Google Sheets, Excel, and
 
 In this step, we will test your task. We can enter a new test to the text box and click the `Predict` button. Then, we can see the result of the test.
 
-![Step2](https://raw.githubusercontent.com/doccano/doccano-mini/master/docs/images/test_new_example.jpg)
+<img src="https://raw.githubusercontent.com/doccano/doccano-mini/master/docs/images/test_new_example.jpg" alt= “Step2” width="700">
 
 ### Step3: Download the config
 
@@ -57,5 +57,5 @@ chain.run("YOUR TEXT")
 
 ```bash
 poetry install
-streamlit run doccano_mini/app.py
+streamlit run doccano_mini/home.py
 ```
diff --git a/doccano_mini/app.py b/doccano_mini/app.py
diff --git a/doccano_mini/cli.py b/doccano_mini/cli.py
@@ -5,7 +5,7 @@
 
 
 def main():
-    filepath = str(Path(__file__).parent.resolve() / "app.py")
+    filepath = str(Path(__file__).parent.resolve() / "home.py")
     sys.argv = ["streamlit", "run", filepath, "--global.developmentMode=false"]
     sys.exit(stcli.main())
 

diff --git a/doccano_mini/components.py b/doccano_mini/components.py
@@ -0,0 +1,21 @@
+import streamlit as st
+
+CODE = """from langchain.chains import load_chain
+
+chain = load_chain("chain.yaml")
+chain.run("YOUR TEXT")"""
+
+
+def display_download_button():
+    st.header("Download a config file")
+    with open("config.yaml", "r", encoding="utf-8") as f:
+        st.download_button(
+            label="Download",
+            data=f,
+            file_name="config.yaml",
+        )
+
+
+def display_usage():
+    st.header("Usage")
+    st.code(CODE)
diff --git a/doccano_mini/docs/.gitkeep b/doccano_mini/docs/.gitkeep
diff --git a/doccano_mini/examples.py b/doccano_mini/examples.py
@@ -1,20 +1,18 @@
 import pandas as pd
 
-from .tasks import TaskType
 
+def make_classification_example() -> pd.DataFrame:
+    df = pd.DataFrame(
+        [
+            {"text": "That would be awesome!", "label": "positive"},
+            {"text": "This is awful!", "label": "negative"},
+            {"text": "Today is hot day.", "label": "neutral"},
+        ],
+        columns=["text", "label"],
+    )
+    return df
 
-def select_example_df(task: TaskType) -> pd.DataFrame:
-    if task == TaskType.TEXT_CLASSIFICATION.value:
-        df = pd.DataFrame(
-            [
-                {"text": "That would be awesome!", "label": "positive"},
-                {"text": "This is awful!", "label": "negative"},
-                {"text": "Today is hot day.", "label": "neutral"},
-            ],
-            columns=["text", "label"],
-        )
-        return df
-    elif task == TaskType.TASK_FREE.value:
-        df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"])
-        return df
-    raise ValueError(f"Task {task} is not supported.")
+
+def make_task_free_example() -> pd.DataFrame:
+    df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"])
+    return df
diff --git a/doccano_mini/home.py b/doccano_mini/home.py
@@ -0,0 +1,19 @@
+from pathlib import Path
+
+import streamlit as st
+
+
+def main():
+    st.set_page_config(page_title="doccano-mini", page_icon=":memo:")
+    filepath = Path(__file__).parent.resolve() / "docs" / "README.md"
+
+    # Development
+    if not filepath.exists():
+        filepath = Path(__file__).parent.parent.resolve() / "README.md"
+
+    with filepath.open("r", encoding="utf-8") as f:
+        st.markdown(f.read(), unsafe_allow_html=True)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/doccano_mini/models.py b/doccano_mini/models.py
@@ -0,0 +1,8 @@
+# https://platform.openai.com/docs/models/gpt-3-5
+AVAILABLE_MODELS = (
+    "gpt-3.5-turbo",
+    "gpt-3.5-turbo-0301",
+    "text-davinci-003",
+    "text-davinci-002",
+    "code-davinci-002",
+)
diff --git a/doccano_mini/pages/01_Text_Classification.py b/doccano_mini/pages/01_Text_Classification.py
@@ -0,0 +1,48 @@
+import os
+
+import streamlit as st
+from langchain.chains import LLMChain
+from langchain.llms import OpenAI
+
+from doccano_mini.components import display_download_button, display_usage
+from doccano_mini.examples import make_classification_example
+from doccano_mini.models import AVAILABLE_MODELS
+from doccano_mini.prompts import make_classification_prompt
+
+st.title("Text Classification")
+st.header("Annotate your data")
+df = make_classification_example()
+edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000)
+examples = edited_df.to_dict(orient="records")
+
+# Create prompt
+prompt = make_classification_prompt(examples)
+
+st.header("Optional: Edit instruction")
+with st.expander("See instruction"):
+    instruction = st.text_area(label="Instruction", value=prompt.prefix, height=200)
+    prompt.prefix = instruction
+
+st.header("Test")
+api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password")
+col1, col2 = st.columns([3, 1])
+text = col1.text_area(label="Please enter your text.", value="", height=300)
+
+# Use text-davinci-003 by default.
+model_name = col2.selectbox("Model", AVAILABLE_MODELS, index=2)
+temperature = col2.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01)
+top_p = col2.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01)
+
+with st.expander("See your prompt"):
+    st.markdown(f"```\n{prompt.format(input=text)}\n```")
+
+if st.button("Predict"):
+    llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key)  # type:ignore
+    chain = LLMChain(llm=llm, prompt=prompt)
+    response = chain.run(text)
+    label = response.split(":")[1]
+    st.text(label)
+
+    chain.save("config.yaml")
+    display_download_button()
+display_usage()
diff --git a/doccano_mini/pages/09_Task_Free.py b/doccano_mini/pages/09_Task_Free.py
@@ -0,0 +1,47 @@
+import os
+
+import streamlit as st
+from langchain.chains import LLMChain
+from langchain.llms import OpenAI
+
+from doccano_mini.components import display_download_button, display_usage
+from doccano_mini.examples import make_task_free_example
+from doccano_mini.models import AVAILABLE_MODELS
+from doccano_mini.prompts import make_task_free_prompt
+
+st.title("Task Free")
+st.header("Annotate your data")
+num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10)
+columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, int(num_cols) + 1)]
+
+df = make_task_free_example()
+df = df.reindex(columns, axis="columns", fill_value="")
+edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000)
+examples = edited_df.to_dict(orient="records")
+
+prompt = make_task_free_prompt(examples)
+
+prompt.prefix = st.text_area(
+    label="Enter task instruction",
+    placeholder=f"Predict {columns[-1]} based on {', '.join(columns[:-1])}.",
+    height=200,
+)
+
+inputs = {column: st.text_input(f"Input for {column}:") for column in columns[:-1]}
+
+st.markdown(f"Your prompt\n```\n{prompt.format(**inputs)}\n```")
+
+# Use text-davinci-003 by default.
+api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password")
+model_name = st.selectbox("Model", AVAILABLE_MODELS, index=2)
+temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01)
+top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01)
+if st.button("Predict"):
+    llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key)  # type:ignore
+    chain = LLMChain(llm=llm, prompt=prompt)
+    response = chain.run(**inputs)
+    st.text(response)
+
+    chain.save("config.yaml")
+    display_download_button()
+display_usage()
diff --git a/doccano_mini/prompts.py b/doccano_mini/prompts.py
@@ -3,8 +3,6 @@
 from langchain.prompts.few_shot import FewShotPromptTemplate
 from langchain.prompts.prompt import PromptTemplate
 
-from .tasks import TaskType
-
 
 def make_classification_prompt(examples: List[dict]) -> FewShotPromptTemplate:
     unique_labels = set([example["label"] for example in examples])
@@ -39,11 +37,3 @@ def make_task_free_prompt(examples: List[dict]) -> FewShotPromptTemplate:
         input_variables=columns[:-1],
     )
     return prompt
-
-
-def select_prompt_maker(task: TaskType):
-    if task == TaskType.TEXT_CLASSIFICATION.value:
-        return make_classification_prompt
-    elif task == TaskType.TASK_FREE.value:
-        return make_task_free_prompt
-    raise ValueError(f"Task {task} is not supported.")
diff --git a/doccano_mini/tasks.py b/doccano_mini/tasks.py