Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enhancement/multiple-pages #15

Merged
merged 3 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/pypi-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ jobs:
python -m pip install --upgrade pip
pip install poetry poetry-dynamic-versioning
poetry install
- name: Copy README
run: |
cp README.md doccano_mini/docs/
- name: Build a binary wheel and a source tarball
run: |
poetry build
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ The editor also supports pasting in tabular data from Google Sheets, Excel, and

In this step, we will test your task. We can enter a new test to the text box and click the `Predict` button. Then, we can see the result of the test.

![Step2](https://raw.githubusercontent.com/doccano/doccano-mini/master/docs/images/test_new_example.jpg)
<img src="https://raw.githubusercontent.com/doccano/doccano-mini/master/docs/images/test_new_example.jpg" alt= “Step2” width="700">

### Step3: Download the config

Expand All @@ -57,5 +57,5 @@ chain.run("YOUR TEXT")

```bash
poetry install
streamlit run doccano_mini/app.py
streamlit run doccano_mini/home.py
```
131 changes: 0 additions & 131 deletions doccano_mini/app.py

This file was deleted.

2 changes: 1 addition & 1 deletion doccano_mini/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@


def main():
filepath = str(Path(__file__).parent.resolve() / "app.py")
filepath = str(Path(__file__).parent.resolve() / "home.py")
sys.argv = ["streamlit", "run", filepath, "--global.developmentMode=false"]
sys.exit(stcli.main())

Expand Down
21 changes: 21 additions & 0 deletions doccano_mini/components.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import streamlit as st

CODE = """from langchain.chains import load_chain

chain = load_chain("chain.yaml")
chain.run("YOUR TEXT")"""


def display_download_button():
st.header("Download a config file")
with open("config.yaml", "r", encoding="utf-8") as f:
st.download_button(
label="Download",
data=f,
file_name="config.yaml",
)


def display_usage():
st.header("Usage")
st.code(CODE)
Empty file added doccano_mini/docs/.gitkeep
Empty file.
30 changes: 14 additions & 16 deletions doccano_mini/examples.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
import pandas as pd

from .tasks import TaskType

def make_classification_example() -> pd.DataFrame:
df = pd.DataFrame(
[
{"text": "That would be awesome!", "label": "positive"},
{"text": "This is awful!", "label": "negative"},
{"text": "Today is hot day.", "label": "neutral"},
],
columns=["text", "label"],
)
return df

def select_example_df(task: TaskType) -> pd.DataFrame:
if task == TaskType.TEXT_CLASSIFICATION.value:
df = pd.DataFrame(
[
{"text": "That would be awesome!", "label": "positive"},
{"text": "This is awful!", "label": "negative"},
{"text": "Today is hot day.", "label": "neutral"},
],
columns=["text", "label"],
)
return df
elif task == TaskType.TASK_FREE.value:
df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"])
return df
raise ValueError(f"Task {task} is not supported.")

def make_task_free_example() -> pd.DataFrame:
df = pd.DataFrame([{"Column 1": "", "Column 2": ""}], columns=["Column 1", "Column 2"])
return df
19 changes: 19 additions & 0 deletions doccano_mini/home.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from pathlib import Path

import streamlit as st


def main():
st.set_page_config(page_title="doccano-mini", page_icon=":memo:")
filepath = Path(__file__).parent.resolve() / "docs" / "README.md"

# Development
if not filepath.exists():
filepath = Path(__file__).parent.parent.resolve() / "README.md"

with filepath.open("r", encoding="utf-8") as f:
st.markdown(f.read(), unsafe_allow_html=True)


if __name__ == "__main__":
main()
8 changes: 8 additions & 0 deletions doccano_mini/models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# https://platform.openai.com/docs/models/gpt-3-5
AVAILABLE_MODELS = (
"gpt-3.5-turbo",
"gpt-3.5-turbo-0301",
"text-davinci-003",
"text-davinci-002",
"code-davinci-002",
)
48 changes: 48 additions & 0 deletions doccano_mini/pages/01_Text_Classification.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import os

import streamlit as st
from langchain.chains import LLMChain
from langchain.llms import OpenAI

from doccano_mini.components import display_download_button, display_usage
from doccano_mini.examples import make_classification_example
from doccano_mini.models import AVAILABLE_MODELS
from doccano_mini.prompts import make_classification_prompt

st.title("Text Classification")
st.header("Annotate your data")
df = make_classification_example()
edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000)
examples = edited_df.to_dict(orient="records")

# Create prompt
prompt = make_classification_prompt(examples)

st.header("Optional: Edit instruction")
with st.expander("See instruction"):
instruction = st.text_area(label="Instruction", value=prompt.prefix, height=200)
prompt.prefix = instruction

st.header("Test")
api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password")
col1, col2 = st.columns([3, 1])
text = col1.text_area(label="Please enter your text.", value="", height=300)

# Use text-davinci-003 by default.
model_name = col2.selectbox("Model", AVAILABLE_MODELS, index=2)
temperature = col2.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01)
top_p = col2.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01)

with st.expander("See your prompt"):
st.markdown(f"```\n{prompt.format(input=text)}\n```")

if st.button("Predict"):
llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(text)
label = response.split(":")[1]
st.text(label)

chain.save("config.yaml")
display_download_button()
display_usage()
47 changes: 47 additions & 0 deletions doccano_mini/pages/09_Task_Free.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
import os

import streamlit as st
from langchain.chains import LLMChain
from langchain.llms import OpenAI

from doccano_mini.components import display_download_button, display_usage
from doccano_mini.examples import make_task_free_example
from doccano_mini.models import AVAILABLE_MODELS
from doccano_mini.prompts import make_task_free_prompt

st.title("Task Free")
st.header("Annotate your data")
num_cols = st.number_input("Set the number of columns", min_value=2, max_value=10)
columns = [st.text_input(f"Column {i}:", value=f"column {i}") for i in range(1, int(num_cols) + 1)]

df = make_task_free_example()
df = df.reindex(columns, axis="columns", fill_value="")
edited_df = st.experimental_data_editor(df, num_rows="dynamic", width=1000)
examples = edited_df.to_dict(orient="records")

prompt = make_task_free_prompt(examples)

prompt.prefix = st.text_area(
label="Enter task instruction",
placeholder=f"Predict {columns[-1]} based on {', '.join(columns[:-1])}.",
height=200,
)

inputs = {column: st.text_input(f"Input for {column}:") for column in columns[:-1]}

st.markdown(f"Your prompt\n```\n{prompt.format(**inputs)}\n```")

# Use text-davinci-003 by default.
api_key = st.text_input("Enter API key", value=os.environ.get("OPENAI_API_KEY", ""), type="password")
model_name = st.selectbox("Model", AVAILABLE_MODELS, index=2)
temperature = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.7, step=0.01)
top_p = st.slider("Top-p", min_value=0.0, max_value=1.0, value=1.0, step=0.01)
if st.button("Predict"):
llm = OpenAI(model_name=model_name, temperature=temperature, top_p=top_p, openai_api_key=api_key) # type:ignore
chain = LLMChain(llm=llm, prompt=prompt)
response = chain.run(**inputs)
st.text(response)

chain.save("config.yaml")
display_download_button()
display_usage()
10 changes: 0 additions & 10 deletions doccano_mini/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from langchain.prompts.few_shot import FewShotPromptTemplate
from langchain.prompts.prompt import PromptTemplate

from .tasks import TaskType


def make_classification_prompt(examples: List[dict]) -> FewShotPromptTemplate:
unique_labels = set([example["label"] for example in examples])
Expand Down Expand Up @@ -39,11 +37,3 @@ def make_task_free_prompt(examples: List[dict]) -> FewShotPromptTemplate:
input_variables=columns[:-1],
)
return prompt


def select_prompt_maker(task: TaskType):
if task == TaskType.TEXT_CLASSIFICATION.value:
return make_classification_prompt
elif task == TaskType.TASK_FREE.value:
return make_task_free_prompt
raise ValueError(f"Task {task} is not supported.")
9 changes: 0 additions & 9 deletions doccano_mini/tasks.py

This file was deleted.