diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..12d2c76 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +*egg-info +**/*/__pycache__ +**/.vscode +**/.pdm.toml +**/.venv +**/pdm.lock \ No newline at end of file diff --git a/README.md b/README.md index c56d45e..bd04bca 100644 --- a/README.md +++ b/README.md @@ -1 +1,12 @@ # 极纳 x 百度飞桨黑客马拉松 + +## 实现 + +1. [src/uie](./src/uie/) Jina 的 UIE Executor & Flow 实现、功能测试、可视化页面 + +![](https://user-images.githubusercontent.com/53158137/221882155-05a23b18-2007-4321-a0bb-961eebe7439b.png) +![](https://user-images.githubusercontent.com/53158137/221882528-14070e16-4829-4552-ad7e-88fe78ea1e23.png) + +2. [src/plato-mini](./src/plato-mini/) Jina 的 Plato-Mini Executor & Flow 实现、功能测试 + +![](https://user-images.githubusercontent.com/53158137/221911101-f6f053a9-6c78-40da-ba36-3be1c49cce3c.png) diff --git a/rfcs/.gitkeep b/rfcs/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/rfcs/plato-mini.md b/rfcs/plato-mini.md new file mode 100644 index 0000000..24bbb98 --- /dev/null +++ b/rfcs/plato-mini.md @@ -0,0 +1,27 @@ +# Jina X Plato + +实现一个 requests 以 schema 为参数,对输入的 DocArray 进行提取 history 对话信息 + +flows 中定义服务如下: + +| 协议名称 | 端口号 | +| --------- | ------ | +| grpc | 12344 | +| http | 12345 | +| websocket | 12346 | + +## 测试 + +1. 运行服务端 + +``` +pdm start +``` + +2. 运行客户端 + +``` +pdm test +``` + +即可进行对话 diff --git a/src/.gitkeep b/src/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/src/plato-mini/README.md b/src/plato-mini/README.md new file mode 100644 index 0000000..86dd534 --- /dev/null +++ b/src/plato-mini/README.md @@ -0,0 +1,41 @@ +## Jina X plato-mini + +## GetStart + +please install pdm first: `pip install pdm` (Pdm is a modern Python package and +dependency manager supporting the latest PEP standards. ) + +after that: + +``` +pdm prepare +``` + +this commond can auto install dependencies in virtual env + +then: + +``` +pdm start +``` + +this commond starts a jina flow server + +then: + +``` +pdm test +``` + +this commond starts a chat with plato-mini: + +``` +=====================Test calling===================== +Test http: 你好,你是做什么的? +Test grpc: 你好,你是做什么工作的? +Test websocket: 你好,你是哪里人啊? +======================Plato chat====================== +我: hi +Plato: 你好,你是做什么的? +我: +``` diff --git a/src/plato-mini/pyproject.toml b/src/plato-mini/pyproject.toml new file mode 100644 index 0000000..64e6786 --- /dev/null +++ b/src/plato-mini/pyproject.toml @@ -0,0 +1,27 @@ +[tool.pdm] + +[project] +name = "jina-paddle-hackathon-PlatoXL" +version = "0.1.0" +description = "" +authors = [ + {name = "", email = "gonorth@qq.com"}, +] +dependencies = [ + "jina>=3.14.1", + "paddlepaddle>=2.4.2", + "paddlenlp>=2.5.1", + "autopep8>=2.0.1", + "pip>=23.0.1", +] +requires-python = ">=3.10" +readme = "README.md" +license = {text = "MIT"} + +[tool.pdm.scripts] +dev = "" +start = "python src/flow.py" +test = "python src/main.py" +create_venv = "pdm venv create 3.10" +activate_venv = "pdm venv activate in-project" +prepare = {composite = ["create_venv", "activate_venv", "pdm install"]} diff --git a/src/plato-mini/src/flow.py b/src/plato-mini/src/flow.py new file mode 100644 index 0000000..a03c27a --- /dev/null +++ b/src/plato-mini/src/flow.py @@ -0,0 +1,8 @@ +from jina import Flow +from plato_executor.executor import PlatoXLExecutor + +f = Flow(port=[12345, 12344, 12346], protocol=["http", "grpc", "websocket"], cors=True).add( + name='uie', uses=PlatoXLExecutor) + +with f: + f.block() diff --git a/src/plato-mini/src/main.py b/src/plato-mini/src/main.py new file mode 100644 index 0000000..2a4c1fb --- /dev/null +++ b/src/plato-mini/src/main.py @@ -0,0 +1,24 @@ +from jina import Client, Document, DocumentArray + +ppmap = { + "http": 12345, + "grpc": 12344, + "websocket": 12346 +} + +print("=====================Test calling=====================") + +for protocol in ppmap: + c = Client(port=ppmap[protocol], protocol=protocol) + response_docs = c.post(on='/', inputs=DocumentArray( + [Document(text='你好!')])) + print(f"Test {protocol}:", response_docs[0].text) + +print("======================Plato chat======================") + +docs = DocumentArray([]) +while True: + docs.append(Document(text=input("我: "))) + response_docs = c.post(on='/', inputs=docs) + docs.append(response_docs[0]) + print("Plato: ", response_docs[0].text) diff --git a/src/plato-mini/src/plato_executor/config.yml b/src/plato-mini/src/plato_executor/config.yml new file mode 100644 index 0000000..859ce04 --- /dev/null +++ b/src/plato-mini/src/plato_executor/config.yml @@ -0,0 +1,8 @@ +jtype: PlatoXLExecutor +py_modules: + - executor.py +metas: + name: PlatoXLExecutor + description: Paddle Plato-Mini Executor + url: https://github.com/jina-ai/jina-paddle-hackathon + keywords: ['nlp', 'plato-mini', 'chatbot'] \ No newline at end of file diff --git a/src/plato-mini/src/plato_executor/executor.py b/src/plato-mini/src/plato_executor/executor.py new file mode 100644 index 0000000..6d3f7ce --- /dev/null +++ b/src/plato-mini/src/plato_executor/executor.py @@ -0,0 +1,57 @@ +from paddlenlp.transformers import (UnifiedTransformerLMHeadModel, + UnifiedTransformerTokenizer) +from typing import List, Dict, Any +from jina import Executor, requests, DocumentArray, Document, Deployment +from typing import List, Dict + +import paddle + +paddle.set_default_dtype("float32") +if paddle.device.cuda.device_count() > 0: + paddle.device.set_device("cuda:0") + + +class PlatoXLExecutor(Executor): + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.tokenizer = UnifiedTransformerTokenizer.from_pretrained( + 'plato-mini') + self.model = UnifiedTransformerLMHeadModel.from_pretrained( + 'plato-mini') + + @requests + async def dialog_predict( + self, + docs: List[Document], + parameters: Dict[str, Any], **kwargs + ) -> str: + history = [doc.text for doc in docs] + output_ids, score = self.model.generate( + **self.tokenizer.dialogue_encode( + history, + add_start_token_as_response=True, + return_tensors=True, + is_split_into_words=False + ), + **parameters, + ) + + token_ids = output_ids.numpy()[0] + + eos_pos = len(token_ids) + for i, tok_id in enumerate(token_ids): + if tok_id == self.tokenizer.sep_token_id: + eos_pos = i + break + token_ids = token_ids[:eos_pos] + tokens = self.tokenizer.convert_ids_to_tokens(token_ids) + tokens = self.tokenizer.merge_subword(tokens) + + return DocumentArray([Document(text=''.join(tokens))]) + + +if __name__ == "__main__": + with Deployment(uses=PlatoXLExecutor) as dep: + response_docs = dep.post( + on='/', inputs=DocumentArray([Document(text='你好!')])) + print(f'Text: {response_docs[0].text}')