Merge pull request #251 from wangyijunlyy/new_version

update dashscope_tool: qwen_vl and wordart to new version
modelscope · Jan 9, 2024 · 812fe8d · 812fe8d
2 parents 716aec8 + 0c966d0
commit 812fe8d
Show file tree

Hide file tree

Showing 10 changed files with 147 additions and 100 deletions.
diff --git a/modelscope_agent/tools/__init__.py b/modelscope_agent/tools/__init__.py
@@ -2,9 +2,22 @@
 from .base import TOOL_REGISTRY, BaseTool
 from .code_interpreter_jupyter import CodeInterpreterJupyter
 from .dashscope_tools.image_generation import TextToImageTool
+from .dashscope_tools.qwen_vl import QWenVL
 from .dashscope_tools.style_repaint import StyleRepaint
+from .dashscope_tools.wordart_tool import WordArtTexture
 from .langchain_tool import LangchainTool
+from .modelscope_tools.image_chat_tool import ImageChatTool
+from .modelscope_tools.pipeline_tool import ModelscopePipelineTool
+from .modelscope_tools.text_address_tool import TextAddressTool
+from .modelscope_tools.text_ie_tool import TextInfoExtractTool
+from .modelscope_tools.text_ner_tool import TextNerTool
+from .modelscope_tools.text_to_speech_tool import TexttoSpeechTool
+from .modelscope_tools.text_to_video_tool import TextToVideoTool
+from .modelscope_tools.translation_en2zh_tool import TranslationEn2ZhTool
+from .modelscope_tools.translation_zh2en_tool import TranslationZh2EnTool
 from .openapi_plugin import OpenAPIPluginTool
+from .similarity_search import SimilaritySearch
+from .storage import Storage
 
 
 def call_tool(plugin_name: str, plugin_args: str) -> str:

diff --git a/modelscope_agent/tools/dashscope_tools/qwen_vl.py b/modelscope_agent/tools/dashscope_tools/qwen_vl.py
@@ -1,53 +1,41 @@
 import os
-import time
 
-import json
-import requests
 from dashscope import MultiModalConversation
-from modelscope_agent.tools.tool import Tool, ToolSchema
-from pydantic import ValidationError
+from modelscope_agent.tools.base import BaseTool, register_tool
 from requests.exceptions import RequestException, Timeout
 
 MAX_RETRY_TIMES = 3
 WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace')
 
 
-class QWenVL(Tool):
+@register_tool('qwen_vl')
+class QWenVL(BaseTool):
     description = '调用qwen_vl api处理图片'
     name = 'qwen_vl'
     parameters: list = [{
         'name': 'image_file_path',
         'description': '用户上传的照片的相对路径',
-        'required': True
+        'required': True,
+        'type': 'string'
     }, {
         'name': 'text',
         'description': '用户针对上传图片的提问文本',
-        'required': True
+        'required': True,
+        'type': 'string'
     }]
 
-    def __init__(self, cfg={}):
-        self.cfg = cfg.get(self.name, {})
-        # remote call
-        self.token = self.cfg.get('token',
-                                  os.environ.get('DASHSCOPE_API_KEY', ''))
-        assert self.token != '', 'dashscope api token must be acquired'
-
+    def call(self, params: str, **kwargs) -> str:
+        # 检查环境变量中是否设置DASHSCOPE_API_KEY
         try:
-            all_param = {
-                'name': self.name,
-                'description': self.description,
-                'parameters': self.parameters
-            }
-            self.tool_schema = ToolSchema(**all_param)
-        except ValidationError:
-            raise ValueError(f'Error when parsing parameters of {self.name}')
-
-        self._str = self.tool_schema.model_dump_json()
-        self._function = self.parse_pydantic_model_to_openai_function(
-            all_param)
-
-    def __call__(self, *args, **kwargs):
-        remote_parsed_input = self._remote_parse_input(*args, **kwargs)
+            os.environ['DASHSCOPE_API_KEY']
+        except KeyError:
+            raise KeyError(
+                'API_KEY Error: DASHSCOPE_API_KEY environment variable is not set.'
+            )
+        params = self._verify_args(params)
+        if isinstance(params, str):
+            return 'Parameter Error'
+        remote_parsed_input = self._remote_parse_input(**params)
         """Sample of use local file.
         linux&mac file schema: file:///home/images/test.png
         windows file schema: file://D:/images/abc.png
@@ -58,7 +46,7 @@ def __call__(self, *args, **kwargs):
         while retry_times:
             retry_times -= 1
             try:
-                if local_file_path.endswith(('.jpeg', '.png', '.jpg')):
+                if local_file_path.lower().endswith(('.jpeg', '.png', '.jpg')):
                     messages = [{
                         'role':
                         'system',
@@ -73,14 +61,14 @@ def __call__(self, *args, **kwargs):
                                 'image': local_file_path
                             },
                             {
-                                'text': kwargs['text']
+                                'text': params['text']
                             },
                         ]
                     }]
                     response = MultiModalConversation.call(
                         model='qwen-vl-plus', messages=messages)
-                    final_result = self._parse_output(response)
-                    return final_result
+                    return response['output']['choices'][0]['message'][
+                        'content'][0]
                 else:
                     raise ValueError(
                         f'the file you upload: {local_file_path} is not an image file, \

diff --git a/modelscope_agent/tools/dashscope_tools/wordart_tool.py b/modelscope_agent/tools/dashscope_tools/wordart_tool.py
@@ -2,67 +2,53 @@
 import time
 
 import json
-import pandas as pd
 import requests
-from modelscope_agent.tools.tool import Tool, ToolSchema
-from pydantic import ValidationError
+from modelscope_agent.tools.base import BaseTool, register_tool
 from requests.exceptions import RequestException, Timeout
 
 MAX_RETRY_TIMES = 3
 
 
-class WordArtTexture(Tool):
+@register_tool('wordart_texture_generation')
+class WordArtTexture(BaseTool):
     description = '生成艺术字纹理图片'
     name = 'wordart_texture_generation'
     parameters: list = [{
         'name': 'input.text.text_content',
         'description': 'text that the user wants to convert to WordArt',
-        'required': True
+        'required': True,
+        'type': 'string'
     }, {
-        'name': 'input.agents',
+        'name': 'input.prompt',
         'description':
         'Users’ style requirements for word art may be requirements in terms of shape, color, entity, etc.',
-        'required': True
+        'required': True,
+        'type': 'string'
     }, {
         'name': 'input.texture_style',
         'description':
         'Type of texture style;Default is "material";If not provided by the user, \
             defaults to "material".Another value is scene.',
-        'required': True
+        'required': True,
+        'type': 'string'
     }, {
         'name': 'input.text.output_image_ratio',
         'description':
         'The aspect ratio of the text input image; the default is "1:1", \
             the available ratios are: "1:1", "16:9", "9:16";',
-        'required': True
+        'required': True,
+        'type': 'string'
     }]
 
-    def __init__(self, cfg={}):
-        self.cfg = cfg.get(self.name, {})
-        # remote call
-        self.url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture'
-        self.token = self.cfg.get('token',
-                                  os.environ.get('DASHSCOPE_API_KEY', ''))
-        assert self.token != '', 'dashscope api token must be acquired with wordart'
-
-        try:
-            all_param = {
-                'name': self.name,
-                'description': self.description,
-                'parameters': self.parameters
-            }
-            self.tool_schema = ToolSchema(**all_param)
-        except ValidationError:
-            raise ValueError(f'Error when parsing parameters of {self.name}')
-
-        self._str = self.tool_schema.model_dump_json()
-        self._function = self.parse_pydantic_model_to_openai_function(
-            all_param)
-
-    def __call__(self, *args, **kwargs):
-        remote_parsed_input = json.dumps(
-            self._remote_parse_input(*args, **kwargs))
+    def call(self, params: str, **kwargs) -> str:
+        params = self._verify_args(params)
+        if isinstance(params, str):
+            return 'Parameter Error'
+        remote_parsed_input = json.dumps(self._remote_parse_input(**params))
         origin_result = None
+        self.token = kwargs.get('token',
+                                os.environ.get('DASHSCOPE_API_KEY', ''))
+        assert self.token != '', 'dashscope api token must be acquired with wordart'
         retry_times = MAX_RETRY_TIMES
         headers = {
             'Content-Type': 'application/json',
@@ -75,16 +61,16 @@ def __call__(self, *args, **kwargs):
 
                 response = requests.request(
                     'POST',
-                    url=self.url,
+                    url=
+                    'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture',
                     headers=headers,
                     data=remote_parsed_input)
 
                 if response.status_code != requests.codes.ok:
                     response.raise_for_status()
                 origin_result = json.loads(response.content.decode('utf-8'))
 
-                self.final_result = self._parse_output(
-                    origin_result, remote=True)
+                self.final_result = origin_result
                 return self.get_wordart_result()
             except Timeout:
                 continue
@@ -108,15 +94,15 @@ def _remote_parse_input(self, *args, **kwargs):
                     temp_dict = temp_dict.setdefault(k, {})
                 temp_dict[keys[-1]] = value
             else:
-                # f the key does not contain ".", directly store the key-value pair into restored_dict
+                # if the key does not contain ".", directly store the key-value pair into restored_dict
                 restored_dict[key] = value
             kwargs = restored_dict
             kwargs['model'] = 'wordart-texture'
         print('传给tool的参数：', kwargs)
         return kwargs
 
     def get_result(self):
-        result_data = json.loads(json.dumps(self.final_result['result']))
+        result_data = json.loads(json.dumps(self.final_result))
         if 'task_id' in result_data['output']:
             task_id = result_data['output']['task_id']
         get_url = f'https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}'
@@ -131,9 +117,7 @@ def get_result(self):
                 if response.status_code != requests.codes.ok:
                     response.raise_for_status()
                 origin_result = json.loads(response.content.decode('utf-8'))
-
-                get_result = self._parse_output(origin_result, remote=True)
-                return get_result
+                return origin_result
             except Timeout:
                 continue
             except RequestException as e:
@@ -148,22 +132,23 @@ def get_result(self):
     def get_wordart_result(self):
         try:
             result = self.get_result()
-            print(result)
             while True:
-                result_data = result.get('result', {})
+                result_data = result
                 output = result_data.get('output', {})
                 task_status = output.get('task_status', '')
 
                 if task_status == 'SUCCEEDED':
                     print('任务已完成')
-                    return result
+                    # 取出result里url的部分，提高url图片展示稳定性
+                    output_url = result['output']['results'][0]['url']
+                    return output_url
 
                 elif task_status == 'FAILED':
-                    raise ('任务失败')
+                    raise Exception(output.get('message', '任务失败，请重试'))
                 else:
                     # 继续轮询，等待一段时间后再次调用
                     time.sleep(1)  # 等待 1 秒钟
                     result = self.get_result()
-
+                    print(f'Running:{result}')
         except Exception as e:
             print('get Remote Error:', str(e))
diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py
@@ -1,5 +1,5 @@
 from modelscope_agent.agent import Agent
-from modelscope_agent.tools.image_generation import TextToImageTool
+from modelscope_agent.tools import TextToImageTool
 
 from modelscope_agent.agents.role_play import RolePlay  # NOQA
 

diff --git a/tests/tools/test_local_tools.py b/tests/tools/test_local_tools.py
@@ -1,4 +1,4 @@
-from modelscope_agent.tools import ModelscopePipelineTool, Tool
+from modelscope_agent.tools import ModelscopePipelineTool
 
 
 def test_modelscope_pipline():

diff --git a/tests/tools/test_modelscope_tools.py b/tests/tools/test_modelscope_tools.py
@@ -4,7 +4,7 @@
 
 
 def test_modelscope_speech_generation():
-    from modelscope_agent.tools.text_to_speech_tool import TexttoSpeechTool
+    from modelscope_agent.tools import TexttoSpeechTool
     input = '北京今天天气怎样?'
     kwargs = {'input': input, 'gender': 'man'}
     txt2speech = TexttoSpeechTool(cfg)
@@ -17,7 +17,7 @@ def test_modelscope_speech_generation():
 
 
 def test_modelscope_text_address():
-    from modelscope_agent.tools.text_address_tool import TextAddressTool
+    from modelscope_agent.tools import TextAddressTool
     input = '北京朝阳望京东金辉大厦'
     kwargs = {'input': input}
     txt_addr = TextAddressTool(cfg)
@@ -27,7 +27,7 @@ def test_modelscope_text_address():
 
 
 def test_modelscope_text_ner():
-    from modelscope_agent.tools.text_ner_tool import TextNerTool
+    from modelscope_agent.tools import TextNerTool
     input = '北京今天天气怎样?'
     kwargs = {'input': input}
     txt_ner = TextNerTool(cfg)
@@ -37,7 +37,7 @@ def test_modelscope_text_ner():
 
 
 def test_modelscope_video_generation():
-    from modelscope_agent.tools.text_to_video_tool import TextToVideoTool
+    from modelscope_agent.tools import TextToVideoTool
     input = '一个正在打篮球的人'
     kwargs = {'text': input}
     video_gen = TextToVideoTool(cfg)
@@ -47,7 +47,7 @@ def test_modelscope_video_generation():
 
 
 def test_modelscope_zh2en():
-    from modelscope_agent.tools.translation_zh2en_tool import TranslationZh2EnTool
+    from modelscope_agent.tools import TranslationZh2EnTool
     input = '北京今天天气怎样?'
     kwargs = {'input': input}
     zh_to_en = TranslationZh2EnTool(cfg)
@@ -57,7 +57,7 @@ def test_modelscope_zh2en():
 
 
 def test_modelscope_en2zh():
-    from modelscope_agent.tools.translation_en2zh_tool import TranslationEn2ZhTool
+    from modelscope_agent.tools import TranslationEn2ZhTool
     input = 'Autonomous agents have long been a prominent research focus in both academic and industry communities.'
     kwargs = {'input': input}
     en_to_zh = TranslationEn2ZhTool(cfg)

diff --git a/tests/tools/test_pipeline_tool.py b/tests/tools/test_pipeline_tool.py
@@ -1,18 +1,20 @@
-from modelscope_agent.tools.pipeline_tool import ModelscopePipelineTool
-from modelscope.utils.config import Config
 import os
 
+from modelscope_agent.tools import ModelscopePipelineTool
+
+from modelscope.utils.config import Config
+
 cfg = Config.from_file('config/cfg_tool_template.json')
 # 请用自己的SDK令牌替换{YOUR_MODELSCOPE_SDK_TOKEN}（包括大括号）
-os.environ['MODELSCOPE_API_KEY'] = f"{YOUR_MODELSCOPE_SDK_TOKEN}"
+os.environ['MODELSCOPE_API_KEY'] = f'{YOUR_MODELSCOPE_SDK_TOKEN}'
+
 
 def test_modelscope_speech_generation():
-    from modelscope_agent.tools.text_to_speech_tool import TexttoSpeechTool
+    from modelscope_agent.tools import TexttoSpeechTool
     kwargs = """{'input': '北京今天天气怎样?', 'gender': 'man'}"""
     txt2speech = TexttoSpeechTool(cfg)
     res = txt2speech.call(kwargs)
     print(res)
 
 
 test_modelscope_speech_generation()
-