diff --git a/modelscope_agent/tools/__init__.py b/modelscope_agent/tools/__init__.py index 2ad4e1bfb..61011c540 100644 --- a/modelscope_agent/tools/__init__.py +++ b/modelscope_agent/tools/__init__.py @@ -2,9 +2,22 @@ from .base import TOOL_REGISTRY, BaseTool from .code_interpreter_jupyter import CodeInterpreterJupyter from .dashscope_tools.image_generation import TextToImageTool +from .dashscope_tools.qwen_vl import QWenVL from .dashscope_tools.style_repaint import StyleRepaint +from .dashscope_tools.wordart_tool import WordArtTexture from .langchain_tool import LangchainTool +from .modelscope_tools.image_chat_tool import ImageChatTool +from .modelscope_tools.pipeline_tool import ModelscopePipelineTool +from .modelscope_tools.text_address_tool import TextAddressTool +from .modelscope_tools.text_ie_tool import TextInfoExtractTool +from .modelscope_tools.text_ner_tool import TextNerTool +from .modelscope_tools.text_to_speech_tool import TexttoSpeechTool +from .modelscope_tools.text_to_video_tool import TextToVideoTool +from .modelscope_tools.translation_en2zh_tool import TranslationEn2ZhTool +from .modelscope_tools.translation_zh2en_tool import TranslationZh2EnTool from .openapi_plugin import OpenAPIPluginTool +from .similarity_search import SimilaritySearch +from .storage import Storage def call_tool(plugin_name: str, plugin_args: str) -> str: diff --git a/modelscope_agent/tools/dashscope_tools/qwen_vl.py b/modelscope_agent/tools/dashscope_tools/qwen_vl.py index e6a0eb937..35fdcdef4 100644 --- a/modelscope_agent/tools/dashscope_tools/qwen_vl.py +++ b/modelscope_agent/tools/dashscope_tools/qwen_vl.py @@ -1,53 +1,41 @@ import os -import time -import json -import requests from dashscope import MultiModalConversation -from modelscope_agent.tools.tool import Tool, ToolSchema -from pydantic import ValidationError +from modelscope_agent.tools.base import BaseTool, register_tool from requests.exceptions import RequestException, Timeout MAX_RETRY_TIMES = 3 WORK_DIR = os.getenv('CODE_INTERPRETER_WORK_DIR', '/tmp/ci_workspace') -class QWenVL(Tool): +@register_tool('qwen_vl') +class QWenVL(BaseTool): description = '调用qwen_vl api处理图片' name = 'qwen_vl' parameters: list = [{ 'name': 'image_file_path', 'description': '用户上传的照片的相对路径', - 'required': True + 'required': True, + 'type': 'string' }, { 'name': 'text', 'description': '用户针对上传图片的提问文本', - 'required': True + 'required': True, + 'type': 'string' }] - def __init__(self, cfg={}): - self.cfg = cfg.get(self.name, {}) - # remote call - self.token = self.cfg.get('token', - os.environ.get('DASHSCOPE_API_KEY', '')) - assert self.token != '', 'dashscope api token must be acquired' - + def call(self, params: str, **kwargs) -> str: + # 检查环境变量中是否设置DASHSCOPE_API_KEY try: - all_param = { - 'name': self.name, - 'description': self.description, - 'parameters': self.parameters - } - self.tool_schema = ToolSchema(**all_param) - except ValidationError: - raise ValueError(f'Error when parsing parameters of {self.name}') - - self._str = self.tool_schema.model_dump_json() - self._function = self.parse_pydantic_model_to_openai_function( - all_param) - - def __call__(self, *args, **kwargs): - remote_parsed_input = self._remote_parse_input(*args, **kwargs) + os.environ['DASHSCOPE_API_KEY'] + except KeyError: + raise KeyError( + 'API_KEY Error: DASHSCOPE_API_KEY environment variable is not set.' + ) + params = self._verify_args(params) + if isinstance(params, str): + return 'Parameter Error' + remote_parsed_input = self._remote_parse_input(**params) """Sample of use local file. linux&mac file schema: file:///home/images/test.png windows file schema: file://D:/images/abc.png @@ -58,7 +46,7 @@ def __call__(self, *args, **kwargs): while retry_times: retry_times -= 1 try: - if local_file_path.endswith(('.jpeg', '.png', '.jpg')): + if local_file_path.lower().endswith(('.jpeg', '.png', '.jpg')): messages = [{ 'role': 'system', @@ -73,14 +61,14 @@ def __call__(self, *args, **kwargs): 'image': local_file_path }, { - 'text': kwargs['text'] + 'text': params['text'] }, ] }] response = MultiModalConversation.call( model='qwen-vl-plus', messages=messages) - final_result = self._parse_output(response) - return final_result + return response['output']['choices'][0]['message'][ + 'content'][0] else: raise ValueError( f'the file you upload: {local_file_path} is not an image file, \ diff --git a/modelscope_agent/tools/dashscope_tools/wordart_tool.py b/modelscope_agent/tools/dashscope_tools/wordart_tool.py index abca88531..fab28dae7 100644 --- a/modelscope_agent/tools/dashscope_tools/wordart_tool.py +++ b/modelscope_agent/tools/dashscope_tools/wordart_tool.py @@ -2,67 +2,53 @@ import time import json -import pandas as pd import requests -from modelscope_agent.tools.tool import Tool, ToolSchema -from pydantic import ValidationError +from modelscope_agent.tools.base import BaseTool, register_tool from requests.exceptions import RequestException, Timeout MAX_RETRY_TIMES = 3 -class WordArtTexture(Tool): +@register_tool('wordart_texture_generation') +class WordArtTexture(BaseTool): description = '生成艺术字纹理图片' name = 'wordart_texture_generation' parameters: list = [{ 'name': 'input.text.text_content', 'description': 'text that the user wants to convert to WordArt', - 'required': True + 'required': True, + 'type': 'string' }, { - 'name': 'input.agents', + 'name': 'input.prompt', 'description': 'Users’ style requirements for word art may be requirements in terms of shape, color, entity, etc.', - 'required': True + 'required': True, + 'type': 'string' }, { 'name': 'input.texture_style', 'description': 'Type of texture style;Default is "material";If not provided by the user, \ defaults to "material".Another value is scene.', - 'required': True + 'required': True, + 'type': 'string' }, { 'name': 'input.text.output_image_ratio', 'description': 'The aspect ratio of the text input image; the default is "1:1", \ the available ratios are: "1:1", "16:9", "9:16";', - 'required': True + 'required': True, + 'type': 'string' }] - def __init__(self, cfg={}): - self.cfg = cfg.get(self.name, {}) - # remote call - self.url = 'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture' - self.token = self.cfg.get('token', - os.environ.get('DASHSCOPE_API_KEY', '')) - assert self.token != '', 'dashscope api token must be acquired with wordart' - - try: - all_param = { - 'name': self.name, - 'description': self.description, - 'parameters': self.parameters - } - self.tool_schema = ToolSchema(**all_param) - except ValidationError: - raise ValueError(f'Error when parsing parameters of {self.name}') - - self._str = self.tool_schema.model_dump_json() - self._function = self.parse_pydantic_model_to_openai_function( - all_param) - - def __call__(self, *args, **kwargs): - remote_parsed_input = json.dumps( - self._remote_parse_input(*args, **kwargs)) + def call(self, params: str, **kwargs) -> str: + params = self._verify_args(params) + if isinstance(params, str): + return 'Parameter Error' + remote_parsed_input = json.dumps(self._remote_parse_input(**params)) origin_result = None + self.token = kwargs.get('token', + os.environ.get('DASHSCOPE_API_KEY', '')) + assert self.token != '', 'dashscope api token must be acquired with wordart' retry_times = MAX_RETRY_TIMES headers = { 'Content-Type': 'application/json', @@ -75,7 +61,8 @@ def __call__(self, *args, **kwargs): response = requests.request( 'POST', - url=self.url, + url= + 'https://dashscope.aliyuncs.com/api/v1/services/aigc/wordart/texture', headers=headers, data=remote_parsed_input) @@ -83,8 +70,7 @@ def __call__(self, *args, **kwargs): response.raise_for_status() origin_result = json.loads(response.content.decode('utf-8')) - self.final_result = self._parse_output( - origin_result, remote=True) + self.final_result = origin_result return self.get_wordart_result() except Timeout: continue @@ -108,7 +94,7 @@ def _remote_parse_input(self, *args, **kwargs): temp_dict = temp_dict.setdefault(k, {}) temp_dict[keys[-1]] = value else: - # f the key does not contain ".", directly store the key-value pair into restored_dict + # if the key does not contain ".", directly store the key-value pair into restored_dict restored_dict[key] = value kwargs = restored_dict kwargs['model'] = 'wordart-texture' @@ -116,7 +102,7 @@ def _remote_parse_input(self, *args, **kwargs): return kwargs def get_result(self): - result_data = json.loads(json.dumps(self.final_result['result'])) + result_data = json.loads(json.dumps(self.final_result)) if 'task_id' in result_data['output']: task_id = result_data['output']['task_id'] get_url = f'https://dashscope.aliyuncs.com/api/v1/tasks/{task_id}' @@ -131,9 +117,7 @@ def get_result(self): if response.status_code != requests.codes.ok: response.raise_for_status() origin_result = json.loads(response.content.decode('utf-8')) - - get_result = self._parse_output(origin_result, remote=True) - return get_result + return origin_result except Timeout: continue except RequestException as e: @@ -148,22 +132,23 @@ def get_result(self): def get_wordart_result(self): try: result = self.get_result() - print(result) while True: - result_data = result.get('result', {}) + result_data = result output = result_data.get('output', {}) task_status = output.get('task_status', '') if task_status == 'SUCCEEDED': print('任务已完成') - return result + # 取出result里url的部分,提高url图片展示稳定性 + output_url = result['output']['results'][0]['url'] + return output_url elif task_status == 'FAILED': - raise ('任务失败') + raise Exception(output.get('message', '任务失败,请重试')) else: # 继续轮询,等待一段时间后再次调用 time.sleep(1) # 等待 1 秒钟 result = self.get_result() - + print(f'Running:{result}') except Exception as e: print('get Remote Error:', str(e)) diff --git a/tests/tools/test_image_gen.py b/tests/tools/test_image_gen.py index 1f9bdf730..3f343190c 100644 --- a/tests/tools/test_image_gen.py +++ b/tests/tools/test_image_gen.py @@ -1,5 +1,5 @@ from modelscope_agent.agent import Agent -from modelscope_agent.tools.image_generation import TextToImageTool +from modelscope_agent.tools import TextToImageTool from modelscope_agent.agents.role_play import RolePlay # NOQA diff --git a/tests/tools/test_local_tools.py b/tests/tools/test_local_tools.py index 372cc3e06..484b7d911 100644 --- a/tests/tools/test_local_tools.py +++ b/tests/tools/test_local_tools.py @@ -1,4 +1,4 @@ -from modelscope_agent.tools import ModelscopePipelineTool, Tool +from modelscope_agent.tools import ModelscopePipelineTool def test_modelscope_pipline(): diff --git a/tests/tools/test_modelscope_tools.py b/tests/tools/test_modelscope_tools.py index bb035661a..6afc243d1 100644 --- a/tests/tools/test_modelscope_tools.py +++ b/tests/tools/test_modelscope_tools.py @@ -4,7 +4,7 @@ def test_modelscope_speech_generation(): - from modelscope_agent.tools.text_to_speech_tool import TexttoSpeechTool + from modelscope_agent.tools import TexttoSpeechTool input = '北京今天天气怎样?' kwargs = {'input': input, 'gender': 'man'} txt2speech = TexttoSpeechTool(cfg) @@ -17,7 +17,7 @@ def test_modelscope_speech_generation(): def test_modelscope_text_address(): - from modelscope_agent.tools.text_address_tool import TextAddressTool + from modelscope_agent.tools import TextAddressTool input = '北京朝阳望京东金辉大厦' kwargs = {'input': input} txt_addr = TextAddressTool(cfg) @@ -27,7 +27,7 @@ def test_modelscope_text_address(): def test_modelscope_text_ner(): - from modelscope_agent.tools.text_ner_tool import TextNerTool + from modelscope_agent.tools import TextNerTool input = '北京今天天气怎样?' kwargs = {'input': input} txt_ner = TextNerTool(cfg) @@ -37,7 +37,7 @@ def test_modelscope_text_ner(): def test_modelscope_video_generation(): - from modelscope_agent.tools.text_to_video_tool import TextToVideoTool + from modelscope_agent.tools import TextToVideoTool input = '一个正在打篮球的人' kwargs = {'text': input} video_gen = TextToVideoTool(cfg) @@ -47,7 +47,7 @@ def test_modelscope_video_generation(): def test_modelscope_zh2en(): - from modelscope_agent.tools.translation_zh2en_tool import TranslationZh2EnTool + from modelscope_agent.tools import TranslationZh2EnTool input = '北京今天天气怎样?' kwargs = {'input': input} zh_to_en = TranslationZh2EnTool(cfg) @@ -57,7 +57,7 @@ def test_modelscope_zh2en(): def test_modelscope_en2zh(): - from modelscope_agent.tools.translation_en2zh_tool import TranslationEn2ZhTool + from modelscope_agent.tools import TranslationEn2ZhTool input = 'Autonomous agents have long been a prominent research focus in both academic and industry communities.' kwargs = {'input': input} en_to_zh = TranslationEn2ZhTool(cfg) diff --git a/tests/tools/test_pipeline_tool.py b/tests/tools/test_pipeline_tool.py index f51054507..01175bc23 100644 --- a/tests/tools/test_pipeline_tool.py +++ b/tests/tools/test_pipeline_tool.py @@ -1,13 +1,16 @@ -from modelscope_agent.tools.pipeline_tool import ModelscopePipelineTool -from modelscope.utils.config import Config import os +from modelscope_agent.tools import ModelscopePipelineTool + +from modelscope.utils.config import Config + cfg = Config.from_file('config/cfg_tool_template.json') # 请用自己的SDK令牌替换{YOUR_MODELSCOPE_SDK_TOKEN}(包括大括号) -os.environ['MODELSCOPE_API_KEY'] = f"{YOUR_MODELSCOPE_SDK_TOKEN}" +os.environ['MODELSCOPE_API_KEY'] = f'{YOUR_MODELSCOPE_SDK_TOKEN}' + def test_modelscope_speech_generation(): - from modelscope_agent.tools.text_to_speech_tool import TexttoSpeechTool + from modelscope_agent.tools import TexttoSpeechTool kwargs = """{'input': '北京今天天气怎样?', 'gender': 'man'}""" txt2speech = TexttoSpeechTool(cfg) res = txt2speech.call(kwargs) @@ -15,4 +18,3 @@ def test_modelscope_speech_generation(): test_modelscope_speech_generation() - diff --git a/tests/tools/test_qwen_vl_tool.py b/tests/tools/test_qwen_vl_tool.py new file mode 100644 index 000000000..c57c4e6f2 --- /dev/null +++ b/tests/tools/test_qwen_vl_tool.py @@ -0,0 +1,30 @@ +from modelscope_agent.tools import QWenVL + +from modelscope_agent.agents.role_play import RolePlay # NOQA + + +def test_qwen_vl(): + # 图片默认上传到ci_workspace,后端测试mork时需要在本地存图片到/tmp/ci_workspace,这里只需要图片basename。 + params = """{'image_file_path': 'WechatIMG139.jpg', 'text': '描述这张照片'}""" + qvl = QWenVL() + res = qvl.call(params) + print(res) + assert (isinstance(res, dict) and 'text' in res) + + +def test_qwen_vl_role(): + role_template = '你扮演一个美术老师,用尽可能丰富的描述调用工具讲解描述各种图画。' + + llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'} + + # input tool args + function_list = [{'name': 'qwen_vl'}] + + bot = RolePlay( + function_list=function_list, llm=llm_config, instruction=role_template) + + response = bot.run('[上传文件WechatIMG139.jpg],描述这张照片') + text = '' + for chunk in response: + text += chunk + print(text) diff --git a/tests/tools/test_style_repaint.py b/tests/tools/test_style_repaint.py index 65f7802f9..fa61e5eda 100644 --- a/tests/tools/test_style_repaint.py +++ b/tests/tools/test_style_repaint.py @@ -1,9 +1,4 @@ -import os - -from modelscope_agent.agent import Agent -from modelscope_agent.tools.style_repaint import StyleRepaint - -print(os.getcwd()) +from modelscope_agent.tools import StyleRepaint from modelscope_agent.agents.role_play import RolePlay # NOQA diff --git a/tests/tools/test_wordart_tool.py b/tests/tools/test_wordart_tool.py new file mode 100644 index 000000000..17f1a7275 --- /dev/null +++ b/tests/tools/test_wordart_tool.py @@ -0,0 +1,34 @@ +from modelscope_agent.tools import WordArtTexture + +from modelscope_agent.agents.role_play import RolePlay # NOQA + + +def test_word_art(): + params = """{ + 'input.text.text_content': '魔搭社区', + 'input.prompt': '一片绿色的森林里开着小花', + 'input.texture_style': 'scene', + 'input.text.output_image_ratio': '9:16' + }""" + wa = WordArtTexture() + res = wa.call(params) + print(res) + assert (res.startswith('http')) + + +def test_word_art_role(): + role_template = '你扮演一个美术老师,用尽可能丰富的描述调用工具生成艺术字图片。' + + llm_config = {'model': 'qwen-max', 'model_server': 'dashscope'} + + # input tool args + function_list = [{'name': 'wordart_texture_generation'}] + + bot = RolePlay( + function_list=function_list, llm=llm_config, instruction=role_template) + + response = bot.run('文字内容:你好新年,风格:海洋,纹理风格:默认,宽高比:16:9') + text = '' + for chunk in response: + text += chunk + print(text)