Feature/datascience assistant (#559)

modelscope · Jul 31, 2024 · 0011c64 · 0011c64
1 parent ec59259
commit 0011c64
Show file tree

Hide file tree

Showing 11 changed files with 201 additions and 6 deletions.
diff --git a/examples/agents/data_science_assistant.ipynb b/examples/agents/data_science_assistant.ipynb
@@ -0,0 +1,193 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "45d56c67-7439-4264-912a-c0b4895cac63",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-04T14:17:41.716630Z",
+     "iopub.status.busy": "2023-09-04T14:17:41.716258Z",
+     "iopub.status.idle": "2023-09-04T14:17:42.097933Z",
+     "shell.execute_reply": "2023-09-04T14:17:42.097255Z",
+     "shell.execute_reply.started": "2023-09-04T14:17:41.716610Z"
+    }
+   },
+   "source": [
+    "### clone代码"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3851d799-7162-4e73-acab-3c13cb1e43bd",
+   "metadata": {
+    "ExecutionIndicator": {
+     "show": true
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "!git clone https://github.com/modelscope/modelscope-agent.git"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f71e64d0-f967-4244-98ba-4e5bc4530883",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2023-09-04T14:17:41.716630Z",
+     "iopub.status.busy": "2023-09-04T14:17:41.716258Z",
+     "iopub.status.idle": "2023-09-04T14:17:42.097933Z",
+     "shell.execute_reply": "2023-09-04T14:17:42.097255Z",
+     "shell.execute_reply.started": "2023-09-04T14:17:41.716610Z"
+    }
+   },
+   "source": [
+    "### 安装特定依赖"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "489900d6-cc33-4ada-b2be-7e3a139cf6ed",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! cd modelscope-agent && pip install -r requirements.txt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9e9f3150",
+   "metadata": {},
+   "source": [
+    "### 本地配置"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a027a6e8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "os.chdir('modelscope-agent/examples/agents')\n",
+    "\n",
+    "import sys\n",
+    "sys.path.append('../../')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3de23896",
+   "metadata": {},
+   "source": [
+    "### API_KEY管理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65e5dcc8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "print('请输入DASHSCOPE_API_KEY')\n",
+    "os.environ['DASHSCOPE_API_KEY'] = input()\n",
+    "print('请输入ModelScope Token')\n",
+    "os.environ['MODELSCOPE_API_TOKEN'] = input()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8c8defa3",
+   "metadata": {},
+   "source": "### 构建DataScienceAssistant"
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "01e90564",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from modelscope_agent.agents.data_science_assistant import DataScienceAssistant\n",
+    "from modelscope_agent.tools.metagpt_tools.tool_recommend import TypeMatchToolRecommender\n",
+    "llm_config = {\n",
+    "    'model': 'qwen2-72b-instruct', \n",
+    "    'model_server': 'dashscope',\n",
+    "}\n",
+    "data_science_assistant = DataScienceAssistant(llm=llm_config,tool_recommender=TypeMatchToolRecommender(tools=[\"<all>\"]))\n"
+   ]
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### 准备数据集\n",
+    "Data Science Assistant在执行时，会根据当前使用的python解释器的工作目录来寻找数据集，所以需要将数据集放在正确的位置。如需查看当前工作目录，可以使用以下代码："
+   ],
+   "id": "c29311637b848243"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": [
+    "import sys\n",
+    "\n",
+    "# 打印当前Python解释器的路径\n",
+    "print(\"Python interpreter path:\", sys.executable)"
+   ],
+   "id": "2465e82452588f4a"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": "### 运行DataScienceAssistant",
+   "id": "ecaf880ce9940581"
+  },
+  {
+   "metadata": {},
+   "cell_type": "code",
+   "outputs": [],
+   "execution_count": null,
+   "source": "data_science_assistant.run(\"This is a customers financial dataset. Your goal is to predict the value of transactions for each potential customer. The target column is target. Perform data analysis, data preprocessing, feature engineering, and modeling to predict the target. Report RMSLE on the eval data. Train data path: './dataset/08_santander-value-prediction-challenge/split_train.csv', eval data path: './dataset/08_santander-value-prediction-challenge/split_eval.csv' .\")",
+   "id": "fafed4d52f259f79"
+  },
+  {
+   "metadata": {},
+   "cell_type": "markdown",
+   "source": [
+    "### 查看结果\n",
+    "可在控制台查看DataScienceAssistant的运行过程，同时可以在 /modelscope-agent/data/ 目录下查看生成的Jupyter文件和运行过程json文件。"
+   ],
+   "id": "3189a9ed1a6f9a38"
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/modelscope_agent/agents/data_science_assistant.py b/modelscope_agent/agents/data_science_assistant.py
@@ -18,8 +18,6 @@
 from modelscope_agent.utils.logger import agent_logger as logger
 from modelscope_agent.utils.utils import parse_code
 
-DATA_SCIENTIST_TEMPLATE = """As a data scientist, you need to help user to achieve their goal step by step in a \
-continuous Jupyter notebook."""
 PLAN_TEMPLATE = """
 # Context:
 {context}

diff --git a/modelscope_agent/schemas.py b/modelscope_agent/schemas.py
@@ -43,15 +43,13 @@ class AgentAttr(BaseModel):
 class CodeCell(BaseModel):
     code: str = ''
     result: str = ''
-    is_success: bool = False
 
 
 class TaskResult(BaseModel):
     """Result of taking a task, with result and is_success required to be filled"""
 
     code: str = ''
     result: str
-    is_success: bool
 
 
 class Task(BaseModel):
@@ -61,7 +59,6 @@ class Task(BaseModel):
     task_type: str = ''
     code: str = ''
     result: str = ''
-    is_success: bool = False
     is_finished: bool = False
     code_cells: List[CodeCell] = []
 

diff --git a/modelscope_agent/tools/metagpt_tools/libs/data_preprocess.py b/modelscope_agent/tools/metagpt_tools/libs/data_preprocess.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 from __future__ import annotations
 from typing import Literal
 

diff --git a/modelscope_agent/tools/metagpt_tools/libs/feature_engineering.py b/modelscope_agent/tools/metagpt_tools/libs/feature_engineering.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 from __future__ import annotations
 import itertools
 

diff --git a/modelscope_agent/tools/metagpt_tools/task_type.py b/modelscope_agent/tools/metagpt_tools/task_type.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT and modified by modelscope-agent
 from enum import Enum
 
 from pydantic import BaseModel

diff --git a/modelscope_agent/tools/metagpt_tools/tool_convert.py b/modelscope_agent/tools/metagpt_tools/tool_convert.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 import ast
 import inspect
 import re

diff --git a/modelscope_agent/tools/metagpt_tools/tool_data_type.py b/modelscope_agent/tools/metagpt_tools/tool_data_type.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 from pydantic import BaseModel
 
 

diff --git a/modelscope_agent/tools/metagpt_tools/tool_recommend.py b/modelscope_agent/tools/metagpt_tools/tool_recommend.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 from __future__ import annotations
 from typing import Any
 

diff --git a/modelscope_agent/tools/metagpt_tools/tool_registry.py b/modelscope_agent/tools/metagpt_tools/tool_registry.py
@@ -1,3 +1,4 @@
+# this code is originally from https://github.com/geekan/MetaGPT
 from __future__ import annotations
 import inspect
 import os

diff --git a/tests/tools/test_openapi_schema.py b/tests/tools/test_openapi_schema.py
@@ -174,7 +174,7 @@
 
 @pytest.mark.skipif(IS_FORKED_PR, reason='only run modelscope-agent main repo')
 def test_openapi_schema_tool():
-    schema_openAPI['auth']['apikey'] = os.getenv('DASHSCOPE_API_KEY')
+    schema_openAPI['auth']['apikey'] = os.environ['DASHSCOPE_API_KEY']
     config_dict = openapi_schema_convert(
         schema=schema_openAPI['schema'], auth=schema_openAPI['auth'])
     plugin_cfg = Config(config_dict)
Original file line number	Diff line number	Diff line change
		@@ -1,3 +1,4 @@
		# this code is originally from https://github.com/geekan/MetaGPT
		from pydantic import BaseModel


Expand Down