Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into feat/openapi_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
zzhangpurdue committed Sep 30, 2024
2 parents 955f860 + 195459c commit 51a285e
Show file tree
Hide file tree
Showing 8 changed files with 139 additions and 38 deletions.
2 changes: 1 addition & 1 deletion .dev_scripts/dockerci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ playwright install --with-deps chromium

# install package
pip install fastapi pydantic uvicorn docker sqlmodel transformers ray
pip install pymongo motor llama-index-storage-docstore-mongodb llama-index-storage-index-store-mongodb llama-index-readers-mongodb
pip install pymongo motor llama-index-storage-docstore-mongodb==0.1.3 llama-index-storage-index-store-mongodb==0.1.2 llama-index-readers-mongodb==0.1.7
pip install tensorflow pyclipper shapely tf_slim
pip install moviepy

Expand Down
18 changes: 12 additions & 6 deletions apps/datascience_assistant/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,21 +6,27 @@ Detailed information can be found in the [documentation](../../docs/source/agent
## Quick Start
Streamlit is a Python library that makes it easy to create and share beautiful, custom web apps for machine learning and data science.

To run the DS Assistant in streamlit, you need to install the Streamlit library. You can install it using pip:
To run the DS Assistant in streamlit, you need to install additional libraries. You can install it using pip:
```bash
pip install streamlit streamlit-jupyter
pip install streamlit mistune matplotlib nbconvert
```
Then, you need to set

Then, you can run the DS Assistant using the following command:
```bash
streamlit run app.py
cd ../../
streamlit run ./apps/datascience_assistant/app.py
```

After running the command, a new tab will open in your default web browser with the DS Assistant running.
The following are screenshots of the DS Assistant running in the browser:

you can upload your dataset and write your request.
![img_2.png](../../resources/data_science_assistant_streamlit_1.png)
you can view all of the codes and in streamlit

After submitting your request, DS Assistant will automatically generate a plan for this request.
![img_2.png](../../resources/data_science_assistant_streamlit_4.png)

After that, DS Assistant will automatically excute every task, you can view all of the codes and details in streamlit
![img_3.png](../../resources/data_science_assistant_streamlit_2.png)

After you have finished using the DS Assistant, you can directly convert the running process to a pdf
![img_5.png](../../resources/data_science_assistant_streamlit_3.png)
60 changes: 41 additions & 19 deletions apps/datascience_assistant/app.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,45 @@
import os
import sys

import streamlit as st
from modelscope_agent.agents.data_science_assistant import DataScienceAssistant
from modelscope_agent.tools.metagpt_tools.tool_recommend import \
TypeMatchToolRecommender

llm_config = {
'model': 'qwen2-72b-instruct',
'model_server': 'dashscope',
}
os.environ['DASHSCOPE_API_KEY'] = input(
'Please input your dashscope api key: ')
data_science_assistant = DataScienceAssistant(
llm=llm_config, tool_recommender=TypeMatchToolRecommender(tools=['<all>']))
st.title('Data Science Assistant')
st.write(
'This is a data science assistant that can help you with your data science tasks.'
)
st.write('Please input your request below and click the submit button.')
user_request = st.text_input('User Request')
if st.button('submit'):
data_science_assistant.run(user_request=user_request, streamlit=True)
os.environ['DASHSCOPE_API_KEY'] = 'YOUR_API_KEY'


def setup_project_paths():
current_dir = os.path.dirname(os.path.abspath(__file__)) # noqa
project_root_path = os.path.abspath(os.path.join(current_dir,
'../../')) # noqa
sys.path.append(project_root_path) # noqa


if __name__ == '__main__':
setup_project_paths()
from modelscope_agent.agents.data_science_assistant import \
DataScienceAssistant # noqa
from modelscope_agent.tools.metagpt_tools.tool_recommend import \
TypeMatchToolRecommender # noqa
st.title('Data Science Assistant')
st.write(
'This is a data science assistant that can help you with your data science tasks.'
)
st.write(
'Please input your request and upload files then click the submit button.'
)

files = st.file_uploader(
'Please upload files that you need. ', accept_multiple_files=True)
last_file_name = ''
user_request = st.text_area('User Request')
if st.button('submit'):
llm_config = {
'model': 'qwen2-72b-instruct',
'model_server': 'dashscope',
}
data_science_assistant = DataScienceAssistant(
llm=llm_config,
tool_recommender=TypeMatchToolRecommender(tools=['<all>']))
for file in files:
with open(file.name, 'wb') as f:
f.write(file.getbuffer())
data_science_assistant.run(user_request=user_request, streamlit=True)
2 changes: 1 addition & 1 deletion examples/apps/modelscope_agentfabric.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@
}
],
"source": [
"! sed -i 's#demo.launch()#demo.launch(share=True)#g' app.py && export PYTHONPATH=$PYTHONPATH:/content/modelscope-agent && python app.py"
"! export PYTHONPATH=$PYTHONPATH:/content/modelscope-agent && python app.py"
]
}
],
Expand Down
92 changes: 82 additions & 10 deletions modelscope_agent/agents/data_science_assistant.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# Implementation inspired by the paper "DATA INTERPRETER: AN LLM AGENT FOR DATA SCIENCE"
import asyncio
import copy
import os
import time
from datetime import datetime
Expand Down Expand Up @@ -39,8 +37,7 @@
- **other**: Any tasks not in the defined categories
# Task:
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal. A plan \
consists of one to four tasks.
Based on the context, write a simple plan or modify an existing plan of what you should do to achieve the goal.
Output a list of jsons following the format:
```json
Expand All @@ -55,6 +52,44 @@
]
```
"""

DECOMPOSE_TASK_TEMPLATE = """
# Context:
{context}
# Available Task Types:
- **eda**: For performing exploratory data analysis
- **data preprocessing**: For preprocessing dataset in a data analysis or machine learning task ONLY,\
general data operation doesn't fall into this type
- **feature engineering**: Only for creating new columns fo input data.
- **model train**: Only for training model.
- **model evaluate**: Only for evaluating model.
- **ocr**: Only for OCR tasks.
- **other**: Any tasks not in the defined categories
# Previous Tasks
We have already generated the following tasks:
{previous_tasks}
# Task:
The current task is:
{current_task}
Currently, the current task is too complex to be executed in one step. Please decompose the task into smaller tasks, \
and output a list of jsons following the format:
Output a list of jsons following the format:
```json
[
{{
"task_id": str = "unique identifier for a task in plan, can be an ordinal, \
should be unique and not conflict with previous task ids",
"dependent_task_ids": list[str] = "ids of tasks prerequisite to this task",
"instruction": "what you should do in this task, one short phrase or sentence",
"task_type": "type of this task, should be one of Available Task Types",
}},
...
]
```
"""

CODE_TEMPLATE = """
# Task
you are a code generator, you need to generate a code python block in jupyter notebook to achieve the \
Expand Down Expand Up @@ -597,8 +632,8 @@ def _judge_code(self, task, previous_code_blocks, code,
if 'incorrect' in judge_result.split('\n')[-1]:
success = False
failed_reason = (
'Though the code executes successfully, The code logic is incorrect, here is the reason: '
+ judge_result)
'Though the code executes successfully, The code logic is \
incorrect, here is the reason: ' + judge_result)
return success, failed_reason

else:
Expand Down Expand Up @@ -634,7 +669,7 @@ def _run(self, user_request, save: bool = True, **kwargs):
previous_code_blocks = self._get_previous_code_blocks()
success = False
code_counter = 0
max_try = kwargs.get('max_try', 10)
max_try = kwargs.get('max_try', 1)
while not success and code_counter < max_try:
code_execute_success = False
code_logic_success = False
Expand Down Expand Up @@ -726,9 +761,13 @@ def _run(self, user_request, save: bool = True, **kwargs):
encoding='utf-8') as file:
nbformat.write(self.code_interpreter.nb, file)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
decomposed_tasks = self._decompose_task(task)
if decomposed_tasks:
self.plan.replace_task(task, decomposed_tasks)
else:
self.plan = self._update_plan(
user_request=user_request, curr_plan=self.plan)
self.code_interpreter.reset()
# save the plan into json file
if save:
after_time = time.time()
Expand Down Expand Up @@ -769,3 +808,36 @@ def _get_total_tokens(self):
except Exception as e:
logger.error(f'get total token error: {e}')
pass

def _decompose_task(self, task):
try:
print(f'decompose task {task.task_id}')
messages = [{
'role':
'user',
'content':
DECOMPOSE_TASK_TEMPLATE.format(
context='User Request: ' + task.instruction + '\n',
previous_tasks='\n'.join([
json.dumps({
'task_id': t.task_id,
'dependent_task_ids': t.dependent_task_ids,
'instruction': t.instruction,
'task_type': t.task_type
}) for t in self.plan.tasks
]),
current_task=json.dumps(task.__dict__))
}]
resp = self._call_llm(prompt=None, messages=messages, stop=None)
tasks_text = ''
for r in resp:
tasks_text += r
tasks_text = parse_code(text=tasks_text, lang='json')
logger.info(f'decomposed tasks: {tasks_text}')

tasks = json5.loads(tasks_text)
tasks = [Task(**task) for task in tasks]
return tasks
except Exception as e:
logger.error(f'decompose task error: {e}')
return None
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ jupyter>=1.0.0
langchain
langchain-community
langchain-experimental
llama-index
llama-index==0.10.29
llama-index-core==0.10.39.post1
llama-index-readers-json
llama-index-retrievers-bm25==0.1.5
modelscope[framework]>=1.16.0
Expand Down
Binary file modified resources/data_science_assistant_streamlit_1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added resources/data_science_assistant_streamlit_4.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 51a285e

Please sign in to comment.