Skip to content

Commit

Permalink
POC DataScientist ChatBot (Assistant) (#5)
Browse files Browse the repository at this point in the history
Data Scientist Assistant POC based on:

- Google Vertex AI gemini-1.5-flash-001 model
- Dask for for distributed execution
- Coiled for cloud-based execution on Google Cloud
- Stream.lit for web UI and visualisation
- pandas for reading CSV data files
  • Loading branch information
Sklavit authored Jul 27, 2024
1 parent 5f14612 commit c6278b6
Show file tree
Hide file tree
Showing 55 changed files with 8,095 additions and 5 deletions.
7 changes: 7 additions & 0 deletions .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions .idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 14 additions & 5 deletions .idea/pet_project.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Empty file added apps/__init__.py
Empty file.
Empty file.
62 changes: 62 additions & 0 deletions apps/ds_chat_backend/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Copyright (c) 2024. Sergii Nechuiviter
import requests


def print_help():
print("You can type your message and press Enter to send it.")
print("")
print("If you start your message with '`' character, you can use some commands.")
print("You can type '`h' and press Enter to see the help.")
print("You can type '`q' and press Enter to quit the application.")
print("You can try to enter '`' and your command in natural language for other system commands.")
print("")


def process_user_message(message: str):
return (f'You said: {message}\n'
f' Ok...')


def process_command(message: str):
return (f'Command: {message}\n'
f' Command is not supported yet.')


if __name__ == "__main__":
# Print invitation message
print("Hello, this is a simple chat application.")
print("")
print_help()

# Main processing loop
while True:
# Get user input
user_input = input("You: ")

# Check if user wants to quit
if user_input == '`q':
break

# Check if user wants to see help
if user_input == "`h":
print_help()
continue

try:
# Check if user wants to send a command
if user_input.startswith("`"):
# Send the command to the server
response = process_command(user_input)
else:
# send message to the server
response = process_user_message(user_input)
except APIException as e:
print(e)

print(response)

continue

# Print a final message
print("Thank you for using the chat application.")
print("Goodbye!")
3 changes: 3 additions & 0 deletions apps/streamlit_ds_chat/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.streamlit/secrets.toml
env/
env12/

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions apps/streamlit_ds_chat/.idea/modules.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

11 changes: 11 additions & 0 deletions apps/streamlit_ds_chat/.idea/streamlit_ds_chat.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions apps/streamlit_ds_chat/.streamlit/secrets.toml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# here is an example file with secrets
# the real file must not be commited to git
Empty file.
Empty file.
Empty file.
Empty file.
68 changes: 68 additions & 0 deletions apps/streamlit_ds_chat/README_04_vertex_ai_codegen_streamlit.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@


For testing manager, the following questions are used:

<q>How many unique fruits are there in the dataset?</q>
<q>How many fruits likes each person?</q>
<q>Make a chart which shows, how many fruits likes each person?</q>
<q>How many different kinds of fruit each person likes?</q>

### Options from VertexAI itself
<q>What is the average number of fruits liked? </q>
<q>Who likes the most number of fruits? </q>
<q>Show the distribution of fruits liked by people. </q>

## Example output to the last question:

### Attention! The response includes \```python phrase in the beginning!

```python
import pandas as pd
from bokeh.plotting import figure
from bokeh.io import show

def run(data):
"""
This function analyzes the provided data and returns a descriptive summary.
Args:
data (dict): A dictionary containing the data to be analyzed.
It should have the same keys as <locals> and corresponding values.
Returns:
dict: A dictionary containing the results of the analysis.
'results' key contains the descriptive summary of the dataset.
'chart' key contains the plot of the distribution of fruit likes per person.
"""

df = data['df_table'] # Access the DataFrame from the data dictionary

# Group by person and count likes
person_likes = df.groupby('person')['likes'].sum()

# Create a Bokeh figure
p = figure(title="Distribution of Fruit Likes per Person",
x_axis_label="Person", y_axis_label="Number of Likes")

# Add a bar chart to the figure
p.vbar(x=person_likes.index, top=person_likes.values, width=0.5)

# Show the chart
show(p)

# Return the results and chart
return {
'results': None, # No specific results in this case
'chart': {
'chart_type': 'vbar',
'kwargs': {
'x': person_likes.index,
'top': person_likes.values,
'width': 0.5,
'title': "Distribution of Fruit Likes per Person",
'x_axis_label': "Person",
'y_axis_label': "Number of Likes"
}
}
}
```
28 changes: 28 additions & 0 deletions apps/streamlit_ds_chat/README_05_putting_all_togather.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Step 5. Put all components together

Now I have code for:
- running Streamlit.app
- uploading sample dataset
- basic chat-like streamlit application
- generating code in response to the user request
- means for remote code execution
- preformatted params to draw charts with Bokeh
(Bokeh exists for >10 years, Streamlit is quite young.
Probably, this is the reason why Gemini may generate Bokeh, but not Streamlit.chart code)

It is time now to put all components togather.

Example requests:
- Draw number of likes for people and fruits

conda create -p ./env12 -c conda-forge coiled python=3.12 "numpy<2.0.0" streamlit google-cloud-aiplatform dask


Known issues:
- Bokeh is not needed for workers but it is still provided.
- StreamlitAPIException: Streamlit only supports Bokeh version 2.4.3, but you have version 3.5.0 installed. Please run `pip install --force-reinstall --no-deps bokeh==2.4.3` to install the correct version.
- start of coiled cluster takes 1-2 minutes, but this will be the same for any cluster start on any tool
- hard to acquire good code from code generation; it usually works, but the return value is not exactly what was requested
This complicates diagram generation on the client side.
- cluster disconnects and killed after set period of time, no reconnection implemented
- TODO needed some way to clear all interface
Empty file.
100 changes: 100 additions & 0 deletions apps/streamlit_ds_chat/chat_with_vertex_ai.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import streamlit as st

import vertexai
from vertexai.generative_models import GenerativeModel
import vertexai.preview.generative_models as generative_models
from google.oauth2.service_account import Credentials

from datetime import datetime


st.title("Stub of DS chat with Google Vertex AI")

########################################################################
# init chat model

chat = None

if "vertex_ai_model" not in st.session_state or chat is None:
st.session_state["vertex_ai_model"] = "gemini-1.5-flash-001"

credentials = Credentials.from_service_account_info(st.secrets["gcs_connections"])
vertexai.init(
project="pivotal-cable-428219-c5",
location="us-central1",
credentials=credentials,
)
model = GenerativeModel(
st.session_state[
"vertex_ai_model"
], # by default it will be "gemini-1.5-flash-001",
system_instruction=[
"You are an expert python engineer with data scientist background."
"If the user's question needs code generation, respond with: <CODE>"
"If the user's question needs drawing a chart, respond with: <CHART>"
],
)
chat = model.start_chat()

generation_config = {
"max_output_tokens": 8192,
"temperature": 1,
"top_p": 0.95,
}

safety_settings = {
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}

#####################################


if "messages" not in st.session_state:
st.session_state.messages = []

for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])

# streaming is not working with streamlit, exceptions inside `vertexai.generative_models import GenerativeModel`
USE_STREAMING = False

if prompt := st.chat_input("What is up?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)

with st.chat_message("assistant"):
if USE_STREAMING:
api_response_stream = chat.send_message(
[prompt],
generation_config=generation_config,
safety_settings=safety_settings,
stream=True,
)

def stream_data():
for api_response in api_response_stream:
chunk = api_response.candidates[0].content.parts[0]._raw_part.text
print(f"{datetime.now()}: {chunk}")
yield chunk

response = st.write_stream(stream_data)
else:
with st.spinner(
"Wait for the whole response (streaming not working with Streamlit)..."
):
api_response = chat.send_message(
[prompt],
generation_config=generation_config,
safety_settings=safety_settings,
stream=False,
)
response = api_response.candidates[0].content.parts[0]._raw_part.text
st.write(response)

print(("response:", response))
st.session_state.messages.append({"role": "assistant", "content": response})
Loading

0 comments on commit c6278b6

Please sign in to comment.