-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
Data Scientist Assistant POC based on: - Google Vertex AI gemini-1.5-flash-001 model - Dask for for distributed execution - Coiled for cloud-based execution on Google Cloud - Stream.lit for web UI and visualisation - pandas for reading CSV data files
- Loading branch information
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
# Copyright (c) 2024. Sergii Nechuiviter | ||
import requests | ||
|
||
|
||
def print_help(): | ||
print("You can type your message and press Enter to send it.") | ||
print("") | ||
print("If you start your message with '`' character, you can use some commands.") | ||
print("You can type '`h' and press Enter to see the help.") | ||
print("You can type '`q' and press Enter to quit the application.") | ||
print("You can try to enter '`' and your command in natural language for other system commands.") | ||
print("") | ||
|
||
|
||
def process_user_message(message: str): | ||
return (f'You said: {message}\n' | ||
f' Ok...') | ||
|
||
|
||
def process_command(message: str): | ||
return (f'Command: {message}\n' | ||
f' Command is not supported yet.') | ||
|
||
|
||
if __name__ == "__main__": | ||
# Print invitation message | ||
print("Hello, this is a simple chat application.") | ||
print("") | ||
print_help() | ||
|
||
# Main processing loop | ||
while True: | ||
# Get user input | ||
user_input = input("You: ") | ||
|
||
# Check if user wants to quit | ||
if user_input == '`q': | ||
break | ||
|
||
# Check if user wants to see help | ||
if user_input == "`h": | ||
print_help() | ||
continue | ||
|
||
try: | ||
# Check if user wants to send a command | ||
if user_input.startswith("`"): | ||
# Send the command to the server | ||
response = process_command(user_input) | ||
else: | ||
# send message to the server | ||
response = process_user_message(user_input) | ||
except APIException as e: | ||
print(e) | ||
|
||
print(response) | ||
|
||
continue | ||
|
||
# Print a final message | ||
print("Thank you for using the chat application.") | ||
print("Goodbye!") |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
.streamlit/secrets.toml | ||
env/ | ||
env12/ |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
# here is an example file with secrets | ||
# the real file must not be commited to git |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
|
||
|
||
For testing manager, the following questions are used: | ||
|
||
<q>How many unique fruits are there in the dataset?</q> | ||
<q>How many fruits likes each person?</q> | ||
<q>Make a chart which shows, how many fruits likes each person?</q> | ||
<q>How many different kinds of fruit each person likes?</q> | ||
|
||
### Options from VertexAI itself | ||
<q>What is the average number of fruits liked? </q> | ||
<q>Who likes the most number of fruits? </q> | ||
<q>Show the distribution of fruits liked by people. </q> | ||
|
||
## Example output to the last question: | ||
|
||
### Attention! The response includes \```python phrase in the beginning! | ||
|
||
```python | ||
import pandas as pd | ||
from bokeh.plotting import figure | ||
from bokeh.io import show | ||
|
||
def run(data): | ||
""" | ||
This function analyzes the provided data and returns a descriptive summary. | ||
Args: | ||
data (dict): A dictionary containing the data to be analyzed. | ||
It should have the same keys as <locals> and corresponding values. | ||
Returns: | ||
dict: A dictionary containing the results of the analysis. | ||
'results' key contains the descriptive summary of the dataset. | ||
'chart' key contains the plot of the distribution of fruit likes per person. | ||
""" | ||
|
||
df = data['df_table'] # Access the DataFrame from the data dictionary | ||
|
||
# Group by person and count likes | ||
person_likes = df.groupby('person')['likes'].sum() | ||
|
||
# Create a Bokeh figure | ||
p = figure(title="Distribution of Fruit Likes per Person", | ||
x_axis_label="Person", y_axis_label="Number of Likes") | ||
|
||
# Add a bar chart to the figure | ||
p.vbar(x=person_likes.index, top=person_likes.values, width=0.5) | ||
|
||
# Show the chart | ||
show(p) | ||
|
||
# Return the results and chart | ||
return { | ||
'results': None, # No specific results in this case | ||
'chart': { | ||
'chart_type': 'vbar', | ||
'kwargs': { | ||
'x': person_likes.index, | ||
'top': person_likes.values, | ||
'width': 0.5, | ||
'title': "Distribution of Fruit Likes per Person", | ||
'x_axis_label': "Person", | ||
'y_axis_label': "Number of Likes" | ||
} | ||
} | ||
} | ||
``` |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
# Step 5. Put all components together | ||
|
||
Now I have code for: | ||
- running Streamlit.app | ||
- uploading sample dataset | ||
- basic chat-like streamlit application | ||
- generating code in response to the user request | ||
- means for remote code execution | ||
- preformatted params to draw charts with Bokeh | ||
(Bokeh exists for >10 years, Streamlit is quite young. | ||
Probably, this is the reason why Gemini may generate Bokeh, but not Streamlit.chart code) | ||
|
||
It is time now to put all components togather. | ||
|
||
Example requests: | ||
- Draw number of likes for people and fruits | ||
|
||
conda create -p ./env12 -c conda-forge coiled python=3.12 "numpy<2.0.0" streamlit google-cloud-aiplatform dask | ||
|
||
|
||
Known issues: | ||
- Bokeh is not needed for workers but it is still provided. | ||
- StreamlitAPIException: Streamlit only supports Bokeh version 2.4.3, but you have version 3.5.0 installed. Please run `pip install --force-reinstall --no-deps bokeh==2.4.3` to install the correct version. | ||
- start of coiled cluster takes 1-2 minutes, but this will be the same for any cluster start on any tool | ||
- hard to acquire good code from code generation; it usually works, but the return value is not exactly what was requested | ||
This complicates diagram generation on the client side. | ||
- cluster disconnects and killed after set period of time, no reconnection implemented | ||
- TODO needed some way to clear all interface |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import streamlit as st | ||
|
||
import vertexai | ||
from vertexai.generative_models import GenerativeModel | ||
import vertexai.preview.generative_models as generative_models | ||
from google.oauth2.service_account import Credentials | ||
|
||
from datetime import datetime | ||
|
||
|
||
st.title("Stub of DS chat with Google Vertex AI") | ||
|
||
######################################################################## | ||
# init chat model | ||
|
||
chat = None | ||
|
||
if "vertex_ai_model" not in st.session_state or chat is None: | ||
st.session_state["vertex_ai_model"] = "gemini-1.5-flash-001" | ||
|
||
credentials = Credentials.from_service_account_info(st.secrets["gcs_connections"]) | ||
vertexai.init( | ||
project="pivotal-cable-428219-c5", | ||
location="us-central1", | ||
credentials=credentials, | ||
) | ||
model = GenerativeModel( | ||
st.session_state[ | ||
"vertex_ai_model" | ||
], # by default it will be "gemini-1.5-flash-001", | ||
system_instruction=[ | ||
"You are an expert python engineer with data scientist background." | ||
"If the user's question needs code generation, respond with: <CODE>" | ||
"If the user's question needs drawing a chart, respond with: <CHART>" | ||
], | ||
) | ||
chat = model.start_chat() | ||
|
||
generation_config = { | ||
"max_output_tokens": 8192, | ||
"temperature": 1, | ||
"top_p": 0.95, | ||
} | ||
|
||
safety_settings = { | ||
generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, | ||
generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, | ||
generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, | ||
generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, | ||
} | ||
|
||
##################################### | ||
|
||
|
||
if "messages" not in st.session_state: | ||
st.session_state.messages = [] | ||
|
||
for message in st.session_state.messages: | ||
with st.chat_message(message["role"]): | ||
st.markdown(message["content"]) | ||
|
||
# streaming is not working with streamlit, exceptions inside `vertexai.generative_models import GenerativeModel` | ||
USE_STREAMING = False | ||
|
||
if prompt := st.chat_input("What is up?"): | ||
st.session_state.messages.append({"role": "user", "content": prompt}) | ||
with st.chat_message("user"): | ||
st.markdown(prompt) | ||
|
||
with st.chat_message("assistant"): | ||
if USE_STREAMING: | ||
api_response_stream = chat.send_message( | ||
[prompt], | ||
generation_config=generation_config, | ||
safety_settings=safety_settings, | ||
stream=True, | ||
) | ||
|
||
def stream_data(): | ||
for api_response in api_response_stream: | ||
chunk = api_response.candidates[0].content.parts[0]._raw_part.text | ||
print(f"{datetime.now()}: {chunk}") | ||
yield chunk | ||
|
||
response = st.write_stream(stream_data) | ||
else: | ||
with st.spinner( | ||
"Wait for the whole response (streaming not working with Streamlit)..." | ||
): | ||
api_response = chat.send_message( | ||
[prompt], | ||
generation_config=generation_config, | ||
safety_settings=safety_settings, | ||
stream=False, | ||
) | ||
response = api_response.candidates[0].content.parts[0]._raw_part.text | ||
st.write(response) | ||
|
||
print(("response:", response)) | ||
st.session_state.messages.append({"role": "assistant", "content": response}) |