diff --git a/apps/__init__.py b/apps/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/streamlit_ds_chat/chat_with_vertex_ai.py b/apps/streamlit_ds_chat/chat_with_vertex_ai.py new file mode 100644 index 0000000..af38eba --- /dev/null +++ b/apps/streamlit_ds_chat/chat_with_vertex_ai.py @@ -0,0 +1,98 @@ +import streamlit as st + +import vertexai +from vertexai.generative_models import GenerativeModel, Part +import vertexai.preview.generative_models as generative_models +from google.oauth2.service_account import Credentials + +from datetime import datetime + + +st.title("Stub of DS chat with Google Vertex AI") + +######################################################################## +# init chat model + +chat = None + +if "vertex_ai_model" not in st.session_state or chat is None: + st.session_state["vertex_ai_model"] = "gemini-1.5-flash-001" + + credentials = Credentials.from_service_account_info(st.secrets["gcs_connections"]) + vertexai.init( + project="pivotal-cable-428219-c5", + location="us-central1", + credentials=credentials, + ) + model = GenerativeModel( + st.session_state[ + "vertex_ai_model" + ], # by default it will be "gemini-1.5-flash-001", + system_instruction=[ + """You are an expert python engineer with data scientist background.""" + ], + ) + chat = model.start_chat() + +generation_config = { + "max_output_tokens": 8192, + "temperature": 1, + "top_p": 0.95, +} + +safety_settings = { + generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, +} + +##################################### + + +if "messages" not in st.session_state: + st.session_state.messages = [] + +for message in st.session_state.messages: + with st.chat_message(message["role"]): + st.markdown(message["content"]) + +# streaming is not working with streamlit, exceptions inside `vertexai.generative_models import GenerativeModel` +USE_STREAMING = False + +if prompt := st.chat_input("What is up?"): + st.session_state.messages.append({"role": "user", "content": prompt}) + with st.chat_message("user"): + st.markdown(prompt) + + with st.chat_message("assistant"): + if USE_STREAMING: + api_response_stream = chat.send_message( + [prompt], + generation_config=generation_config, + safety_settings=safety_settings, + stream=True, + ) + + def stream_data(): + for api_response in api_response_stream: + chunk = api_response.candidates[0].content.parts[0]._raw_part.text + print(f"{datetime.now()}: {chunk}") + yield chunk + + response = st.write_stream(stream_data) + else: + with st.spinner( + "Wait for the whole response (streaming not working with Streamlit)..." + ): + api_response = chat.send_message( + [prompt], + generation_config=generation_config, + safety_settings=safety_settings, + stream=False, + ) + response = api_response.candidates[0].content.parts[0]._raw_part.text + st.write(response) + + print(("response:", response)) + st.session_state.messages.append({"role": "assistant", "content": response}) diff --git a/apps/streamlit_ds_chat/experiments_standalone/__init__.py b/apps/streamlit_ds_chat/experiments_standalone/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/streamlit_ds_chat/code_gen.py b/apps/streamlit_ds_chat/experiments_standalone/code_gen.py similarity index 90% rename from apps/streamlit_ds_chat/code_gen.py rename to apps/streamlit_ds_chat/experiments_standalone/code_gen.py index 1ceba57..d37f1cb 100644 --- a/apps/streamlit_ds_chat/code_gen.py +++ b/apps/streamlit_ds_chat/experiments_standalone/code_gen.py @@ -8,4 +8,4 @@ completion = model.generate(**tokenizer(text, return_tensors="pt")) -print(tokenizer.decode(completion[0])) \ No newline at end of file +print(tokenizer.decode(completion[0])) diff --git a/apps/streamlit_ds_chat/cohere.py b/apps/streamlit_ds_chat/experiments_standalone/cohere.py similarity index 100% rename from apps/streamlit_ds_chat/cohere.py rename to apps/streamlit_ds_chat/experiments_standalone/cohere.py diff --git a/apps/streamlit_ds_chat/experiments_standalone/conversational.py b/apps/streamlit_ds_chat/experiments_standalone/conversational.py new file mode 100644 index 0000000..ad30966 --- /dev/null +++ b/apps/streamlit_ds_chat/experiments_standalone/conversational.py @@ -0,0 +1,14 @@ +from transformers import pipeline + +generator = pipeline(model="HuggingFaceH4/zephyr-7b-beta") +# Zephyr-beta is a conversational model, so let's pass it a chat instead of a single string +result = generator( + [{"role": "user", "content": "What is the capital of France? Answer in one word."}], + do_sample=False, + max_new_tokens=2, +) + +# [{'generated_text': [{'role': 'user', 'content': 'What is the capital of France? Answer in one word.'}, +# {'role': 'assistant', 'content': 'Paris'}]}] + +print(result) diff --git a/apps/streamlit_ds_chat/experiments_standalone/gemini.py b/apps/streamlit_ds_chat/experiments_standalone/gemini.py new file mode 100644 index 0000000..f4b9c41 --- /dev/null +++ b/apps/streamlit_ds_chat/experiments_standalone/gemini.py @@ -0,0 +1,26 @@ +from pprint import pprint + +# Use a pipeline as a high-level helper +from transformers import pipeline + +oracle = pipeline("text2text-generation", model="describeai/gemini") +# - `"question-answering"`: will return a [`QuestionAnsweringPipeline`]. +# - `"text2text-generation"`: will return a [`Text2TextGenerationPipeline`]. + +# QuestionAnsweringPipeline + +# result = oracle(question="Write a short snippet of python code, which use pandas to read csv file into dataframe.", context="I am an expert Python engineer.") +result = oracle( + inputs=[ + "Write a short snippet of python code, which use pandas to read csv file into dataframe.", + "I am an expert Python engineer.", + ], + max_lenght=1000, + max_new_tokens=1000, +) + +pprint(result) + +# from transformers import AutoTokenizer, AutoModelForSeq2SeqLM +# tokenizer = AutoTokenizer.from_pretrained("describeai/gemini") +# model = AutoModelForSeq2SeqLM.from_pretrained("describeai/gemini") diff --git a/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py b/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py new file mode 100644 index 0000000..4d97fd8 --- /dev/null +++ b/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py @@ -0,0 +1,131 @@ +""" +pip install --upgrade google-cloud-aiplatform +gcloud auth application-default login + +Command 'gcloud' not found, but can be installed with: +sudo snap install google-cloud-cli # version 483.0.0, or +sudo snap install google-cloud-sdk # version 483.0.0 + +#################333 +Credentials saved to file: [/home/s-nechuiviter/.config/gcloud/application_default_credentials.json] + +These credentials will be used by any library that requests Application Default Credentials (ADC). +WARNING: +Cannot find a quota project to add to ADC. You might receive a "quota exceeded" or "API not enabled" error. Run $ gcloud auth application-default set-quota-project to add a quota project. +""" + +import base64 +import vertexai +from vertexai.generative_models import GenerativeModel, Part +import vertexai.preview.generative_models as generative_models + +# import streamlit as st + +from google.oauth2.service_account import Credentials + +creds = { + "type": "service_account", + "project_id": "pivotal-cable-428219-c5", + "private_key_id": "be8874b7dea5c6d02590fb0d0df13798777e5e08", + "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDGqmtXg3FIQ1Ee\na7fjOy/sFQYDBCE32QKMCwGT9HTFVeXb2fUJmqlXQAIvQPoprOE3CQn6UOW1+mMU\n0bQKF3C4NMd//zQAr2TT/LIc/I968Ex6y+kkXAubsgnpd8mUQJEsFXDAasJzAhLI\n8NdeAiYkrXEBzv1glsnTvkE8WqijC22tCDe1n8F1N3VUZhucmGPrWLu++Pxwpox5\n5HxcTYImo2qs/MXMekor0FWOKIQqNZLyVYs5/K3XBC0UGePo0V4LEbraZIlBaPdb\nWdry1jxOpG3cCt9HEQj3g88PZZXNV1go54vKzoEmwy862UN2oIV0o6spNzWV3MJF\n5B0svOOvAgMBAAECggEAEX3t0kMY1VOqr8gCOo1P4d8pVq36vW/BC/2SwlPdeDE2\nT444R1kzxyKJTrR6PPMjfUoGyTXC6Vabeg4gu7FmqQFp/g4apmN1uD3hR4YWnVd3\nvxQhz06/bZJWfGpMowiwVOD/uswWRN643V0UnrVdEGZTszoQuybBAdYYPlfTzfnK\n6OdvvMEPBnzCpXwe0cC8bLFTpZ4Q6N0Ss7fMDZ4wOLhA+fNk24xedQhRSLHm8REJ\nxNQ6ha2cp7xK7PXYjjpPsMGVLxdY7Qn2hOA0/ZFPJCoG8ToSxaU5t27dOBsbvw6N\nrEo7Dlx93lzizPNc9XhaKTLxfv2B4a9UAWiNbd6JoQKBgQDj8r5A7G0YT3KSTtlj\ncWRGceN/ig34/Lo7086IMHODWFZyahnZtKu1pJAfsteFT3Xb3Hp1fYxl7rqt++tS\nUY/tHIWu/zQwcYE3Nx03Kn5lafFTcH7bHTtsfvVYcUP0wKASYnbv1p9hxytj3Hqi\nqe2gBadULE69jolXZAum2VOTcQKBgQDfHSmuq3e7V6ydAMiN2a5CLLuJ1TiRx0VJ\nLNgTMYjJApm8BP7symUf62/I+FKfolUdHZrfleT14En4TYTC82wQie4jwPXfj4Wi\nlkB9m/cIj516XrmMqC+F0M1Qb4ii1OvTy0Pfwu34TyjFgUNCT1Mg/be0w20Fni/G\n1Sl6ybgZHwKBgFRaM8VauFRSshcqTo/aGj1nT8SWle0ZuOEC1F7ZbyWfvv2//aju\njsw9BYh1agPPD9I4mKh5uUbPPQ29N6vSuuwHrgDAN9PlbOe94XXUp8lnlwJFkuwK\nuT7BDJGZ+IfN8G5dOZ4vUfOg/JGLuWYQc/rPnMgtTUYgRPqt7xHjQmZBAoGBAJWM\nXpwNoruYEMNL+yHZfswsX6gLm1dbUj2yKUL0ONNDQvicAKOHJjE3Bj6W9Aq8LIDP\njze+qTGFnQ8qJorlztFnIpAkjqnC8bgBLkkDeZnraYrUY1q8gN4ZDwWTPOqn/UqB\nPIWHiyqdJJ79/a88rGO4rKIlO7ZASZXk22DKRPPRAoGAUy8iMXIWOOKnIwRhKtFb\nkraZRAvPS7b9409wdRnaEwa7h2FMsIXNvuvob1B9r1N/sWZr3lb8Q7gvJAqB4Zf2\nl2YfVsbqPkNby1bDdQ/atRjK7NlKTN3bXvktYb4pcGmmAZ/RuQuFl2rx1rptJN6D\nha5obgOGZyiUbJoHXDalvBg=\n-----END PRIVATE KEY-----\n", + "client_email": "ds-chat@pivotal-cable-428219-c5.iam.gserviceaccount.com", + "client_id": "112274147955673066299", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/ds-chat%40pivotal-cable-428219-c5.iam.gserviceaccount.com", + "universe_domain": "googleapis.com", +} + +credentials = Credentials.from_service_account_info( + creds +) # st.secrets["gcs_connections"]) + + +def multiturn_generate_content(): + vertexai.init( + project="pivotal-cable-428219-c5", + location="us-central1", + credentials=credentials, + ) + model = GenerativeModel( + "gemini-1.5-flash-001", + system_instruction=[ + """You are an expert python engineer with data scientist background.""" + ], + ) + chat = model.start_chat() + print( + chat.send_message( + [text1_1], + generation_config=generation_config, + safety_settings=safety_settings, + ) + ) + + +text1_1 = """Write a short snippet of python code, which use pandas to read csv file into dataframe. Return only code, nothing else. So I could copy the response, so I could copy it into python module.""" + +generation_config = { + "max_output_tokens": 8192, + "temperature": 1, + "top_p": 0.95, +} + +safety_settings = { + generative_models.HarmCategory.HARM_CATEGORY_HATE_SPEECH: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, + generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE, +} + +multiturn_generate_content() + +""" +Example results: + +candidates { + content { + role: "model" + parts { + text: "```python\nimport pandas as pd\ndf = pd.read_csv(\'your_file.csv\')\n```" + } + } + finish_reason: STOP + safety_ratings { + category: HARM_CATEGORY_HATE_SPEECH + probability: NEGLIGIBLE + probability_score: 0.06632687151432037 + severity: HARM_SEVERITY_NEGLIGIBLE + severity_score: 0.10017222911119461 + } + safety_ratings { + category: HARM_CATEGORY_DANGEROUS_CONTENT + probability: NEGLIGIBLE + probability_score: 0.20134170353412628 + severity: HARM_SEVERITY_NEGLIGIBLE + severity_score: 0.07599521428346634 + } + safety_ratings { + category: HARM_CATEGORY_HARASSMENT + probability: NEGLIGIBLE + probability_score: 0.193451926112175 + severity: HARM_SEVERITY_NEGLIGIBLE + severity_score: 0.09334687888622284 + } + safety_ratings { + category: HARM_CATEGORY_SEXUALLY_EXPLICIT + probability: NEGLIGIBLE + probability_score: 0.06816437095403671 + severity: HARM_SEVERITY_NEGLIGIBLE + severity_score: 0.05155818909406662 + } +} +usage_metadata { + prompt_token_count: 52 + candidates_token_count: 24 + total_token_count: 76 +} + + +""" diff --git a/apps/streamlit_ds_chat/hf_chat.py b/apps/streamlit_ds_chat/experiments_standalone/hf_chat.py similarity index 99% rename from apps/streamlit_ds_chat/hf_chat.py rename to apps/streamlit_ds_chat/experiments_standalone/hf_chat.py index dbca05e..f138de4 100644 --- a/apps/streamlit_ds_chat/hf_chat.py +++ b/apps/streamlit_ds_chat/experiments_standalone/hf_chat.py @@ -13,11 +13,13 @@ # Headers for authentication headers = {"Authorization": f"Bearer {API_TOKEN}"} + # Function to query the model def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() + # Example input data = { # "inputs": "Can you explain the theory of relativity in simple terms?" diff --git a/apps/streamlit_ds_chat/hf_chat_ms.py b/apps/streamlit_ds_chat/experiments_standalone/hf_chat_ms.py similarity index 77% rename from apps/streamlit_ds_chat/hf_chat_ms.py rename to apps/streamlit_ds_chat/experiments_standalone/hf_chat_ms.py index 8297d0e..c9d481a 100644 --- a/apps/streamlit_ds_chat/hf_chat_ms.py +++ b/apps/streamlit_ds_chat/experiments_standalone/hf_chat_ms.py @@ -4,6 +4,7 @@ API_TOKEN = st.secrets["HF_API_KEY"] import requests + headers = {"Authorization": f"Bearer {API_TOKEN}"} API_URL = "https://api-inference.huggingface.co/models/microsoft/DialoGPT-large" # def query(payload): @@ -61,13 +62,28 @@ # Let's chat for 5 lines for step in range(5): # encode the new user input, add the eos_token and return a tensor in Pytorch - new_user_input_ids = tokenizer.encode(input(">> User:") + tokenizer.eos_token, return_tensors='pt') + new_user_input_ids = tokenizer.encode( + input(">> User:") + tokenizer.eos_token, return_tensors="pt" + ) # append the new user input tokens to the chat history - bot_input_ids = torch.cat([chat_history_ids, new_user_input_ids], dim=-1) if step > 0 else new_user_input_ids + bot_input_ids = ( + torch.cat([chat_history_ids, new_user_input_ids], dim=-1) + if step > 0 + else new_user_input_ids + ) # generated a response while limiting the total chat history to 1000 tokens, - chat_history_ids = model.generate(bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id) + chat_history_ids = model.generate( + bot_input_ids, max_length=1000, pad_token_id=tokenizer.eos_token_id + ) # pretty print last ouput tokens from bot - print("DialoGPT: {}".format(tokenizer.decode(chat_history_ids[:, bot_input_ids.shape[-1]:][0], skip_special_tokens=True))) \ No newline at end of file + print( + "DialoGPT: {}".format( + tokenizer.decode( + chat_history_ids[:, bot_input_ids.shape[-1] :][0], + skip_special_tokens=True, + ) + ) + ) diff --git a/apps/streamlit_ds_chat/experiments_standalone/vertex_code_bison.py b/apps/streamlit_ds_chat/experiments_standalone/vertex_code_bison.py new file mode 100644 index 0000000..3d9bffb --- /dev/null +++ b/apps/streamlit_ds_chat/experiments_standalone/vertex_code_bison.py @@ -0,0 +1,12 @@ +import vertexai +from vertexai.language_models import CodeGenerationModel + +vertexai.init(project="pivotal-cable-428219-c5", location="us-central1") +parameters = {"candidate_count": 1, "max_output_tokens": 1024, "temperature": 0.9} +model = CodeGenerationModel.from_pretrained("code-bison@002") +response = model.predict( + prefix="""You are an expert python engineer with data scientist background. + Write a short snippet of python code, which use pandas to read csv file into dataframe.""", + **parameters, +) +print(f"Response from Model: {response.text}") diff --git a/apps/streamlit_ds_chat/experiments_streamlit/__init__.py b/apps/streamlit_ds_chat/experiments_streamlit/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/streamlit_ds_chat/chat-gpt.py b/apps/streamlit_ds_chat/experiments_streamlit/chat-gpt.py similarity index 100% rename from apps/streamlit_ds_chat/chat-gpt.py rename to apps/streamlit_ds_chat/experiments_streamlit/chat-gpt.py diff --git a/apps/streamlit_ds_chat/single_file_upload_example.py b/apps/streamlit_ds_chat/experiments_streamlit/single_file_upload_example.py similarity index 100% rename from apps/streamlit_ds_chat/single_file_upload_example.py rename to apps/streamlit_ds_chat/experiments_streamlit/single_file_upload_example.py diff --git a/apps/streamlit_ds_chat/streamlit_app.py b/apps/streamlit_ds_chat/experiments_streamlit/streamlit_app.py similarity index 51% rename from apps/streamlit_ds_chat/streamlit_app.py rename to apps/streamlit_ds_chat/experiments_streamlit/streamlit_app.py index 4a56af8..a352769 100644 --- a/apps/streamlit_ds_chat/streamlit_app.py +++ b/apps/streamlit_ds_chat/experiments_streamlit/streamlit_app.py @@ -1,22 +1,27 @@ import streamlit as st +import numpy as np +import altair as alt +import pandas as pd # the application layout and title st.set_page_config( - layout="centered", page_title="DS agent chat", page_icon=":shark:", + layout="centered", + page_title="DS agent chat", + page_icon=":shark:", initial_sidebar_state="expanded", menu_items={ - 'About': "This app should work as AI Data Scientist, how answers questions about the given data." - } + "About": "This app should work as AI Data Scientist, how answers questions about the given data." + }, ) -st.write('Hello world!') +st.write("Hello world!") -st.header('st.button') +st.header("st.button") -if st.button('Say hello'): - st.write('Why hello there') +if st.button("Say hello"): + st.write("Why hello there") else: - st.write('Goodbye') + st.write("Goodbye") """ st.write allows writing text and arguments to the Streamlit app. @@ -30,32 +35,24 @@ And more (see st.write on API docs) """ -import numpy as np -import altair as alt -import pandas as pd - -df = pd.DataFrame({ - 'first column': [1, 2, 3, 4], - 'second column': [10, 20, 30, 40] -}) +df = pd.DataFrame({"first column": [1, 2, 3, 4], "second column": [10, 20, 30, 40]}) st.write(df) # Example 4 -st.write('Below is a DataFrame:', df, 'Above is a dataframe.') +st.write("Below is a DataFrame:", df, "Above is a dataframe.") # Example 5 -df2 = pd.DataFrame( - np.random.randn(200, 3), - columns=['a', 'b', 'c']) -c = alt.Chart(df2).mark_circle().encode( - x='a', y='b', size='c', color='c', tooltip=['a', 'b', 'c']) +df2 = pd.DataFrame(np.random.randn(200, 3), columns=["a", "b", "c"]) +c = ( + alt.Chart(df2) + .mark_circle() + .encode(x="a", y="b", size="c", color="c", tooltip=["a", "b", "c"]) +) st.write(c) # or -chart_data = pd.DataFrame( - np.random.randn(20, 3), - columns=['a', 'b', 'c']) +chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"]) st.line_chart(chart_data) diff --git a/apps/streamlit_ds_chat/readme.md b/apps/streamlit_ds_chat/readme.md index 900251b..22e2fb1 100644 --- a/apps/streamlit_ds_chat/readme.md +++ b/apps/streamlit_ds_chat/readme.md @@ -40,3 +40,15 @@ It's now time to install the `streamlit` library: conda install streamlit ``` + +## Other dependencies + +### For HuggingFace transformers + pip install transformers +conda install pytorch + +### For Google vertex AI +pip install --upgrade google-cloud-aiplatform +sudo snap install google-cloud-cli --classic +pip3 install black +conda install numpy"<=2.0"