diff --git a/.idea/pet_project.iml b/.idea/pet_project.iml index adedf1c..219e4f7 100644 --- a/.idea/pet_project.iml +++ b/.idea/pet_project.iml @@ -1,8 +1,13 @@ - - + + + + + + + diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/README.md b/apps/streamlit_ds_chat/cloud_coiled_io/README.md new file mode 100644 index 0000000..0a791e4 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/README.md @@ -0,0 +1,114 @@ + +https://www.coiled.io/ + +Absolutely fascinating service and library which manage cloud for python computations. + +``` +Code locally, run at scale +Coiled creates ephemeral VMs that match your local environment exactly, spinning up clones in about a minute, copying ... + +Your code +Your software libraries +Your working files +Your cloud credentials (securely) +``` + +Works with AWS, GCloud, Azure, etc. +Runs dusk and pure python. + + +# How to install Coiled + +Just register on `coiled.io` and follow instructions. Everythong is very simple and works great. + +The only pitfall: all you installs probably should go from single distribution system. +If you use conda, you should install everything with `conda install`. +If you use `pip`, you should install all packages with `pip install`. + + +```bash +pip3 install coiled "dask[complete]" +coiled login +``` + +> Authentication successful πŸŽ‰ +> Credentials have been saved at /home/s-nechuiviter/.config/dask/coiled.yaml + +Use `coiled setup gcp` once to setup connection to GCP. + +Coiled requires that all modules should be `conda` compatible, at least if start from conda environment. +Or use pip/venv everywhere. + +# pip and venv + +To create new virtual environment: + +```bash +python -m venv ./env +``` + +Activate: + +```bash +source .venv/bin/activate +``` + +Check + +```bash +which python +``` + +Deactivate + +```bash +deactivate +``` + +Prepare pip + +```bash +python3 -m pip install --upgrade pip +``` + + + +What we need? + +``` +[//]: # (for streamlit) +pip3 install streamlit + +[//]: # (For Google vertex AI) +pip3 install --upgrade google-cloud-aiplatform numpy"<=2.0" +``` +[//]: # (sudo snap install google-cloud-cli --classic) For google account access + +# Results + +https://docs.coiled.io/user_guide/functions.html +`executor_1.py` successfully executes already existing code, but will not reload it. + +https://docs.coiled.io/user_guide/cli-jobs.html +Single file can be executed from terminal with `coiled run python file_name.py`. Stdout --> stdout. + +## Notebooks + +### Files +The --sync flag will optionally synchronize the files you have locally to the cloud machine, and copy any changes made on the cloud machine back to your local hard drive. + +This gives you the experience of just working on your local file system. + +To do this, you will need to install mutagen, which is available on brew : + +brew install mutagen-io/mutagen/mutagen +And then use the --sync flag. + +coiled notebook start --sync +Then you will get live synchronization between the remote server (/scratch/synced) and your local machine. This allows you to edit files on either machine and have the edits quickly show up on the other. + +Files larger than 1 GiB are not synced. If you would like to work in an entire directory that isn’t synced, you can create one in another directory, for example at /scratch/not_synced/ + + + + diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/__init__.py b/apps/streamlit_ds_chat/cloud_coiled_io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/executor_1.py b/apps/streamlit_ds_chat/cloud_coiled_io/executor_1.py new file mode 100644 index 0000000..c57d214 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/executor_1.py @@ -0,0 +1,26 @@ +import coiled +import importlib + +cluster = coiled.Cluster(n_workers=2) # Scale out to 100 machines +client = cluster.get_client() + +# from test_func import estimate_pi_simple +import test_func + +for i in range(10): + input("go?") + print("go") + + # importlib.reload(test_func) # reload doesn't send new code to cluster + client.upload_file("test_func.py") + + futures = [] + for filename in range(100): # Nested for loop + future = client.submit(test_func.estimate_pi_simple, 100_000) + futures.append(future) + + results = client.gather(futures) + + best = max(results) + avg = sum(results) / len(results) + print(f"{min(results)} : {avg} : {max(results)}") diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/func_df.py b/apps/streamlit_ds_chat/cloud_coiled_io/func_df.py new file mode 100644 index 0000000..d57efc8 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/func_df.py @@ -0,0 +1,2 @@ +def run(df): + return df.mean().compute() diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/serverless_example.py b/apps/streamlit_ds_chat/cloud_coiled_io/serverless_example.py new file mode 100644 index 0000000..e037574 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/serverless_example.py @@ -0,0 +1,16 @@ +import coiled, random + + +@coiled.function() +def estimate_pi(n: int) -> float: + total = 0 + for _ in range(n): + x = random.random() + y = random.random() + if x**2 + y**2 < 1: + total += 1 + return total / n * 4 + + +pi = estimate_pi(100_000) +print(pi) diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/test_code.py b/apps/streamlit_ds_chat/cloud_coiled_io/test_code.py new file mode 100644 index 0000000..477d2ee --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/test_code.py @@ -0,0 +1,11 @@ +import random + + +def estimate_pi_simple(n: int) -> float: + total = 0 + for _ in range(n): + x = random.random() + y = random.random() + if x**2 + y**2 < 1: + total += 1 + return total / n * 4 diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/test_func.py b/apps/streamlit_ds_chat/cloud_coiled_io/test_func.py new file mode 100644 index 0000000..3ae4847 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/test_func.py @@ -0,0 +1,16 @@ +import random + + +def estimate_pi_simple(n: int) -> float: + total = 0 + # return -1 + for _ in range(n): + x = random.random() + y = random.random() + if x**2 + y**2 < 1: + total += 1 + return total / n * 4 + + +if __name__ == "__main__": + print(estimate_pi_simple(100_000)) diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload.py b/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload.py new file mode 100644 index 0000000..95d8035 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload.py @@ -0,0 +1,35 @@ +import coiled +from dask.distributed import Client + +# Step 1: Create or connect to a Coiled cluster +cluster = coiled.Cluster(name="my-cluster", n_workers=1) +# n_workers – Number of workers in this cluster. +# Can either be an integer for a static number of workers, +# or a list specifying the lower and upper bounds for adaptively scaling up/ down workers +# depending on the amount of work submitted. +# Defaults to n_workers=[4, 20] which adaptively scales between 4 and 20 workers. +client = Client(cluster) + +for i in range(10): + input("go?") + print("go") + + # Step 2: Upload the module file to all workers + client.upload_file("test_func.py") + + # Step 3: Verify the file upload + def check_file(filename): + import os + + return os.path.exists(filename) + + print(client.run(check_file, "test_func.py")) # Should print True on all workers + + # Step 4: Use the uploaded module in a distributed task + def use_uploaded_module(): + import test_func + + return test_func.estimate_pi_simple(100_000) + + result = client.run(use_uploaded_module) + print(result) # Should print "Hello, world!" from all workers diff --git a/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload_dataframe.py b/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload_dataframe.py new file mode 100644 index 0000000..345dd89 --- /dev/null +++ b/apps/streamlit_ds_chat/cloud_coiled_io/working_dynamic_code_upload_dataframe.py @@ -0,0 +1,54 @@ +import coiled +from dask.distributed import Client + + +import numpy as np +import dask.dataframe as dd +import pandas as pd + +pdf = pd.DataFrame( + { + "A": np.random.rand(10000), + "B": np.random.rand(10000), + "C": np.random.rand(10000), + "D": np.random.rand(10000), + } +) +df = dd.from_pandas(pdf, npartitions=4) + +print(f"Local result: {df.mean().compute()}") + + +# Step 1: Create or connect to a Coiled cluster +cluster = coiled.Cluster(name="my-cluster", n_workers=1) +# n_workers – Number of workers in this cluster. +# Can either be an integer for a static number of workers, +# or a list specifying the lower and upper bounds for adaptively scaling up/ down workers +# depending on the amount of work submitted. +# Defaults to n_workers=[4, 20] which adaptively scales between 4 and 20 workers. +client = Client(cluster) + +for i in range(10): + input("go?") + print("go") + + # Step 2: Upload the module file to all workers + client.upload_file("func_df.py") + + # Step 3: Verify the file upload + def check_file(filename): + import os + + return os.path.exists(filename) + + print(client.run(check_file, "func_df.py")) # Should print True on all workers + + # Step 4: Use the uploaded module in a distributed task + def use_uploaded_module(): + import func_df + + return func_df.run(df) + + result = client.run(use_uploaded_module) + print(type(result)) + print(result) diff --git a/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py b/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py index d69e284..9b053bc 100644 --- a/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py +++ b/apps/streamlit_ds_chat/experiments_standalone/google_vertex_ai.py @@ -23,9 +23,7 @@ from google.oauth2.service_account import Credentials -creds = { - -} +creds = {} credentials = Credentials.from_service_account_info( creds