diff --git a/.github/workflows/docs-workflow.yml b/.github/workflows/docs-workflow.yml index 38b57f7a..0227aa7b 100644 --- a/.github/workflows/docs-workflow.yml +++ b/.github/workflows/docs-workflow.yml @@ -66,4 +66,4 @@ jobs: steps: - name: Deploy to GitHub Pages id: deployment - uses: actions/deploy-pages@v4 + uses: actions/deploy-pages@v4 \ No newline at end of file diff --git a/.github/workflows/prepare-docs-for-release.yaml b/.github/workflows/prepare-docs-for-release.yaml new file mode 100644 index 00000000..ddea752c --- /dev/null +++ b/.github/workflows/prepare-docs-for-release.yaml @@ -0,0 +1,62 @@ +name: Prepare Docs for next release + +on: [workflow_dispatch] + + +inputs: + version: + description: "The next version we're going to release (must be in the form vX.Y.Z)" + required: true + default: 'v1.0.0' + release-branch: + description: "The name of the release branch where we'll create the new Docusaurus documentation versioned" + required: true + default: 'release-please--branches--main' + +jobs: + run-versioning-command: + runs-on: ubuntu-22.04 + defaults: + run: + working-directory: ./docs + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + ref: '${{ github.event.inputs.release-branch }}' + + - name: Validate version input + id: validate_version + run: | + echo "VERSION: ${{ github.event.inputs.version }}" + if [[ ! "${{ github.event.inputs.version }}" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]]; then + echo "Invalid version format. Must be 'vX.Y.Z'" + exit 1 + fi + + - name: Validate release-branch input + id: validate_release-branch + run: | + echo "RELEASE_BRANCH: ${{ github.event.inputs.release-branch }}" + if [[ "${{ github.event.inputs.release-branch }}" != "release-please--branches--main" ]]; then + echo "Invalid release branch. Must be 'release-please--branches--main'" + exit 1 + fi + + - uses: actions/setup-node@v4 + with: + node-version: 18 + cache: yarn + cache-dependency-path: docs/yarn.lock + + - name: Install dependencies + run: yarn install --frozen-lockfile + + - name: Run Docusaurus version command + run: npm run docusaurus docs:version ${{ github.event.inputs.version }} + + - name: Commit versioned docs change + uses: stefanzweifel/git-auto-commit-action@v5 + with: + branch: ${{ github.event.inputs.release-branch }}" + commit_message: "docs: added ${{ github.event.inputs.version }} version to docs" \ No newline at end of file diff --git a/RELEASE.md b/RELEASE.md new file mode 100644 index 00000000..4a6d9731 --- /dev/null +++ b/RELEASE.md @@ -0,0 +1,39 @@ +# Release Process + +This document outlines the steps to release the project. Follow these guidelines to ensure a smooth and consistent release process. + +# Steps to Release + +## Create the release PR + +We use the [release-please](https://github.com/googleapis/release-please-action) GitHub Action to perform the release. Below you can find the configuration that we use into this repository: + +* [workflow manifest](.github/workflows/release-please.yaml) +* [action configuration](.github/release-config.json) + +Manually invoke the release-please [workflow](https://github.com/radicalbit/radicalbit-ai-monitoring/actions/workflows/release-please.yaml). The outcome will be the creation of a release PR, that will include the updates to be released, along with the updated [CHANGELOG.md](./CHANGELOG.md) file. + +![manually-invoke-release-please](https://github.com/user-attachments/assets/d0245757-c9fc-44b0-bb59-8d91ec23ec1b) + +The new release will follow the [semver](https://semver.org) specificationRelease plese will determine the upcoming version bump by parsing the githistoryof the `main` branch, looking for [Conventional Commits](https:/wwwconventionalcommits.org/) messages. + +## (Optional) Update docs version +We use [Docusaurus](https://docusaurus.io) for our documentation. If needed, we must add a new [documentation version](https://docusaurus.io/docs/versioning) created after the same semver tag that will be used for the release. + +For example, if we were going to release the `v1.2.3` version of the application along updates to its documentation, we would have to release a `v1.2.3` documentation version that will be included into the docs site "version dropdown" + +![image](https://github.com/user-attachments/assets/e60a4108-8b7d-424a-b11e-a8e44437b258) + +To create a new documentation version, please manually invoke the corresponding [workflow](https://github.com/radicalbit/radicalbit-ai-monitoring/actions/workflows/prepare-docs-for-release.yaml) passing two params: + +* `version`: the documentation version that will be created. Must be the same version of the upcoming application release (`v1.2.3` if we were following the previous example) +* `release-branch`: must be the string `release-please--branches--main` as it's a conventional/default branch name that is used by release-please + +Please be aware that both inputs will be validated, so you could get errors depending on input passed. + +## Perform the release +To perform the release, you must again invoke the release-please [workflow](https://github.com/radicalbit/radicalbit-ai-monitoring/actions/workflows/release-please.yaml). Please carefully check that everything is in order before doing that, as this action will: + +* merge the release PR on `main` branch +* create/push a Git tag that follows the determined semver version i.e. `v1.2.3` +* invoke all github actions that will publish the newly tagged artifacts where necessary i.e. Docker Hub, PyPi, etc. etc. diff --git a/docs/docs/architecture.md b/docs/docs/architecture.md index 20a5485f..7697d222 100644 --- a/docs/docs/architecture.md +++ b/docs/docs/architecture.md @@ -23,5 +23,5 @@ It covers all the implemented APIs, starting from model creation and ending with ## SDK -To interact with API programmatically, a [_Python SDK_](/python-sdk) is provided. +To interact with API programmatically, a [_Python SDK_](python-sdk.md) is provided. The SDK implements all functionalities exposed via REST API. \ No newline at end of file diff --git a/docs/docusaurus.config.js b/docs/docusaurus.config.js index 8e46d530..36198b2d 100644 --- a/docs/docusaurus.config.js +++ b/docs/docusaurus.config.js @@ -6,9 +6,21 @@ import {themes as prismThemes} from 'prism-react-renderer'; -// x-release-please-start-version -const VERSION = '0.9.0'; -// x-release-please-end +import versions from './versions.json'; + +function getNextVersionName() { + return 'Develop'; +} + +function getLastVersion() { + return versions[0]; +} + +// By customizing this function it is possible to restrict +// the number of versions included +function getIncludedVersions() { + return ['current', ...versions]; +} /** @type {import('@docusaurus/types').Config} */ const config = { @@ -47,13 +59,13 @@ const config = { docs: { routeBasePath: '/', sidebarPath: './sidebars.js', - lastVersion: 'current', versions: { current: { - label: VERSION, - path: '/', + label: `${getNextVersionName()} 🚧`, }, }, + lastVersion: getLastVersion(), + onlyIncludeVersions: getIncludedVersions(), // Please change this to your repo. // Remove this to remove the "edit this page" links. // editUrl: '', diff --git a/docs/versioned_docs/version-v0.8.0/index.md b/docs/versioned_docs/version-v0.8.0/index.md new file mode 100644 index 00000000..092f254f --- /dev/null +++ b/docs/versioned_docs/version-v0.8.0/index.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +--- + +# Introduction +Let's discover the **Radicalbit AI Monitoring Platform** in less than 5 minutes. + +## Welcome! +This platform provides a comprehensive solution for monitoring and observing your Artificial Intelligence (AI) models in production. + +### Why Monitor AI Models? +While models often perform well during development and validation, their effectiveness can degrade over time in production due to various factors like data shifts or concept drift. The Radicalbit AI Monitor platform helps you proactively identify and address potential performance issues. + +### Key Functionalities +The platform provides comprehensive monitoring capabilities to ensure optimal performance of your AI models in production. It analyzes both your reference dataset (used for pre-production validation) and the current datasets in use, allowing you to put under control: +* **Data Quality:** evaluate the quality of your data, as high-quality data is crucial for maintaining optimal model performance. The platform analyzes both numerical and categorical features in your dataset to provide insights into + * *data distribution* + * *missing values* + * *target variable distribution* (for supervised learning). + +* **Model Quality Monitoring:** the platform provides a comprehensive suite of metrics specifically designed at the moment for binary classification models. These metrics include: + * *Accuracy, Precision, Recall, and F1:* These metrics provide different perspectives on how well your model is classifying positive and negative cases. + * *False/True Negative/Positive Rates and Confusion Matrix:* These offer a detailed breakdown of your model's classification performance, including the number of correctly and incorrectly classified instances. + * *AUC-ROC and PR AUC:* These are performance curves that help visualize your model's ability to discriminate between positive and negative classes. +* **Model Drift Detection:** analyze model drift, which occurs when the underlying data distribution changes over time and can affect model accuracy. + +### Current Scope and Future Plans +This initial version focuses on binary classification models. Support for additional model types is planned for future releases. diff --git a/docs/versioned_docs/version-v0.8.0/user-guide/_category_.json b/docs/versioned_docs/version-v0.8.0/user-guide/_category_.json new file mode 100644 index 00000000..2294fcee --- /dev/null +++ b/docs/versioned_docs/version-v0.8.0/user-guide/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "User Guide", + "position": 1, + "link": { + "type": "generated-index", + "description": "Learn how to install and use the AI Monitoring Platform." + } +} diff --git a/docs/versioned_docs/version-v0.8.0/user-guide/installation.md b/docs/versioned_docs/version-v0.8.0/user-guide/installation.md new file mode 100644 index 00000000..8a6e7f7b --- /dev/null +++ b/docs/versioned_docs/version-v0.8.0/user-guide/installation.md @@ -0,0 +1,74 @@ +--- +sidebar_position: 1 +--- + +# Installation +The platform is composed of different modules +* **UI:** the front-end application +* **API:** the back-end application +* **Processing:** the Spark jobs +* **SDK:** the Python SDK + +## Development & Testing with Docker Compose +You can easily run the platform locally using Docker and the provided Docker Compose file. + +**Important:** This setup is intended for development and testing only, not for production environments. + +### Prerequisites +To run the platform successfully, you'll need to have both Docker and Docker Compose installed on your machine. + +### Procedure +Once you've installed Docker and Docker Compose, clone the repository to your local machine: + +```bash +git clone git@github.com:radicalbit/radicalbit-ai-monitoring.git +``` + +This repository provides a Docker Compose file to set up the platform locally alongside a Rancher Kubernetes cluster. This allows you to deploy Spark jobs within the cluster. + +For streamlined development and testing, you can execute these steps to run the platform locally without the graphical user interface: + +```bash +docker compose up +``` + +If you want to access the platform's user interface (UI): + +```bash +docker compose --profile ui up +``` + +After all containers are up and running, you can access the platform at [http://localhost:5173](http://localhost:5173) to start using it. + +#### Accessing the Kubernetes Cluster +The platform creates a Kubernetes cluster for managing deployments. You can connect and interact with this cluster from your local machine using tools like Lens or `kubectl`. + +In the compose file is present a [k9s](https://k9scli.io/) container that can be used to monitor the k3s cluster. + +```bash +docker compose up k9s -d && docker attach radicalbit-ai-monitoring-k9s-1 +``` + +##### Using the kubeconfig File +A file named `kubeconfig.yaml` is automatically generated within the directory `./docker/k3s_data/kubeconfig/` when the platform starts. This file contains sensitive information used to authenticate with the Kubernetes cluster. + +##### Here's how to connect to the cluster: +1. Copy the `kubeconfig.yaml` file to a desired location on your local machine. +1. Edit the copied file and replace the server address `https://k3s:6443` with `https://127.0.0.1:6443`. This points the kubeconfig file to the local Kubernetes cluster running on your machine. +1. Use the modified `kubeconfig.yaml` file with tools like Lens or `kubectl` to interact with the cluster. + +#### Using Real AWS Credentials +In order to use a real AWS instead of MinIO is necessary to modify the environment variables of the api container, putting real `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` and `S3_BUCKET_NAME` and removing `S3_ENDPOINT_URL`. + +#### Teardown +To completely clean up the environment we can use [docker compose](https://docs.docker.com/reference/cli/docker/compose/down/) + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans +``` + +To remove everything including container images: + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans --rmi all +``` diff --git a/docs/versioned_docs/version-v0.8.0/user-guide/quickstart.md b/docs/versioned_docs/version-v0.8.0/user-guide/quickstart.md new file mode 100644 index 00000000..d1f54d0f --- /dev/null +++ b/docs/versioned_docs/version-v0.8.0/user-guide/quickstart.md @@ -0,0 +1,119 @@ +--- +sidebar_position: 2 +--- + +# Quickstart +This guide provides instructions on how to monitor an AI solution through the Radicalbit AI Platform. + +## Monitor an LLM for a Binary Classification +The use case we present here involves the usage of an LLM (powered with RAG) capable of generating an answer to the user's questions in a chatbot for banking services. + +### Introduction + +The model returns two different outputs: + +1. `model_answer`: the answer generated by retrieving similar information +1. `prediction`: a boolean value which indicates if the user's question is pertinent to banking topics. + +The reason for this information lies in the fact that by discriminating the textual data into categories, the bank will be able to use only the information related to banking services, to fine-tune the model in a second moment and improve its performance. + +### Model Creation +To use the radicalbit-ai-monitoring platform, you need first to prepare your data, which should include the following information: + +1. **Features:** The list of variables used by the model to produce the inference. They may include also meta-data (timestamp, log) +2. **Outputs:** The fields returned by the model after the inference. Usually, they are probabilities, a predicted class or number in the case of the classic ML and a generated text in the case of LLMs. +3. **Target**: the ground truth used to validate predictions and evaluate the model quality + +This tutorial involves batch monitoring, including the situation where you have some historical data that you want to compare over time. + +The **reference dataset** is the name we use to indicate the batch that contains the information we desire to have constantly (or we expect to have) over time. It could be the training set or a chunck of production data where the model has had good performances. + +The **current dataset** is the name we use to indicate the batch that contains fresh information, for example, the most recent production data, predictions or ground truths. We expect that it has the same characteristics (statistical properties) as the reference, which indicates that the model has the performance we expect and there is no drift in the data. + +What follows is an exemple of data we will use in this tutorial: + +| timestamp | user_id | question | model_answer | ground_truth | prediction | gender | age | device | days_as_customer | +|-----------|--------:|:---------|:-------------|-------------:|-----------:|:-------|----:|:-------|-----------------:| +|2024-01-11 08:08:00|user_24:|What documents do I need to open a business account?|You need a valid ID, proof of address, and business registration documents.|1|1|M|44|smartphone|194| +|2024-01-10 03:08:00|user_27|What are the benefits of a premium account?|The benefits of a premium account include higher interest rates and exclusive customer support.|1|1|F|29|tablet|258| +2024-01-11 12:22:00|user_56|How can I check my credit score?|You can check your credit score for free through our mobile app.|1|1|F|44|smartphone|51| +2024-01-10 04:57:00|user_58|Are there any fees for using ATMs?|ATM usage is free of charge at all locations.|1|1|M|50|smartphone|197| + +* **timestamp:** it is the time in which the user asks the question +* **user_id:** it is the user identification +* **question:** it is the question asked by the user to the chatbot +* **model_answer:** it is the answer generated by the model +* **ground_truth:** it is the real label where 1 stands for an answer related to banking services and 0 stands for a different topic +* **prediction:** it is the judgment produced by the model about the topic of the answer +* **gender:** it is the user gender +* **age:** it is the user age +* **device:** it is the device used in the current session +* **days_as_customer:** it indicates how many days the user is a customer + +### Create the Model +To create a new model, navigate to the *Models* section and click the plus (+) icon. + +![Alt text](/img/quickstart/empty-models-list.png "Empty Models List") + +The platform should open a modal to allow users to create a new model. + +![Alt text](/img/quickstart/new-model-modal-s1.png "New Model") + +This modal prompts you to enter the following details: +* **Name:** the name of the model +* **Model type:** the type of the model, in the current platform version there is only available `Binary Classification` +* **Data type:** it explains the data type used by the model +* **Granularity:** the window used to calculate aggregated metrics +* **Framework:** an optional field to describe the frameworks used by the model +* **Algorithm:** an optional field to explain the algorithm used by the model + +Please enter the following details and click on the *Next* button: +* **Name:** `LLM-binary-classification` +* **Model type:** `Binary Classification` +* **Data type:** `Tabular` +* **Granularity:** `Hour` + +To infer the model schema you've to upload a sample dataset. Please download and use [this reference Comma-Separated Values file](https://github.com/radicalbit/radicalbit-ai-monitoring/blob/9f21c19e97a9dfa51c1bf17002fcdd76d5a5f304/examples/data/df_reference.csv) and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2.png "Upload CSV file") + +Once you've defined the model schema, select the output fields from the variables. Choose `model_answer` and `prediction`, move them to the right, and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s3.png "Output fields selection") + +Finally, you need to select and associate the following fields: +* **Target:** the target field or ground truth +* **Timestamp:** the field containing the timestamp value +* **Prediction:** the actual prediction +* **Probability:** the probability score associated with the prediction + +Match the following values to their corresponding fields: +* **Target:** `ground_truth` +* **Timestamp:** `timestamp` +* **Prediction:** `prediction` +* **Probability:** leave empty + +![Alt text](/img/quickstart/new-model-modal-s4.png "Identify ground truth (target), timestamp, prediction, and probability fields") + +Click the *Save Model* button to finalize model creation. + +### Model details +Entering into the model details, we can see three different main section: + +* **Overview:** this section provides information about the dataset and its schema. You can view a summary, explore the variables (features and ground truth) and the output fields for your model. +* **Reference:** the Reference section displays performance metrics calculated on the imported reference data. +* **Current:** the Current section displays metrics for any user-uploaded data sets you've added in addition to the reference dataset. + +#### Import Reference Dataset +To calculate metrics for your reference dataset, import a CSV file. + +![Alt text](/img/quickstart/import-reference.png "Import Reference") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. + +#### Import Current Dataset +To calculate metrics for your current dataset, import a CSV file. + +![Alt text](/img/quickstart/import-current.png "Import Current") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. diff --git a/docs/versioned_docs/version-v0.8.1/index.md b/docs/versioned_docs/version-v0.8.1/index.md new file mode 100644 index 00000000..092f254f --- /dev/null +++ b/docs/versioned_docs/version-v0.8.1/index.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +--- + +# Introduction +Let's discover the **Radicalbit AI Monitoring Platform** in less than 5 minutes. + +## Welcome! +This platform provides a comprehensive solution for monitoring and observing your Artificial Intelligence (AI) models in production. + +### Why Monitor AI Models? +While models often perform well during development and validation, their effectiveness can degrade over time in production due to various factors like data shifts or concept drift. The Radicalbit AI Monitor platform helps you proactively identify and address potential performance issues. + +### Key Functionalities +The platform provides comprehensive monitoring capabilities to ensure optimal performance of your AI models in production. It analyzes both your reference dataset (used for pre-production validation) and the current datasets in use, allowing you to put under control: +* **Data Quality:** evaluate the quality of your data, as high-quality data is crucial for maintaining optimal model performance. The platform analyzes both numerical and categorical features in your dataset to provide insights into + * *data distribution* + * *missing values* + * *target variable distribution* (for supervised learning). + +* **Model Quality Monitoring:** the platform provides a comprehensive suite of metrics specifically designed at the moment for binary classification models. These metrics include: + * *Accuracy, Precision, Recall, and F1:* These metrics provide different perspectives on how well your model is classifying positive and negative cases. + * *False/True Negative/Positive Rates and Confusion Matrix:* These offer a detailed breakdown of your model's classification performance, including the number of correctly and incorrectly classified instances. + * *AUC-ROC and PR AUC:* These are performance curves that help visualize your model's ability to discriminate between positive and negative classes. +* **Model Drift Detection:** analyze model drift, which occurs when the underlying data distribution changes over time and can affect model accuracy. + +### Current Scope and Future Plans +This initial version focuses on binary classification models. Support for additional model types is planned for future releases. diff --git a/docs/versioned_docs/version-v0.8.1/user-guide/_category_.json b/docs/versioned_docs/version-v0.8.1/user-guide/_category_.json new file mode 100644 index 00000000..2294fcee --- /dev/null +++ b/docs/versioned_docs/version-v0.8.1/user-guide/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "User Guide", + "position": 1, + "link": { + "type": "generated-index", + "description": "Learn how to install and use the AI Monitoring Platform." + } +} diff --git a/docs/versioned_docs/version-v0.8.1/user-guide/installation.md b/docs/versioned_docs/version-v0.8.1/user-guide/installation.md new file mode 100644 index 00000000..27e55eb4 --- /dev/null +++ b/docs/versioned_docs/version-v0.8.1/user-guide/installation.md @@ -0,0 +1,74 @@ +--- +sidebar_position: 1 +--- + +# Installation +The platform is composed of different modules +* **UI:** the front-end application +* **API:** the back-end application +* **Processing:** the Spark jobs +* **SDK:** the Python SDK + +## Development & Testing with Docker Compose +You can easily run the platform locally using Docker and the provided Docker Compose file. + +**Important:** This setup is intended for development and testing only, not for production environments. + +### Prerequisites +To run the platform successfully, you'll need to have both Docker and Docker Compose installed on your machine. + +### Procedure +Once you've installed Docker and Docker Compose, clone the repository to your local machine: + +```bash +git clone git@github.com:radicalbit/radicalbit-ai-monitoring.git +``` + +This repository provides a Docker Compose file to set up the platform locally alongside a K3s Kubernetes cluster. This allows you to deploy Spark jobs within the cluster. + +For streamlined development and testing, you can execute these steps to run the platform locally without the graphical user interface: + +```bash +docker compose up +``` + +If you want to access the platform's user interface (UI): + +```bash +docker compose --profile ui up +``` + +After all containers are up and running, you can access the platform at [http://localhost:5173](http://localhost:5173) to start using it. + +#### Accessing the Kubernetes Cluster +The platform creates a Kubernetes cluster for managing deployments. You can connect and interact with this cluster from your local machine using tools like Lens or `kubectl`. + +In the compose file is present a [k9s](https://k9scli.io/) container that can be used to monitor the K3s cluster. + +```bash +docker compose up k9s -d && docker attach radicalbit-ai-monitoring-k9s-1 +``` + +##### Using the kubeconfig File +A file named `kubeconfig.yaml` is automatically generated within the directory `./docker/k3s_data/kubeconfig/` when the platform starts. This file contains sensitive information used to authenticate with the Kubernetes cluster. + +##### Here's how to connect to the cluster: +1. Copy the `kubeconfig.yaml` file to a desired location on your local machine. +1. Edit the copied file and replace the server address `https://k3s:6443` with `https://127.0.0.1:6443`. This points the kubeconfig file to the local Kubernetes cluster running on your machine. +1. Use the modified `kubeconfig.yaml` file with tools like Lens or `kubectl` to interact with the cluster. + +#### Using Real AWS Credentials +In order to use a real AWS instead of MinIO is necessary to modify the environment variables of the api container, putting real `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` and `S3_BUCKET_NAME` and removing `S3_ENDPOINT_URL`. + +#### Teardown +To completely clean up the environment we can use [docker compose](https://docs.docker.com/reference/cli/docker/compose/down/) + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans +``` + +To remove everything including container images: + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans --rmi all +``` diff --git a/docs/versioned_docs/version-v0.8.1/user-guide/quickstart.md b/docs/versioned_docs/version-v0.8.1/user-guide/quickstart.md new file mode 100644 index 00000000..beda0112 --- /dev/null +++ b/docs/versioned_docs/version-v0.8.1/user-guide/quickstart.md @@ -0,0 +1,168 @@ +--- +sidebar_position: 2 +--- + +# Quickstart +This guide provides instructions on how to monitor an AI solution through the Radicalbit AI Platform. + +## Monitor an LLM for a Binary Classification +The use case we present here involves the usage of an LLM (powered with RAG) capable of generating an answer to the user's questions in a chatbot for banking services. + +### Introduction + +The model returns two different outputs: + +1. `model_answer`: the answer generated by retrieving similar information +1. `prediction`: a boolean value which indicates if the user's question is pertinent to banking topics. + +The reason for this information lies in the fact that by discriminating the textual data into categories, the bank will be able to use only the information related to banking services, to fine-tune the model in a second moment and improve its performance. + +### Model Creation +To use the radicalbit-ai-monitoring platform, you need first to prepare your data, which should include the following information: + +1. **Features:** The list of variables used by the model to produce the inference. They may include also meta-data (timestamp, log) +2. **Outputs:** The fields returned by the model after the inference. Usually, they are probabilities, a predicted class or number in the case of the classic ML and a generated text in the case of LLMs. +3. **Target**: the ground truth used to validate predictions and evaluate the model quality + +This tutorial involves batch monitoring, including the situation where you have some historical data that you want to compare over time. + +The **reference dataset** is the name we use to indicate the batch that contains the information we desire to have constant (or we expect to have) over time. It could be the training set or a chunk of production data where the model has had good performances. + +The **current dataset** is the name we use to indicate the batch that contains fresh information, for example, the most recent production data, predictions or ground truths. We expect that it has the same characteristics (statistical properties) as the reference, which indicates that the model has the performance we expect and there is no drift in the data. + +What follows is an exemple of data we will use in this tutorial: + +| timestamp | user_id | question | model_answer | ground_truth | prediction | gender | age | device | days_as_customer | +|-----------|--------:|:---------|:-------------|-------------:|-----------:|:-------|----:|:-------|-----------------:| +|2024-01-11 08:08:00|user_24|What documents do I need to open a business account?|You need a valid ID, proof of address, and business registration documents.|1|1|M|44|smartphone|194| +|2024-01-10 03:08:00|user_27|What are the benefits of a premium account?|The benefits of a premium account include higher interest rates and exclusive customer support.|1|1|F|29|tablet|258| +2024-01-11 12:22:00|user_56|How can I check my credit score?|You can check your credit score for free through our mobile app.|1|1|F|44|smartphone|51| +2024-01-10 04:57:00|user_58|Are there any fees for using ATMs?|ATM usage is free of charge at all locations.|1|1|M|50|smartphone|197| + +* **timestamp:** it is the time in which the user asks the question +* **user_id:** it is the user identification +* **question:** it is the question asked by the user to the chatbot +* **model_answer:** it is the answer generated by the model +* **ground_truth:** it is the real label where 1 stands for an answer related to banking services and 0 stands for a different topic +* **prediction:** it is the judgment produced by the model about the topic of the answer +* **gender:** it is the user gender +* **age:** it is the user age +* **device:** it is the device used in the current session +* **days_as_customer:** it indicates how many days the user is a customer + +### Create the Model +To create a new model, navigate to the *Models* section and click the plus (+) icon. + +![Alt text](/img/quickstart/empty-models-list.png "Empty Models List") + +The platform should open a modal to allow users to create a new model. + +![Alt text](/img/quickstart/new-model-modal-s1.png "New Model") + +This modal prompts you to enter the following details: +* **Name:** the name of the model +* **Model type:** the type of the model, in the current platform version there is only available `Binary Classification` +* **Data type:** it explains the data type used by the model +* **Granularity:** the window used to calculate aggregated metrics +* **Framework:** an optional field to describe the frameworks used by the model +* **Algorithm:** an optional field to explain the algorithm used by the model + +Please enter the following details and click on the *Next* button: +* **Name:** `LLM-binary-classification` +* **Model type:** `Binary Classification` +* **Data type:** `Tabular` +* **Granularity:** `Hour` + +To infer the model schema you've to upload a sample dataset. Please download and use [this reference Comma-Separated Values file](/datasets/df_10lines.csv) and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2.png "Upload CSV file") + +Once you've defined the model schema, select the output fields from the variables. Choose `model_answer` and `prediction`, move them to the right, and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s3.png "Output fields selection") + +Finally, you need to select and associate the following fields: +* **Target:** the target field or ground truth +* **Timestamp:** the field containing the timestamp value +* **Prediction:** the actual prediction +* **Probability:** the probability score associated with the prediction + +Match the following values to their corresponding fields: +* **Target:** `ground_truth` +* **Timestamp:** `timestamp` +* **Prediction:** `prediction` +* **Probability:** leave empty + +![Alt text](/img/quickstart/new-model-modal-s4.png "Identify ground truth (target), timestamp, prediction, and probability fields") + +Click the *Save Model* button to finalize model creation. + +### Model details +Entering into the model details, we can see three different main section: + +* **Overview:** this section provides information about the dataset and its schema. You can view a summary, explore the variables (features and ground truth) and the output fields for your model. +* **Reference:** the Reference section displays performance metrics calculated on the imported reference data. +* **Current:** the Current section displays metrics for any user-uploaded data sets you've added in addition to the reference dataset. + +### Import Reference Dataset +To calculate metrics for your reference dataset, [import this CSV file, containing the reference](/datasets/df_reference.csv). + +![Alt text](/img/quickstart/import-reference.png "Import Reference") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. + +After processing, you will be able to see the following information: +* in the **Overview** section a column names and types summary will appear. +* in the **Reference** section a statistical summary of your data will be computed. + +Within the **Reference** section, you can browse between 3 different tabs: +* **Data Quality:** This tab contains statistical information and charts of your reference dataset, including the +number of rows and your data distribution through bar plots (for categorical fields) and histograms (for numerical +fields). Additionally, to make comparisons and analysis easier, you can choose the order in which to arrange your charts. + +![Alt text](/img/quickstart/reference_data_quality.png "Import Reference") + +* **Model Quality:** This tab provides detailed information about model performance, which we can compute since you +provide both predictions and ground truths. These metrics (in this tutorial related to a binary classification task) +are computed by aggregating the whole reference dataset, offering an overall expression of your model quality for this +specific reference. + +![Alt text](/img/quickstart/reference_model_quality.png "Import Reference") +* **Import:** This tab displays all the useful information about the storage of the reference dataset. + +![Alt text](/img/quickstart/reference_import.png "Import Reference") + +### Import Current Dataset +Once your reference data has been imported and all the metrics and information about it are available, you can move to +the **Current** section, in which you can import [the CSV file containing your current dataset](/datasets/df_current1.csv). + + +![Alt text](/img/quickstart/import-current.png "Import Current") + +This action will unlock all the tools you need to compare metrics between the reference and current files. + +In details, you can browse between 4 tabs: + +* **Data Quality:** Here, the same metrics you have in the Reference section will also be computed for the current +dataset. All the information will be presented side by side so that you can compare and analyze any differences. +Throughout the platform, the blue color stands for the current dataset while the gray stands for the reference dataset, +allowing you to easily identify which dataset a specific metric belongs to. + +![Alt text](/img/quickstart/current_data_quality.png "Import Reference") + +* **Model Quality:** In this tab, you can compare the model performance between the reference and current datasets. +In addition to what you see in the reference model quality, here you can track the metric values over time by +aggregating them with a specific granularity (the same you've defined in the Model Creation). + +![Alt text](/img/quickstart/current_model_quality.png "Import Reference") + +* **Model Drift:** This tab provides information about potential changes in the data distributions, known as drift, +which can lead to model degradation. The drift is detected according to the field type: Chi-square test for categorical +variables and Two-Samples Kolmogorov-Smirnov test for numerical ones. + +![Alt text](/img/quickstart/current_model_drift.png "Import Reference") + +* **Import:** Here you can list all the current dataset imported over time and switch among them. By default, the last +current dataset will be shown. + +![Alt text](/img/quickstart/current_import.png "Import Reference") diff --git a/docs/versioned_docs/version-v0.8.2/index.md b/docs/versioned_docs/version-v0.8.2/index.md new file mode 100644 index 00000000..092f254f --- /dev/null +++ b/docs/versioned_docs/version-v0.8.2/index.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +--- + +# Introduction +Let's discover the **Radicalbit AI Monitoring Platform** in less than 5 minutes. + +## Welcome! +This platform provides a comprehensive solution for monitoring and observing your Artificial Intelligence (AI) models in production. + +### Why Monitor AI Models? +While models often perform well during development and validation, their effectiveness can degrade over time in production due to various factors like data shifts or concept drift. The Radicalbit AI Monitor platform helps you proactively identify and address potential performance issues. + +### Key Functionalities +The platform provides comprehensive monitoring capabilities to ensure optimal performance of your AI models in production. It analyzes both your reference dataset (used for pre-production validation) and the current datasets in use, allowing you to put under control: +* **Data Quality:** evaluate the quality of your data, as high-quality data is crucial for maintaining optimal model performance. The platform analyzes both numerical and categorical features in your dataset to provide insights into + * *data distribution* + * *missing values* + * *target variable distribution* (for supervised learning). + +* **Model Quality Monitoring:** the platform provides a comprehensive suite of metrics specifically designed at the moment for binary classification models. These metrics include: + * *Accuracy, Precision, Recall, and F1:* These metrics provide different perspectives on how well your model is classifying positive and negative cases. + * *False/True Negative/Positive Rates and Confusion Matrix:* These offer a detailed breakdown of your model's classification performance, including the number of correctly and incorrectly classified instances. + * *AUC-ROC and PR AUC:* These are performance curves that help visualize your model's ability to discriminate between positive and negative classes. +* **Model Drift Detection:** analyze model drift, which occurs when the underlying data distribution changes over time and can affect model accuracy. + +### Current Scope and Future Plans +This initial version focuses on binary classification models. Support for additional model types is planned for future releases. diff --git a/docs/versioned_docs/version-v0.8.2/user-guide/_category_.json b/docs/versioned_docs/version-v0.8.2/user-guide/_category_.json new file mode 100644 index 00000000..2294fcee --- /dev/null +++ b/docs/versioned_docs/version-v0.8.2/user-guide/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "User Guide", + "position": 1, + "link": { + "type": "generated-index", + "description": "Learn how to install and use the AI Monitoring Platform." + } +} diff --git a/docs/versioned_docs/version-v0.8.2/user-guide/installation.md b/docs/versioned_docs/version-v0.8.2/user-guide/installation.md new file mode 100644 index 00000000..27e55eb4 --- /dev/null +++ b/docs/versioned_docs/version-v0.8.2/user-guide/installation.md @@ -0,0 +1,74 @@ +--- +sidebar_position: 1 +--- + +# Installation +The platform is composed of different modules +* **UI:** the front-end application +* **API:** the back-end application +* **Processing:** the Spark jobs +* **SDK:** the Python SDK + +## Development & Testing with Docker Compose +You can easily run the platform locally using Docker and the provided Docker Compose file. + +**Important:** This setup is intended for development and testing only, not for production environments. + +### Prerequisites +To run the platform successfully, you'll need to have both Docker and Docker Compose installed on your machine. + +### Procedure +Once you've installed Docker and Docker Compose, clone the repository to your local machine: + +```bash +git clone git@github.com:radicalbit/radicalbit-ai-monitoring.git +``` + +This repository provides a Docker Compose file to set up the platform locally alongside a K3s Kubernetes cluster. This allows you to deploy Spark jobs within the cluster. + +For streamlined development and testing, you can execute these steps to run the platform locally without the graphical user interface: + +```bash +docker compose up +``` + +If you want to access the platform's user interface (UI): + +```bash +docker compose --profile ui up +``` + +After all containers are up and running, you can access the platform at [http://localhost:5173](http://localhost:5173) to start using it. + +#### Accessing the Kubernetes Cluster +The platform creates a Kubernetes cluster for managing deployments. You can connect and interact with this cluster from your local machine using tools like Lens or `kubectl`. + +In the compose file is present a [k9s](https://k9scli.io/) container that can be used to monitor the K3s cluster. + +```bash +docker compose up k9s -d && docker attach radicalbit-ai-monitoring-k9s-1 +``` + +##### Using the kubeconfig File +A file named `kubeconfig.yaml` is automatically generated within the directory `./docker/k3s_data/kubeconfig/` when the platform starts. This file contains sensitive information used to authenticate with the Kubernetes cluster. + +##### Here's how to connect to the cluster: +1. Copy the `kubeconfig.yaml` file to a desired location on your local machine. +1. Edit the copied file and replace the server address `https://k3s:6443` with `https://127.0.0.1:6443`. This points the kubeconfig file to the local Kubernetes cluster running on your machine. +1. Use the modified `kubeconfig.yaml` file with tools like Lens or `kubectl` to interact with the cluster. + +#### Using Real AWS Credentials +In order to use a real AWS instead of MinIO is necessary to modify the environment variables of the api container, putting real `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` and `S3_BUCKET_NAME` and removing `S3_ENDPOINT_URL`. + +#### Teardown +To completely clean up the environment we can use [docker compose](https://docs.docker.com/reference/cli/docker/compose/down/) + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans +``` + +To remove everything including container images: + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans --rmi all +``` diff --git a/docs/versioned_docs/version-v0.8.2/user-guide/quickstart.md b/docs/versioned_docs/version-v0.8.2/user-guide/quickstart.md new file mode 100644 index 00000000..beda0112 --- /dev/null +++ b/docs/versioned_docs/version-v0.8.2/user-guide/quickstart.md @@ -0,0 +1,168 @@ +--- +sidebar_position: 2 +--- + +# Quickstart +This guide provides instructions on how to monitor an AI solution through the Radicalbit AI Platform. + +## Monitor an LLM for a Binary Classification +The use case we present here involves the usage of an LLM (powered with RAG) capable of generating an answer to the user's questions in a chatbot for banking services. + +### Introduction + +The model returns two different outputs: + +1. `model_answer`: the answer generated by retrieving similar information +1. `prediction`: a boolean value which indicates if the user's question is pertinent to banking topics. + +The reason for this information lies in the fact that by discriminating the textual data into categories, the bank will be able to use only the information related to banking services, to fine-tune the model in a second moment and improve its performance. + +### Model Creation +To use the radicalbit-ai-monitoring platform, you need first to prepare your data, which should include the following information: + +1. **Features:** The list of variables used by the model to produce the inference. They may include also meta-data (timestamp, log) +2. **Outputs:** The fields returned by the model after the inference. Usually, they are probabilities, a predicted class or number in the case of the classic ML and a generated text in the case of LLMs. +3. **Target**: the ground truth used to validate predictions and evaluate the model quality + +This tutorial involves batch monitoring, including the situation where you have some historical data that you want to compare over time. + +The **reference dataset** is the name we use to indicate the batch that contains the information we desire to have constant (or we expect to have) over time. It could be the training set or a chunk of production data where the model has had good performances. + +The **current dataset** is the name we use to indicate the batch that contains fresh information, for example, the most recent production data, predictions or ground truths. We expect that it has the same characteristics (statistical properties) as the reference, which indicates that the model has the performance we expect and there is no drift in the data. + +What follows is an exemple of data we will use in this tutorial: + +| timestamp | user_id | question | model_answer | ground_truth | prediction | gender | age | device | days_as_customer | +|-----------|--------:|:---------|:-------------|-------------:|-----------:|:-------|----:|:-------|-----------------:| +|2024-01-11 08:08:00|user_24|What documents do I need to open a business account?|You need a valid ID, proof of address, and business registration documents.|1|1|M|44|smartphone|194| +|2024-01-10 03:08:00|user_27|What are the benefits of a premium account?|The benefits of a premium account include higher interest rates and exclusive customer support.|1|1|F|29|tablet|258| +2024-01-11 12:22:00|user_56|How can I check my credit score?|You can check your credit score for free through our mobile app.|1|1|F|44|smartphone|51| +2024-01-10 04:57:00|user_58|Are there any fees for using ATMs?|ATM usage is free of charge at all locations.|1|1|M|50|smartphone|197| + +* **timestamp:** it is the time in which the user asks the question +* **user_id:** it is the user identification +* **question:** it is the question asked by the user to the chatbot +* **model_answer:** it is the answer generated by the model +* **ground_truth:** it is the real label where 1 stands for an answer related to banking services and 0 stands for a different topic +* **prediction:** it is the judgment produced by the model about the topic of the answer +* **gender:** it is the user gender +* **age:** it is the user age +* **device:** it is the device used in the current session +* **days_as_customer:** it indicates how many days the user is a customer + +### Create the Model +To create a new model, navigate to the *Models* section and click the plus (+) icon. + +![Alt text](/img/quickstart/empty-models-list.png "Empty Models List") + +The platform should open a modal to allow users to create a new model. + +![Alt text](/img/quickstart/new-model-modal-s1.png "New Model") + +This modal prompts you to enter the following details: +* **Name:** the name of the model +* **Model type:** the type of the model, in the current platform version there is only available `Binary Classification` +* **Data type:** it explains the data type used by the model +* **Granularity:** the window used to calculate aggregated metrics +* **Framework:** an optional field to describe the frameworks used by the model +* **Algorithm:** an optional field to explain the algorithm used by the model + +Please enter the following details and click on the *Next* button: +* **Name:** `LLM-binary-classification` +* **Model type:** `Binary Classification` +* **Data type:** `Tabular` +* **Granularity:** `Hour` + +To infer the model schema you've to upload a sample dataset. Please download and use [this reference Comma-Separated Values file](/datasets/df_10lines.csv) and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2.png "Upload CSV file") + +Once you've defined the model schema, select the output fields from the variables. Choose `model_answer` and `prediction`, move them to the right, and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s3.png "Output fields selection") + +Finally, you need to select and associate the following fields: +* **Target:** the target field or ground truth +* **Timestamp:** the field containing the timestamp value +* **Prediction:** the actual prediction +* **Probability:** the probability score associated with the prediction + +Match the following values to their corresponding fields: +* **Target:** `ground_truth` +* **Timestamp:** `timestamp` +* **Prediction:** `prediction` +* **Probability:** leave empty + +![Alt text](/img/quickstart/new-model-modal-s4.png "Identify ground truth (target), timestamp, prediction, and probability fields") + +Click the *Save Model* button to finalize model creation. + +### Model details +Entering into the model details, we can see three different main section: + +* **Overview:** this section provides information about the dataset and its schema. You can view a summary, explore the variables (features and ground truth) and the output fields for your model. +* **Reference:** the Reference section displays performance metrics calculated on the imported reference data. +* **Current:** the Current section displays metrics for any user-uploaded data sets you've added in addition to the reference dataset. + +### Import Reference Dataset +To calculate metrics for your reference dataset, [import this CSV file, containing the reference](/datasets/df_reference.csv). + +![Alt text](/img/quickstart/import-reference.png "Import Reference") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. + +After processing, you will be able to see the following information: +* in the **Overview** section a column names and types summary will appear. +* in the **Reference** section a statistical summary of your data will be computed. + +Within the **Reference** section, you can browse between 3 different tabs: +* **Data Quality:** This tab contains statistical information and charts of your reference dataset, including the +number of rows and your data distribution through bar plots (for categorical fields) and histograms (for numerical +fields). Additionally, to make comparisons and analysis easier, you can choose the order in which to arrange your charts. + +![Alt text](/img/quickstart/reference_data_quality.png "Import Reference") + +* **Model Quality:** This tab provides detailed information about model performance, which we can compute since you +provide both predictions and ground truths. These metrics (in this tutorial related to a binary classification task) +are computed by aggregating the whole reference dataset, offering an overall expression of your model quality for this +specific reference. + +![Alt text](/img/quickstart/reference_model_quality.png "Import Reference") +* **Import:** This tab displays all the useful information about the storage of the reference dataset. + +![Alt text](/img/quickstart/reference_import.png "Import Reference") + +### Import Current Dataset +Once your reference data has been imported and all the metrics and information about it are available, you can move to +the **Current** section, in which you can import [the CSV file containing your current dataset](/datasets/df_current1.csv). + + +![Alt text](/img/quickstart/import-current.png "Import Current") + +This action will unlock all the tools you need to compare metrics between the reference and current files. + +In details, you can browse between 4 tabs: + +* **Data Quality:** Here, the same metrics you have in the Reference section will also be computed for the current +dataset. All the information will be presented side by side so that you can compare and analyze any differences. +Throughout the platform, the blue color stands for the current dataset while the gray stands for the reference dataset, +allowing you to easily identify which dataset a specific metric belongs to. + +![Alt text](/img/quickstart/current_data_quality.png "Import Reference") + +* **Model Quality:** In this tab, you can compare the model performance between the reference and current datasets. +In addition to what you see in the reference model quality, here you can track the metric values over time by +aggregating them with a specific granularity (the same you've defined in the Model Creation). + +![Alt text](/img/quickstart/current_model_quality.png "Import Reference") + +* **Model Drift:** This tab provides information about potential changes in the data distributions, known as drift, +which can lead to model degradation. The drift is detected according to the field type: Chi-square test for categorical +variables and Two-Samples Kolmogorov-Smirnov test for numerical ones. + +![Alt text](/img/quickstart/current_model_drift.png "Import Reference") + +* **Import:** Here you can list all the current dataset imported over time and switch among them. By default, the last +current dataset will be shown. + +![Alt text](/img/quickstart/current_import.png "Import Reference") diff --git a/docs/versioned_docs/version-v0.9.0/index.md b/docs/versioned_docs/version-v0.9.0/index.md new file mode 100644 index 00000000..092f254f --- /dev/null +++ b/docs/versioned_docs/version-v0.9.0/index.md @@ -0,0 +1,28 @@ +--- +sidebar_position: 1 +--- + +# Introduction +Let's discover the **Radicalbit AI Monitoring Platform** in less than 5 minutes. + +## Welcome! +This platform provides a comprehensive solution for monitoring and observing your Artificial Intelligence (AI) models in production. + +### Why Monitor AI Models? +While models often perform well during development and validation, their effectiveness can degrade over time in production due to various factors like data shifts or concept drift. The Radicalbit AI Monitor platform helps you proactively identify and address potential performance issues. + +### Key Functionalities +The platform provides comprehensive monitoring capabilities to ensure optimal performance of your AI models in production. It analyzes both your reference dataset (used for pre-production validation) and the current datasets in use, allowing you to put under control: +* **Data Quality:** evaluate the quality of your data, as high-quality data is crucial for maintaining optimal model performance. The platform analyzes both numerical and categorical features in your dataset to provide insights into + * *data distribution* + * *missing values* + * *target variable distribution* (for supervised learning). + +* **Model Quality Monitoring:** the platform provides a comprehensive suite of metrics specifically designed at the moment for binary classification models. These metrics include: + * *Accuracy, Precision, Recall, and F1:* These metrics provide different perspectives on how well your model is classifying positive and negative cases. + * *False/True Negative/Positive Rates and Confusion Matrix:* These offer a detailed breakdown of your model's classification performance, including the number of correctly and incorrectly classified instances. + * *AUC-ROC and PR AUC:* These are performance curves that help visualize your model's ability to discriminate between positive and negative classes. +* **Model Drift Detection:** analyze model drift, which occurs when the underlying data distribution changes over time and can affect model accuracy. + +### Current Scope and Future Plans +This initial version focuses on binary classification models. Support for additional model types is planned for future releases. diff --git a/docs/versioned_docs/version-v0.9.0/user-guide/_category_.json b/docs/versioned_docs/version-v0.9.0/user-guide/_category_.json new file mode 100644 index 00000000..2294fcee --- /dev/null +++ b/docs/versioned_docs/version-v0.9.0/user-guide/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "User Guide", + "position": 1, + "link": { + "type": "generated-index", + "description": "Learn how to install and use the AI Monitoring Platform." + } +} diff --git a/docs/versioned_docs/version-v0.9.0/user-guide/installation.md b/docs/versioned_docs/version-v0.9.0/user-guide/installation.md new file mode 100644 index 00000000..27e55eb4 --- /dev/null +++ b/docs/versioned_docs/version-v0.9.0/user-guide/installation.md @@ -0,0 +1,74 @@ +--- +sidebar_position: 1 +--- + +# Installation +The platform is composed of different modules +* **UI:** the front-end application +* **API:** the back-end application +* **Processing:** the Spark jobs +* **SDK:** the Python SDK + +## Development & Testing with Docker Compose +You can easily run the platform locally using Docker and the provided Docker Compose file. + +**Important:** This setup is intended for development and testing only, not for production environments. + +### Prerequisites +To run the platform successfully, you'll need to have both Docker and Docker Compose installed on your machine. + +### Procedure +Once you've installed Docker and Docker Compose, clone the repository to your local machine: + +```bash +git clone git@github.com:radicalbit/radicalbit-ai-monitoring.git +``` + +This repository provides a Docker Compose file to set up the platform locally alongside a K3s Kubernetes cluster. This allows you to deploy Spark jobs within the cluster. + +For streamlined development and testing, you can execute these steps to run the platform locally without the graphical user interface: + +```bash +docker compose up +``` + +If you want to access the platform's user interface (UI): + +```bash +docker compose --profile ui up +``` + +After all containers are up and running, you can access the platform at [http://localhost:5173](http://localhost:5173) to start using it. + +#### Accessing the Kubernetes Cluster +The platform creates a Kubernetes cluster for managing deployments. You can connect and interact with this cluster from your local machine using tools like Lens or `kubectl`. + +In the compose file is present a [k9s](https://k9scli.io/) container that can be used to monitor the K3s cluster. + +```bash +docker compose up k9s -d && docker attach radicalbit-ai-monitoring-k9s-1 +``` + +##### Using the kubeconfig File +A file named `kubeconfig.yaml` is automatically generated within the directory `./docker/k3s_data/kubeconfig/` when the platform starts. This file contains sensitive information used to authenticate with the Kubernetes cluster. + +##### Here's how to connect to the cluster: +1. Copy the `kubeconfig.yaml` file to a desired location on your local machine. +1. Edit the copied file and replace the server address `https://k3s:6443` with `https://127.0.0.1:6443`. This points the kubeconfig file to the local Kubernetes cluster running on your machine. +1. Use the modified `kubeconfig.yaml` file with tools like Lens or `kubectl` to interact with the cluster. + +#### Using Real AWS Credentials +In order to use a real AWS instead of MinIO is necessary to modify the environment variables of the api container, putting real `AWS_ACCESS_KEY_ID`, `AWS_SECRET_ACCESS_KEY`, `AWS_REGION` and `S3_BUCKET_NAME` and removing `S3_ENDPOINT_URL`. + +#### Teardown +To completely clean up the environment we can use [docker compose](https://docs.docker.com/reference/cli/docker/compose/down/) + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans +``` + +To remove everything including container images: + +```bash +docker compose --profile ui --profile k9s down -v --remove-orphans --rmi all +``` diff --git a/docs/versioned_docs/version-v0.9.0/user-guide/quickstart.md b/docs/versioned_docs/version-v0.9.0/user-guide/quickstart.md new file mode 100644 index 00000000..beda0112 --- /dev/null +++ b/docs/versioned_docs/version-v0.9.0/user-guide/quickstart.md @@ -0,0 +1,168 @@ +--- +sidebar_position: 2 +--- + +# Quickstart +This guide provides instructions on how to monitor an AI solution through the Radicalbit AI Platform. + +## Monitor an LLM for a Binary Classification +The use case we present here involves the usage of an LLM (powered with RAG) capable of generating an answer to the user's questions in a chatbot for banking services. + +### Introduction + +The model returns two different outputs: + +1. `model_answer`: the answer generated by retrieving similar information +1. `prediction`: a boolean value which indicates if the user's question is pertinent to banking topics. + +The reason for this information lies in the fact that by discriminating the textual data into categories, the bank will be able to use only the information related to banking services, to fine-tune the model in a second moment and improve its performance. + +### Model Creation +To use the radicalbit-ai-monitoring platform, you need first to prepare your data, which should include the following information: + +1. **Features:** The list of variables used by the model to produce the inference. They may include also meta-data (timestamp, log) +2. **Outputs:** The fields returned by the model after the inference. Usually, they are probabilities, a predicted class or number in the case of the classic ML and a generated text in the case of LLMs. +3. **Target**: the ground truth used to validate predictions and evaluate the model quality + +This tutorial involves batch monitoring, including the situation where you have some historical data that you want to compare over time. + +The **reference dataset** is the name we use to indicate the batch that contains the information we desire to have constant (or we expect to have) over time. It could be the training set or a chunk of production data where the model has had good performances. + +The **current dataset** is the name we use to indicate the batch that contains fresh information, for example, the most recent production data, predictions or ground truths. We expect that it has the same characteristics (statistical properties) as the reference, which indicates that the model has the performance we expect and there is no drift in the data. + +What follows is an exemple of data we will use in this tutorial: + +| timestamp | user_id | question | model_answer | ground_truth | prediction | gender | age | device | days_as_customer | +|-----------|--------:|:---------|:-------------|-------------:|-----------:|:-------|----:|:-------|-----------------:| +|2024-01-11 08:08:00|user_24|What documents do I need to open a business account?|You need a valid ID, proof of address, and business registration documents.|1|1|M|44|smartphone|194| +|2024-01-10 03:08:00|user_27|What are the benefits of a premium account?|The benefits of a premium account include higher interest rates and exclusive customer support.|1|1|F|29|tablet|258| +2024-01-11 12:22:00|user_56|How can I check my credit score?|You can check your credit score for free through our mobile app.|1|1|F|44|smartphone|51| +2024-01-10 04:57:00|user_58|Are there any fees for using ATMs?|ATM usage is free of charge at all locations.|1|1|M|50|smartphone|197| + +* **timestamp:** it is the time in which the user asks the question +* **user_id:** it is the user identification +* **question:** it is the question asked by the user to the chatbot +* **model_answer:** it is the answer generated by the model +* **ground_truth:** it is the real label where 1 stands for an answer related to banking services and 0 stands for a different topic +* **prediction:** it is the judgment produced by the model about the topic of the answer +* **gender:** it is the user gender +* **age:** it is the user age +* **device:** it is the device used in the current session +* **days_as_customer:** it indicates how many days the user is a customer + +### Create the Model +To create a new model, navigate to the *Models* section and click the plus (+) icon. + +![Alt text](/img/quickstart/empty-models-list.png "Empty Models List") + +The platform should open a modal to allow users to create a new model. + +![Alt text](/img/quickstart/new-model-modal-s1.png "New Model") + +This modal prompts you to enter the following details: +* **Name:** the name of the model +* **Model type:** the type of the model, in the current platform version there is only available `Binary Classification` +* **Data type:** it explains the data type used by the model +* **Granularity:** the window used to calculate aggregated metrics +* **Framework:** an optional field to describe the frameworks used by the model +* **Algorithm:** an optional field to explain the algorithm used by the model + +Please enter the following details and click on the *Next* button: +* **Name:** `LLM-binary-classification` +* **Model type:** `Binary Classification` +* **Data type:** `Tabular` +* **Granularity:** `Hour` + +To infer the model schema you've to upload a sample dataset. Please download and use [this reference Comma-Separated Values file](/datasets/df_10lines.csv) and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2.png "Upload CSV file") + +Once you've defined the model schema, select the output fields from the variables. Choose `model_answer` and `prediction`, move them to the right, and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s3.png "Output fields selection") + +Finally, you need to select and associate the following fields: +* **Target:** the target field or ground truth +* **Timestamp:** the field containing the timestamp value +* **Prediction:** the actual prediction +* **Probability:** the probability score associated with the prediction + +Match the following values to their corresponding fields: +* **Target:** `ground_truth` +* **Timestamp:** `timestamp` +* **Prediction:** `prediction` +* **Probability:** leave empty + +![Alt text](/img/quickstart/new-model-modal-s4.png "Identify ground truth (target), timestamp, prediction, and probability fields") + +Click the *Save Model* button to finalize model creation. + +### Model details +Entering into the model details, we can see three different main section: + +* **Overview:** this section provides information about the dataset and its schema. You can view a summary, explore the variables (features and ground truth) and the output fields for your model. +* **Reference:** the Reference section displays performance metrics calculated on the imported reference data. +* **Current:** the Current section displays metrics for any user-uploaded data sets you've added in addition to the reference dataset. + +### Import Reference Dataset +To calculate metrics for your reference dataset, [import this CSV file, containing the reference](/datasets/df_reference.csv). + +![Alt text](/img/quickstart/import-reference.png "Import Reference") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. + +After processing, you will be able to see the following information: +* in the **Overview** section a column names and types summary will appear. +* in the **Reference** section a statistical summary of your data will be computed. + +Within the **Reference** section, you can browse between 3 different tabs: +* **Data Quality:** This tab contains statistical information and charts of your reference dataset, including the +number of rows and your data distribution through bar plots (for categorical fields) and histograms (for numerical +fields). Additionally, to make comparisons and analysis easier, you can choose the order in which to arrange your charts. + +![Alt text](/img/quickstart/reference_data_quality.png "Import Reference") + +* **Model Quality:** This tab provides detailed information about model performance, which we can compute since you +provide both predictions and ground truths. These metrics (in this tutorial related to a binary classification task) +are computed by aggregating the whole reference dataset, offering an overall expression of your model quality for this +specific reference. + +![Alt text](/img/quickstart/reference_model_quality.png "Import Reference") +* **Import:** This tab displays all the useful information about the storage of the reference dataset. + +![Alt text](/img/quickstart/reference_import.png "Import Reference") + +### Import Current Dataset +Once your reference data has been imported and all the metrics and information about it are available, you can move to +the **Current** section, in which you can import [the CSV file containing your current dataset](/datasets/df_current1.csv). + + +![Alt text](/img/quickstart/import-current.png "Import Current") + +This action will unlock all the tools you need to compare metrics between the reference and current files. + +In details, you can browse between 4 tabs: + +* **Data Quality:** Here, the same metrics you have in the Reference section will also be computed for the current +dataset. All the information will be presented side by side so that you can compare and analyze any differences. +Throughout the platform, the blue color stands for the current dataset while the gray stands for the reference dataset, +allowing you to easily identify which dataset a specific metric belongs to. + +![Alt text](/img/quickstart/current_data_quality.png "Import Reference") + +* **Model Quality:** In this tab, you can compare the model performance between the reference and current datasets. +In addition to what you see in the reference model quality, here you can track the metric values over time by +aggregating them with a specific granularity (the same you've defined in the Model Creation). + +![Alt text](/img/quickstart/current_model_quality.png "Import Reference") + +* **Model Drift:** This tab provides information about potential changes in the data distributions, known as drift, +which can lead to model degradation. The drift is detected according to the field type: Chi-square test for categorical +variables and Two-Samples Kolmogorov-Smirnov test for numerical ones. + +![Alt text](/img/quickstart/current_model_drift.png "Import Reference") + +* **Import:** Here you can list all the current dataset imported over time and switch among them. By default, the last +current dataset will be shown. + +![Alt text](/img/quickstart/current_import.png "Import Reference") diff --git a/docs/versioned_sidebars/version-v0.8.0-sidebars.json b/docs/versioned_sidebars/version-v0.8.0-sidebars.json new file mode 100644 index 00000000..39332bfe --- /dev/null +++ b/docs/versioned_sidebars/version-v0.8.0-sidebars.json @@ -0,0 +1,8 @@ +{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versioned_sidebars/version-v0.8.1-sidebars.json b/docs/versioned_sidebars/version-v0.8.1-sidebars.json new file mode 100644 index 00000000..39332bfe --- /dev/null +++ b/docs/versioned_sidebars/version-v0.8.1-sidebars.json @@ -0,0 +1,8 @@ +{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versioned_sidebars/version-v0.8.2-sidebars.json b/docs/versioned_sidebars/version-v0.8.2-sidebars.json new file mode 100644 index 00000000..39332bfe --- /dev/null +++ b/docs/versioned_sidebars/version-v0.8.2-sidebars.json @@ -0,0 +1,8 @@ +{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versioned_sidebars/version-v0.9.0-sidebars.json b/docs/versioned_sidebars/version-v0.9.0-sidebars.json new file mode 100644 index 00000000..39332bfe --- /dev/null +++ b/docs/versioned_sidebars/version-v0.9.0-sidebars.json @@ -0,0 +1,8 @@ +{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versions.json b/docs/versions.json index 0d4f101c..2d1339cb 100644 --- a/docs/versions.json +++ b/docs/versions.json @@ -1,2 +1,6 @@ [ + "v0.9.0", + "v0.8.2", + "v0.8.1", + "v0.8.0" ]