diff --git a/.github/release-manifest.json b/.github/release-manifest.json index 2b1fcc79..d0a2c8d2 100644 --- a/.github/release-manifest.json +++ b/.github/release-manifest.json @@ -1,3 +1,3 @@ { - ".": "1.1.0" + ".": "1.2.0" } \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index a4161152..73887ab6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,24 @@ # Changelog +## [1.2.0](https://github.com/radicalbit/radicalbit-ai-monitoring/compare/v1.1.0...v1.2.0) (2024-12-10) + + +### Features + +* change banner introducing "Book a demo" ([#197](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/197)) ([7842eb1](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/7842eb148459fed7b6af9768dc41a4ac9012e2d1)) +* create a custom component and custom hooks to manage dark mode ([#199](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/199)) ([05b93fb](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/05b93fb3587900091cc1dcea37bb432e1e4abace)) +* improve layout and accessibility ([#201](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/201)) ([7016c0f](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/7016c0fa9923740ea1f773440dfff14761be30be)) +* ugrade design system to 1.4.0 ([#196](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/196)) ([9358296](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/9358296c0c8d78abe7b88229ff9fc2ec1f770254)) +* **ui:** add dark mode ([#195](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/195)) ([1c3bc31](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/1c3bc316a9e75598b11ff731b19392e2de5f7ccd)) +* **ui:** improve accessibility ([#198](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/198)) ([8ea1e23](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/8ea1e23d82ab5c3c00e4e01768d4dd87f9de6d4c)) +* upgrade design-system ([#200](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/200)) ([6f582e6](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/6f582e6680c26e5806f19c64ee827684026ab57c)) + + +### Bug Fixes + +* percentage fix ([#206](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/206)) ([523d197](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/523d1974dbd513bb85107f50de2d5d6f3e5e5304)) +* **ui:** improve charts header legend ([#203](https://github.com/radicalbit/radicalbit-ai-monitoring/issues/203)) ([5c3ca24](https://github.com/radicalbit/radicalbit-ai-monitoring/commit/5c3ca24fc539ab434a36ec3dc3492aace6ca2d0b)) + ## [1.1.0](https://github.com/radicalbit/radicalbit-ai-monitoring/compare/v1.0.1...v1.1.0) (2024-10-31) diff --git a/api/pyproject.toml b/api/pyproject.toml index 1f3e1ce1..59c259ba 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "radicalbit-ai-monitoring" # x-release-please-start-version -version = "1.1.0" +version = "1.2.0" # x-release-please-end description = "radicalbit platform" authors = ["Radicalbit"] diff --git a/docs/versioned_docs/version-v1.2.0/all-metrics.md b/docs/versioned_docs/version-v1.2.0/all-metrics.md new file mode 100644 index 00000000..60633910 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/all-metrics.md @@ -0,0 +1,82 @@ +--- +sidebar_position: 5 +--- + +# All metrics +List of all available Metrics and Charts. + +## CSV summary + +* Number of variables +* Number of observations +* Number of missing values +* Percentage of missing values +* Number of duplicated rows +* Percentage of duplicated rows +* Number of **numerical** variables +* Number of **categorical** variables +* Number of **datetime** variables + +Summary with all variable name and type (float, int, string, datetime). + +## Data quality + +* **Numerical** variables + * Average + * Standard deviation + * Minimum + * Maximum + * Percentile 25% + * Median + * Percentile 75% + * Number of missing values + * Histogram with 10 bins +* **Categorical** variables + * Number of missing values + * Percentage of missing values + * Number of distinct values + * For each distinct value: + * count of observations + * percentage of observations +* **Ground truth** + * if categorical i.e. for a classification model: bar plot *(for both reference and current for an easy comparison)* + * if numerical, i.e. for a regression model: histogram with 10 bins *(for both reference and current for an easy comparison)* + +## Model quality + +* Classification model + * Number of classes + * Accuracy *(for both reference and current for an easy comparison)* + * Line chart of accuracy over time + * Confusion matrix + * Log loss, *only for binary classification at the moment* + * Line chart of log loss over time, *only for binary classification at the moment* + * For each class: + * Precision *(for both reference and current for an easy comparison)* + * Recall *(for both reference and current for an easy comparison)* + * F1 score *(for both reference and current for an easy comparison)* + * True Positive Rate *(for both reference and current for an easy comparison)* + * False Positive Rate *(for both reference and current for an easy comparison)* + * Support *(for both reference and current for an easy comparison)* +* Regression model + * Mean squared error *(for both reference and current for an easy comparison)* + * Root mean squared error *(for both reference and current for an easy comparison)* + * Mean absolute error *(for both reference and current for an easy comparison)* + * Mean absolute percentage error *(for both reference and current for an easy comparison)* + * R-squared *(for both reference and current for an easy comparison)* + * Adjusted R-squared *(for both reference and current for an easy comparison)* + * Variance *(for both reference and current for an easy comparison)* + * Line charts for all of the above over time + * Residual analysis: + * Correlation prediction/ground_truth + * Residuals plot, i.e, scatter plot for standardised residuals and predictions + * Scatter plot for predictions vs ground truth and linear regression line + * Histogram of the residuals + * Kolmogorov-Smirnov test of normality for residuals + +## Data Drift + +Data drift for all features using different algorithms depending on the data type: float, int, categorical. We use the following algorithms (but others will be added in the future): +* [Chi-Square Test](https://en.wikipedia.org/wiki/Pearson%27s_chi-squared_test) +* [Two-Sample Kolmogorov-Smirnov](https://en.wikipedia.org/wiki/Kolmogorov%E2%80%93Smirnov_test#Two-sample_Kolmogorov%E2%80%93Smirnov_test) +* [Population Stability Index](https://scholarworks.wmich.edu/dissertations/3208/) diff --git a/docs/versioned_docs/version-v1.2.0/architecture.md b/docs/versioned_docs/version-v1.2.0/architecture.md new file mode 100644 index 00000000..7697d222 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/architecture.md @@ -0,0 +1,27 @@ +--- +sidebar_position: 6 +--- + +# Architecture + +In this section we explore the architecture of the Radicalbit AI platform. +The image below shows all the components of the platform: + +![Alt text](/img/architecture/architecture.png "Architecture") + +## API + +API is the core of the platform, it exposes all the functionalities via REST APIs. +It requires a PostgreSQL database to store data and a Kubernetes cluster to run Spark jobs for metrics evaluations. +To store all dataset files a distributed storage is used. +REST APIs could be used via user interface or using the provided Python SDK. + +## UI + +To use REST APIs with a human friendly interface, a UI is provided. +It covers all the implemented APIs, starting from model creation and ending with all metrics visualization. + +## SDK + +To interact with API programmatically, a [_Python SDK_](python-sdk.md) is provided. +The SDK implements all functionalities exposed via REST API. \ No newline at end of file diff --git a/docs/versioned_docs/version-v1.2.0/index.md b/docs/versioned_docs/version-v1.2.0/index.md new file mode 100644 index 00000000..58385ffd --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/index.md @@ -0,0 +1,33 @@ +--- +sidebar_position: 1 +--- + +# Introduction +Let's discover the **Radicalbit AI Monitoring Platform** in less than 5 minutes. + +## Welcome! +This platform provides a comprehensive solution for monitoring and observing your Artificial Intelligence (AI) models in production. + +### Why Monitor AI Models? +While models often perform well during development and validation, their effectiveness can degrade over time in production due to various factors like data shifts or concept drift. The Radicalbit AI Monitor platform helps you proactively identify and address potential performance issues. + +### Key Functionalities +The platform provides comprehensive monitoring capabilities to ensure optimal performance of your AI models in production. It analyses both your reference dataset (used for pre-production validation) and the current datasets in use, allowing you to put under control: +* **Data Quality:** evaluate the quality of your data, as high-quality data is crucial for maintaining optimal model performance. The platform analyses both numerical and categorical features in your dataset to provide insights into + * *data distribution* + * *missing values* + * *target variable distribution* (for supervised learning). + +* **Model Quality Monitoring:** the platform provides a comprehensive suite of metrics specifically designed at the moment for classification and regression models. \ +For classification these metrics include: + * *Accuracy, Precision, Recall, and F1:* These metrics provide different perspectives on how well your model is classifying positive and negative cases. + * *False/True Negative/Positive Rates and Confusion Matrix:* These offer a detailed breakdown of your model's classification performance, including the number of correctly and incorrectly classified instances. + * *AUC-ROC and PR AUC:* These are performance curves that help visualize your model's ability to discriminate between positive and negative classes. + + For regression these metrics include: + * *Mean Absolute Error, Mean Squared Error, Root Mean Squared Error, R²:* These metrics provide different perspectives on how well your model is predicting a numerical value. + * *Residual Analysis:* This offers a detailed breakdown of your model's performance, comparing predictions with ground truth and predictions with residuals, i.e. the difference between predictions and ground truth. +* **Model Drift Detection:** analyse model drift, which occurs when the underlying data distribution changes over time and can affect model performance. + +### Current Scope and Future Plans +This version focuses on classification, both binary and multiclass, and regression models. Support for additional model types is planned for future releases. diff --git a/docs/versioned_docs/version-v1.2.0/model-sections/_category_.json b/docs/versioned_docs/version-v1.2.0/model-sections/_category_.json new file mode 100644 index 00000000..7276512a --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/model-sections/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "Model sections", + "position": 4, + "link": { + "type": "generated-index", + "description": "Each created model includes three main sections — Overview, Reference, and Current — as well as a summary section called the Launchpad. This document provides an in-depth explanation of each section." + } +} diff --git a/docs/versioned_docs/version-v1.2.0/model-sections/current.md b/docs/versioned_docs/version-v1.2.0/model-sections/current.md new file mode 100644 index 00000000..71c42fe3 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/model-sections/current.md @@ -0,0 +1,54 @@ +--- +sidebar_position: 4 +--- + +# Current +The Current section stores all the information (statistics, model metrics and charts) related to the current dataset, placed side-by-side to the reference ones. The objective is to streamline and highlight every difference between the data over time. Throughout the platform, all the current information is coloured blue or in different shades. + +> NOTE: in this section, you will always see the last uploaded current dataset. In case you need previous current analysis, you can browse among them in the `Import` section. + + +## Data Quality +The **Data Quality** dashboard contains a descriptive analysis of the current variables (blue) placed side-by-side with the reference ones (grey). It adapts itself accordingly to the `Model Type` and shows information such as: + +- Number of observations +- Number of classes (not in regression task) +- Ground Truth Distribution +- Histograms for Numerical Features +- Descriptive Statistics for Numerical Features (average, standard deviation, ranges, percentiles, missing values) +- Bar Charts for Categorical Features +- Descriptive Statistics for Categorical Features(missing values, distinct values, frequencies) + +![Alt text](/img/current/current-data-quality.png "Current Data Quality") + + +## Model Quality + +The **Model Quality** dashboard contains all the metrics used to evaluate the model performance in the current dataset and compare these values to the reference. Many of them are computed through the `prediction`/`probability` compared to the `ground truth`. Naturally, the platform computes the proper metrics according to the chosen `Model Type`. \ +Differently from the reference section, here, the metrics are computed over time thanks to the flagged `timestamp` columns and the `granularity` parameter chosen during the model creation. + +![Alt text](/img/current/current-model-quality.png "Current Model Quality") + + +## Data Drift + +The **Data Drift** section contains the outcome of some drift detector executed for each variable. +According to the field type (categorical or numerical), a specific drift is computed: + +- Categoricals: **Chi-Square Test** +- Numerical: **2-Samples-KS Test** (for `float` variables), **PSI** (for `int` variables) + +If the dot placed at the side of the variable name is red, it means that a drift has been revealed and the relative chart (and statistical description) can be seen in the `Current/Data Quality` section. + +![Alt text](/img/current/current-data-drift.png "Current Data Drift") + + +## Import + +The **Import** section lists the path where your current CSVs are stored. If you have a private AWS, the files will be saved in a dedicated S3 bucket otherwise, they will be saved locally with Minio (which shares the same syntax as S3). +To see your current datasets stored in Minio, visit the address [http://localhost:9091](http://localhost:9091). + +Here, you can browse between all the current datasets you have uploaded over time. + +![Alt text](/img/current/current-import.png "Current Import") + diff --git a/docs/versioned_docs/version-v1.2.0/model-sections/launchpad.md b/docs/versioned_docs/version-v1.2.0/model-sections/launchpad.md new file mode 100644 index 00000000..45247777 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/model-sections/launchpad.md @@ -0,0 +1,25 @@ +--- +sidebar_position: 1 +--- + +# Launchpad +The launchpad provides a dedicated space for summarising existing models. + +![Alt text](/img/launchpad/launchpad.png "Launchpad") + +It offers a quick overview of key aspects: + +- **Data Quality Percentage:** This metric reflects the proportion of columns without anomalies across the current datasets. Anomalies are identified using the Interquartile Range (IQR) method, and the final percentage displayed is the average of each Current’s anomaly-free ratio. +- **Model Quality Percentage:** This metric is calculated using a Bootstrap Test, based on historical metrics (the same grouped by Timestamp) from the Model Quality page for the current dataset. By grouping metrics over time (e.g., Accuracy), we generate multiple instances of the same metric, forming a statistical population. The Bootstrap Test then compares this population with the metric calculated for the Reference dataset, checking if it falls outside the 95% confidence interval. If so, the metric is flagged as “significantly different” between Reference and Current datasets. This process is repeated for each model metric, and the percentage of metrics that pass the test is returned. +- **Drift Detection Percentage:** This percentage represents the ratio of features without drift over the total number of features. + +> NOTE: if a metric cannot be computed, the placeholder `--` will be used. + +The general **pie chart** represents the averages of each computed percentage across all models. + + +Additional information appears on the right side: +- **Work in Progress:** This section provides real-time updates on model activities, including ongoing and failed jobs. +- **Alerts:** Here, you’ll find any alerts triggered by the percentages above. When an issue lowers a metric from its ideal 100%, the alert identifies the affected model and component. Clicking the alert takes you to the relevant page for more details. + + diff --git a/docs/versioned_docs/version-v1.2.0/model-sections/overview.md b/docs/versioned_docs/version-v1.2.0/model-sections/overview.md new file mode 100644 index 00000000..6e8e0289 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/model-sections/overview.md @@ -0,0 +1,40 @@ +--- +sidebar_position: 2 +--- + +# Overview + +The Overview is the section dedicated to the information recap of your reference dataset and your last current dataset, and it helps users to quickly assess the differences and monitor the data shapes. + + +## Summary + +The **Summary** table provides a side-by-side comparison of key metrics between the current and reference datasets: + +- Number of variables +- Number of observations +- Missing Values +- Missing Values (%) +- Duplicated rows +- Duplicated rows (%) +- Number of numerical columns +- Number of categorical columns +- Number of Datetime columns + +![Alt text](/img/overview/overview-summary.png "Overview Summary") + + +## Variables + +The **Variables** table lists all the columns flagged as `feature` or `ground truth`. That's the reason why we have chosen this name. Each field presents with its own type while the `ground truth` is flagged properly. +For the meaning of the column `Field Type` see the *Hands-On Guide*. + +![Alt text](/img/overview/overview-variables.png "Overview Variables") + + +## Output + +The **Output** table lists all the columns flagged as `probability` or `prediction` and it has to include all the fields produced by your model. Each field presents with its own type while the `probability` and the `prediction` are flagged properly. + +![Alt text](/img/overview/overview-output.png "Overview Output") + diff --git a/docs/versioned_docs/version-v1.2.0/model-sections/reference.md b/docs/versioned_docs/version-v1.2.0/model-sections/reference.md new file mode 100644 index 00000000..a5131d6e --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/model-sections/reference.md @@ -0,0 +1,36 @@ +--- +sidebar_position: 3 +--- + +# Reference +The Reference section stores all the information (statistics, model metrics and charts) related to the reference dataset. Throughout the platform, all the reference information is gray-coloured. + + +## Data Quality +The **Data Quality** dashboard contains a descriptive analysis of the reference variables. It adapts itself accordingly to the model type and shows information such as: + +- Number of observations +- Number of classes (not in regression task) +- Ground Truth Distribution +- Histograms for Numerical Features +- Descriptive Statistics for Numerical Features (average, standard deviation, ranges, percentiles, missing values) +- Bar Charts for Categorical Features +- Descriptive Statistics for Categorical Features(missing values, distinct values, frequencies) + +![Alt text](/img/reference/reference-data-quality.png "Reference Data Quality") + + +## Model Quality + +The **Model Quality** dashboard contains all the metrics used to evaluate the model performance. Many of them are computed through the `prediction`/`probability` compared to the `ground truth`. Naturally, the platform computes the proper metrics according to the chosen `Model Type`. + +![Alt text](/img/reference/reference-model-quality.png "Reference Modela Quality") + + +## Import + +The **Import** section lists the path where your reference CSV is stored. If you have a private AWS, the file will be saved in a dedicated S3 bucket otherwise, it will be saved locally with Minio (which shares the same syntax as S3). +To see your reference dataset stored in Minio, visit the address [http://localhost:9091](http://localhost:9091). + +![Alt text](/img/reference/reference-import.png "Reference Import") + diff --git a/docs/versioned_docs/version-v1.2.0/python-sdk.md b/docs/versioned_docs/version-v1.2.0/python-sdk.md new file mode 100644 index 00000000..7a42b9cd --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/python-sdk.md @@ -0,0 +1,298 @@ +--- +sidebar_position: 4 +--- + +# Python SDK +In this document are exposed all classes implemented inside the [Python SDK](https://pypi.org/project/radicalbit-platform-sdk/). + + +### Client + +To interact with the Radicalbit AI platform via the SDK, the first thing that must be done is to create the client. +The only required parameter is the `base_url`, which is the URL of the running platform. + +```python +from radicalbit_platform_sdk.client import Client + +base_url = "http://localhost:9000/" +client = Client(base_url) +``` + +Once you have a client instance, you can interact with models inside the platform. +ß +The available methods of a client instance are: + +* **`create_model(model: CreateModel)`**: it is used to create a brand new model inside the platform. + + It requires a [CreateModel](#createmodel) instance and returns the created [Model](#model). + + ```python + from radicalbit_platform_sdk.models import ( + CreateModel, + DataType, + FieldType, + ModelType, + ColumnDefinition, + OutputType, + Granularity, + SupportedTypes, + ) + + model_definition = CreateModel( + name="My model", + modelType=ModelType.BINARY, + dataType=DataType.TABULAR, + granularity=Granularity.HOUR, + features=[ + ColumnDefinition( + name="first_name", + type=SupportedTypes.string, + field_type=FieldType.categorical + ), + ColumnDefinition( + name="last_name", + type=SupportedTypes.string, + field_type=FieldType.categorical + ), + ColumnDefinition( + name="age", + type=SupportedTypes.int, + field_type=FieldType.numerical + ), + ], + outputs=OutputType( + prediction=ColumnDefinition( + name="prediction", + type=SupportedTypes.float, + field_type=FieldType.numerical + ), + output=[ + ColumnDefinition( + name="adult", + type=SupportedTypes.string, + field_type=FieldType.categorical + ) + ], + ), + target=ColumnDefinition( + name="prediction", + type=SupportedTypes.float, + field_type=FieldType.numerical + ), + timestamp=ColumnDefinition( + name="prediction_timestamp", + type=SupportedTypes.datetime, + field_type=FieldType.datetime + ), + ) + + model = client.create_model(model_definition) + ``` + +* **`get_model()`**: It gets a specific and existing model by its identifier. It requires the id of an existing model and returns the [Model](#model) instance. + + ```python + model = client.get_model(model_uuid) + ``` + +* **`search_models()`**: It gets a list of models. It returns a list of [Model](#model). + + ```python + models = client.search_models() + ``` + + +### Model + +It represents an instance of a monitored model. + +The available methods of a model instance are: + +* **`uuid()`**: It returns the UUID identifier of the model +* **`name()`**: It returns the name of the model +* **`description()`**: It returns the model’s description, if provided +* **`model_type()`**: It returns the [ModelType](#modeltype) +* **`data_type()`**: It returns the [DataType](#datatype) +* **`granularity()`**: It returns the [Granularity](#granularity) used by metrics aggregation +* **`features()`**: It returns a list of [ColumnDefinition](#columndefinition) representing all the feature definitions +* **`target()`**: It returns a [ColumnDefinition](#columndefinition) representing the ground truth +* **`timestamp()`**: It returns a [ColumnDefinition](#columndefinition) representing the prediction timestamp. This field is used as reconciliation between reference and current datasets +* **`outputs()`**: It returns an [OutputType](#outputtype) representing the model outputs, including prediction and possibly prediction probability fields +* **`frameworks()`**: It returns the used frameworks, if defined +* **`algorithm()`**: It returns the used algorithm, if defined +* **`delete()`**: It deletes the actual model from the platform +* **`update_features(features: List[ColumnDefinition])`**: Update the model features definition if reference dataset is not provided. +* **`load_reference_dataset(file_name: str, bucket: str, object_name: Optional[str] = None, aws_credentials: Optional[AwsCredentials] = None, separator: str = ‘,’)`**: It uploads a reference dataset file to an S3 bucket and then binds it to the model. It returns a [ModelReferenceDataset](#modelreferencedataset). + + Method properties are: + * **`file_name`**: The name of the reference file + * **`bucket`**: The name of the S3 bucket. + * **`object_name`**: The optional name of the object uploaded to S3. Default value is None. + * **`aws_credentials`**: [AwsCredentials](#awscredentials) used to connect to S3 bucket. Default value is None. + * **`separator`**: Optional value to define separator used inside CSV file. Default value is "," + + ```python + reference_dataset = model.load_reference_dataset( + file_name="reference.csv", bucket="my-bucket" + ) + ``` + +* **`bind_reference_dataset(dataset_url: str, aws_credentials: Optional[AwsCredentials] = None, separator: str = ‘,’)`**: It binds an existing reference dataset file already uploded to S3 to the model. It returns a [ModelReferenceDataset](#modelreferencedataset). + + Method properties are: + + * **`dataset_url`**: The url of the file already uploaded inside S3 + * **`aws_credentials`**: [AwsCredentials](#awscredentials) used to connect to S3 bucket. Default value is None. + * **`separator`**: Optional value to define separator used inside CSV file. Default value is "," + + ```python + reference_dataset = model.bind_reference_dataset( + dataset_url="s3://my-bucket/reference.csv" + ) + ``` + +* **`load_current_dataset(file_name: str, bucket: str, correlation_id_column: Optional[str] = None, object_name: Optional[str] = None, aws_credentials: Optional[AwsCredentials] = None, separator: str = ‘,’)`**: It uploads a current dataset file to an S3 bucket and then bind it to the model. +It returns a [ModelCurrentDataset](#modelcurrentdataset). + + Method properties are: + * **`file_name`**: The name of the reference file + * **`bucket`**: The name of the S3 bucket. + * **`correlation_id_column`**: The name of the column used for correlation id + * **`object_name`**: The optional name of the object uploaded to S3. Default value is None. + * **`aws_credentials`**: [AwsCredentials](#awscredentials) used to connect to S3 bucket. Default value is None. + * **`separator`**: Optional value to define separator used inside CSV file. Default value is "," + + ```python + current_dataset = model.load_current_dataset( + file_name="reference.csv", + bucket="my-bucket", + correlation_id_column="prediction_identifier" + ) + ``` + +* **`bind_current_dataset(dataset_url: str, correlation_id_column: str, aws_credentials: Optional[AwsCredentials] = None, separator: str = ‘,’)`**: It binds an existing current dataset file already uploded to S3 to the model. It returns a [ModelCurrentDataset](#modelcurrentdataset). + + Method properties are: + * **`dataset_url`**: The url of the file already uploaded inside S3 + * **`correlation_id_column`**: The name of the column used for correlation id + * **`aws_credentials`**: [AwsCredentials](#awscredentials) used to connect to S3 bucket. Default value is None. + * **`separator`**: Optional value to define separator used inside CSV file. Default value is "," + + ```python + current_dataset = model.bind_current_dataset( + dataset_url="s3://my-bucket/reference.csv", + correlation_id_column="prediction_identifier" + ) + ``` + +* **`get_reference_datasets()`**: It returns a list of [ModelReferenceDataset](#modelreferencedataset) representing all the current datasets and related metrics +* **`get_current_datasets()`**: It returns a list of [ModelCurrentDataset](#modelcurrentdataset) representing all the current datasets and related metrics + + +### ModelReferenceDataset +* It represent an instance of uploaded reference dataset.The available methods are: +* **`uuid()`**: the UUID identifier of the uploaded dataset +* **`path()`**: The URL of the dataset in the object storage +* **`date()`**: When dataset was uploaded +* **`status()`**: The status job of the while it is calculating metrics +* **`statistics()`**: If job status is `SUCCEEDED` then returns the dataset statistics +* **`data_quality()`**: If job status is `SUCCEEDED` then returns the data quality metrics of the current dataset +* **`model_quality()`**: If job status is `SUCCEEDED` then returns the model quality metrics of the current dataset + + +### ModelCurrentDataset +It represents an instance of uploaded current dataset. + +The available methods are: + +* **`uuid()`**: The UUID identifier of the uploaded dataset +* **`path()`**: The URL of the dataset in the object storage +* **`date()`**: When dataset was uploaded +* **`status()`**: The status job while it is calculating metrics +* **`statistics()`**: If job status is `SUCCEEDED` then returns the dataset statistics +* **`data_quality()`**: If job status is `SUCCEEDED` then returns the data quality metrics of the current dataset +* **`model_quality()`**: If job status is `SUCCEEDED` then returns the model quality metrics of the current dataset +* **`drift()`**: If job status is `SUCCEEDED` then returns the drift metrics of the current dataset + + +### CreateModel + +It contains the definition of a model to be created. + +Its properties are: + +* **`name`**: The name of the model. +* **`description`**: An optional description to explain something about the model. +* **`model_type`**: The [ModelType](#modeltype) of the model +* **`data_type`**: It explains the [DataType](#datatype) used by the model +* **`granularity`**: The [Granularity](#granularity) of window used to calculate aggregated metrics +* **`features`**: A list of [ColumnDefinition](#columndefinition) representing the features set +* **`outputs`**: An [OutputType](#outputtype) definition to explain the output of the model +* **`target`**: The [ColumnDefinition](#columndefinition) used to represent model’s target +* **`timestamp`**: The [ColumnDefinition](#columndefinition) used to store when prediction was done +* **`frameworks`**: An optional field to describe the frameworks used by the model +* **`algorithm`**: An optional field to explain the algorithm used by the model + + +### ModelType + +Enumeration used to define the type of the model and to calculate the right metrics. +Available values are: `REGRESSION`, `BINARY` and `MULTI_CLASS`. + + +### DataType + +Enumeration used to define the type of data managed by the model. +Available values are: `TABULAR`, `TEXT` and `IMAGE` + + +### Granularity + +Enumeration used to define the granularity used by aggregations inside metrics calculation. +Available values are: `HOUR`, `DAY`, `WEEK` and `MONTH`. + + +### ColumnDefinition + +It contains the definition of a single column inside a dataset. + +Its properties are: + +* **`name`**: The name of the column +* **`type`**: The [SupportedTypes](#supportedtypes) of the data represented inside this column +* **`field_type`**: The [FieldType](#fieldtype) of the field + + +### SupportedTypes + +Enumeration used to define the available types that a column definition could have. +Available values are: `int`, `float`, `str`, `bool` and `datetime` + + +### FieldType + +Enumeration used to define the categorical type of the field. +Available values are: `categorical`, `numerical` and `datetime`. + + +### OutputType + +It defines the output of the model. + +Its properties are: + +* **`output`**: A list of [ColumnDefinition](#columndefinition) representing the outputs set +* **`prediction`**: The [ColumnDefinition](#columndefinition) used to represent the prediction +* **`prediction_proba`**: An optional [ColumnDefinition](#columndefinition) used to represent the prediction probability + + +### AwsCredentials + +It defines credentials needed to authenticate to an S3 compatible API service. + +Its properties are: + +* **`access_key_id`**: Access key ID needed to authenticate to APIs +* **`secret_access_key`**: Secret access key needed to authenticate to APIs +* **`default_region`**: Region to be used +* **`endpoint_url`**: Optional value to define an S3 compatible API endpoint, if different than AWS. By default is `None` diff --git a/docs/versioned_docs/version-v1.2.0/quickstart.md b/docs/versioned_docs/version-v1.2.0/quickstart.md new file mode 100644 index 00000000..b72611bb --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/quickstart.md @@ -0,0 +1,161 @@ +--- +sidebar_position: 2 +--- + +# Quickstart +This guide provides instructions on monitoring an AI solution through the Radicalbit AI Platform. + +Inside the platform, three models are already included: one binary classificator, one multiclass classificator, and one regressor. If you would like to see how to create them from scratch, see [Radicalbit Platform Python SDK Examples](https://github.com/radicalbit/radicalbit-ai-monitoring/blob/main/docs/quickstarts/README.md): the corresponding notebooks can be found [here](https://github.com/radicalbit/radicalbit-ai-monitoring/tree/main/docs/quickstarts/notebooks). + +In this quickstart, we are going to use the GUI to create a binary classificator for monitoring an LLM. + +## Monitor an LLM for a Binary Classification +The use case we present here involves the usage of an LLM (powered with RAG) capable of generating an answer to the user's questions in a chatbot for banking services. + +### Introduction + +The model returns two different outputs: + +1. `model_answer`: the answer generated by retrieving similar information +1. `prediction`: a boolean value that indicates if the user's question is pertinent to banking topics. + +The reason for this information lies in the fact that by discriminating the textual data into categories, the bank will be able to use only the information related to banking services, to fine-tune the model in a second moment and improve its performance. + +### Model Creation +To use the radicalbit-ai-monitoring platform, you need first to prepare your data, which should include the following information: + +1. **Features:** The list of variables used by the model to produce the inference. They may include also meta-data (timestamp, log) +2. **Outputs:** The fields returned by the model after the inference. Usually, they are probabilities, a predicted class or number in the case of the classic ML, and a generated text in the case of LLMs. +3. **Target**: the ground truth used to validate predictions and evaluate the model quality + +This tutorial involves batch monitoring, including the situation where you have some historical data that you want to compare over time. + +The **reference dataset** is the name we use to indicate the batch that contains the information we desire to have constant (or we expect to have) over time. It could be the training set or a chunk of production data where the model has had good performances. + +The **current dataset** is the name we use to indicate the batch that contains fresh information, for example, the most recent production data, predictions, or ground truths. We expect that it has the same characteristics (statistical properties) as the reference, which indicates that the model has the performance we expect and there is no drift in the data. + +What follows is an example of data we will use in this tutorial: + +| timestamp | user_id | question | model_answer | ground_truth | prediction | gender | age | device | days_as_customer | +|-----------|--------:|:---------|:-------------|-------------:|-----------:|:-------|----:|:-------|-----------------:| +|2024-01-11 08:08:00|user_24|What documents do I need to open a business account?|You need a valid ID, proof of address, and business registration documents.|1|1|M|44|smartphone|194| +|2024-01-10 03:08:00|user_27|What are the benefits of a premium account?|The benefits of a premium account include higher interest rates and exclusive customer support.|1|1|F|29|tablet|258| +2024-01-11 12:22:00|user_56|How can I check my credit score?|You can check your credit score for free through our mobile app.|1|1|F|44|smartphone|51| +2024-01-10 04:57:00|user_58|Are there any fees for using ATMs?|ATM usage is free of charge at all locations.|1|1|M|50|smartphone|197| + +* **timestamp:** it is the time at which the user asks the question; +* **user_id:** it is the user identification; +* **question:** it is the question asked by the user to the chatbot; +* **model_answer:** it is the answer generated by the model; +* **ground_truth:** it is the real label where 1 stands for an answer related to banking services and 0 stands for a different topic; +* **prediction:** it is the judgment produced by the model about the topic of the answer; +* **gender:** it is the user's gender; +* **age:** it is the user's age; +* **device:** it is the device used in the current session; +* **days_as_customer:** it indicates how many days the user is a customer. + +### Create the Model +To create a new model, navigate to the *Models* section and click the plus (+) icon in the top right corner. + +![Alt text](/img/quickstart/empty-models-list.png "Empty Models List") + +The platform should open a modal to allow users to create a new model. + +![Alt text](/img/quickstart/new-model-modal-s1.png "New Model") + +This modal prompts you to enter the following details: +* **Name:** the name of the model; +* **Model type:** the type of the model; +* **Data type:** it explains the data type used by the model; +* **Granularity:** the window used to calculate aggregated metrics; +* **Framework:** an optional field to describe the frameworks used by the model; +* **Algorithm:** an optional field to explain the algorithm used by the model. + +Please enter the following details and click on the *Next* button: +* **Name:** `LLM-binary-classification`; +* **Model type:** `Binary Classification`; +* **Data type:** `Tabular`; +* **Granularity:** `Hour`; + +To infer the model schema you have to upload a sample dataset. Please download and use [this reference Comma-Separated Values file](/datasets/df_10lines.csv) and click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2.png "Upload CSV file") + +Since in the CSV file there might be useless fields, e.g. some uuid which would be pointless to analyse, choose which fields you want to carry over: in the case select all of them, click on the arrow to transfer them, and then click on the *Next* button. + +![Alt text](/img/quickstart/new-model-modal-s2_extra.png "Fields transfer") + +Finally, you need to select and associate the following fields: +* **Target:** the target field or ground truth; +* **Timestamp:** the field containing the timestamp value; +* **Prediction:** the actual prediction; +* **Probability:** the probability score associated with the prediction. + +Match the following values to their corresponding fields: +* **Target:** `ground_truth`; +* **Timestamp:** `timestamp`; +* **Prediction:** `prediction`; +* **Probability:** leave it empty; + +![Alt text](/img/quickstart/new-model-modal-s4.png "Identify ground truth (target), timestamp, prediction, and probability fields") + +Click the *Save Model* button to finalize model creation. + +### Model details +Entering into the model details, we can see three different main sections: + +* **Overview**: this section provides information about the dataset and its schema. You can view a summary, and explore the variables (features and ground truth) and the output fields for your model. +* **Reference**: the Reference section displays performance metrics calculated on the imported reference data. +* **Current**: the Current section displays metrics for any user-uploaded data sets you have added in addition to the reference dataset. + +### Import Reference Dataset +To calculate metrics for your reference dataset, [import this CSV file, containing the reference](/datasets/df_reference.csv). + +![Alt text](/img/quickstart/import-reference.png "Import Reference") + +Once you initiate the process, the platform will run background jobs to calculate the metrics. + +After processing, you will be able to see the following information: +* in the **Overview** section a column names and types summary will appear. +* in the **Reference** section a statistical summary of your data will be computed. + +Within the **Reference** section, you can browse between 3 different tabs: +* **Data Quality:** This tab contains statistical information and charts of your reference dataset, including the +number of rows and your data distribution through bar plots (for categorical fields) and histograms (for numerical fields). Additionally, to make comparisons and analysis easier, you can choose the order in which to arrange your charts. + +![Alt text](/img/quickstart/reference_data_quality.png "Import Reference") + +* **Model Quality:** This tab provides detailed information about model performance, which we can compute since you provide both predictions and ground truths. These metrics (in this tutorial related to a binary classification task) are computed by aggregating the whole reference dataset, offering an overall expression of your model quality for this specific reference. + +![Alt text](/img/quickstart/reference_model_quality.png "Import Reference") +* **Import:** This tab displays all the useful information about the storage of the reference dataset. + +![Alt text](/img/quickstart/reference_import.png "Import Reference") + +### Import Current Dataset +Once your reference data has been imported and all the metrics and information about it are available, you can move to the **Current** section, in which you can import [the CSV file containing your current dataset](/datasets/df_current1.csv). + + +![Alt text](/img/quickstart/import-current.png "Import Current") + +This action will unlock all the tools you need to compare metrics between the reference and current files. + +In detail, you can browse between 4 tabs: + +* **Data Quality:** Here, the same metrics you have in the Reference section will also be computed for the current dataset. All the information will be presented side by side so that you can compare and analyze any differences. Throughout the platform, the blue color stands for the current dataset while the gray stands for the reference dataset, allowing you to easily identify which dataset a specific metric belongs to. + +![Alt text](/img/quickstart/current_data_quality.png "Import Reference") + +* **Model Quality:** In this tab, you can compare the model performance between the reference and current datasets. In addition to what you see in the reference model quality, here you can track the metric values over time by aggregating them with a specific granularity (the same you have defined in the Model Creation). + +![Alt text](/img/quickstart/current_model_quality.png "Import Reference") + +* **Model Drift:** This tab provides information about potential changes in the data distributions, known as drift, which can lead to model degradation. The drift is detected according to the field type: Chi-square test for categorical variables and Two-Samples Kolmogorov-Smirnov test for numerical ones. + +![Alt text](/img/quickstart/current_model_drift.png "Import Reference") + +* **Import:** Here you can list all the current datasets imported over time and switch among them. By default, the last current dataset will be shown. + +![Alt text](/img/quickstart/current_import.png "Import Reference") + + diff --git a/docs/versioned_docs/version-v1.2.0/support.md b/docs/versioned_docs/version-v1.2.0/support.md new file mode 100644 index 00000000..43bc9a60 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/support.md @@ -0,0 +1,26 @@ +--- +sidebar_position: 7 +--- + +# Community Support + +Welcome to the **Community Support** page! Here, you can find various ways to connect with us and get the help you need. Click on the icons below to reach our pages on different platforms. + +## Social Media + +| Platform | Link | +|:---------:|:-----| +| [![YouTube](https://img.shields.io/badge/YouTube-FF0000?style=for-the-badge&logo=youtube&logoColor=white)](https://www.youtube.com/@radicalbit5002) | Subscribe to our YouTube channel for the latest tutorials and updates. | +| [![LinkedIn](https://img.shields.io/badge/LinkedIn-0A66C2?style=for-the-badge&logo=linkedin&logoColor=white)](https://www.linkedin.com/company/radicalbit/) | Follow us on LinkedIn for professional updates and networking. | +| [![GitHub](https://img.shields.io/badge/GitHub-181717?style=for-the-badge&logo=github&logoColor=white)](https://github.com/radicalbit/radicalbit-ai-monitoring) | Check out our GitHub for project repositories and collaboration. | +| [![Discord](https://img.shields.io/badge/Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white)](https://discord.com/invite/x2Ze8TMRsD) | Join our Discord server for community support and discussions. | + + + +We are here to help! Feel free to reach out to us on any of these platforms for support, collaboration, or just to say hi. + +## Website +Would you see something more? Visit the [radicalbit website](https://radicalbit.ai/)! + +## Glossary +Keep yourself updated with the most trend keywords in the AI landscape! Let's take a look at our [glossary](https://radicalbit.ai/resources/glossary/). \ No newline at end of file diff --git a/docs/versioned_docs/version-v1.2.0/user-guide/_category_.json b/docs/versioned_docs/version-v1.2.0/user-guide/_category_.json new file mode 100644 index 00000000..65f7e7af --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/user-guide/_category_.json @@ -0,0 +1,8 @@ +{ + "label": "User Guide", + "position": 3, + "link": { + "type": "generated-index", + "description": "Welcome to the «radicalbit-ai-monitoring» user guide. This document is designed to help you get started with our platform, understand its core concepts and make the most out of its features. Whether you are a new user or an experienced professional, this guide will provide you with the necessary information to effectively monitor and manage your AI systems." + } +} diff --git a/docs/versioned_docs/version-v1.2.0/user-guide/how-to.md b/docs/versioned_docs/version-v1.2.0/user-guide/how-to.md new file mode 100644 index 00000000..6ebc4d56 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/user-guide/how-to.md @@ -0,0 +1,43 @@ +--- +sidebar_position: 3 +--- + +# Hands-On Guide + +In ths guide we are focusing on how to use the GUI. + +If you prefer to do everything using our SDK please refer to our [SDK Quickstarts](https://github.com/radicalbit/radicalbit-ai-monitoring/tree/main/docs/quickstarts). + +## How to create a model + +* From the Model main page ![Alt text](/img/how_to/new_model_step1.png "New model step 1") +click on the plus sign in the top right corner. + +* Fill up the name of the model, the model type (at the moment only `Binary Classification` `Multiclass Classification` and `Regression` are available), the time granularity on which aggregations are computed (`Hour`, `Day`, `Week` or `Month`), and eventually (optional fields) Framework (e.g. scikit-learn) and Algorithm (e.g. KNeighborsClassifier), and then click `Next` +![Alt text](/img/how_to/new_model_step2.png "New model step 2") + +* Upload a CSV file containing *all features*, *prediction*, eventually *prediction probability*, *ground truth* (i.e. the correct value for the set of feature that the model should predict), and a *timestamp* (we use it as a UUID for each row). At this stage a very small CSV file, about 10 rows, suffices, since we use it just to create a schema, input and output signatures for the model. If you prefer to upload your whole reference dataset this is perfectly fine, of course. + +* Since your CSV file can contain fields you are not interested in monitoring, e.g. some custom UUID, you can choose the fields to carry forward ![Alt text](/img/how_to/new_model_step3.png "New model step 3") + +* Next choose your `Target`, `Prediction`, `Timestamp` and, if present, `Prediction Probability` fields. + +## Change field type + +In your CSV file there might be some numerical variables which are actually categorical: for instance you might have the feature `Gender` which has values `{0,1}`: so we automatically infer it as an integer variables but clearly it makes no sense to compute numerical statistics on it, ince it is clearly the representation of a categorical feature. \ +Hence, **as long as no reference dataset has been loaded yet**, in the `Overview` section, `Variables` tab, you are allowed to change the field type of any numerical feature to categorical. +![Alt text](/img/how_to/change_field_type.png "Change field type") + +Please note that as soon as a reference dataset is loaded into the platform **this option is no longer available** because we are starting right away computing statistics and metrics on the variables according to their type. + +## Load a reference dataset + +Go to the `Reference` entry of the vertical navigation bar ![Alt text](/img/how_to/reference.png "Import Reference") click `Import Reference` and choose the right CSV file. + +## Load a Current Dataset + +* If no Current dataset has been published yet, just go to the `Current` entry of the vertical navigation bar ![Alt text](/img/how_to/first_current.png "Import First Current") click `Import Current` and choose the right CSV file. + +* If some current dataset have already been imported and you want to add an extra one, go to the `Current` entry of the vertical navigation bar, then on the `Import` tab ![Alt text](/img/how_to/more_current.png "Import More Current") +click `Import Current` and choose the right CSV file. + diff --git a/docs/versioned_docs/version-v1.2.0/user-guide/installation.md b/docs/versioned_docs/version-v1.2.0/user-guide/installation.md new file mode 100644 index 00000000..6be8a499 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/user-guide/installation.md @@ -0,0 +1,35 @@ +--- +sidebar_position: 1 +--- + +# Installation + +To install the platform you can choose from two different approaches. + +* **Using the main repository:** Clone [the main repository](https://github.com/radicalbit/radicalbit-ai-monitoring) and activate the Docker daemon. Finally, run the following command: + ```bash + docker compose --profile ui --profile init-data up + ``` +See [README file](https://github.com/radicalbit/radicalbit-ai-monitoring/blob/main/README.md) for further information and details. + +* **Using the Python Installer:** Install the [Python installer](https://pypi.org/project/radicalbit-ai-monitoring/) via `poetry` or `pip`. + + * With poetry: + 1. Clone the repository using `git clone https://github.com/radicalbit/radicalbit-ai-monitoring-installer.git`. + 2. Move inside the repository using `cd radicalbit-ai-monitoring-installer`. + 3. Install poetry using `poetry install`. + + * With pip: Just run `pip install radicalbit-ai-monitoring`. + + Once you have installed the Python package, activate the Docker daemon and run the following commands: + + ``` + rbit-ai-monitoring platform install + rbit-ai-monitoring platform up + ``` + +After all the containers are up & running, you can go to [http://localhost:5173](http://127.0.0.1:5173/) and play with the platform. + +## Spark tuning + +We use Spark jobs to calculate metrics: if you need to tune Spark configuration in order to optimize performance for large files or accelerate computations, please refer to the corresponding section of this [README file](https://github.com/radicalbit/radicalbit-ai-monitoring/blob/main/api/README.md). diff --git a/docs/versioned_docs/version-v1.2.0/user-guide/key-concepts.md b/docs/versioned_docs/version-v1.2.0/user-guide/key-concepts.md new file mode 100644 index 00000000..39a90194 --- /dev/null +++ b/docs/versioned_docs/version-v1.2.0/user-guide/key-concepts.md @@ -0,0 +1,106 @@ +--- +sidebar_position: 2 +--- + +# Key Concepts + +This section introduces the fundamental concepts and terminologies used within the `radicalbit-ai-monitoring` Platform. Understanding these concepts is crucial for the effective utilization of the platform. + +## Model Type + +The radicalbit-ai-monitoring platform supports various types of models, each suited for different types of tasks: + +- **Binary Classification**: Models that categorize data into one of two possible classes (e.g., spam or not spam). +- **Multiclass Classification**: Models that categorize data into one of three or more possible classes (e.g., type of fruit: apple, orange, pear). +- **Regression**: Models that predict a continuous value (e.g., predicting house prices based on various features). + +Accordingly to the `Model Type`, the platform will compute specific metrics to evaluate the performance over time. + +## Data Type + +The platform can handle different types of data, which are crucial for the kind of analysis achieved to evaluate the consistency of the information: + +* **Tabular**: Data is organized into a table and saved in CSV format. +* **Text**: Not available yet. +* **Image**: Not available yet. + +## Reference Dataset + +The reference dataset is a static dataset used as a benchmark for comparison. It represents the ideal or expected data distribution and quality, against which the current dataset's performance and quality are evaluated. This dataset is typically: + +- **Historical Data**: Derived from historical data that the model was trained on or validated against. It serves as a baseline to compare new incoming data. +- **Preprocessed**: Cleaned and preprocessed to ensure it represents the best possible version of the data, free from anomalies or errors. +- **Comprehensive**: Should cover all possible scenarios and variations that the model is expected to handle, ensuring it is robust and reliable. +- **Static**: Unlike the current dataset, the reference dataset remains unchanged over time to provide a consistent benchmark for monitoring purposes. + +> **_TIP:_** A good example of a reference dataset is the training set. + +Using the reference dataset, the platform can: + +* **Detect Data Drift**: By comparing the current dataset to the reference dataset, the platform can identify significant deviations in data patterns. +* **Evaluate Model Performance**: The reference dataset provides a baseline for assessing whether the model's performance on new data aligns with its performance on known, reliable data. +* **Ensure Data Quality**: Regularly comparing the current dataset to the reference dataset helps maintain high data quality standards by highlighting inconsistencies and anomalies. + +By maintaining a high-quality reference dataset, the `radicalbit-ai-monitoring` platform ensures that any changes in data or model performance can be promptly identified and addressed. + +## Current Dataset + +The current dataset is the most recent data being fed into the model for predictions. It should be continuously monitored to ensure consistency with the reference dataset and to detect any anomalies or changes. To achieve this, the current dataset must have the same schema as the reference. + +Using the current dataset, the platform can: + +- **Monitor Performance Metrics**: Continuously assess how well the model is performing on new data by tracking key metrics such as accuracy, precision, recall and others. +- **Detect Drifts**: Identify unusual patterns or anomalies in the data that may indicate issues with data collection processes or changes in underlying data distributions. +- **Adapt to Changes**: Provide insights into when the model may need retraining or adjustment due to shifts in the data, known as data drift. +- **Ensure Timeliness**: By constantly updating and analyzing the current dataset, the platform ensures that the model's predictions are based on the most up-to-date information available. + +By effectively managing and monitoring the current dataset, the `radicalbit-ai-monitoring` platform helps maintain the reliability and accuracy of models in a changing environment. + +## Data Quality + +Data quality refers to the accuracy, completeness, and reliability of the data used by the models. High data quality is essential for the performance and reliability of the models. The platform monitors various data quality indicators and charts to ensure its integrity: + +- **Descriptive Statistics**: Metrics such as mean, median, standard deviation and range are computed to summarize the central tendency and dispersion of the data. +- **Histograms**: Visual representations to identify distributions, outliers, and any potential anomalies in the data. +- **Frequency Distribution**: Charts such as bar plots are used to display the distribution of categories and highlight any imbalances or anomalies. +- **Detect Data Quality Issues**: Identify inaccuracies, inconsistencies, missing values, and outliers in the data. +- **Monitor Changes Over Time**: Track data quality metrics over time to detect any degradation or improvement in data quality. + +By incorporating detailed charts and statistics for both numerical and categorical data, the `radicalbit-ai-monitoring` platform ensures comprehensive monitoring and maintenance of data quality, crucial for the robust performance of the models. + +## Model Quality + +Model quality is a measure of how well a model performs its task. It includes classic metrics such as accuracy, precision, recall and F1 score. The platform evaluates these metrics to ensure the model maintains high performance over time. Naturally, to compute them, the user have to include the ground truth into the dataset. + +The model quality changes given the chosen Model Type and thanks to this, it includes the following metrics: + +- **Binary Classification**: Accuracy, Precision, Recall, F1 score, Confusion Matrix +- **Multiclass Classification**: Accuracy, Precision, Recall, F1 score, Confusion Matrix +- **Regression**: Mean Absolute Error, Mean Squared Error, Root Mean Squared Error, R², Residual Analysis + +The platform provides detailed visualizations and reports for these metrics, allowing users to: + +- **Monitor Performance Trends**: Track changes in model performance over time to ensure the model remains effective. +- **Identify Weaknesses**: Pinpoint specific areas where the model may be underperforming, such as particular classes in a classification model or high-error regions in a regression model. +- **Compare Models**: Evaluate and compare the performance of different models or model versions, aiding in model selection and improvement. + +By highlighting the differences in evaluation criteria and metrics for various model types, the `radicalbit-ai-monitoring` platform ensures that users can effectively assess and maintain the quality of their models. + +## Data Drift + +Data drift occurs when the statistical properties of the current dataset differ significantly from the reference dataset. This can affect model performance. The platform monitors for data drift to alert users of potential issues that may require model retraining or adjustment. + +To detect data drift, the platform uses several statistical tests and metrics tailored to the type of data and model: + +- **Chi-square Test**: Used primarily for categorical data, this test evaluates whether the distribution of categories in the current dataset significantly differs from the reference dataset. It compares the observed frequencies of categories in the current dataset against the expected frequencies derived from the reference dataset. A significant Chi-square test result indicates that the categorical distribution has changed, signalling potential data drift. +- **2-sample Kolmogorov-Smirnov (KS) Test**: This non-parametric test is used for numerical data to compare the distributions of the reference and current datasets. It evaluates the maximum difference between the cumulative distributions of the two datasets. A significant KS test result indicates that the distributions are different, suggesting data drift. The KS test is sensitive to changes in both the central tendency and the shape of the distribution. +- **Population Stability Index (PSI)**: This metric is used for both categorical and numerical data to quantify the shift in the distribution between the reference and current datasets. PSI measures the divergence between the two distributions, with higher values indicating greater drift. It is particularly useful for identifying gradual changes over time. PSI is calculated by dividing the data into bins and comparing the relative frequencies of each bin between the reference and current datasets. + +Using these tests and metrics, the platform can: + +- **Detect Significant Changes**: Identify when the current data distribution has shifted enough to potentially impact model performance. +- **Trigger Alerts**: Notify users when significant data drift is detected, allowing for timely intervention. +- **Guide Retraining**: Provide insights into which specific features or aspects of the data have drifted, helping to guide model retraining efforts. +- **Visualize Drift**: Offer visual representations of the drift, such as distribution plots and bar charts, to help users understand the nature and extent of the drift. + +By employing these methods, the `radicalbit-ai-monitoring` platform ensures comprehensive monitoring for data drift, helping maintain the reliability and accuracy of the models in a changing data environment. \ No newline at end of file diff --git a/docs/versioned_sidebars/version-v1.2.0-sidebars.json b/docs/versioned_sidebars/version-v1.2.0-sidebars.json new file mode 100644 index 00000000..39332bfe --- /dev/null +++ b/docs/versioned_sidebars/version-v1.2.0-sidebars.json @@ -0,0 +1,8 @@ +{ + "docsSidebar": [ + { + "type": "autogenerated", + "dirName": "." + } + ] +} diff --git a/docs/versions.json b/docs/versions.json index 280d4474..3fd3713d 100644 --- a/docs/versions.json +++ b/docs/versions.json @@ -1,4 +1,5 @@ [ + "v1.2.0", "v1.1.0", "v1.0.1", "v1.0.0", diff --git a/sdk/pyproject.toml b/sdk/pyproject.toml index 99837c5d..ac0dbd25 100644 --- a/sdk/pyproject.toml +++ b/sdk/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "radicalbit-platform-sdk" # x-release-please-start-version -version = "1.1.0" +version = "1.2.0" # x-release-please-end description = "" authors = ["dev.team "] diff --git a/spark/pyproject.toml b/spark/pyproject.toml index c4573fa7..d1e2723a 100644 --- a/spark/pyproject.toml +++ b/spark/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "spark" # x-release-please-start-version -version = "1.1.0" +version = "1.2.0" # x-release-please-end description = "Spark jobs collection for Radicalbit AI Monitoring Platform" authors = ["Radicalbit "] diff --git a/ui/package.json b/ui/package.json index 70059a67..c71de8de 100644 --- a/ui/package.json +++ b/ui/package.json @@ -1,7 +1,7 @@ { "name": "radicalbit-ai-monitoring-ui", "private": true, - "version": "1.1.0", + "version": "1.2.0", "type": "module", "scripts": { "start:local": "NODE_ENV=local && vite --host 0.0.0.0", diff --git a/version.txt b/version.txt index 9084fa2f..26aaba0e 100644 --- a/version.txt +++ b/version.txt @@ -1 +1 @@ -1.1.0 +1.2.0