Skip to content

Commit

Permalink
Added examples of snapshots usage and tests criticality customization (
Browse files Browse the repository at this point in the history
…#730)

Co-authored-by: Emeli Dral
  • Loading branch information
emeli-dral authored Aug 22, 2023
1 parent 26fad6e commit 6cf02b3
Show file tree
Hide file tree
Showing 2 changed files with 348 additions and 0 deletions.
151 changes: 151 additions & 0 deletions examples/how_to_questions/how_to_specify_test_critycality.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "3eab4051-27cc-4d53-b9d5-bc4c4b69ba8c",
"metadata": {},
"source": [
"# How to specify test criticality?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ef729d6-5eae-4189-9f12-529ea76817a7",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from sklearn import datasets\n",
"\n",
"from evidently.test_suite import TestSuite\n",
"from evidently.tests import *"
]
},
{
"cell_type": "markdown",
"id": "4333330d-c83c-44dc-9107-9fc2c933f7b0",
"metadata": {},
"source": [
"## Toy Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75d19dbf-ba6c-4c90-b25c-f14772878da1",
"metadata": {},
"outputs": [],
"source": [
"adult_data = datasets.fetch_openml(name='adult', version=2, as_frame='auto')\n",
"adult = adult_data.frame\n",
"\n",
"adult_ref = adult[~adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
"adult_cur = adult[adult.education.isin(['Some-college', 'HS-grad', 'Bachelors'])]\n",
"\n",
"adult_cur.iloc[:2000, 3:5] = np.nan"
]
},
{
"cell_type": "markdown",
"id": "5c488a2b-2113-40cc-8212-e0db0afbce2e",
"metadata": {},
"source": [
"## Test suite with default criticality "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "27447dec-f355-40cc-829f-136eae5baabc",
"metadata": {},
"outputs": [],
"source": [
"data_integrity_column_tests = TestSuite(tests=[\n",
" TestColumnNumberOfMissingValues(column_name='education'),\n",
" TestColumnShareOfMissingValues(column_name='education'),\n",
" TestColumnNumberOfDifferentMissingValues(column_name='education'),\n",
" TestColumnAllConstantValues(column_name='education'),\n",
" TestColumnAllUniqueValues(column_name='education'),\n",
" TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n",
" TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n",
" TestCategoryShare(column_name='age', category=27., lt=0.5)\n",
"])\n",
"\n",
"data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "517c9f2a-2d5c-4b59-abeb-624bdfbbb119",
"metadata": {},
"outputs": [],
"source": [
"data_integrity_column_tests.show(mode='inline')"
]
},
{
"cell_type": "markdown",
"id": "664803a0-14d4-4c9a-b8d0-1a1932b0f3e5",
"metadata": {},
"source": [
"## Test suite with custom criticality"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "42c679b3-c514-429d-85f7-e51f5091e5b8",
"metadata": {},
"outputs": [],
"source": [
"data_integrity_column_tests = TestSuite(tests=[\n",
" TestColumnNumberOfMissingValues(column_name='education'),\n",
" TestColumnShareOfMissingValues(column_name='education'),\n",
" TestColumnNumberOfDifferentMissingValues(column_name='education'),\n",
" TestColumnAllConstantValues(column_name='education', is_critical=False),\n",
" TestColumnAllUniqueValues(column_name='education', is_critical=False),\n",
" TestColumnRegExp(column_name='education',reg_exp='^[0..9]'),\n",
" TestCategoryShare(column_name='education', category='Some-college', lt=0.5),\n",
" TestCategoryShare(column_name='age', category=27., lt=0.5)\n",
"])\n",
"\n",
"data_integrity_column_tests.run(reference_data=adult_ref, current_data=adult_cur)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "deffbb70-c7b9-411c-9798-7273545b6b4f",
"metadata": {},
"outputs": [],
"source": [
"data_integrity_column_tests.show(mode='inline')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
197 changes: 197 additions & 0 deletions examples/how_to_questions/how_to_use_snapshots.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "c2dddbfb-e26f-47d2-bdad-8c5083e85ff6",
"metadata": {},
"source": [
"# How to use Snapshots?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2b18ee6f-4602-4c37-ad3b-965efbf91c4e",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"from sklearn import datasets\n",
"\n",
"from evidently.report import Report\n",
"from evidently.metric_preset import DataDriftPreset"
]
},
{
"cell_type": "markdown",
"id": "6d078542-9c09-48e7-b749-e3ec99e27fc4",
"metadata": {},
"source": [
"## Toy Dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66256f1e-f713-47c3-b06f-f62487c4e01c",
"metadata": {},
"outputs": [],
"source": [
"bcancer_data = datasets.load_breast_cancer(as_frame=True)\n",
"bcancer = bcancer_data.frame\n",
"\n",
"bcancer_ref = bcancer.sample(n=300, replace=False)\n",
"bcancer_cur = bcancer.sample(n=200, replace=False)"
]
},
{
"cell_type": "markdown",
"id": "53421e97-f2a6-4e8d-9bb6-e6e1e1a83f91",
"metadata": {},
"source": [
"## Data Drift Report"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c5899d62-cf2a-4942-abe0-3ff54bdabda3",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report = Report(\n",
" metrics=[DataDriftPreset()]\n",
")\n",
"\n",
"data_drift_report.run(reference_data=bcancer_ref, current_data=bcancer_cur)"
]
},
{
"cell_type": "markdown",
"id": "601555e3-3b06-436f-a331-889cbc51a2e5",
"metadata": {},
"source": [
"## Report Formats"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "72f47a4a-0de3-4c6f-b286-41d4652f25af",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.show(mode='inline')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f341f7bd-f95e-4d48-aa0f-d2241fe23d72",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.json()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6e93d1da-a0f2-4ee3-b38d-98c009d19782",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.as_dict()"
]
},
{
"cell_type": "markdown",
"id": "05ae3843-37ff-4de5-a1f5-b34be2639d2c",
"metadata": {},
"source": [
"## Report saving options"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77fbc338-a992-4973-9f09-578605afab09",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.save_html('data_drift_report.html')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6ad9643c-7ef2-47b6-97d9-6d83da8f3d61",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.save_json('data_drift_report.json')"
]
},
{
"cell_type": "markdown",
"id": "f74ebd6d-a0cf-486d-a1a1-d0bbd41a43b6",
"metadata": {},
"source": [
"## Snapshot save and load "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0ad8db16-c235-44e0-b201-5469e394c753",
"metadata": {},
"outputs": [],
"source": [
"data_drift_report.save('snapshot.json')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dcb8c8ab-b106-4579-a4b4-f6b2641c5f88",
"metadata": {},
"outputs": [],
"source": [
"loaded_report = Report.load('snapshot.json')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2052c76e-4b89-461f-901b-c7dc3ca1dde0",
"metadata": {},
"outputs": [],
"source": [
"loaded_report.show(mode='inline')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

0 comments on commit 6cf02b3

Please sign in to comment.