diff --git a/docs/_static/images/binary_metrics.gif b/docs/_static/images/binary_metrics.gif new file mode 100644 index 0000000..2188284 Binary files /dev/null and b/docs/_static/images/binary_metrics.gif differ diff --git a/docs/conf.py b/docs/conf.py index c9f67f1..b13207b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,9 +44,9 @@ author = 'Neptune Dev Team' # The short X.Y version -version = '0.11' +version = '0.12' # The full version, including alpha/beta/rc tags -release = '0.11.0' +release = '0.12.0' # -- General configuration --------------------------------------------------- diff --git a/docs/examples/examples_index.rst b/docs/examples/examples_index.rst index c5d4140..a6a6ef4 100644 --- a/docs/examples/examples_index.rst +++ b/docs/examples/examples_index.rst @@ -4,7 +4,7 @@ Code snapshoting Image directory snapshoting Hyper parameter comparison - Log model diagnostics + Log binary classification metrics Integrate with Sacred Monitor lightGBM training Monitor fast.ai training diff --git a/docs/examples/log_binary_metrics.ipynb b/docs/examples/log_binary_metrics.ipynb new file mode 100644 index 0000000..d6c594f --- /dev/null +++ b/docs/examples/log_binary_metrics.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Log binary classification metrics to Neptune\n", + "## Train your model and run predictions\n", + "Let's train a model on a synthetic problem predict on test data." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.datasets import make_classification\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import classification_report\n", + "\n", + "X, y = make_classification(n_samples=2000)\n", + "\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", + "\n", + "model = RandomForestClassifier()\n", + "model.fit(X_train, y_train)\n", + "\n", + "y_test_pred = model.predict_proba(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Instantiate Neptune" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import neptune\n", + "\n", + "\n", + "neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME')\n", + "neptune.create_experiment()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Send all binary classification metrics to Neptune\n", + "\n", + "With just one function call you can log a lot of information.\n", + "\n", + "### Class-based metrics:\n", + "\n", + "- accuracy\n", + "- precision, recall\n", + "- f1_score, f2_score\n", + "- matthews_corrcoef\n", + "- cohen_kappa\n", + "- true_positive_rate, true_negative_rate\n", + "- false_positive_rate, false_negative_rate\n", + "- positive_predictive_value, negative_predictive_value, false_discovery_rate\n", + " \n", + "### Threshold-based charts for all class-based metrics\n", + "\n", + "### Performance charts:\n", + "\n", + "- Confusion Matrics\n", + "- Classification Report Table\n", + "- ROC AUC\n", + "- Precision Recall curve\n", + "- Lift curve\n", + "- Cumulative gain chart\n", + "- Kolmogorov-Smirnov statistic chart\n", + " \n", + "### Losses:\n", + "\n", + "- log loss\n", + "- brier loss\n", + " \n", + "### Other metrics:\n", + "\n", + "- ROC AUC score\n", + "- Average precision \n", + "- KS-statistic score" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from neptunecontrib.monitoring.metrics import log_binary_classification_metrics\n", + "\n", + "log_binary_classification_metrics(y_test, y_test_pred, threshold=0.5)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "It is now safely logged in Neptune.\n", + "Check out [this experiment](https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs). \n", + "\n", + "![binary classification metrics](../_static/images/binary_metrics.gif)\n", + "\n", + "## Log things separately\n", + "\n", + "You can also choose what to log and do it separately." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from neptunecontrib.monitoring.metrics import *\n", + "\n", + "log_confusion_matrix(y_test, y_test_pred[:, 1] > threshold)\n", + "log_classification_report(y_test, y_test_pred[:, 1] > threshold)\n", + "log_class_metrics(y_test, y_test_pred[:, 1] > threshold)\n", + "log_class_metrics_by_threshold(y_test, y_test_pred[:, 1])\n", + "log_roc_auc(y_test, y_test_pred)\n", + "log_precision_recall_auc(y_test, y_test_pred)\n", + "log_brier_loss(y_test, y_test_pred[:, 1])\n", + "log_log_loss(y_test, y_test_pred)\n", + "log_ks_statistic(y_test, y_test_pred)\n", + "log_cumulative_gain(y_test, y_test_pred)\n", + "log_lift_curve(y_test, y_test_pred)\n", + "log_prediction_distribution(y_test, y_test_pred[:, 1])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "blog_metrics", + "language": "python", + "name": "blog_metrics" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/examples/log_model_diagnostics.ipynb b/docs/examples/log_model_diagnostics.ipynb deleted file mode 100644 index e179c87..0000000 --- a/docs/examples/log_model_diagnostics.ipynb +++ /dev/null @@ -1,223 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Log model diagnostics to Neptune\n", - "## Train your model and run predictions\n", - "Let's train a model on a synthetic problem predict on test data." - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from sklearn.datasets import make_classification\n", - "from sklearn.ensemble import RandomForestClassifier\n", - "from sklearn.model_selection import train_test_split\n", - "from sklearn.metrics import classification_report\n", - "\n", - "X, y = make_classification(n_samples=2000)\n", - "\n", - "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", - "\n", - "model = RandomForestClassifier()\n", - "model.fit(X_train, y_train)\n", - "\n", - "y_test_pred = model.predict_proba(X_test)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Instantiate Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import neptune\n", - "\n", - "neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME')\n", - "neptune.create_experiment()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Send classification report to Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "from neptunecontrib.monitoring.reporting import send_binary_classification_report\n", - "\n", - "send_binary_classification_report(y_test, y_test_pred, threshold=0.5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is now safely logged in Neptune\n", - "\n", - "![image1](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report1.png)\n", - "\n", - "## Send confusion matrix to Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from neptunecontrib.monitoring.reporting import send_confusion_matrix\n", - "\n", - "send_confusion_matrix(y_test, y_test_pred[:, 1] > 0.5)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is now safely logged in Neptune\n", - "\n", - "![image2](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report4.png)\n", - "\n", - "## Send ROC AUC curve to Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from neptunecontrib.monitoring.reporting import send_roc_auc_curve\n", - "\n", - "send_roc_auc_curve(ctx, y_test, y_test_pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is now safely logged in Neptune\n", - "\n", - "![image3](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report3.png)\n", - "\n", - "## Send Precision-Recall curve to Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from neptunecontrib.monitoring.reporting import send_precision_recall\n", - "\n", - "send_precision_recall(y_test, y_test_pred)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is now safely logged in Neptune\n", - "\n", - "![image4](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report5.png)\n", - "\n", - "## Send Precision-Recall curve to Neptune" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from neptunecontrib.monitoring.reporting import send_prediction_distribution\n", - "\n", - "send_prediction_distribution(y_test, y_test_pred[:, 1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Stop Neptune experiment" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "neptune.stop()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "You can also put everything in the `with` block.\n", - "For example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "with neptune.create_experiment():\n", - " send_prediction_distribution(y_test, y_test_pred[:, 1])" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "It is now safely logged in Neptune\n", - "\n", - "![image5](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report2.png)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "santander", - "language": "python", - "name": "santander" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/index.rst b/docs/index.rst index f358631..14e01de 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,21 +8,21 @@ and gives you option to do things like: * running hyper parameter sweeps in scikit-optimize, hyperopt or any other tool you like * monitor training of the lightGBM or fastai models with a single callback * much more - + Enjoy the following integrations: - + .. image:: _static/images/fastai_neptuneML.png :target: _static/images/fastai_neptuneML.png :alt: fastai neptune.ml integration - + .. image:: _static/images/sacred_neptuneML.png :target: _static/images/sacred_neptuneML.png :alt: Sacred neptune.ml integration - + .. image:: _static/images/LightGBM_neptuneML.png :target: _static/images/LightGBM_neptuneML.png :alt: lightGBM neptune.ml integration - + .. image:: _static/images/matplotlib_neptuneML.png :target: _static/images/matplotlib_neptuneML.png :alt: matplotlib neptune.ml integration @@ -30,7 +30,7 @@ Enjoy the following integrations: .. image:: _static/images/Telegram_neptuneML.png :target: _static/images/Telegram_neptuneML.png :alt: Telegram neptune.ml integration - + And the best thing is you can extend it yourself or... tell us to do it for you :). .. toctree:: @@ -55,14 +55,14 @@ And the best thing is you can extend it yourself or... tell us to do it for you bots.telegram_bot monitoring.lightgbm monitoring.fastai - monitoring.reporting + monitoring.metrics monitoring.skopt monitoring.utils sync.with_json versioning.data viz.experiments viz.projects - + Bug Reports and Questions ----------------------- diff --git a/docs/user_guide/monitoring/metrics.rst b/docs/user_guide/monitoring/metrics.rst new file mode 100644 index 0000000..247af89 --- /dev/null +++ b/docs/user_guide/monitoring/metrics.rst @@ -0,0 +1,6 @@ +Metrics +=========== + +.. automodule:: neptunecontrib.monitoring.metrics + :members: + :show-inheritance: diff --git a/docs/user_guide/monitoring/reporting.rst b/docs/user_guide/monitoring/reporting.rst deleted file mode 100644 index 9059988..0000000 --- a/docs/user_guide/monitoring/reporting.rst +++ /dev/null @@ -1,6 +0,0 @@ -Reporting -=========== - -.. automodule:: neptunecontrib.monitoring.reporting - :members: - :show-inheritance: diff --git a/neptunecontrib/api/utils.py b/neptunecontrib/api/utils.py index ae188c8..c3faad5 100644 --- a/neptunecontrib/api/utils.py +++ b/neptunecontrib/api/utils.py @@ -15,7 +15,9 @@ # import os import warnings +import tempfile +import joblib import pandas as pd warnings.filterwarnings('ignore') @@ -229,6 +231,41 @@ def get_filepaths(dirpath='.', extensions=None): return files +def get_pickled_artifact(experiment, filename): + """Downloads pickled artifact object from Neptune and returns a Python object. + + Downloads the pickled object from artifacts of given experiment, + loads them and returns a Python object. + + Args: + experiment(`neptune.experiments.Experiment`): Neptune experiment. + filename(str): filename under which object was saved in Neptune. + + Examples: + Initialize Neptune:: + + import neptune + + session = neptune.sessions.Session() + project = session.get_project('USER_NAME/PROJECT_NAME') + + Choose Neptune experiment:: + + experiment = project.get_experiments(id=['PRO-101'])[0] + + Get your pickled object from experiment articats:: + + from neptunecontrib.monitoring.utils import get_artifact + + results = get_pickled_artifact(experiment, 'results.pkl') + """ + with tempfile.TemporaryDirectory() as d: + experiment.download_artifact(filename, d) + full_path = os.path.join(d, filename) + artifact = joblib.load(full_path) + return artifact + + def _prep_time_column(progress_df): progress_df['timestamp'] = pd.to_datetime(progress_df['timestamp']) progress_df.sort_values('timestamp', inplace=True) diff --git a/neptunecontrib/monitoring/metrics.py b/neptunecontrib/monitoring/metrics.py new file mode 100644 index 0000000..60e6cfd --- /dev/null +++ b/neptunecontrib/monitoring/metrics.py @@ -0,0 +1,783 @@ +# +# Copyright (c) 2019, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import matplotlib.pyplot as plt +import neptune +from neptunecontrib.monitoring.utils import send_figure +import numpy as np +import pandas as pd +import scikitplot.metrics as plt_metrics +from scikitplot.helpers import binary_ks_curve +import seaborn as sns +import sklearn.metrics as sk_metrics + + +def log_binary_classification_metrics(y_true, y_pred, threshold=0.5, experiment=None): + """Creates metric chartsa and calculates classification metrics and logs them to Neptune. + + Class-based metrics that are logged: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score', + 'matthews_corrcoef', 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value', + 'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate' + For each class-based metric, a curve with metric/threshold is logged to 'metrics_by_threshold' channel. + Losses that are logged: 'brier_loss', 'log_loss' + Other metrics that are logged: 'roc_auc', 'ks_statistic', 'avg_precision' + Curves that are logged: 'roc_auc', 'precision_recall_curve', 'ks_statistic_curve', 'cumulative_gain_curve', + 'lift_curve', + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + threshold (float): Threshold that calculates a class for class-based metrics. Default is 0.5. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Log metrics and performance curves to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_binary_classification_metrics + + neptune.init() + with neptune.create_experiment(): + log_binary_classification_metrics(y_test, y_test_pred, threshold=0.5) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + log_confusion_matrix(y_true, y_pred[:, 1] > threshold, experiment=_exp) + log_classification_report(y_true, y_pred[:, 1] > threshold, experiment=_exp) + log_class_metrics(y_true, y_pred[:, 1] > threshold, experiment=_exp) + log_class_metrics_by_threshold(y_true, y_pred[:, 1], experiment=_exp) + log_roc_auc(y_true, y_pred, experiment=_exp) + log_precision_recall_auc(y_true, y_pred, experiment=_exp) + log_brier_loss(y_true, y_pred[:, 1], experiment=_exp) + log_log_loss(y_true, y_pred, experiment=_exp) + log_ks_statistic(y_true, y_pred, experiment=_exp) + log_cumulative_gain(y_true, y_pred, experiment=_exp) + log_lift_curve(y_true, y_pred, experiment=_exp) + log_prediction_distribution(y_true, y_pred[:, 1], experiment=_exp) + + +def log_confusion_matrix(y_true, y_pred_class, experiment=None, channel_name='metric_charts'): + """Creates a confusion matrix figure and logs it in Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Log confusion matrix to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_confusion_matrix + + neptune.init() + with neptune.create_experiment(): + log_confusion_matrix(y_test, y_test_pred[:,1]>0.5) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1' + + _exp = experiment if experiment else neptune + + fig, ax = plt.subplots() + _plot_confusion_matrix(y_true, y_pred_class, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_classification_report(y_true, y_pred_class, experiment=None, channel_name='metric_charts'): + """Creates a figure with classifiction report table and logs it in Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Log classification report to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_classification_report + + neptune.init() + with neptune.create_experiment(): + log_classification_report(y_test, y_test_pred[:,1]>0.5) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1' + + _exp = experiment if experiment else neptune + + fig = _plot_classification_report(y_true, y_pred_class) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_class_metrics(y_true, y_pred_class, experiment=None): + """Calculates and logs all class-based metrics to Neptune. + + Metrics that are logged: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score', 'matthews_corrcoef', + 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value', + 'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate' + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Log class metrics to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_class_metrics + + neptune.init() + with neptune.create_experiment(): + log_class_metrics(y_test, y_test_pred[:,1]>0.5) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1' + + _exp = experiment if experiment else neptune + + scores = _class_metrics(y_true, y_pred_class) + for metric_name, score in scores.items(): + _exp.log_metric(metric_name, score) + + +def log_class_metrics_by_threshold(y_true, y_pred_pos, experiment=None): + """Creates metric/threshold charts for each metric and logs them to Neptune. + + Metrics for which charsta re created and logged are: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score', + 'matthews_corrcoef', 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value', + 'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate' + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Logs metric/threshold charts to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_class_metrics_by_threshold + + neptune.init() + with neptune.create_experiment(): + log_class_metrics_by_threshold(y_test, y_test_pred[:,1]) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class' + + _exp = experiment if experiment else neptune + + figs = _plot_class_metrics_by_threshold(y_true, y_pred_pos) + + for fig in figs: + send_figure(fig, channel_name='metrics_by_threshold', experiment=_exp) + plt.close() + + +def log_roc_auc(y_true, y_pred, experiment=None, channel_name='metric_charts'): + """Creates and logs ROC AUC curve and ROCAUC score to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Logs ROCAUC curve and ROCAUC score to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_roc_auc + + neptune.init() + with neptune.create_experiment(): + log_roc_auc(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + roc_auc = sk_metrics.roc_auc_score(y_true, y_pred[:, 1]) + _exp.log_metric('roc_auc', roc_auc) + + fig, ax = plt.subplots() + plt_metrics.plot_roc(y_true, y_pred, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_precision_recall_auc(y_true, y_pred, experiment=None, channel_name='metric_charts'): + """Creates and logs Precision Recall curve and Average precision score to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Logs Precision Recall curve and Average precision score to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_precision_recall_auc + + neptune.init() + with neptune.create_experiment(): + log_precision_recall_auc(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + avg_precision = sk_metrics.average_precision_score(y_true, y_pred[:, 1]) + _exp.log_metric('avg_precision', avg_precision) + + fig, ax = plt.subplots() + plt_metrics.plot_precision_recall(y_true, y_pred, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_brier_loss(y_true, y_pred_pos, experiment=None): + """Calculates and logs brier loss to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Logs Brier score to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_brier_loss + + neptune.init() + with neptune.create_experiment(): + log_brier_loss(y_test, y_test_pred[:,1]) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class' + + _exp = experiment if experiment else neptune + + brier = sk_metrics.brier_score_loss(y_true, y_pred_pos) + _exp.log_metric('brier_loss', brier) + + +def log_log_loss(y_true, y_pred, experiment=None): + """Creates and logs Precision Recall curve and Average precision score to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Logs log-loss to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_log_loss + + neptune.init() + with neptune.create_experiment(): + log_log_loss(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + log_loss = sk_metrics.log_loss(y_true, y_pred) + _exp.log_metric('log_loss', log_loss) + + +def log_ks_statistic(y_true, y_pred, experiment=None, channel_name='metric_charts'): + """Creates and logs KS statistics curve and KS statistics score to Neptune. + + Kolmogorov-Smirnov statistics chart can be calculated for true positive rates (TPR) and true negative rates (TNR) + for each threshold and plotted on a chart. + The maximum distance from TPR to TNR can be treated as performance metric. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Create and log KS statistics curve and KS statistics score to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_ks_statistic + + neptune.init() + with neptune.create_experiment(): + log_ks_statistic(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + res = binary_ks_curve(y_true, y_pred[:, 1]) + ks_stat = res[3] + _exp.log_metric('ks_statistic', ks_stat) + + fig, ax = plt.subplots() + plt_metrics.plot_ks_statistic(y_true, y_pred, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_cumulative_gain(y_true, y_pred, experiment=None, channel_name='metric_charts'): + """Creates cumulative gain chart and logs it to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Create and log cumulative gain chart to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_cumulative_gain + + neptune.init() + with neptune.create_experiment(): + log_cumulative_gain(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + fig, ax = plt.subplots() + plt_metrics.plot_cumulative_gain(y_true, y_pred, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_lift_curve(y_true, y_pred, experiment=None, channel_name='metric_charts'): + """Creates cumulative gain chart and logs it to Neptune. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Create and log lift curve chart to Neptune:: + + import neptune + from neptunecontrib.monitoring.metrics import log_lift_curve + + neptune.init() + with neptune.create_experiment(): + log_lift_curve(y_test, y_test_pred) + + Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs. + + """ + assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it' + + _exp = experiment if experiment else neptune + + fig, ax = plt.subplots() + plt_metrics.plot_lift_curve(y_true, y_pred, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def log_prediction_distribution(y_true, y_pred_pos, experiment=None, channel_name='metric_charts'): + """Generates prediction distribution plot from predictions and true labels. + + Args: + y_true (array-like, shape (n_samples)): Ground truth (correct) target values. + y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + channel_name(str): name of the neptune channel. Default is 'metric_charts'. + + Examples: + Train the model and make predictions on test:: + + from sklearn.datasets import make_classification + from sklearn.ensemble import RandomForestClassifier + from sklearn.model_selection import train_test_split + from sklearn.metrics import classification_report + + X, y = make_classification(n_samples=2000) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) + + model = RandomForestClassifier() + model.fit(X_train, y_train) + + y_test_pred = model.predict_proba(X_test) + + Plot prediction distribution:: + + from neptunecontrib.monitoring.metrics import log_prediction_distribution + + log_prediction_distribution(y_test, y_test_pred[:, 1]) + """ + assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class' + + _exp = experiment if experiment else neptune + + fig, ax = plt.subplots() + _plot_prediction_distribution(y_true, y_pred_pos, ax=ax) + send_figure(fig, channel_name=channel_name, experiment=_exp) + plt.close() + + +def expand_prediction(prediction): + """Expands 1D binary prediction for positive class. + + Args: + prediction (array-like, shape (n_samples)): + Estimated targets as returned by a classifier. + + Returns: + prediction (array-like, shape (n_samples, 2)): + Estimated targets for both negative and positive class. + """ + assert prediction.shape[1] == 2, 'You can only expand 1D prediction for positive classes' + + prediction_reshaped = prediction.reshape(-1, 1) + return np.clip(np.concatenate((1.0 - prediction_reshaped, prediction_reshaped), axis=1), 0.0, 1.0) + + +def _plot_confusion_matrix(y_true, y_pred_class, ax=None): + cmap = plt.get_cmap('Blues') + cm = sk_metrics.confusion_matrix(y_true, y_pred_class) + sns.heatmap(cm, cmap=cmap, annot=True, fmt='g', ax=ax) + + +def _plot_class_metrics_by_threshold(y_true, y_pred_positive): + scores_by_thres = _class_metrics_by_threshold(y_true, y_pred_positive) + figs = [] + for name in scores_by_thres.columns: + if name == 'threshold': + continue + else: + best_thres, best_score = _get_best_thres(scores_by_thres, name) + fig, ax = plt.subplots() + ax.plot(scores_by_thres['threshold'], scores_by_thres[name]) + ax.set_title('{} by threshold'.format(name)) + ax.axvline(x=best_thres, color='red') + ax.text(x=best_thres + 0.01, y=0.98 * best_score, + s='thres={:.4f}\nscore={:.4f}'.format(best_thres, best_score), + color='red') + figs.append(fig) + return figs + + +def _plot_classification_report(y_true, y_pred_class): + report = sk_metrics.classification_report(y_true, y_pred_class, output_dict=True) + report_df = pd.DataFrame(report).transpose().round(4) + + fig, ax = plt.subplots() + ax.axis('off') + ax.axis('tight') + ax.table(cellText=report_df.values, + colLabels=report_df.columns, + rowLabels=report_df.index, + loc='center', + bbox=[0.2, 0.2, 0.8, 0.8]) + fig.tight_layout() + + return fig + + +def _plot_prediction_distribution(y_true, y_pred_pos, ax=None): + if ax is None: + _, ax = plt.subplots() + + ax.set_title('Prediction Distribution', fontsize='large') + + df = pd.DataFrame({'Prediction': y_pred_pos, + 'True label': y_true}) + + sns.distplot(df[df['True label'] == 0]['Prediction'], label='negative', ax=ax) + sns.distplot(df[df['True label'] == 1]['Prediction'], label='positive', ax=ax) + + ax.legend(prop={'size': 16}, title='Labels') + ax.set_xlim([0.0, 1.0]) + + +def _class_metrics(y_true, y_pred_class): + tn, fp, fn, tp = sk_metrics.confusion_matrix(y_true, y_pred_class).ravel() + + true_positive_rate = tp / (tp + fn) + true_negative_rate = tn / (tn + fp) + positive_predictive_value = tp / (tp + fp) + negative_predictive_value = tn / (tn + fn) + false_positive_rate = fp / (fp + tn) + false_negative_rate = fn / (tp + fn) + false_discovery_rate = fp / (tp + fp) + + scores = {'accuracy': sk_metrics.accuracy_score(y_true, y_pred_class), + 'precision': sk_metrics.precision_score(y_true, y_pred_class), + 'recall': sk_metrics.recall_score(y_true, y_pred_class), + 'f1_score': sk_metrics.fbeta_score(y_true, y_pred_class, beta=1), + 'f2_score': sk_metrics.fbeta_score(y_true, y_pred_class, beta=2), + 'matthews_corrcoef': sk_metrics.matthews_corrcoef(y_true, y_pred_class), + 'cohen_kappa': sk_metrics.cohen_kappa_score(y_true, y_pred_class), + 'true_positive_rate': true_positive_rate, + 'true_negative_rate': true_negative_rate, + 'positive_predictive_value': positive_predictive_value, + 'negative_predictive_value': negative_predictive_value, + 'false_positive_rate': false_positive_rate, + 'false_negative_rate': false_negative_rate, + 'false_discovery_rate': false_discovery_rate} + + return scores + + +def _class_metrics_by_threshold(y_true, y_pred_pos, thres_nr=100): + thresholds = [i / thres_nr for i in range(1, thres_nr, 1)] + + scores_per_thres = [] + for thres in thresholds: + y_pred_class = y_pred_pos > thres + scores = _class_metrics(y_true, y_pred_class) + scores['threshold'] = thres + scores_per_thres.append(pd.Series(scores)) + + return pd.DataFrame(scores_per_thres) + + +def _get_best_thres(scores_by_thres, name): + best_res = scores_by_thres[scores_by_thres[name] == scores_by_thres[name].max()][['threshold', name]] + position = len(best_res) // 2 + result = best_res.iloc[position].to_dict() + return result['threshold'], result[name] diff --git a/neptunecontrib/monitoring/reporting.py b/neptunecontrib/monitoring/reporting.py deleted file mode 100644 index b529d21..0000000 --- a/neptunecontrib/monitoring/reporting.py +++ /dev/null @@ -1,388 +0,0 @@ -# -# Copyright (c) 2019, Neptune Labs Sp. z o.o. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import tempfile - - -import matplotlib.pyplot as plt -import neptune -import pandas as pd -import seaborn as sns -from scikitplot.metrics import plot_roc, plot_precision_recall, plot_confusion_matrix - - -def send_binary_classification_report(y_true, y_pred, - experiment=None, - threshold=0.5, - figsize=(16, 12), - channel_name='classification report'): - """Creates binary classification report and logs it in Neptune. - - This function creates ROC AUC curve, confusion matrix, precision recall curve and - prediction distribution charts and logs it to the 'classification report' channel in Neptune. - - Args: - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class - in the float format. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - threshold(float): threshold to be applied for the class asignment. - figsize(tuple): size of the matplotlib.pyplot figure object - channel_name(str): name of the neptune channel. Default is 'classification report'. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Log classification report to Neptune:: - - import neptune - from neptunecontrib.monitoring.reporting import send_binary_classification_report - - neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME') - with neptune.create_experiment(): - send_binary_classification_report(y_test, y_test_pred) - - """ - - _exp = experiment if experiment else neptune - - fig = plot_binary_classification_report(y_true, y_pred, threshold=threshold, figsize=figsize) - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) - - -def send_prediction_distribution(y_true, y_pred, - experiment=None, - figsize=(16, 12), - channel_name='prediction distribution'): - """Creates prediction distribution chart and logs it in Neptune. - - Args: - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples)): Predictions for the positive class in the float format. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - figsize(tuple): size of the matplotlib.pyplot figure object - channel_name(str): name of the neptune channel. Default is 'prediction distribution'. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Log prediction distribution to Neptune:: - - import neptune - from neptunecontrib.monitoring.reporting import send_prediction_distribution - - neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME') - - with neptune.create_experiment(): - send_prediction_distribution(ctx, y_test, y_test_pred[:, 1]) - - """ - - _exp = experiment if experiment else neptune - - fig, ax = plt.subplots(figsize=figsize) - plot_prediction_distribution(y_true, y_pred, ax=ax) - - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) - - -def send_roc_auc_curve(y_true, y_pred, - experiment=None, - figsize=(16, 12), - channel_name='ROC AUC curve'): - """Creates ROC AUC curve and logs it in Neptune. - - Args: - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class - in the float format. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - figsize(tuple): size of the matplotlib.pyplot figure object - channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Log classification report to Neptune:: - - import neptune - from neptunecontrib.monitoring.reporting import send_roc_auc_curve - - neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME') - - with neptune.create_experiment(): - send_roc_auc_curve(ctx, y_test, y_test_pred) - - """ - - _exp = experiment if experiment else neptune - - fig, ax = plt.subplots(figsize=figsize) - plot_roc(y_true, y_pred, ax=ax) - - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) - - -def send_confusion_matrix(y_true, y_pred, - experiment=None, - figsize=(16, 12), - channel_name='confusion_matrix'): - """Creates ROC AUC curve and logs it in Neptune. - - Args: - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples)): Positive class predictions in the binary format. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - figsize(tuple): size of the matplotlib.pyplot figure object - channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Log classification report to Neptune:: - - import neptune - from neptunecontrib.monitoring.reporting import send_confusion_matrix - - neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME') - - with neptune.create_experiment(): - send_confusion_matrix(ctx, y_test, y_test_pred[:, 1] > 0.5) - - """ - fig, ax = plt.subplots(figsize=figsize) - plot_confusion_matrix(y_true, y_pred, ax=ax) - - _exp = experiment if experiment else neptune - - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) - - -def send_precision_recall(y_true, y_pred, - experiment=None, - figsize=(16, 12), - channel_name='precision_recall_curve'): - """Creates precision recall curve and logs it in Neptune. - - Args: - ctx(`neptune.Context`): Neptune context. - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class - in the float format. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - figsize(tuple): size of the matplotlib.pyplot figure object - channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Log classification report to Neptune:: - - import neptune - from neptunecontrib.monitoring.reporting import send_precision_recall - - neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME') - - with neptune.create_experiment(): - send_precision_recall(ctx, y_test, y_test_pred) - - """ - - _exp = experiment if experiment else neptune - - fig, ax = plt.subplots(figsize=figsize) - plot_precision_recall(y_true, y_pred, ax=ax) - - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) - - -def plot_binary_classification_report(y_true, y_pred, threshold=0.5, figsize=(16, 12)): - """Creates binary classification report. - - This function creates ROC AUC curve, confusion matrix, precision recall curve and - prediction distribution charts and logs it to the 'classification report' channel in Neptune. - - Args: - y_true (array-like, shape (n_samples)): Ground truth (correct) target values. - y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class - in the float format. - threshold(float): threshold to be applied for the class asignment. - figsize(tuple): size of the matplotlib.pyplot figure object - - Returns: - (`matplotlib.figure`): Figure object with binary classification report. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Plot binary classification report:: - - from neptunecontrib.monitoring.reporting import plot_binary_classification_report - - plot_binary_classification_report(y_test, y_test_pred) - - """ - fig, axs = plt.subplots(2, 2, figsize=figsize) - plot_roc(y_true, y_pred, ax=axs[0, 0]) - plot_precision_recall(y_true, y_pred, ax=axs[0, 1]) - plot_prediction_distribution(y_true, y_pred[:, 1], ax=axs[1, 0]) - plot_confusion_matrix(y_true, y_pred[:, 1] > threshold, ax=axs[1, 1]) - fig.tight_layout() - return fig - - -def plot_prediction_distribution(y_true, y_pred, ax=None, figsize=None): - """Generates prediction distribution plot from predictions and true labels. - - Args: - y_true (array-like, shape (n_samples)): - Ground truth (correct) target values. - y_pred (array-like, shape (n_samples)): - Estimated targets as returned by a classifier. - ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to - plot the curve. If None, the plot is drawn on a new set of axes. - figsize (2-tuple, optional): Tuple denoting figure size of the plot - e.g. (6, 6). Defaults to ``None``. - - Returns: - ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was - drawn. - - Examples: - Train the model and make predictions on test:: - - from sklearn.datasets import make_classification - from sklearn.ensemble import RandomForestClassifier - from sklearn.model_selection import train_test_split - from sklearn.metrics import classification_report - - X, y = make_classification(n_samples=2000) - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) - - model = RandomForestClassifier() - model.fit(X_train, y_train) - - y_test_pred = model.predict_proba(X_test) - - Plot prediction distribution:: - - from neptunecontrib.monitoring.reporting import plot_prediction_distribution - - plot_prediction_distribution(y_test, y_test_pred[:, 1]) - """ - - if ax is None: - _, ax = plt.subplots(1, 1, figsize=figsize) - - ax.set_title('Prediction Distribution', fontsize='large') - - df = pd.DataFrame({'Prediction': y_pred, - 'True label': y_true}) - - sns.distplot(df[df['True label'] == 0]['Prediction'], label='negative', ax=ax) - sns.distplot(df[df['True label'] == 1]['Prediction'], label='positive', ax=ax) - - ax.legend(prop={'size': 16}, title='Labels') - - return ax diff --git a/setup.py b/setup.py index 4ad87b5..a3060ec 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ def main(): extras = { 'bots': ['python-telegram-bot'], 'hpo': ['scikit-optimize==0.5.2', 'scipy'], - 'monitoring': ['scikit-optimize==0.5.2', 'sacred==0.7.5', 'scikit-plot==0.3.7', 'seaborn'], + 'monitoring': ['scikit-optimize==0.5.2', 'sacred==0.7.5', 'scikit-plot==0.3.7', 'seaborn==0.8.1'], 'versioning': ['boto3', 'numpy'], 'viz': ['altair==2.3.0'], } @@ -19,7 +19,7 @@ def main(): setup( name='neptune-contrib', - version='0.11.0', + version='0.12.0', description='Neptune Python library contributions', author='neptune.ml', author_email='contact@neptune.ml',