diff --git a/docs/_static/images/binary_metrics.gif b/docs/_static/images/binary_metrics.gif
new file mode 100644
index 0000000..2188284
Binary files /dev/null and b/docs/_static/images/binary_metrics.gif differ
diff --git a/docs/conf.py b/docs/conf.py
index c9f67f1..b13207b 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -44,9 +44,9 @@
 author = 'Neptune Dev Team'
 
 # The short X.Y version
-version = '0.11'
+version = '0.12'
 # The full version, including alpha/beta/rc tags
-release = '0.11.0'
+release = '0.12.0'
 
 # -- General configuration ---------------------------------------------------
 
diff --git a/docs/examples/examples_index.rst b/docs/examples/examples_index.rst
index c5d4140..a6a6ef4 100644
--- a/docs/examples/examples_index.rst
+++ b/docs/examples/examples_index.rst
@@ -4,7 +4,7 @@
    Code snapshoting <code_snapshots>
    Image directory snapshoting <image_dir_snapshots>
    Hyper parameter comparison <explore_hyperparams_skopt>
-   Log model diagnostics <log_model_diagnostics>
+   Log binary classification metrics <log_binary_metrics>
    Integrate with Sacred <observer_sacred>
    Monitor lightGBM training <monitor_lgbm>
    Monitor fast.ai training <monitor_fastai>
diff --git a/docs/examples/log_binary_metrics.ipynb b/docs/examples/log_binary_metrics.ipynb
new file mode 100644
index 0000000..d6c594f
--- /dev/null
+++ b/docs/examples/log_binary_metrics.ipynb
@@ -0,0 +1,165 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Log binary classification metrics to Neptune\n",
+    "## Train your model and run predictions\n",
+    "Let's train a model on a synthetic problem predict on test data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.datasets import make_classification\n",
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.metrics import classification_report\n",
+    "\n",
+    "X, y = make_classification(n_samples=2000)\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
+    "\n",
+    "model = RandomForestClassifier()\n",
+    "model.fit(X_train, y_train)\n",
+    "\n",
+    "y_test_pred = model.predict_proba(X_test)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Instantiate Neptune"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import neptune\n",
+    "\n",
+    "\n",
+    "neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME')\n",
+    "neptune.create_experiment()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Send all binary classification metrics to Neptune\n",
+    "\n",
+    "With just one function call you can log a lot of information.\n",
+    "\n",
+    "### Class-based metrics:\n",
+    "\n",
+    "- accuracy\n",
+    "- precision, recall\n",
+    "- f1_score, f2_score\n",
+    "- matthews_corrcoef\n",
+    "- cohen_kappa\n",
+    "- true_positive_rate, true_negative_rate\n",
+    "- false_positive_rate, false_negative_rate\n",
+    "- positive_predictive_value, negative_predictive_value, false_discovery_rate\n",
+    "   \n",
+    "### Threshold-based charts for all class-based metrics\n",
+    "\n",
+    "### Performance charts:\n",
+    "\n",
+    "- Confusion Matrics\n",
+    "- Classification Report Table\n",
+    "- ROC AUC\n",
+    "- Precision Recall curve\n",
+    "- Lift curve\n",
+    "- Cumulative gain chart\n",
+    "- Kolmogorov-Smirnov statistic chart\n",
+    "   \n",
+    "### Losses:\n",
+    "\n",
+    "- log loss\n",
+    "- brier loss\n",
+    "   \n",
+    "### Other metrics:\n",
+    "\n",
+    "- ROC AUC score\n",
+    "- Average precision \n",
+    "- KS-statistic score"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from neptunecontrib.monitoring.metrics import log_binary_classification_metrics\n",
+    "\n",
+    "log_binary_classification_metrics(y_test, y_test_pred, threshold=0.5)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "It is now safely logged in Neptune.\n",
+    "Check out [this experiment](https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs). \n",
+    "\n",
+    "![binary classification metrics](../_static/images/binary_metrics.gif)\n",
+    "\n",
+    "## Log things separately\n",
+    "\n",
+    "You can also choose what to log and do it separately."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from neptunecontrib.monitoring.metrics import *\n",
+    "\n",
+    "log_confusion_matrix(y_test, y_test_pred[:, 1] > threshold)\n",
+    "log_classification_report(y_test, y_test_pred[:, 1] > threshold)\n",
+    "log_class_metrics(y_test, y_test_pred[:, 1] > threshold)\n",
+    "log_class_metrics_by_threshold(y_test, y_test_pred[:, 1])\n",
+    "log_roc_auc(y_test, y_test_pred)\n",
+    "log_precision_recall_auc(y_test, y_test_pred)\n",
+    "log_brier_loss(y_test, y_test_pred[:, 1])\n",
+    "log_log_loss(y_test, y_test_pred)\n",
+    "log_ks_statistic(y_test, y_test_pred)\n",
+    "log_cumulative_gain(y_test, y_test_pred)\n",
+    "log_lift_curve(y_test, y_test_pred)\n",
+    "log_prediction_distribution(y_test, y_test_pred[:, 1])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "blog_metrics",
+   "language": "python",
+   "name": "blog_metrics"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/examples/log_model_diagnostics.ipynb b/docs/examples/log_model_diagnostics.ipynb
deleted file mode 100644
index e179c87..0000000
--- a/docs/examples/log_model_diagnostics.ipynb
+++ /dev/null
@@ -1,223 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Log model diagnostics to Neptune\n",
-    "## Train your model and run predictions\n",
-    "Let's train a model on a synthetic problem predict on test data."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.datasets import make_classification\n",
-    "from sklearn.ensemble import RandomForestClassifier\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "from sklearn.metrics import classification_report\n",
-    "\n",
-    "X, y = make_classification(n_samples=2000)\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n",
-    "\n",
-    "model = RandomForestClassifier()\n",
-    "model.fit(X_train, y_train)\n",
-    "\n",
-    "y_test_pred = model.predict_proba(X_test)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Instantiate Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import neptune\n",
-    "\n",
-    "neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME')\n",
-    "neptune.create_experiment()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Send classification report to Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neptunecontrib.monitoring.reporting import send_binary_classification_report\n",
-    "\n",
-    "send_binary_classification_report(y_test, y_test_pred, threshold=0.5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is now safely logged in Neptune\n",
-    "\n",
-    "![image1](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report1.png)\n",
-    "\n",
-    "## Send confusion matrix to Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neptunecontrib.monitoring.reporting import send_confusion_matrix\n",
-    "\n",
-    "send_confusion_matrix(y_test, y_test_pred[:, 1] > 0.5)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is now safely logged in Neptune\n",
-    "\n",
-    "![image2](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report4.png)\n",
-    "\n",
-    "## Send ROC AUC curve to Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neptunecontrib.monitoring.reporting import send_roc_auc_curve\n",
-    "\n",
-    "send_roc_auc_curve(ctx, y_test, y_test_pred)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is now safely logged in Neptune\n",
-    "\n",
-    "![image3](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report3.png)\n",
-    "\n",
-    "## Send Precision-Recall curve to Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neptunecontrib.monitoring.reporting import send_precision_recall\n",
-    "\n",
-    "send_precision_recall(y_test, y_test_pred)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is now safely logged in Neptune\n",
-    "\n",
-    "![image4](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report5.png)\n",
-    "\n",
-    "## Send Precision-Recall curve to Neptune"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from neptunecontrib.monitoring.reporting import send_prediction_distribution\n",
-    "\n",
-    "send_prediction_distribution(y_test, y_test_pred[:, 1])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Stop Neptune experiment"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "neptune.stop()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "You can also put everything in the `with` block.\n",
-    "For example:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "with neptune.create_experiment():\n",
-    "    send_prediction_distribution(y_test, y_test_pred[:, 1])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "It is now safely logged in Neptune\n",
-    "\n",
-    "![image5](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/a1386b3a5edddc0eecb478a81d497336156b5b19/clf_report2.png)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "santander",
-   "language": "python",
-   "name": "santander"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.8"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/docs/index.rst b/docs/index.rst
index f358631..14e01de 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -8,21 +8,21 @@ and gives you option to do things like:
  * running hyper parameter sweeps in scikit-optimize, hyperopt or any other tool you like
  * monitor training of the lightGBM or fastai models with a single callback
  * much more
- 
+
 Enjoy the following integrations:
- 
+
 .. image:: _static/images/fastai_neptuneML.png
    :target: _static/images/fastai_neptuneML.png
    :alt: fastai neptune.ml integration
-   
+
 .. image:: _static/images/sacred_neptuneML.png
    :target: _static/images/sacred_neptuneML.png
    :alt: Sacred neptune.ml integration
-  
+
 .. image:: _static/images/LightGBM_neptuneML.png
    :target: _static/images/LightGBM_neptuneML.png
    :alt: lightGBM neptune.ml integration
- 
+
 .. image:: _static/images/matplotlib_neptuneML.png
    :target: _static/images/matplotlib_neptuneML.png
    :alt: matplotlib neptune.ml integration
@@ -30,7 +30,7 @@ Enjoy the following integrations:
 .. image:: _static/images/Telegram_neptuneML.png
    :target: _static/images/Telegram_neptuneML.png
    :alt: Telegram neptune.ml integration
-   
+
 And the best thing is you can extend it yourself or... tell us to do it for you :).
 
 .. toctree::
@@ -55,14 +55,14 @@ And the best thing is you can extend it yourself or... tell us to do it for you
    bots.telegram_bot <user_guide/bots/telegram_bot>
    monitoring.lightgbm <user_guide/monitoring/lightgbm>
    monitoring.fastai <user_guide/monitoring/fastai>
-   monitoring.reporting <user_guide/monitoring/reporting>
+   monitoring.metrics <user_guide/monitoring/metrics>
    monitoring.skopt <user_guide/monitoring/skopt>
    monitoring.utils <user_guide/monitoring/utils>
    sync.with_json <user_guide/sync/with_json>
    versioning.data <user_guide/versioning/data>
    viz.experiments <user_guide/viz/experiments>
    viz.projects <user_guide/viz/projects>
-   
+
 
 Bug Reports and Questions
 -----------------------
diff --git a/docs/user_guide/monitoring/metrics.rst b/docs/user_guide/monitoring/metrics.rst
new file mode 100644
index 0000000..247af89
--- /dev/null
+++ b/docs/user_guide/monitoring/metrics.rst
@@ -0,0 +1,6 @@
+Metrics
+===========
+
+.. automodule:: neptunecontrib.monitoring.metrics
+    :members:
+    :show-inheritance:
diff --git a/docs/user_guide/monitoring/reporting.rst b/docs/user_guide/monitoring/reporting.rst
deleted file mode 100644
index 9059988..0000000
--- a/docs/user_guide/monitoring/reporting.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Reporting
-===========
-
-.. automodule:: neptunecontrib.monitoring.reporting
-    :members:
-    :show-inheritance:
diff --git a/neptunecontrib/api/utils.py b/neptunecontrib/api/utils.py
index ae188c8..c3faad5 100644
--- a/neptunecontrib/api/utils.py
+++ b/neptunecontrib/api/utils.py
@@ -15,7 +15,9 @@
 #
 import os
 import warnings
+import tempfile
 
+import joblib
 import pandas as pd
 
 warnings.filterwarnings('ignore')
@@ -229,6 +231,41 @@ def get_filepaths(dirpath='.', extensions=None):
     return files
 
 
+def get_pickled_artifact(experiment, filename):
+    """Downloads pickled artifact object from Neptune and returns a Python object.
+
+    Downloads the pickled object from artifacts of given experiment,
+     loads them and returns a Python object.
+
+    Args:
+        experiment(`neptune.experiments.Experiment`): Neptune experiment.
+        filename(str): filename under which object was saved in Neptune.
+
+    Examples:
+        Initialize Neptune::
+
+            import neptune
+
+            session = neptune.sessions.Session()
+            project = session.get_project('USER_NAME/PROJECT_NAME')
+
+        Choose Neptune experiment::
+
+            experiment = project.get_experiments(id=['PRO-101'])[0]
+
+        Get your pickled object from experiment articats::
+
+            from neptunecontrib.monitoring.utils import get_artifact
+
+            results = get_pickled_artifact(experiment, 'results.pkl')
+    """
+    with tempfile.TemporaryDirectory() as d:
+        experiment.download_artifact(filename, d)
+        full_path = os.path.join(d, filename)
+        artifact = joblib.load(full_path)
+    return artifact
+
+
 def _prep_time_column(progress_df):
     progress_df['timestamp'] = pd.to_datetime(progress_df['timestamp'])
     progress_df.sort_values('timestamp', inplace=True)
diff --git a/neptunecontrib/monitoring/metrics.py b/neptunecontrib/monitoring/metrics.py
new file mode 100644
index 0000000..60e6cfd
--- /dev/null
+++ b/neptunecontrib/monitoring/metrics.py
@@ -0,0 +1,783 @@
+#
+# Copyright (c) 2019, Neptune Labs Sp. z o.o.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import matplotlib.pyplot as plt
+import neptune
+from neptunecontrib.monitoring.utils import send_figure
+import numpy as np
+import pandas as pd
+import scikitplot.metrics as plt_metrics
+from scikitplot.helpers import binary_ks_curve
+import seaborn as sns
+import sklearn.metrics as sk_metrics
+
+
+def log_binary_classification_metrics(y_true, y_pred, threshold=0.5, experiment=None):
+    """Creates metric chartsa and calculates classification metrics and logs them to Neptune.
+
+    Class-based metrics that are logged: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score',
+       'matthews_corrcoef', 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value',
+       'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate'
+    For each class-based metric, a curve with metric/threshold is logged to 'metrics_by_threshold' channel.
+    Losses that are logged: 'brier_loss', 'log_loss'
+    Other metrics that are logged: 'roc_auc', 'ks_statistic', 'avg_precision'
+    Curves that are logged: 'roc_auc', 'precision_recall_curve', 'ks_statistic_curve', 'cumulative_gain_curve',
+       'lift_curve',
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        threshold (float): Threshold that calculates a class for class-based metrics. Default is 0.5.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Log metrics and performance curves to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_binary_classification_metrics
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_binary_classification_metrics(y_test, y_test_pred, threshold=0.5)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    log_confusion_matrix(y_true, y_pred[:, 1] > threshold, experiment=_exp)
+    log_classification_report(y_true, y_pred[:, 1] > threshold, experiment=_exp)
+    log_class_metrics(y_true, y_pred[:, 1] > threshold, experiment=_exp)
+    log_class_metrics_by_threshold(y_true, y_pred[:, 1], experiment=_exp)
+    log_roc_auc(y_true, y_pred, experiment=_exp)
+    log_precision_recall_auc(y_true, y_pred, experiment=_exp)
+    log_brier_loss(y_true, y_pred[:, 1], experiment=_exp)
+    log_log_loss(y_true, y_pred, experiment=_exp)
+    log_ks_statistic(y_true, y_pred, experiment=_exp)
+    log_cumulative_gain(y_true, y_pred, experiment=_exp)
+    log_lift_curve(y_true, y_pred, experiment=_exp)
+    log_prediction_distribution(y_true, y_pred[:, 1], experiment=_exp)
+
+
+def log_confusion_matrix(y_true, y_pred_class, experiment=None, channel_name='metric_charts'):
+    """Creates a confusion matrix figure and logs it in Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Log confusion matrix to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_confusion_matrix
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_confusion_matrix(y_test, y_test_pred[:,1]>0.5)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1'
+
+    _exp = experiment if experiment else neptune
+
+    fig, ax = plt.subplots()
+    _plot_confusion_matrix(y_true, y_pred_class, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_classification_report(y_true, y_pred_class, experiment=None, channel_name='metric_charts'):
+    """Creates a figure with classifiction report table and logs it in Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Log classification report to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_classification_report
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_classification_report(y_test, y_test_pred[:,1]>0.5)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1'
+
+    _exp = experiment if experiment else neptune
+
+    fig = _plot_classification_report(y_true, y_pred_class)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_class_metrics(y_true, y_pred_class, experiment=None):
+    """Calculates and logs all class-based metrics to Neptune.
+
+    Metrics that are logged: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score', 'matthews_corrcoef',
+    'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value',
+    'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate'
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_class (array-like, shape (n_samples)): Class predictions with values 0 or 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Log class metrics to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_class_metrics
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_class_metrics(y_test, y_test_pred[:,1]>0.5)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred_class.shape) == 1, 'y_pred_class needs to be 1D class prediction with values 0, 1'
+
+    _exp = experiment if experiment else neptune
+
+    scores = _class_metrics(y_true, y_pred_class)
+    for metric_name, score in scores.items():
+        _exp.log_metric(metric_name, score)
+
+
+def log_class_metrics_by_threshold(y_true, y_pred_pos, experiment=None):
+    """Creates metric/threshold charts for each metric and logs them to Neptune.
+
+    Metrics for which charsta re created and logged are: 'accuracy', 'precision', 'recall', 'f1_score', 'f2_score',
+    'matthews_corrcoef', 'cohen_kappa', 'true_positive_rate', 'true_negative_rate', 'positive_predictive_value',
+    'negative_predictive_value', 'false_positive_rate', 'false_negative_rate', 'false_discovery_rate'
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Logs metric/threshold charts to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_class_metrics_by_threshold
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_class_metrics_by_threshold(y_test, y_test_pred[:,1])
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class'
+
+    _exp = experiment if experiment else neptune
+
+    figs = _plot_class_metrics_by_threshold(y_true, y_pred_pos)
+
+    for fig in figs:
+        send_figure(fig, channel_name='metrics_by_threshold', experiment=_exp)
+        plt.close()
+
+
+def log_roc_auc(y_true, y_pred, experiment=None, channel_name='metric_charts'):
+    """Creates and logs ROC AUC curve and ROCAUC score to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Logs ROCAUC curve and ROCAUC score to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_roc_auc
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_roc_auc(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    roc_auc = sk_metrics.roc_auc_score(y_true, y_pred[:, 1])
+    _exp.log_metric('roc_auc', roc_auc)
+
+    fig, ax = plt.subplots()
+    plt_metrics.plot_roc(y_true, y_pred, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_precision_recall_auc(y_true, y_pred, experiment=None, channel_name='metric_charts'):
+    """Creates and logs Precision Recall curve and Average precision score to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Logs Precision Recall curve and Average precision score to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_precision_recall_auc
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_precision_recall_auc(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    avg_precision = sk_metrics.average_precision_score(y_true, y_pred[:, 1])
+    _exp.log_metric('avg_precision', avg_precision)
+
+    fig, ax = plt.subplots()
+    plt_metrics.plot_precision_recall(y_true, y_pred, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_brier_loss(y_true, y_pred_pos, experiment=None):
+    """Calculates and logs brier loss to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Logs Brier score to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_brier_loss
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_brier_loss(y_test, y_test_pred[:,1])
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class'
+
+    _exp = experiment if experiment else neptune
+
+    brier = sk_metrics.brier_score_loss(y_true, y_pred_pos)
+    _exp.log_metric('brier_loss', brier)
+
+
+def log_log_loss(y_true, y_pred, experiment=None):
+    """Creates and logs Precision Recall curve and Average precision score to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Logs log-loss to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_log_loss
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_log_loss(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    log_loss = sk_metrics.log_loss(y_true, y_pred)
+    _exp.log_metric('log_loss', log_loss)
+
+
+def log_ks_statistic(y_true, y_pred, experiment=None, channel_name='metric_charts'):
+    """Creates and logs KS statistics curve and KS statistics score to Neptune.
+
+    Kolmogorov-Smirnov statistics chart can be calculated for true positive rates (TPR) and true negative rates (TNR)
+    for each threshold and plotted on a chart.
+    The maximum distance from TPR to TNR can be treated as performance metric.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Create and log KS statistics curve and KS statistics score to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_ks_statistic
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_ks_statistic(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    res = binary_ks_curve(y_true, y_pred[:, 1])
+    ks_stat = res[3]
+    _exp.log_metric('ks_statistic', ks_stat)
+
+    fig, ax = plt.subplots()
+    plt_metrics.plot_ks_statistic(y_true, y_pred, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_cumulative_gain(y_true, y_pred, experiment=None, channel_name='metric_charts'):
+    """Creates cumulative gain chart and logs it to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Create and log cumulative gain chart to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_cumulative_gain
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_cumulative_gain(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    fig, ax = plt.subplots()
+    plt_metrics.plot_cumulative_gain(y_true, y_pred, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_lift_curve(y_true, y_pred, experiment=None, channel_name='metric_charts'):
+    """Creates cumulative gain chart and logs it to Neptune.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred (array-like, shape (n_samples, 2)): Predictions for classes 0 and 1 with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Create and log lift curve chart to Neptune::
+
+            import neptune
+            from neptunecontrib.monitoring.metrics import log_lift_curve
+
+            neptune.init()
+            with neptune.create_experiment():
+                log_lift_curve(y_test, y_test_pred)
+
+        Check out this experiment https://ui.neptune.ml/o/neptune-ml/org/binary-classification-metrics/e/BIN-101/logs.
+
+    """
+    assert len(y_pred.shape) == 2, 'y_pred needs to be (n_samples, 2), use expand_prediction helper to format it'
+
+    _exp = experiment if experiment else neptune
+
+    fig, ax = plt.subplots()
+    plt_metrics.plot_lift_curve(y_true, y_pred, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def log_prediction_distribution(y_true, y_pred_pos, experiment=None, channel_name='metric_charts'):
+    """Generates prediction distribution plot from predictions and true labels.
+
+    Args:
+        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
+        y_pred_pos (array-like, shape (n_samples)): Score predictions with values from 0 to 1.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
+        channel_name(str): name of the neptune channel. Default is 'metric_charts'.
+
+    Examples:
+        Train the model and make predictions on test::
+
+            from sklearn.datasets import make_classification
+            from sklearn.ensemble import RandomForestClassifier
+            from sklearn.model_selection import train_test_split
+            from sklearn.metrics import classification_report
+
+            X, y = make_classification(n_samples=2000)
+            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+
+            model = RandomForestClassifier()
+            model.fit(X_train, y_train)
+
+            y_test_pred = model.predict_proba(X_test)
+
+        Plot prediction distribution::
+
+            from neptunecontrib.monitoring.metrics import log_prediction_distribution
+
+            log_prediction_distribution(y_test, y_test_pred[:, 1])
+    """
+    assert len(y_pred_pos.shape) == 1, 'y_pred_pos needs to be 1D prediction for positive class'
+
+    _exp = experiment if experiment else neptune
+
+    fig, ax = plt.subplots()
+    _plot_prediction_distribution(y_true, y_pred_pos, ax=ax)
+    send_figure(fig, channel_name=channel_name, experiment=_exp)
+    plt.close()
+
+
+def expand_prediction(prediction):
+    """Expands 1D binary prediction for positive class.
+
+    Args:
+        prediction (array-like, shape (n_samples)):
+            Estimated targets as returned by a classifier.
+
+    Returns:
+        prediction (array-like, shape (n_samples, 2)):
+            Estimated targets for both negative and positive class.
+    """
+    assert prediction.shape[1] == 2, 'You can only expand 1D prediction for positive classes'
+
+    prediction_reshaped = prediction.reshape(-1, 1)
+    return np.clip(np.concatenate((1.0 - prediction_reshaped, prediction_reshaped), axis=1), 0.0, 1.0)
+
+
+def _plot_confusion_matrix(y_true, y_pred_class, ax=None):
+    cmap = plt.get_cmap('Blues')
+    cm = sk_metrics.confusion_matrix(y_true, y_pred_class)
+    sns.heatmap(cm, cmap=cmap, annot=True, fmt='g', ax=ax)
+
+
+def _plot_class_metrics_by_threshold(y_true, y_pred_positive):
+    scores_by_thres = _class_metrics_by_threshold(y_true, y_pred_positive)
+    figs = []
+    for name in scores_by_thres.columns:
+        if name == 'threshold':
+            continue
+        else:
+            best_thres, best_score = _get_best_thres(scores_by_thres, name)
+            fig, ax = plt.subplots()
+            ax.plot(scores_by_thres['threshold'], scores_by_thres[name])
+            ax.set_title('{} by threshold'.format(name))
+            ax.axvline(x=best_thres, color='red')
+            ax.text(x=best_thres + 0.01, y=0.98 * best_score,
+                    s='thres={:.4f}\nscore={:.4f}'.format(best_thres, best_score),
+                    color='red')
+            figs.append(fig)
+    return figs
+
+
+def _plot_classification_report(y_true, y_pred_class):
+    report = sk_metrics.classification_report(y_true, y_pred_class, output_dict=True)
+    report_df = pd.DataFrame(report).transpose().round(4)
+
+    fig, ax = plt.subplots()
+    ax.axis('off')
+    ax.axis('tight')
+    ax.table(cellText=report_df.values,
+             colLabels=report_df.columns,
+             rowLabels=report_df.index,
+             loc='center',
+             bbox=[0.2, 0.2, 0.8, 0.8])
+    fig.tight_layout()
+
+    return fig
+
+
+def _plot_prediction_distribution(y_true, y_pred_pos, ax=None):
+    if ax is None:
+        _, ax = plt.subplots()
+
+    ax.set_title('Prediction Distribution', fontsize='large')
+
+    df = pd.DataFrame({'Prediction': y_pred_pos,
+                       'True label': y_true})
+
+    sns.distplot(df[df['True label'] == 0]['Prediction'], label='negative', ax=ax)
+    sns.distplot(df[df['True label'] == 1]['Prediction'], label='positive', ax=ax)
+
+    ax.legend(prop={'size': 16}, title='Labels')
+    ax.set_xlim([0.0, 1.0])
+
+
+def _class_metrics(y_true, y_pred_class):
+    tn, fp, fn, tp = sk_metrics.confusion_matrix(y_true, y_pred_class).ravel()
+
+    true_positive_rate = tp / (tp + fn)
+    true_negative_rate = tn / (tn + fp)
+    positive_predictive_value = tp / (tp + fp)
+    negative_predictive_value = tn / (tn + fn)
+    false_positive_rate = fp / (fp + tn)
+    false_negative_rate = fn / (tp + fn)
+    false_discovery_rate = fp / (tp + fp)
+
+    scores = {'accuracy': sk_metrics.accuracy_score(y_true, y_pred_class),
+              'precision': sk_metrics.precision_score(y_true, y_pred_class),
+              'recall': sk_metrics.recall_score(y_true, y_pred_class),
+              'f1_score': sk_metrics.fbeta_score(y_true, y_pred_class, beta=1),
+              'f2_score': sk_metrics.fbeta_score(y_true, y_pred_class, beta=2),
+              'matthews_corrcoef': sk_metrics.matthews_corrcoef(y_true, y_pred_class),
+              'cohen_kappa': sk_metrics.cohen_kappa_score(y_true, y_pred_class),
+              'true_positive_rate': true_positive_rate,
+              'true_negative_rate': true_negative_rate,
+              'positive_predictive_value': positive_predictive_value,
+              'negative_predictive_value': negative_predictive_value,
+              'false_positive_rate': false_positive_rate,
+              'false_negative_rate': false_negative_rate,
+              'false_discovery_rate': false_discovery_rate}
+
+    return scores
+
+
+def _class_metrics_by_threshold(y_true, y_pred_pos, thres_nr=100):
+    thresholds = [i / thres_nr for i in range(1, thres_nr, 1)]
+
+    scores_per_thres = []
+    for thres in thresholds:
+        y_pred_class = y_pred_pos > thres
+        scores = _class_metrics(y_true, y_pred_class)
+        scores['threshold'] = thres
+        scores_per_thres.append(pd.Series(scores))
+
+    return pd.DataFrame(scores_per_thres)
+
+
+def _get_best_thres(scores_by_thres, name):
+    best_res = scores_by_thres[scores_by_thres[name] == scores_by_thres[name].max()][['threshold', name]]
+    position = len(best_res) // 2
+    result = best_res.iloc[position].to_dict()
+    return result['threshold'], result[name]
diff --git a/neptunecontrib/monitoring/reporting.py b/neptunecontrib/monitoring/reporting.py
deleted file mode 100644
index b529d21..0000000
--- a/neptunecontrib/monitoring/reporting.py
+++ /dev/null
@@ -1,388 +0,0 @@
-#
-# Copyright (c) 2019, Neptune Labs Sp. z o.o.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import tempfile
-
-
-import matplotlib.pyplot as plt
-import neptune
-import pandas as pd
-import seaborn as sns
-from scikitplot.metrics import plot_roc, plot_precision_recall, plot_confusion_matrix
-
-
-def send_binary_classification_report(y_true, y_pred,
-                                      experiment=None,
-                                      threshold=0.5,
-                                      figsize=(16, 12),
-                                      channel_name='classification report'):
-    """Creates binary classification report and logs it in Neptune.
-
-    This function creates ROC AUC curve, confusion matrix, precision recall curve and
-    prediction distribution charts and logs it to the 'classification report' channel in Neptune.
-
-    Args:
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class
-            in the float format.
-        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
-        threshold(float): threshold to be applied for the class asignment.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-        channel_name(str): name of the neptune channel. Default is 'classification report'.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Log classification report to Neptune::
-
-            import neptune
-            from neptunecontrib.monitoring.reporting import send_binary_classification_report
-
-            neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME')
-            with neptune.create_experiment():
-                send_binary_classification_report(y_test, y_test_pred)
-
-    """
-
-    _exp = experiment if experiment else neptune
-
-    fig = plot_binary_classification_report(y_true, y_pred, threshold=threshold, figsize=figsize)
-    with tempfile.NamedTemporaryFile(suffix='.png') as f:
-        fig.savefig(f.name)
-        _exp.send_image(channel_name, f.name)
-
-
-def send_prediction_distribution(y_true, y_pred,
-                                 experiment=None,
-                                 figsize=(16, 12),
-                                 channel_name='prediction distribution'):
-    """Creates prediction distribution chart and logs it in Neptune.
-
-    Args:
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples)): Predictions for the positive class in the float format.
-        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-        channel_name(str): name of the neptune channel. Default is 'prediction distribution'.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Log prediction distribution to Neptune::
-
-            import neptune
-            from neptunecontrib.monitoring.reporting import send_prediction_distribution
-
-            neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME')
-
-            with neptune.create_experiment():
-                send_prediction_distribution(ctx, y_test, y_test_pred[:, 1])
-
-    """
-
-    _exp = experiment if experiment else neptune
-
-    fig, ax = plt.subplots(figsize=figsize)
-    plot_prediction_distribution(y_true, y_pred, ax=ax)
-
-    with tempfile.NamedTemporaryFile(suffix='.png') as f:
-        fig.savefig(f.name)
-        _exp.send_image(channel_name, f.name)
-
-
-def send_roc_auc_curve(y_true, y_pred,
-                       experiment=None,
-                       figsize=(16, 12),
-                       channel_name='ROC AUC curve'):
-    """Creates ROC AUC curve and logs it in Neptune.
-
-    Args:
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class
-            in the float format.
-        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-        channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Log classification report to Neptune::
-
-            import neptune
-            from neptunecontrib.monitoring.reporting import send_roc_auc_curve
-
-            neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME')
-
-            with neptune.create_experiment():
-                send_roc_auc_curve(ctx, y_test, y_test_pred)
-
-    """
-
-    _exp = experiment if experiment else neptune
-
-    fig, ax = plt.subplots(figsize=figsize)
-    plot_roc(y_true, y_pred, ax=ax)
-
-    with tempfile.NamedTemporaryFile(suffix='.png') as f:
-        fig.savefig(f.name)
-        _exp.send_image(channel_name, f.name)
-
-
-def send_confusion_matrix(y_true, y_pred,
-                          experiment=None,
-                          figsize=(16, 12),
-                          channel_name='confusion_matrix'):
-    """Creates ROC AUC curve and logs it in Neptune.
-
-    Args:
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples)): Positive class predictions in the binary format.
-        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-        channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Log classification report to Neptune::
-
-            import neptune
-            from neptunecontrib.monitoring.reporting import send_confusion_matrix
-
-            neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME')
-
-            with neptune.create_experiment():
-                send_confusion_matrix(ctx, y_test, y_test_pred[:, 1] > 0.5)
-
-    """
-    fig, ax = plt.subplots(figsize=figsize)
-    plot_confusion_matrix(y_true, y_pred, ax=ax)
-
-    _exp = experiment if experiment else neptune
-
-    with tempfile.NamedTemporaryFile(suffix='.png') as f:
-        fig.savefig(f.name)
-        _exp.send_image(channel_name, f.name)
-
-
-def send_precision_recall(y_true, y_pred,
-                          experiment=None,
-                          figsize=(16, 12),
-                          channel_name='precision_recall_curve'):
-    """Creates precision recall curve and logs it in Neptune.
-
-    Args:
-        ctx(`neptune.Context`): Neptune context.
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class
-            in the float format.
-        experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-        channel_name(str): name of the neptune channel. Default is 'ROC AUC curve'.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Log classification report to Neptune::
-
-            import neptune
-            from neptunecontrib.monitoring.reporting import send_precision_recall
-
-            neptune.init(qualified_project_name='USER_NAME/PROJECT_NAME')
-
-            with neptune.create_experiment():
-                send_precision_recall(ctx, y_test, y_test_pred)
-
-    """
-
-    _exp = experiment if experiment else neptune
-
-    fig, ax = plt.subplots(figsize=figsize)
-    plot_precision_recall(y_true, y_pred, ax=ax)
-
-    with tempfile.NamedTemporaryFile(suffix='.png') as f:
-        fig.savefig(f.name)
-        _exp.send_image(channel_name, f.name)
-
-
-def plot_binary_classification_report(y_true, y_pred, threshold=0.5, figsize=(16, 12)):
-    """Creates binary classification report.
-
-    This function creates ROC AUC curve, confusion matrix, precision recall curve and
-    prediction distribution charts and logs it to the 'classification report' channel in Neptune.
-
-    Args:
-        y_true (array-like, shape (n_samples)): Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples, 2)): Predictions both for negative and positive class
-            in the float format.
-        threshold(float): threshold to be applied for the class asignment.
-        figsize(tuple): size of the matplotlib.pyplot figure object
-
-    Returns:
-         (`matplotlib.figure`): Figure object with binary classification report.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Plot binary classification report::
-
-            from neptunecontrib.monitoring.reporting import plot_binary_classification_report
-
-            plot_binary_classification_report(y_test, y_test_pred)
-
-    """
-    fig, axs = plt.subplots(2, 2, figsize=figsize)
-    plot_roc(y_true, y_pred, ax=axs[0, 0])
-    plot_precision_recall(y_true, y_pred, ax=axs[0, 1])
-    plot_prediction_distribution(y_true, y_pred[:, 1], ax=axs[1, 0])
-    plot_confusion_matrix(y_true, y_pred[:, 1] > threshold, ax=axs[1, 1])
-    fig.tight_layout()
-    return fig
-
-
-def plot_prediction_distribution(y_true, y_pred, ax=None, figsize=None):
-    """Generates prediction distribution plot from predictions and true labels.
-
-    Args:
-        y_true (array-like, shape (n_samples)):
-            Ground truth (correct) target values.
-        y_pred (array-like, shape (n_samples)):
-            Estimated targets as returned by a classifier.
-        ax (:class:`matplotlib.axes.Axes`, optional): The axes upon which to
-            plot the curve. If None, the plot is drawn on a new set of axes.
-        figsize (2-tuple, optional): Tuple denoting figure size of the plot
-            e.g. (6, 6). Defaults to ``None``.
-
-    Returns:
-        ax (:class:`matplotlib.axes.Axes`): The axes on which the plot was
-            drawn.
-
-    Examples:
-        Train the model and make predictions on test::
-
-            from sklearn.datasets import make_classification
-            from sklearn.ensemble import RandomForestClassifier
-            from sklearn.model_selection import train_test_split
-            from sklearn.metrics import classification_report
-
-            X, y = make_classification(n_samples=2000)
-            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
-
-            model = RandomForestClassifier()
-            model.fit(X_train, y_train)
-
-            y_test_pred = model.predict_proba(X_test)
-
-        Plot prediction distribution::
-
-            from neptunecontrib.monitoring.reporting import plot_prediction_distribution
-
-            plot_prediction_distribution(y_test, y_test_pred[:, 1])
-    """
-
-    if ax is None:
-        _, ax = plt.subplots(1, 1, figsize=figsize)
-
-    ax.set_title('Prediction Distribution', fontsize='large')
-
-    df = pd.DataFrame({'Prediction': y_pred,
-                       'True label': y_true})
-
-    sns.distplot(df[df['True label'] == 0]['Prediction'], label='negative', ax=ax)
-    sns.distplot(df[df['True label'] == 1]['Prediction'], label='positive', ax=ax)
-
-    ax.legend(prop={'size': 16}, title='Labels')
-
-    return ax
diff --git a/setup.py b/setup.py
index 4ad87b5..a3060ec 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@ def main():
     extras = {
         'bots': ['python-telegram-bot'],
         'hpo': ['scikit-optimize==0.5.2', 'scipy'],
-        'monitoring': ['scikit-optimize==0.5.2', 'sacred==0.7.5', 'scikit-plot==0.3.7', 'seaborn'],
+        'monitoring': ['scikit-optimize==0.5.2', 'sacred==0.7.5', 'scikit-plot==0.3.7', 'seaborn==0.8.1'],
         'versioning': ['boto3', 'numpy'],
         'viz': ['altair==2.3.0'],
     }
@@ -19,7 +19,7 @@ def main():
 
     setup(
         name='neptune-contrib',
-        version='0.11.0',
+        version='0.12.0',
         description='Neptune Python library contributions',
         author='neptune.ml',
         author_email='contact@neptune.ml',