Sacred (#44)

* added Sacred integration via observer * update docs in `get_filepaths`
neptune-ai · Aug 12, 2019 · 1d4e8de · 1d4e8de
1 parent c83dec6
commit 1d4e8de
Show file tree

Hide file tree

Showing 11 changed files with 324 additions and 8 deletions.
diff --git a/docs/_static/images/sacred_neptuneML.png b/docs/_static/images/sacred_neptuneML.png
diff --git a/docs/conf.py b/docs/conf.py
@@ -28,6 +28,9 @@
                         'pandas',
                         'neptune',
                         'matplotlib',
+                        'sacred',
+                        'sacred.dependencies',
+                        'sacred.observers',
                         'scipy',
                         'seaborn',
                         'skopt',
@@ -41,9 +44,9 @@
 author = 'Neptune Dev Team'
 
 # The short X.Y version
-version = '0.9'
+version = '0.10'
 # The full version, including alpha/beta/rc tags
-release = '0.9.2'
+release = '0.10.1'
 
 # -- General configuration ---------------------------------------------------
 

diff --git a/docs/examples/examples_index.rst b/docs/examples/examples_index.rst
@@ -5,6 +5,7 @@
    Image directory snapshoting <image_dir_snapshots>
    Hyper parameter comparison <explore_hyperparams_skopt>
    Log model diagnostics <log_model_diagnostics>
+   Integrate with Sacred <observer_sacred>
    Monitor lightGBM training <monitor_lgbm>
    Monitor fast.ai training <monitor_fastai>
    Log matplotlib charts to neptune <log_matplotlib>

diff --git a/docs/examples/observer_sacred.ipynb b/docs/examples/observer_sacred.ipynb
@@ -0,0 +1,150 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Log Sacred experiments to neptune\n",
+    "\n",
+    "![sacred neptune.ml integration](../_static/images/sacred_neptuneML.png)\n",
+    "\n",
+    "## Create Sacred experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from numpy.random import permutation\n",
+    "from sklearn import svm, datasets\n",
+    "from sacred import Experiment\n",
+    "\n",
+    "ex = Experiment('iris_rbf_svm')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Add Neptune observer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from neptunecontrib.monitoring.sacred_integration import NeptuneObserver\n",
+    "ex.observers.append(NeptuneObserver(project_name='jakub-czakon/examples'))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup config and run for your experiment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@ex.config\n",
+    "def cfg():\n",
+    "    C = 1.0\n",
+    "    gamma = 0.7\n",
+    "\n",
+    "@ex.automain\n",
+    "def run(C, gamma, _run):\n",
+    "    iris = datasets.load_iris()\n",
+    "    per = permutation(iris.target.size)\n",
+    "    iris.data = iris.data[per]\n",
+    "    iris.target = iris.target[per]\n",
+    "    clf = svm.SVC(C, 'rbf', gamma=gamma)\n",
+    "    clf.fit(iris.data[:90],\n",
+    "            iris.target[:90])\n",
+    "    return clf.score(iris.data[90:],\n",
+    "                     iris.target[90:])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Go to Neptune app and observe your experiment\n",
+    "Now you can watch your Sacred model training in neptune!\n",
+    "\n",
+    "For example, you can check [this experiment](https://ui.neptune.ml/jakub-czakon/examples/e/EX-263)\n",
+    "        \n",
+    "![image](https://gist.githubusercontent.com/jakubczakon/f754769a39ea6b8fa9728ede49b9165c/raw/ae86f7321113327602be89c6ed3ac9d618ffdb4c/sacred_observer.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Full script"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from numpy.random import permutation\n",
+    "from sklearn import svm, datasets\n",
+    "from sacred import Experiment\n",
+    "\n",
+    "from neptunecontrib.monitoring.sacred import NeptuneObserver\n",
+    "\n",
+    "ex = Experiment('iris_rbf_svm')\n",
+    "ex.observers.append(NeptuneObserver(project_name='jakub-czakon/examples'))\n",
+    "\n",
+    "@ex.config\n",
+    "def cfg():\n",
+    "    C = 1.0\n",
+    "    gamma = 0.7\n",
+    "\n",
+    "@ex.automain\n",
+    "def run(C, gamma, _run):\n",
+    "\n",
+    "    iris = datasets.load_iris()\n",
+    "    per = permutation(iris.target.size)\n",
+    "    iris.data = iris.data[per]\n",
+    "    iris.target = iris.target[per]\n",
+    "    clf = svm.SVC(C, 'rbf', gamma=gamma)\n",
+    "    clf.fit(iris.data[:90],\n",
+    "            iris.target[:90])\n",
+    "    return clf.score(iris.data[90:],\n",
+    "                     iris.target[90:])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/index.rst b/docs/index.rst
@@ -2,7 +2,7 @@ neptune-contrib: open-source contributions to Neptune.ml
 ===========================================
 
 This library is a collection of helpers and extensions that make working
-with `Neptune website`_ more effective and better. It is build on top of neptune-client
+with `Neptune app`_ more effective and better. It is build on top of neptune-client
 and gives you option to do things like:
  * interactive visualizations of experiment runs or hyperparameters
  * running hyper parameter sweeps in scikit-optimize, hyperopt or any other tool you like
@@ -14,6 +14,10 @@ Enjoy the following integrations:
 .. image:: _static/images/fastai_neptuneML.png
    :target: _static/images/fastai_neptuneML.png
    :alt: fastai neptune.ml integration
+
+.. image:: _static/images/sacred_neptuneML.png
+   :target: _static/images/sacred_neptuneML.png
+   :alt: Sacred neptune.ml integration
 
 .. image:: _static/images/LightGBM_neptuneML.png
    :target: _static/images/LightGBM_neptuneML.png
@@ -86,7 +90,7 @@ Indices and tables
 .. _GitHub: https://github.com/neptune-ml/neptune-contrib
 .. _Git Issues: https://github.com/neptune-ml/neptune-contrib/issues
 .. _Git Feature Request: https://github.com/neptune-ml/neptune-contrib/issues
-.. _Neptune website: https://neptune.ml/
+.. _Neptune app: https://neptune.ml/
 .. _Neptune community forum: https://community.neptune.ml/
 .. _Github projects: https://github.com/neptune-ml/neptune-contrib/projects
 .. _Neptune community spectrum: https://spectrum.chat/neptune-community?tab=posts
diff --git a/neptunecontrib/api/utils.py b/neptunecontrib/api/utils.py
@@ -197,7 +197,7 @@ def strip_prefices(columns, prefices):
 
 
 def get_filepaths(dirpath='.', extensions=None):
-    """Filters leaderboard columns to get the system column names.
+    """Creates a list of all the files with selected extensions.
 
     Args:
         dirpath(str): Folder from which all files with given extensions should be added to list.

diff --git a/neptunecontrib/monitoring/lightgbm.py b/neptunecontrib/monitoring/lightgbm.py
@@ -26,7 +26,7 @@ def neptune_monitor(experiment=None, prefix=''):
     `train_multiclass_logloss` and `valid_multiclass_logloss`.
 
     Args:
-        ctx(`neptune.Context`): Neptune context.
+        experiment(`neptune.experiments.Experiment`): Neptune experiment.
         prefix(str): Prefix that should be added before the `metric_name`
             and `valid_name` before logging to the appropriate channel.
 

diff --git a/neptunecontrib/monitoring/sacred.py b/neptunecontrib/monitoring/sacred.py
@@ -0,0 +1,157 @@
+#
+# Copyright (c) 2019, Neptune Labs Sp. z o.o.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import collections
+import os
+
+import neptune
+from sacred.dependencies import get_digest
+from sacred.observers import RunObserver
+
+
+class NeptuneObserver(RunObserver):
+    """Logs sacred experiment data to Neptune.
+
+    Sacred observer that logs experiment metadata to neptune.ml app.
+    The experiment data can be accessed and shared via web UI or experiment API.
+    Check Neptune docs for more information https://docs.neptune.ml.
+
+    Args:
+        project_name(str): project name in Neptune app
+        api_token(str): Neptune API token. If it is kept in the NEPTUNE_API_TOKEN environment
+           variable leave None here.
+        base_dir(str): base directory from which you run your code.
+        source_extensions(list(str)): list of extensions that Neptune should treat as source files
+           extensions and send.
+
+    Examples:
+        Create sacred experiment::
+
+            from numpy.random import permutation
+            from sklearn import svm, datasets
+            from sacred import Experiment
+
+            ex = Experiment('iris_rbf_svm')
+
+        Add Neptune observer::
+
+            from neptunecontrib.monitoring.sacred import NeptuneObserver
+            ex.observers.append(NeptuneObserver(api_token='YOUR_LONG_API_TOKEN',
+                                                project_name='USER_NAME/PROJECT_NAME'))
+
+        Run experiment::
+
+            @ex.config
+            def cfg():
+                C = 1.0
+                gamma = 0.7
+
+            @ex.automain
+            def run(C, gamma, _run):
+                iris = datasets.load_iris()
+                per = permutation(iris.target.size)
+                iris.data = iris.data[per]
+                iris.target = iris.target[per]
+                clf = svm.SVC(C, 'rbf', gamma=gamma)
+                clf.fit(iris.data[:90],
+                        iris.target[:90])
+                return clf.score(iris.data[90:],
+                                 iris.target[90:])
+
+        Go to the app and see the experiment. For example, https://ui.neptune.ml/jakub-czakon/examples/e/EX-263
+    """
+
+    def __init__(self, project_name, api_token=None, base_dir='.', source_extensions=None):
+        neptune.init(project_qualified_name=project_name, api_token=api_token)
+        self.resources = {}
+        self.base_dir = base_dir
+        if source_extensions:
+            self.source_extensions = source_extensions
+        else:
+            self.source_extensions = ['.py', '.R', '.cpp', '.yaml', '.yml']
+
+    def started_event(self, ex_info, command, host_info, start_time, config, meta_info, _id):
+
+        neptune.create_experiment(name=ex_info['name'],
+                                  params=_flatten_dict(config),
+                                  upload_source_files=_get_filepaths(dirpath=self.base_dir,
+                                                                     extensions=self.source_extensions),
+                                  properties={'mainfile': ex_info['mainfile'],
+                                              'dependencies': str(ex_info['dependencies']),
+                                              'sacred_id': str(_id),
+                                              **_str_dict_values(host_info),
+                                              **_str_dict_values(_flatten_dict(meta_info))})
+
+    def completed_event(self, stop_time, result):
+        if result:
+            neptune.log_metric('result', result)
+        neptune.stop()
+
+    def interrupted_event(self, interrupt_time, status):
+        neptune.stop()
+
+    def failed_event(self, fail_time, fail_trace):
+        neptune.stop()
+
+    def artifact_event(self, name, filename, metadata=None, content_type=None):
+        neptune.log_artifact(filename)
+
+    def resource_event(self, filename):
+        if filename not in self.resources:
+            new_prefix = self._create_new_prefix()
+            self.resources[filename] = new_prefix
+            md5 = get_digest(filename)
+
+            neptune.set_property('{}data_path'.format(new_prefix), filename)
+            neptune.set_property('{}data_version'.format(new_prefix), md5)
+
+    def log_metrics(self, metrics_by_name, info):
+        for metric_name, metric_ptr in metrics_by_name.items():
+            for step, value in zip(metric_ptr["steps"], metric_ptr["values"]):
+                neptune.log_metric(metric_name, x=step, y=value)
+
+    def _create_new_prefix(self):
+        existing_prefixes = self.resources.values()
+        if existing_prefixes:
+            prefix_ids = [int(prefix.replace('resource', '')) for prefix in existing_prefixes]
+            new_prefix = 'resource{}'.format(max(prefix_ids) + 1)
+        else:
+            new_prefix = 'resource0'
+        return new_prefix
+
+
+def _get_filepaths(dirpath, extensions):
+    files = []
+    for r, _, f in os.walk(dirpath):
+        for file in f:
+            if any(file.endswith(ext) for ext in extensions):
+                files.append(os.path.join(r, file))
+    return files
+
+
+def _flatten_dict(d, parent_key='', sep='_'):
+    items = []
+    for k, v in d.items():
+        new_key = parent_key + sep + k if parent_key else k
+        if isinstance(v, collections.MutableMapping):
+            items.extend(_flatten_dict(v, new_key, sep=sep).items())
+        else:
+            items.append((new_key, v))
+    return dict(items)
+
+
+def _str_dict_values(d):
+    return {k: str(v) for k, v in d.items()}
diff --git a/pylintrc b/pylintrc
@@ -41,7 +41,7 @@ load-plugins=pylintfileheader
 # W0511 Allow TODO/FIXME comments.
 # W0703 Allow too broad except clause (Exception).
 # I0011 Do not show Locally disabled warnings in report
-disable=R,C0103,C0111,W0401,W0511,W0614,W0703,I0011
+disable=R,C0103,C0111,W0401,W0511,W0614,W0703,I0011,W0613
 
 # Enable the message, report, category or checker with the given id(s). You can
 # either give multiple identifier separated by comma (,) or put this option