From 0a9e3a9469371e1d0fddcbcd0692d930ade5d41a Mon Sep 17 00:00:00 2001 From: Jakub Date: Mon, 11 May 2020 13:17:39 +0200 Subject: [PATCH] improvements to log chart (#83) * moved log chart to api submodule, added support for bokeh and altair, added log_table * fixed formatting errors * added log_html and reinstated the old xgboost_monitor file with a deprecation warning --- neptunecontrib/api/__init__.py | 32 +++ neptunecontrib/api/chart.py | 202 +++++++++++++++ neptunecontrib/api/html.py | 74 ++++++ neptunecontrib/api/table.py | 74 ++++++ neptunecontrib/api/utils.py | 47 ++++ neptunecontrib/hpo/__init__.py | 16 ++ neptunecontrib/hpo/utils.py | 8 + neptunecontrib/logging/chart.py | 104 +------- neptunecontrib/monitoring/utils.py | 81 ++---- neptunecontrib/monitoring/xgboost.py | 238 ++++++++++++++++++ neptunecontrib/monitoring/xgboost_monitor.py | 227 +---------------- neptunecontrib/versioning/__init__.py | 12 + neptunecontrib/versioning/data.py | 5 + neptunecontrib/viz/__init__.py | 10 + neptunecontrib/viz/experiments.py | 3 + .../viz/parallel_coordinates_plot.py | 3 + neptunecontrib/viz/projects.py | 4 + 17 files changed, 755 insertions(+), 385 deletions(-) create mode 100644 neptunecontrib/api/chart.py create mode 100644 neptunecontrib/api/html.py create mode 100644 neptunecontrib/api/table.py create mode 100644 neptunecontrib/monitoring/xgboost.py diff --git a/neptunecontrib/api/__init__.py b/neptunecontrib/api/__init__.py index 62a86a5..5f24abc 100644 --- a/neptunecontrib/api/__init__.py +++ b/neptunecontrib/api/__init__.py @@ -13,3 +13,35 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from neptunecontrib.api.chart import log_chart +from neptunecontrib.api.html import log_html +from neptunecontrib.api.table import log_table +from neptunecontrib.api.utils import ( + concat_experiments_on_channel, + extract_project_progress_info, + get_channel_columns, + get_parameter_columns, + get_property_columns, + get_system_columns, + strip_prefices, + pickle_and_log_artifact, + get_pickled_artifact, + get_filepaths +) + +__all__ = [ + 'log_table', + 'log_html', + 'log_chart', + 'concat_experiments_on_channel', + 'extract_project_progress_info', + 'get_channel_columns', + 'get_parameter_columns', + 'get_property_columns', + 'get_system_columns', + 'strip_prefices', + 'pickle_and_log_artifact', + 'get_pickled_artifact', + 'get_filepaths' +] diff --git a/neptunecontrib/api/chart.py b/neptunecontrib/api/chart.py new file mode 100644 index 0000000..e9bfdd7 --- /dev/null +++ b/neptunecontrib/api/chart.py @@ -0,0 +1,202 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import neptune + +__all__ = [ + 'log_chart', +] + + +def log_chart(name, chart, experiment=None): + """Logs charts from matplotlib, plotly, bokeh, and altair to neptune. + + Plotly, Bokeh, and Altair charts are converted to interactive HTML objects and then uploaded to Neptune + as an artifact with path charts/{name}.html. + + Matplotlib figures are converted optionally. If plotly is installed, matplotlib figures are converted + to plotly figures and then converted to interactive HTML and uploaded to Neptune as an artifact with + path charts/{name}.html. If plotly is not installed, matplotlib figures are converted to PNG images + and uploaded to Neptune as an artifact with path charts/{name}.png + + Args: + name (:obj:`str`): + | Name of the chart (without extension) that will be used as a part of artifact's destination. + chart (:obj:`matplotlib` or :obj:`plotly` Figure): + | Figure from `matplotlib` or `plotly`. If you want to use global figure from `matplotlib`, you + can also pass reference to `matplotlib.pyplot` module. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | For advanced users only. Pass Neptune + `Experiment `_ + object if you want to control to which experiment data is logged. + | If ``None``, log to currently active, and most recent experiment. + + Examples: + Start an experiment:: + + import neptune + + neptune.init(api_token='ANONYMOUS', + project_qualified_name='shared/showroom') + neptune.create_experiment(name='experiment_with_charts') + + Create matplotlib figure and log it to Neptune:: + + import matplotlib.pyplot as plt + + fig = plt.figure() + x = [21,22,23,4,5,6,77,8,9,10,31,32,33,34,35,36,37,18,49,50,100] + plt.hist(x, bins=5) + plt.show() + + from neptunecontrib.logging.chart import log_chart + + log_chart('matplotlib_figure', fig) + + Create Plotly chart and log it to Neptune:: + + import plotly.express as px + + df = px.data.tips() + fig = px.histogram(df, x="total_bill", y="tip", color="sex", marginal="rug", + hover_data=df.columns) + fig.show() + + from neptunecontrib.logging.chart import log_chart + + log_chart('plotly_figure', fig) + + Create Altair chart and log it to Neptune:: + + import altair as alt + from vega_datasets import data + + source = data.cars() + + chart = alt.Chart(source).mark_circle(size=60).encode( + x='Horsepower', + y='Miles_per_Gallon', + color='Origin', + tooltip=['Name', 'Origin', 'Horsepower', 'Miles_per_Gallon'] + ).interactive() + + from neptunecontrib.logging.chart import log_chart + + log_chart('altair_chart', chart) + + Create Bokeh figure and log it to Neptune:: + + from bokeh.plotting import figure + + p = figure(plot_width=400, plot_height=400) + + # add a circle renderer with a size, color, and alpha + p.circle([1, 2, 3, 4, 5], [6, 7, 2, 4, 5], size=20, color="navy", alpha=0.5) + + from neptunecontrib.logging.chart import log_chart + + log_chart('bokeh_figure', p) + + Check out how the logged charts look in Neptune: + https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-973/artifacts?path=charts%2F&file=bokeh_figure.html + """ + _exp = experiment if experiment else neptune + + if is_matplotlib_pyplot(chart) or is_matplotlib_figure(chart): + if is_matplotlib_pyplot(chart): + chart = chart.gcf() + + try: + from plotly import tools + chart = tools.mpl_to_plotly(chart) + + _exp.log_artifact(export_plotly_figure(chart), "charts/" + name + '.html') + except ImportError: + _exp.log_artifact(export_matplotlib_figure(chart), "charts/" + name + '.png') + + elif is_plotly_figure(chart): + _exp.log_artifact(export_plotly_figure(chart), "charts/" + name + '.html') + + elif is_bokeh_figure(chart): + _exp.log_artifact(export_bokeh_figure(chart), "charts/" + name + '.html') + + elif is_altair_chart(chart): + _exp.log_artifact(export_altair_chart(chart), "charts/" + name + '.html') + + else: + raise ValueError("Currently supported are matplotlib, plotly, altair, and bokeh figures") + + +def is_matplotlib_pyplot(chart): + return hasattr(chart, '__name__') and chart.__name__.startswith('matplotlib.') + + +def is_matplotlib_figure(chart): + return chart.__class__.__module__.startswith('matplotlib.') and chart.__class__.__name__ == 'Figure' + + +def is_plotly_figure(chart): + return chart.__class__.__module__.startswith('plotly.') and chart.__class__.__name__ == 'Figure' + + +def is_altair_chart(chart): + return chart.__class__.__module__.startswith('altair.') and 'Chart' in chart.__class__.__name__ + + +def is_bokeh_figure(chart): + return chart.__class__.__module__.startswith('bokeh.') and chart.__class__.__name__ == 'Figure' + + +def export_plotly_figure(chart): + from io import StringIO + + buffer = StringIO() + chart.write_html(buffer) + buffer.seek(0) + + return buffer + + +def export_matplotlib_figure(chart): + from io import BytesIO + + buffer = BytesIO() + chart.savefig(buffer, format='png') + buffer.seek(0) + + return buffer + + +def export_altair_chart(chart): + from io import StringIO + + buffer = StringIO() + chart.save(buffer, format='html') + buffer.seek(0) + + return buffer + + +def export_bokeh_figure(chart): + from io import StringIO + from bokeh.resources import CDN + from bokeh.embed import file_html + + html = file_html(chart, CDN) + buffer = StringIO(html) + buffer.seek(0) + + return buffer diff --git a/neptunecontrib/api/html.py b/neptunecontrib/api/html.py new file mode 100644 index 0000000..d550480 --- /dev/null +++ b/neptunecontrib/api/html.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import neptune + +__all__ = [ + 'log_html', +] + + +def log_html(name, html, experiment=None): + """Logs html to neptune. + + HTML is logged to Neptune as an artifact with path html/{name}.html + + Args: + name (:obj:`str`): + | Name of the chart (without extension) that will be used as a part of artifact's destination. + html_body (:obj:`str`): + | HTML string that is logged and rendered as HTML. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | For advanced users only. Pass Neptune + `Experiment `_ + object if you want to control to which experiment data is logged. + | If ``None``, log to currently active, and most recent experiment. + + Examples: + Start an experiment:: + + import neptune + + neptune.init(api_token='ANONYMOUS', + project_qualified_name='shared/showroom') + neptune.create_experiment(name='experiment_with_html') + + Create an HTML string:: + + html = "" + + Log it to Neptune:: + + from neptunecontrib.api import log_html + + log_html('go_to_docs_button', html) + + Check out how the logged table looks in Neptune: + https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-988/artifacts?path=html%2F&file=button_example.html + """ + + _exp = experiment if experiment else neptune + + _exp.log_artifact(export_html(html), "htmls/" + name + '.html') + + +def export_html(html): + from io import StringIO + buffer = StringIO(html) + buffer.seek(0) + + return buffer diff --git a/neptunecontrib/api/table.py b/neptunecontrib/api/table.py new file mode 100644 index 0000000..15cfe81 --- /dev/null +++ b/neptunecontrib/api/table.py @@ -0,0 +1,74 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import neptune + +__all__ = [ + 'log_table', +] + +def log_table(name, table, experiment=None): + """Logs pandas dataframe to neptune. + + Pandas dataframe is converted to an HTML table and logged to Neptune as an artifact with path tables/{name}.html + + Args: + name (:obj:`str`): + | Name of the chart (without extension) that will be used as a part of artifact's destination. + table (:obj:`pandas.Dataframe`): + | DataFrame table + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | For advanced users only. Pass Neptune + `Experiment `_ + object if you want to control to which experiment data is logged. + | If ``None``, log to currently active, and most recent experiment. + + Examples: + Start an experiment:: + + import neptune + + neptune.init(api_token='ANONYMOUS', + project_qualified_name='shared/showroom') + neptune.create_experiment(name='experiment_with_tables') + + Create or load dataframe:: + + import pandas as pd + + iris_df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv', nrows=100) + + Log it to Neptune:: + + from neptunecontrib.api import log_table + + log_table('pandas_df', iris_df) + + Check out how the logged table looks in Neptune: + https://ui.neptune.ai/o/shared/org/showroom/e/SHOW-977/artifacts?path=tables%2F&file=pandas_df.html + """ + _exp = experiment if experiment else neptune + + _exp.log_artifact(export_pandas_dataframe(table), "tables/" + name + '.html') + + +def export_pandas_dataframe(table): + from io import StringIO + + buffer = StringIO(table.to_html()) + buffer.seek(0) + + return buffer diff --git a/neptunecontrib/api/utils.py b/neptunecontrib/api/utils.py index 5f94381..e1d3f36 100644 --- a/neptunecontrib/api/utils.py +++ b/neptunecontrib/api/utils.py @@ -18,10 +18,23 @@ import tempfile import joblib +import neptune import pandas as pd warnings.filterwarnings('ignore') +__all__ = [ + 'concat_experiments_on_channel', + 'extract_project_progress_info', + 'get_channel_columns', + 'get_parameter_columns', + 'get_property_columns', + 'get_system_columns', + 'strip_prefices', + 'pickle_and_log_artifact', + 'get_pickled_artifact' +] + def concat_experiments_on_channel(experiments, channel_name): """Combines channel values from experiments into one dataframe. @@ -226,6 +239,7 @@ def get_filepaths(dirpath='.', extensions=None): Starting from neptune-client==4.9 you can pass ['**/*.py*', '**/*.yaml*', '**/*.yml*'] to upload_source_files argument to upload all files with given extensions recursively. Read more https://docs.neptune.ai/neptune-client/docs/project.html + get_filepaths() will be removed in future releases. """ warnings.warn(msg, DeprecationWarning) @@ -239,6 +253,39 @@ def get_filepaths(dirpath='.', extensions=None): return files +def pickle_and_log_artifact(obj, filename, experiment=None): + """Logs picklable object to Neptune. + + Pickles and logs your object to Neptune under specified filename. + + Args: + obj: Picklable object. + filename(str): filename under which object will be saved. + experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. + + Examples: + Initialize Neptune:: + + import neptune + neptune.init('USER_NAME/PROJECT_NAME') + + Create RandomForest object and log to Neptune:: + + from sklearn.ensemble import RandomForestClassifier + from neptunecontrib.api import pickle_and_log_artifact + + with neptune.create_experiment(): + rf = RandomForestClassifier() + pickle_and_log_artifact(rf, 'rf') + """ + _exp = experiment if experiment else neptune + + with tempfile.TemporaryDirectory() as d: + filename = os.path.join(d, filename) + joblib.dump(obj, filename) + _exp.send_artifact(filename) + + def get_pickled_artifact(experiment, filename): """Downloads pickled artifact object from Neptune and returns a Python object. diff --git a/neptunecontrib/hpo/__init__.py b/neptunecontrib/hpo/__init__.py index 62a86a5..7147f62 100644 --- a/neptunecontrib/hpo/__init__.py +++ b/neptunecontrib/hpo/__init__.py @@ -13,3 +13,19 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from neptunecontrib.hpo.utils import ( + df2result, + hyperopt2skopt, + optuna2skopt, + bayes2skopt, + hpbandster2skopt, +) + +__all__ = [ + 'hyperopt2skopt', + 'df2result', + 'optuna2skopt', + 'bayes2skopt', + 'hpbandster2skopt' +] diff --git a/neptunecontrib/hpo/utils.py b/neptunecontrib/hpo/utils.py index 706f08e..205de98 100644 --- a/neptunecontrib/hpo/utils.py +++ b/neptunecontrib/hpo/utils.py @@ -19,6 +19,14 @@ from scipy.optimize import OptimizeResult import skopt +__all__ = [ + 'hyperopt2skopt', + 'df2result', + 'optuna2skopt', + 'bayes2skopt', + 'hpbandster2skopt' +] + def hyperopt2skopt(trials, space): """Converts hyperopt trials to scipy OptimizeResult. diff --git a/neptunecontrib/logging/chart.py b/neptunecontrib/logging/chart.py index 3d0f660..2a393a3 100644 --- a/neptunecontrib/logging/chart.py +++ b/neptunecontrib/logging/chart.py @@ -13,102 +13,12 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import warnings -import neptune +message = """neptunecontrib.logging.chart was moved to neptunecontrib.api. +You should use ``from neptunecontrib.api import log_chart`` +neptunecontrib.logging.log_chart will be removed in future releases. +""" +warnings.warn(message) - -def log_chart(name, chart, experiment=None): - """Logs charts from matplotlib, plotly to neptune. - - Plotly figures are converted to interactive HTML and then uploaded to Neptune as an artifact with path - charts/{name}.html. - - Matplotlib figures are converted optionally. If plotly is installed, matplotlib figures are converted - to plotly figures and then converted to interactive HTML and uploaded to Neptune as an artifact with - path charts/{name}.html. If plotly is not installed, matplotlib figures are converted to PNG images - and uploaded to Neptune as an artifact with path charts/{name}.png - - Args: - name (:obj:`str`): - | Name of the chart (without extension) that will be used as a part of artifact's destination. - chart (:obj:`matplotlib` or :obj:`plotly` Figure): - | Figure from `matplotlib` or `plotly`. If you want to use global figure from `matplotlib`, you - can also pass reference to `matplotlib.pyplot` module. - experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): - | For advanced users only. Pass Neptune - `Experiment `_ - object if you want to control to which experiment data is logged. - | If ``None``, log to currently active, and most recent experiment. - - Examples: - Start an experiment:: - - import neptune - - neptune.init(project_qualified_name='USER_NAME/PROJECT_NAME') - neptune.create_experiment(name='experiment_with_chart') - - Create some figure:: - - import matplotlib.pyplot as plt - - plt.plot([1, 2, 3, 4]) - plt.ylabel('some numbers') - - Log the figure to Neptune:: - - from neptunecontrib.logging.chart import log_chart - - log_chart('matplotlib_chart', plt) - """ - _exp = experiment if experiment else neptune - - if is_matplotlib_pyplot(chart) or is_matplotlib_figure(chart): - if is_matplotlib_pyplot(chart): - chart = chart.gcf() - - try: - from plotly import tools - chart = tools.mpl_to_plotly(chart) - - _exp.log_artifact(export_plotly_figure(chart), "charts/" + name + '.html') - except ImportError: - _exp.log_artifact(export_matplotlib_figure(chart), "charts/" + name + '.png') - - elif is_plotly_figure(chart): - _exp.log_artifact(export_plotly_figure(chart), "charts/" + name + '.html') - - else: - raise ValueError("Currently supported are matplotlib and plotly figures") - - -def is_matplotlib_pyplot(chart): - return hasattr(chart, '__name__') and chart.__name__.startswith('matplotlib.') - - -def is_matplotlib_figure(chart): - return chart.__class__.__module__.startswith('matplotlib.') and chart.__class__.__name__ == 'Figure' - - -def is_plotly_figure(chart): - return chart.__class__.__module__.startswith('plotly.') and chart.__class__.__name__ == 'Figure' - - -def export_plotly_figure(chart): - from io import StringIO - - buffer = StringIO() - chart.write_html(buffer) - buffer.seek(0) - - return buffer - - -def export_matplotlib_figure(chart): - from io import BytesIO - - buffer = BytesIO() - chart.savefig(buffer, format='png') - buffer.seek(0) - - return buffer +from neptunecontrib.api import log_chart # pylint: disable=C0413, W0611 diff --git a/neptunecontrib/monitoring/utils.py b/neptunecontrib/monitoring/utils.py index dea76b1..dd3ae1d 100644 --- a/neptunecontrib/monitoring/utils.py +++ b/neptunecontrib/monitoring/utils.py @@ -15,13 +15,13 @@ # from itertools import product -import os -import tempfile +import warnings -import joblib import matplotlib.pyplot as plt import neptune +from neptunecontrib.api import pickle_and_log_artifact + def axes2fig(axes, fig=None): """Converts ndarray of matplotlib object to matplotlib figure. @@ -65,74 +65,21 @@ def axes2fig(axes, fig=None): def send_figure(fig, channel_name='figures', experiment=None): - """Logs matplotlib figure to Neptune. - - Logs any figure from matplotlib to specified image channel. - By default it logs to 'figures' and you can log multiple images to the same channel. - - Args: - channel_name(str): name of the neptune channel. Default is 'figures'. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - fig(`matplotlib.figure`): Matplotlib figure object - - Examples: - Initialize Neptune:: - - import neptune - neptune.init('USER_NAME/PROJECT_NAME') - - Create random data::: - - import numpy as np - table = np.random.random((10,10)) - - Plot and log to Neptune:: - - import matplotlib.pyplot as plt - import seaborn as sns - from neptunecontrib.monitoring.utils import send_figure - - with neptune.create_experiment(): - fig, ax = plt.subplots() - sns.heatmap(table,ax=ax) - send_figure(fig) - + message = """neptunecontrib.monitoring.utils send_figure functionality is now available in neptune-client. + You should simply use neptune.log_image('channel_name', fig) where you used send_figure('channel_name', fig) before. + send_figure will be removed in future releases. """ - _exp = experiment if experiment else neptune + warnings.warn(message) - with tempfile.NamedTemporaryFile(suffix='.png') as f: - fig.savefig(f.name) - _exp.send_image(channel_name, f.name) + _exp = experiment if experiment else neptune + _exp.log_image(channel_name, fig) def pickle_and_send_artifact(obj, filename, experiment=None): - """Logs picklable object to Neptune. - - Pickles and logs your object to Neptune under specified filename. - - Args: - obj: Picklable object. - filename(str): filename under which object will be saved. - experiment(`neptune.experiments.Experiment`): Neptune experiment. Default is None. - - Examples: - Initialize Neptune:: - - import neptune - neptune.init('USER_NAME/PROJECT_NAME') - - Create RandomForest object and log to Neptune:: - - from sklearn.ensemble import RandomForestClassifier - from neptunecontrib.monitoring.utils import pickle_and_send_artifact - - with neptune.create_experiment(): - rf = RandomForestClassifier() - pickle_and_send_artifact(rf, 'rf') + message = """neptunecontrib.monitoring.utils pickle_and_send_artifact was moved to neptunecontrib.api + and renamed to pickle_and_log_artifact. You should use ``from neptunecontrib.api import pickle_and_log_artifact`` + neptunecontrib.logging.log_chart will be removed in future releases. """ - _exp = experiment if experiment else neptune + warnings.warn(message) - with tempfile.TemporaryDirectory() as d: - filename = os.path.join(d, filename) - joblib.dump(obj, filename) - _exp.send_artifact(filename) + pickle_and_log_artifact(obj, filename, experiment) diff --git a/neptunecontrib/monitoring/xgboost.py b/neptunecontrib/monitoring/xgboost.py new file mode 100644 index 0000000..e164f96 --- /dev/null +++ b/neptunecontrib/monitoring/xgboost.py @@ -0,0 +1,238 @@ +# +# Copyright (c) 2020, Neptune Labs Sp. z o.o. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import os +import tempfile + +import neptune +import xgboost as xgb + + +def neptune_callback(log_model=True, + log_importance=True, + max_num_features=None, + log_tree=(0,), + experiment=None, + **kwargs): + """XGBoost callback for Neptune experiments. + + This is XGBoost callback that automatically logs training and evaluation metrics, feature importance chart, + visualized trees and trained Booster to Neptune. + + Check Neptune documentation for the `full example `_. + + Make sure you created an experiment before you start XGBoost training using ``neptune.create_experiment()`` + (`check our docs `_). + + Integration works with ``xgboost>=0.82``. + + Tip: + Use this `Google Colab `_ to try it without further ado. + + Args: + log_model (:obj:`bool`, optional, default is ``True``): + | Log booster to Neptune after last boosting iteration. + | If you run xgb.cv, log booster for all folds. + log_importance (:obj:`bool`, optional, default is ``True``): + | Log feature importance to Neptune as image after last boosting iteration. + | Specify number of features using ``max_num_features`` parameter below. + | If you run xgb.cv, log feature importance for each folds' booster. + max_num_features (:obj:`int`, optional, default is ``None``): + | Plot top ``max_num_features`` features on the importance plot. + | If ``None``, plot all features. + log_tree (:obj:`list` of :obj:`int`, optional, default is ``[1,]``): + | Log specified trees to Neptune as images after last boosting iteration. + | If you run xgb.cv, log specified trees for each folds' booster. + | Default is to log first tree. + | If ``None``, do not log any tree. + experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): + | For advanced users only. Pass Neptune + `Experiment `_ + object if you want to control to which experiment data is logged. + | If ``None``, log to currently active, and most recent experiment. + kwargs: + Parametrize XGBoost functions used in this callback: + `xgboost.plot_importance `_ + and `xgboost.to_graphviz `_. + + Returns: + :obj:`callback`, function that you can pass directly to the XGBoost callbacks list, for example to the + ``xgboost.cv()`` + (`see docs `_) + or ``XGBClassifier.fit()`` + (`check docs `_). + + Note: + If you use early stopping, make sure to log model, feature importance and trees on your own. + Neptune logs these artifacts only after last iteration, which you may not reach because of early stop. + + Examples: + ``xgb.train`` examples + + .. code:: python3 + + # basic usage + xgb.train(param, dtrain, num_round, watchlist, + callbacks=[neptune_callback()]) + + # do not log model + xgb.train(param, dtrain, num_round, watchlist, + callbacks=[neptune_callback(log_model=False)]) + + # log top 5 features' importance chart + xgb.train(param, dtrain, num_round, watchlist, + callbacks=[neptune_callback(max_num_features=5)]) + + ``xgb.cv`` examples + + .. code:: python3 + + # log 5 trees per each folds' booster + xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, + callbacks=neptune_callback(log_tree=[0,1,2,3,4])) + + # log only metrics + xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, + callbacks=[neptune_callback(log_model=False, + log_importance=False, + max_num_features=None, + log_tree=None)]) + + # log top 5 features per each folds' booster + xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, + callbacks=[neptune_callback(log_model=False, + max_num_features=3, + log_tree=None)]) + + ``sklearn`` API examples + + .. code:: python3 + + # basic usage with early stopping + xgb.XGBRegressor().fit(X_train, y_train, + early_stopping_rounds=10, + eval_metric=['mae', 'rmse', 'rmsle'], + eval_set=[(X_test, y_test)], + callbacks=[neptune_callback()]) + + # do not log model + clf = xgb.XGBRegressor() + clf.fit(X_train, y_train, + eval_metric=['mae', 'rmse', 'rmsle'], + eval_set=[(X_test, y_test)], + callbacks=[neptune_callback(log_model=False)]) + y_pred = clf.predict(X_test) + + # log 8 trees + reg = xgb.XGBRegressor(**params) + reg.fit(X_train, y_train, + eval_metric=['mae', 'rmse', 'rmsle'], + eval_set=[(X_test, y_test)], + callbacks=[neptune_callback(log_tree=[0,1,2,3,4,5,6,7])]) + """ + if experiment: + _exp = experiment + else: + try: + neptune.get_experiment() + _exp = neptune + except neptune.exceptions.NoExperimentContext: + msg = 'No currently running Neptune experiment. \n'\ + 'To start logging to Neptune create experiment by using: `neptune.create_experiment()`. \n'\ + 'More info in the documentation: '\ + '.' + raise neptune.exceptions.NeptuneException(msg) + + assert isinstance(log_model, bool),\ + 'log_model must be bool, got {} instead. Check log_model parameter.'.format(type(log_model)) + assert isinstance(log_importance, bool),\ + 'log_importance must be bool, got {} instead. Check log_importance parameter.'.format(type(log_importance)) + if max_num_features is not None: + assert isinstance(max_num_features, int),\ + 'max_num_features must be int, got {} instead. ' \ + 'Check max_num_features parameter.'.format(type(max_num_features)) + if log_tree is not None: + if isinstance(log_tree, tuple): + log_tree = list(log_tree) + assert isinstance(log_tree, list),\ + 'log_tree must be list of int, got {} instead. Check log_tree parameter.'.format(type(log_tree)) + + def callback(env): + # Log metrics after iteration + for item in env.evaluation_result_list: + if len(item) == 2: # train case + _exp.log_metric(item[0], item[1]) + if len(item) == 3: # cv case + _exp.log_metric('{}-mean'.format(item[0]), item[1]) + _exp.log_metric('{}-std'.format(item[0]), item[2]) + + # Log booster, end of training + if env.iteration + 1 == env.end_iteration and log_model: + if env.cvfolds: # cv case + for i, cvpack in enumerate(env.cvfolds): + _log_model(cvpack.bst, 'cv-fold-{}-bst.model'.format(i), _exp) + else: # train case + _log_model(env.model, 'bst.model', _exp) + + # Log feature importance, end of training + if env.iteration + 1 == env.end_iteration and log_importance: + if env.cvfolds: # cv case + for i, cvpack in enumerate(env.cvfolds): + _log_importance(cvpack.bst, max_num_features, _exp, title='cv-fold-{}'.format(i), **kwargs) + else: # train case + _log_importance(env.model, max_num_features, _exp, **kwargs) + + # Log trees, end of training + if env.iteration + 1 == env.end_iteration and log_tree: + if env.cvfolds: + for j, cvpack in enumerate(env.cvfolds): + _log_trees(cvpack.bst, log_tree, 'trees-cv-fold-{}'.format(j), _exp, **kwargs) + else: + _log_trees(env.model, log_tree, 'trees', _exp, **kwargs) + return callback + + +def _log_model(booster, name, npt): + with tempfile.TemporaryDirectory(dir='.') as d: + path = os.path.join(d, name) + booster.save_model(path) + npt.log_artifact(path) + + +def _log_importance(booster, max_num_features, npt, **kwargs): + try: + import matplotlib.pyplot as plt + except ImportError: + raise ImportError('Please install matplotlib to log importance') + importance = xgb.plot_importance(booster, max_num_features=max_num_features, **kwargs) # pylint: disable=E1101 + npt.log_image('feature_importance', importance.figure) + plt.close('all') + + +def _log_trees(booster, tree_list, img_name, npt, **kwargs): + with tempfile.TemporaryDirectory(dir='.') as d: + for i in tree_list: + file_name = 'tree_{}'.format(i) + tree = xgb.to_graphviz(booster=booster, num_trees=i, **kwargs) # pylint: disable=E1101 + tree.render(filename=file_name, directory=d, view=False, format='png') + npt.log_image(img_name, + os.path.join(d, '{}.png'.format(file_name)), + image_name=file_name) diff --git a/neptunecontrib/monitoring/xgboost_monitor.py b/neptunecontrib/monitoring/xgboost_monitor.py index 2105c42..eada0b1 100644 --- a/neptunecontrib/monitoring/xgboost_monitor.py +++ b/neptunecontrib/monitoring/xgboost_monitor.py @@ -13,226 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import os -import tempfile +import warnings -import neptune -import xgboost as xgb +message = """neptunecontrib.monitoring.xgboost_monitor was moved to neptunecontrib.monitoring.xgboost +neptunecontrib.monitoring.xgboost_monitor will be removed in future releases. +""" +warnings.warn(message) - -def neptune_callback(log_model=True, - log_importance=True, - max_num_features=None, - log_tree=(0,), - experiment=None, - **kwargs): - """XGBoost callback for Neptune experiments. - - This is XGBoost callback that automatically logs training and evaluation metrics, feature importance chart, - visualized trees and trained Booster to Neptune. - - Check Neptune documentation for the `full example `_. - - Make sure you created an experiment before you start XGBoost training using ``neptune.create_experiment()`` - (`check our docs `_). - - Integration works with ``xgboost>=0.82``. - - Tip: - Use this `Google Colab `_ to try it without further ado. - - Args: - log_model (:obj:`bool`, optional, default is ``True``): - | Log booster to Neptune after last boosting iteration. - | If you run xgb.cv, log booster for all folds. - log_importance (:obj:`bool`, optional, default is ``True``): - | Log feature importance to Neptune as image after last boosting iteration. - | Specify number of features using ``max_num_features`` parameter below. - | If you run xgb.cv, log feature importance for each folds' booster. - max_num_features (:obj:`int`, optional, default is ``None``): - | Plot top ``max_num_features`` features on the importance plot. - | If ``None``, plot all features. - log_tree (:obj:`list` of :obj:`int`, optional, default is ``[1,]``): - | Log specified trees to Neptune as images after last boosting iteration. - | If you run xgb.cv, log specified trees for each folds' booster. - | Default is to log first tree. - | If ``None``, do not log any tree. - experiment (:obj:`neptune.experiments.Experiment`, optional, default is ``None``): - | For advanced users only. Pass Neptune - `Experiment `_ - object if you want to control to which experiment data is logged. - | If ``None``, log to currently active, and most recent experiment. - kwargs: - Parametrize XGBoost functions used in this callback: - `xgboost.plot_importance `_ - and `xgboost.to_graphviz `_. - - Returns: - :obj:`callback`, function that you can pass directly to the XGBoost callbacks list, for example to the - ``xgboost.cv()`` - (`see docs `_) - or ``XGBClassifier.fit()`` - (`check docs `_). - - Note: - If you use early stopping, make sure to log model, feature importance and trees on your own. - Neptune logs these artifacts only after last iteration, which you may not reach because of early stop. - - Examples: - ``xgb.train`` examples - - .. code:: python3 - - # basic usage - xgb.train(param, dtrain, num_round, watchlist, - callbacks=[neptune_callback()]) - - # do not log model - xgb.train(param, dtrain, num_round, watchlist, - callbacks=[neptune_callback(log_model=False)]) - - # log top 5 features' importance chart - xgb.train(param, dtrain, num_round, watchlist, - callbacks=[neptune_callback(max_num_features=5)]) - - ``xgb.cv`` examples - - .. code:: python3 - - # log 5 trees per each folds' booster - xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, - callbacks=neptune_callback(log_tree=[0,1,2,3,4])) - - # log only metrics - xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, - callbacks=[neptune_callback(log_model=False, - log_importance=False, - max_num_features=None, - log_tree=None)]) - - # log top 5 features per each folds' booster - xgb.cv(param, dtrain, num_boost_round=num_round, nfold=7, - callbacks=[neptune_callback(log_model=False, - max_num_features=3, - log_tree=None)]) - - ``sklearn`` API examples - - .. code:: python3 - - # basic usage with early stopping - xgb.XGBRegressor().fit(X_train, y_train, - early_stopping_rounds=10, - eval_metric=['mae', 'rmse', 'rmsle'], - eval_set=[(X_test, y_test)], - callbacks=[neptune_callback()]) - - # do not log model - clf = xgb.XGBRegressor() - clf.fit(X_train, y_train, - eval_metric=['mae', 'rmse', 'rmsle'], - eval_set=[(X_test, y_test)], - callbacks=[neptune_callback(log_model=False)]) - y_pred = clf.predict(X_test) - - # log 8 trees - reg = xgb.XGBRegressor(**params) - reg.fit(X_train, y_train, - eval_metric=['mae', 'rmse', 'rmsle'], - eval_set=[(X_test, y_test)], - callbacks=[neptune_callback(log_tree=[0,1,2,3,4,5,6,7])]) - """ - if experiment: - _exp = experiment - else: - try: - neptune.get_experiment() - _exp = neptune - except neptune.exceptions.NoExperimentContext: - msg = 'No currently running Neptune experiment. \n'\ - 'To start logging to Neptune create experiment by using: `neptune.create_experiment()`. \n'\ - 'More info in the documentation: '\ - '.' - raise neptune.exceptions.NeptuneException(msg) - - assert isinstance(log_model, bool),\ - 'log_model must be bool, got {} instead. Check log_model parameter.'.format(type(log_model)) - assert isinstance(log_importance, bool),\ - 'log_importance must be bool, got {} instead. Check log_importance parameter.'.format(type(log_importance)) - if max_num_features is not None: - assert isinstance(max_num_features, int),\ - 'max_num_features must be int, got {} instead. ' \ - 'Check max_num_features parameter.'.format(type(max_num_features)) - if log_tree is not None: - if isinstance(log_tree, tuple): - log_tree = list(log_tree) - assert isinstance(log_tree, list),\ - 'log_tree must be list of int, got {} instead. Check log_tree parameter.'.format(type(log_tree)) - - def callback(env): - # Log metrics after iteration - for item in env.evaluation_result_list: - if len(item) == 2: # train case - _exp.log_metric(item[0], item[1]) - if len(item) == 3: # cv case - _exp.log_metric('{}-mean'.format(item[0]), item[1]) - _exp.log_metric('{}-std'.format(item[0]), item[2]) - - # Log booster, end of training - if env.iteration + 1 == env.end_iteration and log_model: - if env.cvfolds: # cv case - for i, cvpack in enumerate(env.cvfolds): - _log_model(cvpack.bst, 'cv-fold-{}-bst.model'.format(i), _exp) - else: # train case - _log_model(env.model, 'bst.model', _exp) - - # Log feature importance, end of training - if env.iteration + 1 == env.end_iteration and log_importance: - if env.cvfolds: # cv case - for i, cvpack in enumerate(env.cvfolds): - _log_importance(cvpack.bst, max_num_features, _exp, title='cv-fold-{}'.format(i), **kwargs) - else: # train case - _log_importance(env.model, max_num_features, _exp, **kwargs) - - # Log trees, end of training - if env.iteration + 1 == env.end_iteration and log_tree: - if env.cvfolds: - for j, cvpack in enumerate(env.cvfolds): - _log_trees(cvpack.bst, log_tree, 'trees-cv-fold-{}'.format(j), _exp, **kwargs) - else: - _log_trees(env.model, log_tree, 'trees', _exp, **kwargs) - return callback - - -def _log_model(booster, name, npt): - with tempfile.TemporaryDirectory(dir='.') as d: - path = os.path.join(d, name) - booster.save_model(path) - npt.log_artifact(path) - - -def _log_importance(booster, max_num_features, npt, **kwargs): - try: - import matplotlib.pyplot as plt - except ImportError: - raise ImportError('Please install matplotlib to log importance') - importance = xgb.plot_importance(booster, max_num_features=max_num_features, **kwargs) - npt.log_image('feature_importance', importance.figure) - plt.close('all') - - -def _log_trees(booster, tree_list, img_name, npt, **kwargs): - with tempfile.TemporaryDirectory(dir='.') as d: - for i in tree_list: - file_name = 'tree_{}'.format(i) - tree = xgb.to_graphviz(booster=booster, num_trees=i, **kwargs) - tree.render(filename=file_name, directory=d, view=False, format='png') - npt.log_image(img_name, - os.path.join(d, '{}.png'.format(file_name)), - image_name=file_name) +from neptunecontrib.monitoring.xgboost import * # pylint: disable=C0413, W0611 diff --git a/neptunecontrib/versioning/__init__.py b/neptunecontrib/versioning/__init__.py index 62a86a5..2b2cdde 100644 --- a/neptunecontrib/versioning/__init__.py +++ b/neptunecontrib/versioning/__init__.py @@ -13,3 +13,15 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from neptunecontrib.versioning.data import ( + log_data_version, + log_s3_data_version, + log_image_dir_snapshots +) + +__all__ = [ + 'log_data_version', + 'log_s3_data_version', + 'log_image_dir_snapshots', +] diff --git a/neptunecontrib/versioning/data.py b/neptunecontrib/versioning/data.py index 4c4a37c..038f773 100644 --- a/neptunecontrib/versioning/data.py +++ b/neptunecontrib/versioning/data.py @@ -23,6 +23,11 @@ from neptunecontrib.monitoring.utils import send_figure +__all__ = [ + 'log_data_version', + 'log_s3_data_version', + 'log_image_dir_snapshots', +] def log_data_version(path, prefix='', experiment=None): """Logs data version of file or folder to Neptune diff --git a/neptunecontrib/viz/__init__.py b/neptunecontrib/viz/__init__.py index 62a86a5..d1c6002 100644 --- a/neptunecontrib/viz/__init__.py +++ b/neptunecontrib/viz/__init__.py @@ -13,3 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # + +from neptunecontrib.viz.experiments import channel_curve_compare +from neptunecontrib.viz.parallel_coordinates_plot import make_parallel_coordinates_plot +from neptunecontrib.viz.projects import project_progress + +__all__ = [ + 'channel_curve_compare', + 'make_parallel_coordinates_plot', + 'project_progress', +] diff --git a/neptunecontrib/viz/experiments.py b/neptunecontrib/viz/experiments.py index 49f8555..098876b 100644 --- a/neptunecontrib/viz/experiments.py +++ b/neptunecontrib/viz/experiments.py @@ -16,6 +16,9 @@ import altair as alt +__all__ = [ + 'channel_curve_compare', +] def channel_curve_compare(experiment_df, width=800, diff --git a/neptunecontrib/viz/parallel_coordinates_plot.py b/neptunecontrib/viz/parallel_coordinates_plot.py index 065bf1b..7b3f87a 100644 --- a/neptunecontrib/viz/parallel_coordinates_plot.py +++ b/neptunecontrib/viz/parallel_coordinates_plot.py @@ -19,6 +19,9 @@ import hiplot as hip import neptune +__all__ = [ + 'make_parallel_coordinates_plot', +] def make_parallel_coordinates_plot(html_file_path=None, metrics=False, diff --git a/neptunecontrib/viz/projects.py b/neptunecontrib/viz/projects.py index 734bde8..ddffc60 100644 --- a/neptunecontrib/viz/projects.py +++ b/neptunecontrib/viz/projects.py @@ -20,6 +20,10 @@ warnings.filterwarnings('ignore') +__all__ = [ + 'project_progress', +] + def project_progress(progress_df, width=800,