Skip to content

Commit

Permalink
[MRG] Update joblib to 0.11 (scikit-learn#8492)
Browse files Browse the repository at this point in the history
Use pip rather than easy_install in copy_joblib.sh. Also need to remove joblib/testing.py to avoid pytest dependency.
  • Loading branch information
lesteve authored Mar 7, 2017
1 parent 5210f81 commit cee5a38
Show file tree
Hide file tree
Showing 17 changed files with 511 additions and 400 deletions.
22 changes: 8 additions & 14 deletions sklearn/externals/copy_joblib.sh
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
#!/bin/sh
# Script to do a local install of joblib
export LC_ALL=C
rm -rf tmp joblib
PYTHON_VERSION=$(python -c 'import sys; print("{0[0]}.{0[1]}".format(sys.version_info))')
SITE_PACKAGES="$PWD/tmp/lib/python$PYTHON_VERSION/site-packages"
INSTALL_FOLDER=tmp/joblib_install
rm -rf joblib $INSTALL_FOLDER
pip install joblib --target $INSTALL_FOLDER
cp -r $INSTALL_FOLDER/joblib .
rm -rf $INSTALL_FOLDER

mkdir -p $SITE_PACKAGES
mkdir -p tmp/bin
export PYTHONPATH="$SITE_PACKAGES"
easy_install -Zeab tmp joblib

cd tmp/joblib/
python setup.py install --prefix $OLDPWD/tmp
cd $OLDPWD
cp -r $SITE_PACKAGES/joblib-*.egg/joblib .
rm -rf tmp
# Needed to rewrite the doctests
# Note: BSD sed -i needs an argument unders OSX
# so first renaming to .bak and then deleting backup files
Expand All @@ -25,4 +17,6 @@ find joblib -name "*.bak" | xargs rm
# joblib is already tested on its own CI infrastructure upstream.
rm -r joblib/test

chmod -x joblib/*.py
# Remove joblib/testing.py which is only used in tests and has a
# pytest dependency (needed until we drop nose)
rm joblib/testing.py
25 changes: 12 additions & 13 deletions sklearn/externals/joblib/__init__.py
Original file line number Diff line number Diff line change
@@ -1,27 +1,27 @@
""" Joblib is a set of tools to provide **lightweight pipelining in
"""Joblib is a set of tools to provide **lightweight pipelining in
Python**. In particular, joblib offers:
1. transparent disk-caching of the output values and lazy re-evaluation
(memoize pattern)
1. transparent disk-caching of the output values and lazy re-evaluation
(memoize pattern)
2. easy simple parallel computing
2. easy simple parallel computing
3. logging and tracing of the execution
3. logging and tracing of the execution
Joblib is optimized to be **fast** and **robust** in particular on large
data and has specific optimizations for `numpy` arrays. It is
**BSD-licensed**.
============================== ============================================
**User documentation**: http://pythonhosted.org/joblib
========================= ================================================
**User documentation:** http://pythonhosted.org/joblib
**Download packages**: http://pypi.python.org/pypi/joblib#downloads
**Download packages:** http://pypi.python.org/pypi/joblib#downloads
**Source code**: http://github.com/joblib/joblib
**Source code:** http://github.com/joblib/joblib
**Report issues**: http://github.com/joblib/joblib/issues
============================== ============================================
**Report issues:** http://github.com/joblib/joblib/issues
========================= ================================================
Vision
Expand Down Expand Up @@ -115,8 +115,7 @@
# Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
# 'X.Y.dev0' is the canonical version of 'X.Y.dev'
#

__version__ = '0.10.3'
__version__ = '0.11'


from .memory import Memory, MemorizedResult
Expand Down
1 change: 0 additions & 1 deletion sklearn/externals/joblib/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
import sys

PY3_OR_LATER = sys.version_info[0] >= 3
PY26 = sys.version_info[:2] == (2, 6)
PY27 = sys.version_info[:2] == (2, 7)

try:
Expand Down
2 changes: 1 addition & 1 deletion sklearn/externals/joblib/_memory_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,4 +102,4 @@ def open_py_source(filename):
buffer.seek(0)
text = TextIOWrapper(buffer, encoding, line_buffering=True)
text.mode = 'r'
return text
return text
10 changes: 8 additions & 2 deletions sklearn/externals/joblib/_parallel_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
class ParallelBackendBase(with_metaclass(ABCMeta)):
"""Helper abc which defines all methods a ParallelBackend must implement"""

supports_timeout = False

@abstractmethod
def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs that can actually run in parallel
Expand Down Expand Up @@ -236,6 +238,8 @@ class ThreadingBackend(PoolManagerMixin, ParallelBackendBase):
"with nogil" block or an expensive call to a library such as NumPy).
"""

supports_timeout = True

def configure(self, n_jobs=1, parallel=None, **backend_args):
"""Build a process or thread pool and return the number of workers"""
n_jobs = self.effective_n_jobs(n_jobs)
Expand All @@ -259,6 +263,8 @@ class MultiprocessingBackend(PoolManagerMixin, AutoBatchingMixin,
# Environment variables to protect against bad situations when nesting
JOBLIB_SPAWNED_PROCESS = "__JOBLIB_SPAWNED_PARALLEL__"

supports_timeout = True

def effective_n_jobs(self, n_jobs):
"""Determine the number of jobs which are going to run in parallel.
Expand All @@ -277,10 +283,10 @@ def effective_n_jobs(self, n_jobs):
stacklevel=3)
return 1

elif threading.current_thread().name != 'MainThread':
if not isinstance(threading.current_thread(), threading._MainThread):
# Prevent posix fork inside in non-main posix threads
warnings.warn(
'Multiprocessing backed parallel loops cannot be nested'
'Multiprocessing-backed parallel loops cannot be nested'
' below threads, setting n_jobs=1',
stacklevel=3)
return 1
Expand Down
80 changes: 80 additions & 0 deletions sklearn/externals/joblib/backports.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""
Backports of fixes for joblib dependencies
"""
import os
import time
import ctypes
import sys

from distutils.version import LooseVersion

try:
import numpy as np

def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
shape=None, order='C'):
"""Backport of numpy memmap offset fix.
See https://github.com/numpy/numpy/pull/8443 for more details.
The numpy fix will be available in numpy 1.13.
"""
mm = np.memmap(filename, dtype=dtype, mode=mode, offset=offset,
shape=shape, order=order)
if LooseVersion(np.__version__) < '1.13':
mm.offset = offset
return mm
except ImportError:
def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
shape=None, order='C'):
raise NotImplementedError(
"'joblib.backports.make_memmap' should not be used "
'if numpy is not installed.')


if os.name == 'nt':
error_access_denied = 5
try:
from os import replace
except ImportError:
# Python 2.7
def replace(src, dst):
if not isinstance(src, unicode): # noqa
src = unicode(src, sys.getfilesystemencoding()) # noqa
if not isinstance(dst, unicode): # noqa
dst = unicode(dst, sys.getfilesystemencoding()) # noqa

movefile_replace_existing = 0x1
return_value = ctypes.windll.kernel32.MoveFileExW(
src, dst, movefile_replace_existing)
if return_value == 0:
raise ctypes.WinError()

def concurrency_safe_rename(src, dst):
"""Renames ``src`` into ``dst`` overwriting ``dst`` if it exists.
On Windows os.replace (or for Python 2.7 its implementation
through MoveFileExW) can yield permission errors if executed by
two different processes.
"""
max_sleep_time = 1
total_sleep_time = 0
sleep_time = 0.001
while total_sleep_time < max_sleep_time:
try:
replace(src, dst)
break
except Exception as exc:
if getattr(exc, 'winerror', None) == error_access_denied:
time.sleep(sleep_time)
total_sleep_time += sleep_time
sleep_time *= 2
else:
raise
else:
raise
else:
try:
from os import replace as concurrency_safe_rename
except ImportError:
from os import rename as concurrency_safe_rename # noqa
30 changes: 7 additions & 23 deletions sklearn/externals/joblib/format_stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,10 @@ def _fixed_getframes(etb, context=1, tb_offset=0):
aux = traceback.extract_tb(etb)
assert len(records) == len(aux)
for i, (file, lnum, _, _) in enumerate(aux):
maybeStart = lnum - 1 - context // 2
start = max(maybeStart, 0)
maybe_start = lnum - 1 - context // 2
start = max(maybe_start, 0)
end = start + context
lines = linecache.getlines(file)[start:end]
# pad with empty lines if necessary
if maybeStart < 0:
lines = (['\n'] * -maybeStart) + lines
if len(lines) < context:
lines += ['\n'] * (context - len(lines))
buf = list(records[i])
buf[LNUM_POS] = lnum
buf[INDEX_POS] = lnum - 1 - start
Expand Down Expand Up @@ -355,13 +350,7 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0):
pyver)

# Drop topmost frames if requested
try:
records = _fixed_getframes(etb, context, tb_offset)
except:
raise
print('\nUnfortunately, your original traceback can not be '
'constructed.\n')
return ''
records = _fixed_getframes(etb, context, tb_offset)

# Get (safely) a string form of the exception info
try:
Expand Down Expand Up @@ -397,18 +386,13 @@ def format_outer_frames(context=5, stack_start=None, stack_end=None,
filename = filename[:-4] + '.py'
if ignore_ipython:
# Hack to avoid printing the internals of IPython
if (os.path.basename(filename) == 'iplib.py'
and func_name in ('safe_execfile', 'runcode')):
if (os.path.basename(filename) in ('iplib.py', 'py3compat.py')
and func_name in ('execfile', 'safe_execfile', 'runcode')):
break
maybeStart = line_no - 1 - context // 2
start = max(maybeStart, 0)
maybe_start = line_no - 1 - context // 2
start = max(maybe_start, 0)
end = start + context
lines = linecache.getlines(filename)[start:end]
# pad with empty lines if necessary
if maybeStart < 0:
lines = (['\n'] * -maybeStart) + lines
if len(lines) < context:
lines += ['\n'] * (context - len(lines))
buf = list(records[i])
buf[LNUM_POS] = line_no
buf[INDEX_POS] = line_no - 1 - start
Expand Down
21 changes: 13 additions & 8 deletions sklearn/externals/joblib/func_inspect.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def _signature_str(function_name, arg_spec):
arg_spec_for_format = arg_spec[:7 if PY3_OR_LATER else 4]

arg_spec_str = inspect.formatargspec(*arg_spec_for_format)
return '{0}{1}'.format(function_name, arg_spec_str)
return '{}{}'.format(function_name, arg_spec_str)


def _function_called_str(function_name, args, kwargs):
Expand Down Expand Up @@ -316,6 +316,13 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
return arg_dict


def _format_arg(arg):
formatted_arg = pformat(arg, indent=2)
if len(formatted_arg) > 1500:
formatted_arg = '%s...' % formatted_arg[:700]
return formatted_arg


def format_signature(func, *args, **kwargs):
# XXX: Should this use inspect.formatargvalues/formatargspec?
module, name = get_func_name(func)
Expand All @@ -328,14 +335,12 @@ def format_signature(func, *args, **kwargs):
arg_str = list()
previous_length = 0
for arg in args:
arg = pformat(arg, indent=2)
if len(arg) > 1500:
arg = '%s...' % arg[:700]
formatted_arg = _format_arg(arg)
if previous_length > 80:
arg = '\n%s' % arg
previous_length = len(arg)
arg_str.append(arg)
arg_str.extend(['%s=%s' % (v, pformat(i)) for v, i in kwargs.items()])
formatted_arg = '\n%s' % formatted_arg
previous_length = len(formatted_arg)
arg_str.append(formatted_arg)
arg_str.extend(['%s=%s' % (v, _format_arg(i)) for v, i in kwargs.items()])
arg_str = ', '.join(arg_str)

signature = '%s(%s)' % (name, arg_str)
Expand Down
3 changes: 2 additions & 1 deletion sklearn/externals/joblib/hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import types
import struct
import io
import decimal

from ._compat import _bytes_or_unicode, PY3_OR_LATER

Expand All @@ -35,7 +36,7 @@ def __init__(self, set_sequence):
# This fails on python 3 when elements are unorderable
# but we keep it in a try as it's faster.
self._sequence = sorted(set_sequence)
except TypeError:
except (TypeError, decimal.InvalidOperation):
# If elements are unorderable, sorting them using their hash.
# This is slower but works in any case.
self._sequence = sorted((hash(e) for e in set_sequence))
Expand Down
2 changes: 1 addition & 1 deletion sklearn/externals/joblib/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, depth=3):
self.depth = depth

def warn(self, msg):
logging.warn("[%s]: %s" % (self, msg))
logging.warning("[%s]: %s" % (self, msg))

def debug(self, msg):
# XXX: This conflicts with the debug flag used in children class
Expand Down
Loading

0 comments on commit cee5a38

Please sign in to comment.