[MRG] Update joblib to 0.11 (scikit-learn#8492)

Use pip rather than easy_install in copy_joblib.sh. Also need to remove joblib/testing.py to avoid pytest dependency.
sergiogaiotto · Mar 7, 2017 · cee5a38 · cee5a38
1 parent 5210f81
commit cee5a38
Show file tree

Hide file tree

Showing 17 changed files with 511 additions and 400 deletions.
diff --git a/sklearn/externals/copy_joblib.sh b/sklearn/externals/copy_joblib.sh
@@ -1,20 +1,12 @@
 #!/bin/sh
 # Script to do a local install of joblib
 export LC_ALL=C
-rm -rf tmp joblib
-PYTHON_VERSION=$(python -c 'import sys; print("{0[0]}.{0[1]}".format(sys.version_info))')
-SITE_PACKAGES="$PWD/tmp/lib/python$PYTHON_VERSION/site-packages"
+INSTALL_FOLDER=tmp/joblib_install
+rm -rf joblib $INSTALL_FOLDER
+pip install joblib --target $INSTALL_FOLDER
+cp -r $INSTALL_FOLDER/joblib .
+rm -rf $INSTALL_FOLDER
 
-mkdir -p $SITE_PACKAGES
-mkdir -p tmp/bin
-export PYTHONPATH="$SITE_PACKAGES"
-easy_install -Zeab tmp joblib
-
-cd tmp/joblib/
-python setup.py install --prefix $OLDPWD/tmp
-cd $OLDPWD
-cp -r $SITE_PACKAGES/joblib-*.egg/joblib .
-rm -rf tmp
 # Needed to rewrite the doctests
 # Note: BSD sed -i needs an argument unders OSX
 # so first renaming to .bak and then deleting backup files
@@ -25,4 +17,6 @@ find joblib -name "*.bak" | xargs rm
 # joblib is already tested on its own CI infrastructure upstream.
 rm -r joblib/test
 
-chmod -x joblib/*.py
+# Remove joblib/testing.py which is only used in tests and has a
+# pytest dependency (needed until we drop nose)
+rm joblib/testing.py
diff --git a/sklearn/externals/joblib/__init__.py b/sklearn/externals/joblib/__init__.py
@@ -1,27 +1,27 @@
-""" Joblib is a set of tools to provide **lightweight pipelining in
+"""Joblib is a set of tools to provide **lightweight pipelining in
 Python**. In particular, joblib offers:
 
-  1. transparent disk-caching of the output values and lazy re-evaluation
-     (memoize pattern)
+1. transparent disk-caching of the output values and lazy re-evaluation
+   (memoize pattern)
 
-  2. easy simple parallel computing
+2. easy simple parallel computing
 
-  3. logging and tracing of the execution
+3. logging and tracing of the execution
 
 Joblib is optimized to be **fast** and **robust** in particular on large
 data and has specific optimizations for `numpy` arrays. It is
 **BSD-licensed**.
 
 
-    ============================== ============================================
-    **User documentation**:        http://pythonhosted.org/joblib
+    ========================= ================================================
+    **User documentation:**        http://pythonhosted.org/joblib
 
-    **Download packages**:         http://pypi.python.org/pypi/joblib#downloads
+    **Download packages:**         http://pypi.python.org/pypi/joblib#downloads
 
-    **Source code**:               http://github.com/joblib/joblib
+    **Source code:**               http://github.com/joblib/joblib
 
-    **Report issues**:             http://github.com/joblib/joblib/issues
-    ============================== ============================================
+    **Report issues:**             http://github.com/joblib/joblib/issues
+    ========================= ================================================
 
 
 Vision
@@ -115,8 +115,7 @@
 # Dev branch marker is: 'X.Y.dev' or 'X.Y.devN' where N is an integer.
 # 'X.Y.dev0' is the canonical version of 'X.Y.dev'
 #
-
-__version__ = '0.10.3'
+__version__ = '0.11'
 
 
 from .memory import Memory, MemorizedResult

diff --git a/sklearn/externals/joblib/_compat.py b/sklearn/externals/joblib/_compat.py
@@ -4,7 +4,6 @@
 import sys
 
 PY3_OR_LATER = sys.version_info[0] >= 3
-PY26 = sys.version_info[:2] == (2, 6)
 PY27 = sys.version_info[:2] == (2, 7)
 
 try:

diff --git a/sklearn/externals/joblib/_memory_helpers.py b/sklearn/externals/joblib/_memory_helpers.py
@@ -102,4 +102,4 @@ def open_py_source(filename):
         buffer.seek(0)
         text = TextIOWrapper(buffer, encoding, line_buffering=True)
         text.mode = 'r'
-        return text
+        return text
diff --git a/sklearn/externals/joblib/_parallel_backends.py b/sklearn/externals/joblib/_parallel_backends.py
@@ -21,6 +21,8 @@
 class ParallelBackendBase(with_metaclass(ABCMeta)):
     """Helper abc which defines all methods a ParallelBackend must implement"""
 
+    supports_timeout = False
+
     @abstractmethod
     def effective_n_jobs(self, n_jobs):
         """Determine the number of jobs that can actually run in parallel
@@ -236,6 +238,8 @@ class ThreadingBackend(PoolManagerMixin, ParallelBackendBase):
     "with nogil" block or an expensive call to a library such as NumPy).
     """
 
+    supports_timeout = True
+
     def configure(self, n_jobs=1, parallel=None, **backend_args):
         """Build a process or thread pool and return the number of workers"""
         n_jobs = self.effective_n_jobs(n_jobs)
@@ -259,6 +263,8 @@ class MultiprocessingBackend(PoolManagerMixin, AutoBatchingMixin,
     # Environment variables to protect against bad situations when nesting
     JOBLIB_SPAWNED_PROCESS = "__JOBLIB_SPAWNED_PARALLEL__"
 
+    supports_timeout = True
+
     def effective_n_jobs(self, n_jobs):
         """Determine the number of jobs which are going to run in parallel.
 
@@ -277,10 +283,10 @@ def effective_n_jobs(self, n_jobs):
                     stacklevel=3)
             return 1
 
-        elif threading.current_thread().name != 'MainThread':
+        if not isinstance(threading.current_thread(), threading._MainThread):
             # Prevent posix fork inside in non-main posix threads
             warnings.warn(
-                'Multiprocessing backed parallel loops cannot be nested'
+                'Multiprocessing-backed parallel loops cannot be nested'
                 ' below threads, setting n_jobs=1',
                 stacklevel=3)
             return 1

diff --git a/sklearn/externals/joblib/backports.py b/sklearn/externals/joblib/backports.py
@@ -0,0 +1,80 @@
+"""
+Backports of fixes for joblib dependencies
+"""
+import os
+import time
+import ctypes
+import sys
+
+from distutils.version import LooseVersion
+
+try:
+    import numpy as np
+
+    def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
+                    shape=None, order='C'):
+        """Backport of numpy memmap offset fix.
+
+        See https://github.com/numpy/numpy/pull/8443 for more details.
+
+        The numpy fix will be available in numpy 1.13.
+        """
+        mm = np.memmap(filename, dtype=dtype, mode=mode, offset=offset,
+                       shape=shape, order=order)
+        if LooseVersion(np.__version__) < '1.13':
+            mm.offset = offset
+        return mm
+except ImportError:
+    def make_memmap(filename, dtype='uint8', mode='r+', offset=0,
+                    shape=None, order='C'):
+        raise NotImplementedError(
+            "'joblib.backports.make_memmap' should not be used "
+            'if numpy is not installed.')
+
+
+if os.name == 'nt':
+    error_access_denied = 5
+    try:
+        from os import replace
+    except ImportError:
+        # Python 2.7
+        def replace(src, dst):
+            if not isinstance(src, unicode):  # noqa
+                src = unicode(src, sys.getfilesystemencoding())  # noqa
+            if not isinstance(dst, unicode):  # noqa
+                dst = unicode(dst, sys.getfilesystemencoding())  # noqa
+
+            movefile_replace_existing = 0x1
+            return_value = ctypes.windll.kernel32.MoveFileExW(
+                src, dst, movefile_replace_existing)
+            if return_value == 0:
+                raise ctypes.WinError()
+
+    def concurrency_safe_rename(src, dst):
+        """Renames ``src`` into ``dst`` overwriting ``dst`` if it exists.
+
+        On Windows os.replace (or for Python 2.7 its implementation
+        through MoveFileExW) can yield permission errors if executed by
+        two different processes.
+        """
+        max_sleep_time = 1
+        total_sleep_time = 0
+        sleep_time = 0.001
+        while total_sleep_time < max_sleep_time:
+            try:
+                replace(src, dst)
+                break
+            except Exception as exc:
+                if getattr(exc, 'winerror', None) == error_access_denied:
+                    time.sleep(sleep_time)
+                    total_sleep_time += sleep_time
+                    sleep_time *= 2
+                else:
+                    raise
+        else:
+            raise
+else:
+    try:
+        from os import replace as concurrency_safe_rename
+    except ImportError:
+        from os import rename as concurrency_safe_rename  # noqa
diff --git a/sklearn/externals/joblib/format_stack.py b/sklearn/externals/joblib/format_stack.py
@@ -135,15 +135,10 @@ def _fixed_getframes(etb, context=1, tb_offset=0):
     aux = traceback.extract_tb(etb)
     assert len(records) == len(aux)
     for i, (file, lnum, _, _) in enumerate(aux):
-        maybeStart = lnum - 1 - context // 2
-        start = max(maybeStart, 0)
+        maybe_start = lnum - 1 - context // 2
+        start = max(maybe_start, 0)
         end = start + context
         lines = linecache.getlines(file)[start:end]
-        # pad with empty lines if necessary
-        if maybeStart < 0:
-            lines = (['\n'] * -maybeStart) + lines
-        if len(lines) < context:
-            lines += ['\n'] * (context - len(lines))
         buf = list(records[i])
         buf[LNUM_POS] = lnum
         buf[INDEX_POS] = lnum - 1 - start
@@ -355,13 +350,7 @@ def format_exc(etype, evalue, etb, context=5, tb_offset=0):
         pyver)
 
     # Drop topmost frames if requested
-    try:
-        records = _fixed_getframes(etb, context, tb_offset)
-    except:
-        raise
-        print('\nUnfortunately, your original traceback can not be '
-              'constructed.\n')
-        return ''
+    records = _fixed_getframes(etb, context, tb_offset)
 
     # Get (safely) a string form of the exception info
     try:
@@ -397,18 +386,13 @@ def format_outer_frames(context=5, stack_start=None, stack_end=None,
                 filename = filename[:-4] + '.py'
         if ignore_ipython:
             # Hack to avoid printing the internals of IPython
-            if (os.path.basename(filename) == 'iplib.py'
-                        and func_name in ('safe_execfile', 'runcode')):
+            if (os.path.basename(filename) in ('iplib.py', 'py3compat.py')
+                        and func_name in ('execfile', 'safe_execfile', 'runcode')):
                 break
-        maybeStart = line_no - 1 - context // 2
-        start = max(maybeStart, 0)
+        maybe_start = line_no - 1 - context // 2
+        start = max(maybe_start, 0)
         end = start + context
         lines = linecache.getlines(filename)[start:end]
-        # pad with empty lines if necessary
-        if maybeStart < 0:
-            lines = (['\n'] * -maybeStart) + lines
-        if len(lines) < context:
-            lines += ['\n'] * (context - len(lines))
         buf = list(records[i])
         buf[LNUM_POS] = line_no
         buf[INDEX_POS] = line_no - 1 - start

diff --git a/sklearn/externals/joblib/func_inspect.py b/sklearn/externals/joblib/func_inspect.py
@@ -190,7 +190,7 @@ def _signature_str(function_name, arg_spec):
     arg_spec_for_format = arg_spec[:7 if PY3_OR_LATER else 4]
 
     arg_spec_str = inspect.formatargspec(*arg_spec_for_format)
-    return '{0}{1}'.format(function_name, arg_spec_str)
+    return '{}{}'.format(function_name, arg_spec_str)
 
 
 def _function_called_str(function_name, args, kwargs):
@@ -316,6 +316,13 @@ def filter_args(func, ignore_lst, args=(), kwargs=dict()):
     return arg_dict
 
 
+def _format_arg(arg):
+    formatted_arg = pformat(arg, indent=2)
+    if len(formatted_arg) > 1500:
+        formatted_arg = '%s...' % formatted_arg[:700]
+    return formatted_arg
+
+
 def format_signature(func, *args, **kwargs):
     # XXX: Should this use inspect.formatargvalues/formatargspec?
     module, name = get_func_name(func)
@@ -328,14 +335,12 @@ def format_signature(func, *args, **kwargs):
     arg_str = list()
     previous_length = 0
     for arg in args:
-        arg = pformat(arg, indent=2)
-        if len(arg) > 1500:
-            arg = '%s...' % arg[:700]
+        formatted_arg = _format_arg(arg)
         if previous_length > 80:
-            arg = '\n%s' % arg
-        previous_length = len(arg)
-        arg_str.append(arg)
-    arg_str.extend(['%s=%s' % (v, pformat(i)) for v, i in kwargs.items()])
+            formatted_arg = '\n%s' % formatted_arg
+        previous_length = len(formatted_arg)
+        arg_str.append(formatted_arg)
+    arg_str.extend(['%s=%s' % (v, _format_arg(i)) for v, i in kwargs.items()])
     arg_str = ', '.join(arg_str)
 
     signature = '%s(%s)' % (name, arg_str)

diff --git a/sklearn/externals/joblib/hashing.py b/sklearn/externals/joblib/hashing.py
@@ -13,6 +13,7 @@
 import types
 import struct
 import io
+import decimal
 
 from ._compat import _bytes_or_unicode, PY3_OR_LATER
 
@@ -35,7 +36,7 @@ def __init__(self, set_sequence):
             # This fails on python 3 when elements are unorderable
             # but we keep it in a try as it's faster.
             self._sequence = sorted(set_sequence)
-        except TypeError:
+        except (TypeError, decimal.InvalidOperation):
             # If elements are unorderable, sorting them using their hash.
             # This is slower but works in any case.
             self._sequence = sorted((hash(e) for e in set_sequence))

diff --git a/sklearn/externals/joblib/logger.py b/sklearn/externals/joblib/logger.py
@@ -74,7 +74,7 @@ def __init__(self, depth=3):
         self.depth = depth
 
     def warn(self, msg):
-        logging.warn("[%s]: %s" % (self, msg))
+        logging.warning("[%s]: %s" % (self, msg))
 
     def debug(self, msg):
         # XXX: This conflicts with the debug flag used in children class