Skip to content

Commit

Permalink
C file removal + cython build + Travis build cache
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurmensch authored and Arthur Mensch committed Nov 4, 2015
1 parent b4adafe commit 986fb60
Show file tree
Hide file tree
Showing 52 changed files with 322 additions and 543,795 deletions.
7 changes: 7 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,10 @@ benchmarks/bench_covertype_data/
*.prefs
.pydevproject
.idea

cythonize.dat
*.c
*.cpp

!*/src/*.c
!*/src/*.cpp
22 changes: 15 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,16 @@ sudo: false
language: python

# Pre-install packages for the ubuntu distribution
cache:
apt: true
# We use three different cache directory
# to work around a Travis bug with multi-platform cache
directories:
- $HOME/sklearn_build_ubuntu
- $HOME/sklearn_build_oldest
- $HOME/sklearn_build_latest
- $HOME/.cache/pip
- $HOME/download
addons:
apt:
packages:
Expand All @@ -16,12 +26,15 @@ env:
# This environment tests that scikit-learn can be built against
# versions of numpy, scipy with ATLAS that comes with Ubuntu Precise 12.04
- DISTRIB="ubuntu" PYTHON_VERSION="2.7" COVERAGE="true"
CYTHON_VERSION="0.23.4" NAME="ubuntu"
# This environment tests the oldest supported anaconda env
- DISTRIB="conda" PYTHON_VERSION="2.6" INSTALL_MKL="false"
NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0"
NUMPY_VERSION="1.6.2" SCIPY_VERSION="0.11.0" CYTHON_VERSION="0.21"
NAME="oldest"
# This environment tests the newest supported anaconda env
- DISTRIB="conda" PYTHON_VERSION="3.5" INSTALL_MKL="true"
NUMPY_VERSION="1.10.1" SCIPY_VERSION="0.16.0"
NUMPY_VERSION="1.10.1" SCIPY_VERSION="0.16.0" CYTHON_VERSION="0.23.4"
NAME="latest"
install: source continuous_integration/install.sh
script: bash continuous_integration/test_script.sh
after_success:
Expand All @@ -36,8 +49,3 @@ notifications:
on_success: change # options: [always|never|change] default: always
on_failure: always # options: [always|never|change] default: always
on_start: never # options: [always|never|change] default: always
cache:
apt: true
directories:
- $HOME/.cache/pip
- $HOME/download
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ trailing-spaces:
find sklearn -name "*.py" -exec perl -pi -e 's/[ \t]*$$//' {} \;

cython:
find sklearn -name "*.pyx" -exec $(CYTHON) {} \;
python sklearn/_build_utils/cythonize.py sklearn

ctags:
# make tags for symbol based navigation in emacs and vim
Expand Down
1 change: 1 addition & 0 deletions circle.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ dependencies:
- sudo apt-get install python-numpy python-scipy python-dev python-matplotlib
- sudo apt-get install python-nose python-coverage
- sudo apt-get install python-sphinx
- pip install cython
# The --user is needed to let sphinx see the source and the binaries
# The pipefail is requested to propagate exit code
override:
Expand Down
24 changes: 21 additions & 3 deletions continuous_integration/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,19 @@

# License: 3-clause BSD


# Travis clone scikit-learn/scikit-learn repository in to a local repository.
# We use a cached directory with three scikit-learn repositories (one for each
# matrix entry) from which we pull from local Travis repository. This allows
# us to keep build artefact for gcc + cython, and gain time

set -e

# Fix the compilers to workaround avoid having the Python 3.4 build
# lookup for g++44 unexpectedly.
export CC=gcc
export CXX=g++


echo 'List files from cached directories'
echo 'pip:'
ls $HOME/.cache/pip
Expand Down Expand Up @@ -49,7 +54,7 @@ if [[ "$DISTRIB" == "conda" ]]; then
# Configure the conda environment and put it in the path using the
# provided versions
conda create -n testenv --yes python=$PYTHON_VERSION pip nose \
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION
numpy=$NUMPY_VERSION scipy=$SCIPY_VERSION cython=$CYTHON_VERSION
source activate testenv

# Resolve MKL usage
Expand All @@ -68,15 +73,28 @@ elif [[ "$DISTRIB" == "ubuntu" ]]; then
virtualenv --system-site-packages testvenv
source testvenv/bin/activate
pip install nose
pip install cython
fi

if [[ "$COVERAGE" == "true" ]]; then
pip install coverage coveralls
fi

GIT_TRAVIS_REPO=$(pwd)
echo $GIT_TRAVIS_REPO

cd $HOME
if [ ! -d "sklearn_build_$NAME" ]; then
mkdir sklearn_build_$NAME
fi

rsync -av --exclude='.git/' --exclude='testvenv/' $GIT_TRAVIS_REPO \
sklearn_build_${NAME}
cd sklearn_build_${NAME}/scikit-learn

# Build scikit-learn in the install.sh script to collapse the verbose
# build output in the travis output when it succeeds.
python --version
python -c "import numpy; print('numpy %s' % numpy.__version__)"
python -c "import scipy; print('scipy %s' % scipy.__version__)"
python setup.py build_ext --inplace
python setup.py develop
12 changes: 10 additions & 2 deletions continuous_integration/test_script.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@

set -e

# Get into a temp directory to run test from the installed scikit learn and
# check if we do not leave artifacts
mkdir -p /tmp/sklearn_tmp
cd /tmp/sklearn_tmp

python --version
python -c "import numpy; print('numpy %s' % numpy.__version__)"
python -c "import scipy; print('scipy %s' % scipy.__version__)"
Expand All @@ -17,12 +22,15 @@ python -c "import scipy; print('scipy %s' % scipy.__version__)"
# disk caching does not work.
export SKLEARN_SKIP_NETWORK_TESTS=1

# Do not use "make test" or "make test-coverage" as they enable verbose mode
# which renders travis output too slow to display in a browser.
if [[ "$COVERAGE" == "true" ]]; then
nosetests -s --with-coverage sklearn
else
nosetests -s sklearn
fi

# Is directory still empty ?
ls

# Test doc
cd $HOME/sklearn_build_$NAME/scikit-learn
make test-doc test-sphinxext
14 changes: 11 additions & 3 deletions doc/developers/performance.rst
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,17 @@ following:
parallelism** that is amenable to **multi-processing** by using the
``joblib.Parallel`` class.

When using Cython, include the generated C source code alongside with
the Cython source code. The goal is to make it possible to install the
scikit on any machine with Python, Numpy, Scipy and C/C++ compiler.
When using Cython, use either

$ python setup.py build_ext -i
$ python setup.py install

to generate C files. You are responsible for adding .c/.cpp extensions along
with build parameters in each submodule ``setup.py``.

C/C++ generated files are embedded in distributed stable packages. The goal is
to make it possible to install scikit-learn stable version
on any machine with Python, Numpy, Scipy and C/C++ compiler.

Fast matrix multiplications
===========================
Expand Down
4 changes: 4 additions & 0 deletions doc/whats_new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,10 @@ Enhancements
provided as a percentage of the training samples. By
`yelite`_ and `Arnaud Joly`_

- Codebase does not contain C/C++ cython generated files: they are
generated during build. Distribution packages will still contain generated
C/C++ files. By `Arthur Mensch`_

Bug fixes
.........

Expand Down
70 changes: 58 additions & 12 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (C) 2007-2009 Cournapeau David <[email protected]>
# 2010 Fabian Pedregosa <[email protected]>
# License: 3-clause BSD
import subprocess

descr = """A set of python modules for machine learning and data mining"""

Expand All @@ -12,7 +13,6 @@
from distutils.command.clean import clean as Clean
from pkg_resources import parse_version


if sys.version_info[0] < 3:
import __builtin__ as builtins
else:
Expand All @@ -39,8 +39,10 @@
# We can actually import a restricted version of sklearn that
# does not need the compiled code
import sklearn

VERSION = sklearn.__version__

from sklearn._build_utils import cythonize

# Optional setuptools features
# We need to import setuptools early, if we want setuptools features,
Expand All @@ -54,6 +56,7 @@
])
if SETUPTOOLS_COMMANDS.intersection(sys.argv):
import setuptools

extra_setuptools_args = dict(
zip_safe=False, # the package can run out of an .egg file
include_package_data=True,
Expand All @@ -69,20 +72,33 @@ class CleanCommand(Clean):

def run(self):
Clean.run(self)
# Remove c files if we are not within a sdist package
cwd = os.path.abspath(os.path.dirname(__file__))
remove_c_files = not os.path.exists(os.path.join(cwd, 'PKG-INFO'))
if remove_c_files:
cython_hash_file = os.path.join(cwd, 'cythonize.dat')
if os.path.exists(cython_hash_file):
os.unlink(cython_hash_file)
print('Will remove generated .c files')
if os.path.exists('build'):
shutil.rmtree('build')
for dirpath, dirnames, filenames in os.walk('sklearn'):
for filename in filenames:
if (filename.endswith('.so') or filename.endswith('.pyd')
or filename.endswith('.dll')
or filename.endswith('.pyc')):
if any(filename.endswith(suffix) for suffix in
(".so", ".pyd", ".dll", ".pyc")):
os.unlink(os.path.join(dirpath, filename))
continue
extension = os.path.splitext(filename)[1]
if remove_c_files and extension in ['.c', '.cpp']:
pyx_file = str.replace(filename, extension, '.pyx')
if os.path.exists(os.path.join(dirpath, pyx_file)):
os.unlink(os.path.join(dirpath, filename))
for dirname in dirnames:
if dirname == '__pycache__':
shutil.rmtree(os.path.join(dirpath, dirname))

cmdclass = {'clean': CleanCommand}

cmdclass = {'clean': CleanCommand}

# Optional wheelhouse-uploader features
# To automate release of binary packages for scikit-learn we need a tool
Expand All @@ -94,6 +110,7 @@ def run(self):
WHEELHOUSE_UPLOADER_COMMANDS = set(['fetch_artifacts', 'upload_all'])
if WHEELHOUSE_UPLOADER_COMMANDS.intersection(sys.argv):
import wheelhouse_uploader.cmd

cmdclass.update(vars(wheelhouse_uploader.cmd))


Expand All @@ -115,6 +132,7 @@ def configuration(parent_package='', top_path=None):

return config


scipy_min_version = '0.9'
numpy_min_version = '1.6.1'

Expand Down Expand Up @@ -157,6 +175,12 @@ def get_numpy_status():
return numpy_status


def generate_cython():
cwd = os.path.abspath(os.path.dirname(__file__))
print("Cythonizing sources")
cythonize.main(cwd)


def setup_package():
metadata = dict(name=DISTNAME,
maintainer=MAINTAINER,
Expand Down Expand Up @@ -188,11 +212,13 @@ def setup_package():
cmdclass=cmdclass,
**extra_setuptools_args)

if (len(sys.argv) >= 2
and ('--help' in sys.argv[1:] or sys.argv[1]
in ('--help-commands', 'egg_info', '--version', 'clean'))):

# For these actions, NumPy is not required.
if len(sys.argv) == 1 or (
len(sys.argv) >= 2 and ('--help' in sys.argv[1:] or
sys.argv[1] in ('--help-commands',
'egg_info',
'--version',
'clean'))):
# For these actions, NumPy is not required, nor Cythonization
#
# They are required to succeed without Numpy for example when
# pip is used to install Scikit-learn when Numpy is not yet present in
Expand All @@ -206,10 +232,10 @@ def setup_package():
else:
numpy_status = get_numpy_status()
numpy_req_str = "scikit-learn requires NumPy >= {0}.\n".format(
numpy_min_version)
numpy_min_version)
scipy_status = get_scipy_status()
scipy_req_str = "scikit-learn requires SciPy >= {0}.\n".format(
scipy_min_version)
scipy_min_version)

instructions = ("Installation instructions are available on the "
"scikit-learn website: "
Expand Down Expand Up @@ -240,6 +266,26 @@ def setup_package():

metadata['configuration'] = configuration

if len(sys.argv) >= 2 and sys.argv[1] not in 'config':
# Cythonize if needed

print('Generating cython files')
cwd = os.path.abspath(os.path.dirname(__file__))
if not os.path.exists(os.path.join(cwd, 'PKG-INFO')):
# Generate Cython sources, unless building from source release
generate_cython()

# Clean left-over .so file
for dirpath, dirnames, filenames in os.walk(
os.path.join(cwd, 'sklearn')):
for filename in filenames:
extension = os.path.splitext(filename)[1]
if extension in (".so", ".pyd", ".dll"):
pyx_file = str.replace(filename, extension, '.pyx')
print(pyx_file)
if not os.path.exists(os.path.join(dirpath, pyx_file)):
os.unlink(os.path.join(dirpath, filename))

setup(**metadata)


Expand Down
Loading

0 comments on commit 986fb60

Please sign in to comment.