Skip to content

Commit

Permalink
Merge pull request #2 from pelucid/BN-1175-sample-weights-feature-sel…
Browse files Browse the repository at this point in the history
…ection-updated

Sample weights feature selection
  • Loading branch information
harjeetkalsi authored Mar 24, 2020
2 parents 91ac0cd + 7883e55 commit 62baea6
Show file tree
Hide file tree
Showing 8 changed files with 107 additions and 176 deletions.
34 changes: 0 additions & 34 deletions .appveyor.yml

This file was deleted.

60 changes: 14 additions & 46 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,51 +1,19 @@
language: generic

cache:
apt: true
directories:
- $HOME/.cache/pip
- $HOME/.ccache

dist: trusty

env:
global:
# Directory where tests are run from
- TEST_DIR=/tmp/sklearn
- OMP_NUM_THREADS=4
- OPENBLAS_NUM_THREADS=4

matrix:
include:
- os: linux
sudo: required
python: 3.8
env: LATEST="false" IMAGE="true" COVERAGE="false" NUMPY_VERSION="1.18.1" SCIPY_VERSION="1.4.1" SKLEARN_VERSION="0.22.0" JOBLIB_VERSION=0.13.2 PANDAS_VERSION="1.0.1" IMAGEIO_VERSION="2.5.0" SKIMAGE_VERSION="0.15.0" DLIB_VERSION="19.17.0" MINICONDA_PYTHON_VERSION=3.7
- os: linux
python: 3.8
env: LATEST="true" IMAGE="true" COVERAGE="true" NOTEBOOKS="true" MINICONDA_PYTHON_VERSION=3.7
- os: linux
sudo: required
python: 3.8
env: LATEST="true" IMAGE="false" COVERAGE="false" NOTEBOOKS="false" MINICONDA_PYTHON_VERSION=3.7


before_install:
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew update; fi

install:
- if [[ "$TRAVIS_OS_NAME" != "osx" ]]; then sudo apt-get update; fi
- source ci/.travis_install.sh
language: python
python:
- "3.6"

script:
- bash ci/.travis_test.sh
- make deps

after_success:
- if [[ "$COVERAGE" == "true" ]]; then coveralls || echo "failed"; fi
dist: trusty

notifications:
email:
recipients:
- [email protected]
on_success: always
on_failure: always
email:
recipients:
- [email protected]
on_success: never
on_failure: change

slack:
rooms:
secure: "O/kZr/L3H3R7ndC9aR8ScpL0lN893g8TAsiPdTxVQK7fJ9t88gAonR+yQIq/ZwD4Xwb5NDc09xMBpikhCrquBCqpSxf/EC9tDzePbjlqYCjsZ8wuOmo4byFrH32bvcxr3SIWw8zSY88Z4Ac5msTgUBW2aovOgn9wIM2Prs9kP9y/ftPnlkAs9IJHLJ5DmwEw9KEUTq1eKbWh7+nlguDkBIwpqeuU4gOUAZGFuCy4Cqs53K87PQRX0VjTxnkqODKoF8cuIN9TU0D0u75kqny902rXfzgaSyy8mFFxu+HkPhDbdFICV3H8P82QSpbmUTzcgvfHOBDxZHFJ2cxAScKoLtSzaaHvHA6H3WrHP6r5jei0UgG4v5dNRtyA5uO+DtVu0thr/cpBI97Hm9Ob9sTOCzIRtTuSPyk+cAYsksDDzL67diJhkaaCcIQBL6Z5qSEZzJS+Ggdzo5SwiTDrK/qOW/xk/53qQrB2lL9oQT4vuiTpwSUJnaWDeVlfYk09JnfYWfSHzNWXy+F8aQQylMzuoKGDTtVDya7dpObwCFatJ1yvr1sGu067Y5+04a5fk6hRmPaQDFNEfkN14BF8xw4EQJA2M8Gna6U6bQmkwHRPFIcJpC6vmA6UV5Wvg8Rpgme7GN3R9i07/1+520CeWdYCXFlYWlFy6kpAhECDMO3uJwU="
5 changes: 0 additions & 5 deletions MANIFEST.in

This file was deleted.

16 changes: 16 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# Guard against running Make commands outside a virtualenv or conda env
venv:
ifndef VIRTUAL_ENV
ifndef CONDA_PREFIX
$(error VIRTUAL / CONDA ENV is not set - please activate environment)
endif
endif

clean: venv
@echo "Removing build artifacts / temp files"
find . -name "*.pyc" -delete

deps: venv
pip install -U pip==18.1
pip install -Ue . --process-dependency-links

82 changes: 63 additions & 19 deletions mlxtend/feature_selection/sequential_feature_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,20 +19,64 @@
from sklearn.base import MetaEstimatorMixin
from ..externals.name_estimators import _name_estimators
from ..utils.base_compostion import _BaseXComposition
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold, cross_val_score
from joblib import Parallel, delayed


def fit_and_score(model, X, y, sample_weights,
train, test,
scoring):
# Fit and predict
model_clone = clone(model)
model_clone.fit(X[train], y[train], sample_weight=sample_weights[train])

# Score
score = scoring(model_clone, X[test], y[test], sample_weight=sample_weights[test])
return score


def cross_val_scores_weighted(model, X, y, scoring, sample_weights,
cv=5,
n_jobs=None,
verbose=0,
pre_dispatch='2*n_jobs'):
# Initialise CV
cv = StratifiedKFold(n_splits=cv, shuffle=True, random_state=42)

# Set up parallel processing
parallel = Parallel(n_jobs=n_jobs,
verbose=verbose,
pre_dispatch=pre_dispatch)

# Call fit_and_score
scores = parallel(
delayed(fit_and_score)(model, X, y, sample_weights, train, test, scoring)
for train, test in cv.split(X, y))

return scores


def _calc_score(selector, X, y, indices, groups=None, **fit_params):
if selector.cv:
scores = cross_val_score(selector.est_,
X[:, indices], y,
groups=groups,
cv=selector.cv,
scoring=selector.scorer,
n_jobs=1,
pre_dispatch=selector.pre_dispatch,
fit_params=fit_params)
if selector.sample_weights is not None:
scores = cross_val_scores_weighted(selector.est_,
X[:, indices], y,
scoring=selector.scorer,
sample_weights=selector.sample_weights,
cv=selector.cv,
n_jobs=1,
pre_dispatch=selector.pre_dispatch,
)
else:
scores = cross_val_score(selector.est_,
X[:, indices], y,
groups=groups,
cv=selector.cv,
scoring=selector.scorer,
n_jobs=1,
pre_dispatch=selector.pre_dispatch,
fit_params=fit_params)

else:
selector.est_.fit(X[:, indices], y, **fit_params)
scores = np.array([selector.scorer(selector.est_, X[:, indices], y)])
Expand Down Expand Up @@ -66,7 +110,6 @@ def _get_featurenames(subsets_dict, feature_idx, custom_feature_names, X):


class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):

"""Sequential Feature Selection for Classification and Regression.
Parameters
Expand Down Expand Up @@ -174,19 +217,22 @@ class SequentialFeatureSelector(_BaseXComposition, MetaEstimatorMixin):
http://rasbt.github.io/mlxtend/user_guide/feature_selection/SequentialFeatureSelector/
"""

def __init__(self, estimator, k_features=1,
forward=True, floating=False,
verbose=0, scoring=None,
cv=5, n_jobs=1,
pre_dispatch='2*n_jobs',
clone_estimator=True,
fixed_features=None):
fixed_features=None,
sample_weights=None):

self.estimator = estimator
self.k_features = k_features
self.forward = forward
self.floating = floating
self.pre_dispatch = pre_dispatch
self.sample_weights = sample_weights
# Want to raise meaningful error message if a
# cross-validation generator is inputted
if isinstance(cv, types.GeneratorType):
Expand Down Expand Up @@ -335,8 +381,8 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
'the number of elements in custom_feature_names '
'must equal the number of columns in X.')

if not isinstance(self.k_features, int) and\
not isinstance(self.k_features, tuple)\
if not isinstance(self.k_features, int) and \
not isinstance(self.k_features, tuple) \
and not isinstance(self.k_features, str):
raise AttributeError('k_features must be a positive integer'
', tuple, or string')
Expand All @@ -345,7 +391,7 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
self.k_features < 1 or self.k_features > X_.shape[1])):
raise AttributeError('k_features must be a positive integer'
' between 1 and X.shape[1], got %s'
% (self.k_features, ))
% (self.k_features,))

if isinstance(self.k_features, tuple):
if len(self.k_features) != 2:
Expand All @@ -364,7 +410,7 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
raise AttributeError('The min k_features value must be smaller'
' than the max k_features value.')

if isinstance(self.k_features, tuple) or\
if isinstance(self.k_features, tuple) or \
isinstance(self.k_features, str):

select_in_range = True
Expand Down Expand Up @@ -475,8 +521,8 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
self._exclusion(
feature_set=k_idx,
fixed_feature=(
{new_feature} |
self.fixed_features_set_),
{new_feature} |
self.fixed_features_set_),
X=X_,
y=y,
groups=groups,
Expand Down Expand Up @@ -519,7 +565,6 @@ def fit(self, X, y, custom_feature_names=None, groups=None, **fit_params):
# floating can lead to multiple same-sized subsets
if k not in self.subsets_ or (k_score >
self.subsets_[k]['avg_score']):

k_idx = tuple(sorted(k_idx))
self.subsets_[k] = {
'feature_idx': k_idx,
Expand Down Expand Up @@ -637,7 +682,6 @@ def _exclusion(self, feature_set, X, y, fixed_feature=None,
fixed_feature.issubset(set(p)))

for p, cv_scores in work:

all_avg_scores.append(np.nanmean(cv_scores))
all_cv_scores.append(cv_scores)
all_subsets.append(p)
Expand Down
6 changes: 0 additions & 6 deletions requirements.txt

This file was deleted.

4 changes: 2 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
[bdist_wheel]
universal = 1
[aliases]
test=pytest
76 changes: 12 additions & 64 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,18 @@
# Sebastian Raschka 2014-2016
# mlxtend Machine Learning Library Extensions
# Author: Sebastian Raschka <sebastianraschka.com>
#
# License: BSD 3 clause

from os.path import realpath, dirname, join
from setuptools import setup, find_packages
import mlxtend

VERSION = mlxtend.__version__
PROJECT_ROOT = dirname(realpath(__file__))

REQUIREMENTS_FILE = join(PROJECT_ROOT, 'requirements.txt')

with open(REQUIREMENTS_FILE) as f:
install_reqs = f.read().splitlines()

install_reqs.append('setuptools')


setup(name='mlxtend',
version=VERSION,
version='0.0.1',
description='Machine Learning Library Extensions',
author='Sebastian Raschka',
author_email='[email protected]',
url='https://github.com/rasbt/mlxtend',
url='https://github.com/pelucid/mlxtend',
packages=find_packages(),
package_data={'': ['LICENSE-BSD3.txt',
'LICENSE-CC-BY.txt',
'README.md',
'requirements.txt']
},
include_package_data=True,
install_requires=install_reqs,
extras_require={'testing': ['pytest'],
'docs': ['mkdocs']},
license='BSD 3-Clause',
platforms='any',
classifiers=[
'License :: OSI Approved :: BSD License',
'Development Status :: 5 - Production/Stable',
'Operating System :: Microsoft :: Windows',
'Operating System :: POSIX',
'Operating System :: Unix',
'Operating System :: MacOS',
'Programming Language :: Python :: 3.7',
'Topic :: Scientific/Engineering',
'Topic :: Scientific/Engineering :: Artificial Intelligence',
'Topic :: Scientific/Engineering :: Information Analysis',
'Topic :: Scientific/Engineering :: Image Recognition',
install_requires=[
"scipy>=1.2.1",
"numpy>=1.16.2",
"pandas>=0.24.2",
"scikit-learn>=0.20.3",
"matplotlib>=3.0.0",
"joblib>=0.13.2"
],
long_description="""
A library of Python tools and extensions for data science.
Contact
=============
If you have any questions or comments about mlxtend,
please feel free to contact me via
eMail: [email protected]
or Twitter: https://twitter.com/rasbt
This project is hosted at https://github.com/rasbt/mlxtend
The documentation can be found at http://rasbt.github.io/mlxtend/
""")
setup_requires=["pytest-runner"],
tests_require=["pytest"]
)

0 comments on commit 62baea6

Please sign in to comment.