Skip to content

Commit

Permalink
Merge pull request dssg#1 from dssg/triage_transfer
Browse files Browse the repository at this point in the history
Import code from triage
  • Loading branch information
thcrock authored Jul 17, 2017
2 parents 765faa3 + efd6e61 commit cacc2df
Show file tree
Hide file tree
Showing 23 changed files with 3,671 additions and 0 deletions.
24 changes: 24 additions & 0 deletions catwalk/db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import yaml
from sqlalchemy import create_engine
from sqlalchemy.engine.url import URL
from sqlalchemy.pool import QueuePool

from results_schema import *


def ensure_db(engine):
Base.metadata.create_all(engine)


def connect(poolclass=QueuePool):
with open('database.yaml') as f:
profile = yaml.load(f)
dbconfig = {
'host': profile['host'],
'username': profile['user'],
'database': profile['db'],
'password': profile['pass'],
'port': profile['port'],
}
dburl = URL('postgres', **dbconfig)
return create_engine(dburl, poolclass=poolclass)
Empty file added catwalk/estimators/__init__.py
Empty file.
78 changes: 78 additions & 0 deletions catwalk/estimators/classifiers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# coding: utf-8

from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import LogisticRegression

from catwalk.estimators.transformers import CutOff

class ScaledLogisticRegression(BaseEstimator, ClassifierMixin):
"""
An in-place replacement for the scikit-learn's LogisticRegression.
It incorporates the MaxMinScaler, and the CutOff as preparations
for the logistic regression.
"""
def __init__(self, penalty='l2', dual=False, tol=1e-4, C=1.0,
fit_intercept=True, intercept_scaling=1, class_weight=None,
random_state=None, solver='liblinear', max_iter=100,
multi_class='ovr', verbose=0, warm_start=False, n_jobs=1):


self.penalty = penalty
self.dual = dual
self.tol = tol
self.C = C
self.fit_intercept = fit_intercept
self.intercept_scaling = intercept_scaling
self.class_weight = class_weight
self.random_state = random_state
self.solver = solver
self.max_iter = max_iter
self.multi_class = multi_class
self.verbose = verbose
self.warm_start = warm_start
self.n_jobs = n_jobs

self.minmax_scaler = MinMaxScaler()
self.dsapp_cutoff = CutOff()
self.lr = LogisticRegression(penalty=penalty, dual=dual, tol=tol, C=C,
fit_intercept=fit_intercept, intercept_scaling=intercept_scaling, class_weight=class_weight,
random_state=random_state, solver=solver, max_iter=max_iter,
multi_class=multi_class, verbose=verbose, warm_start=warm_start, n_jobs=n_jobs)

self.pipeline =Pipeline([
('minmax_scaler', self.minmax_scaler),
('dsapp_cutoff', self.dsapp_cutoff),
('lr', self.lr)
])


def fit(self, X, y = None):
self.pipeline.fit(X, y)

self.min_ = self.pipeline.named_steps['minmax_scaler'].min_
self.scale_ = self.pipeline.named_steps['minmax_scaler'].scale_
self.data_min_ = self.pipeline.named_steps['minmax_scaler'].data_min_
self.data_max_ = self.pipeline.named_steps['minmax_scaler'].data_max_
self.data_range_ = self.pipeline.named_steps['minmax_scaler'].data_range_

self.coef_ = self.pipeline.named_steps['lr'].coef_
self.intercept_ = self.pipeline.named_steps['lr'].intercept_

self.classes_ = self.pipeline.named_steps['lr'].classes_

return self

def predict_proba(self, X):
return self.pipeline.predict_proba(X)

def predict_log_proba(self, X):
return self.pipeline.predict_log_proba(X)

def predict(self, X):
return self.pipeline.predict(X)

def score(self, X, y):
return self.pipeline.score(X,y)
Loading

0 comments on commit cacc2df

Please sign in to comment.