Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

2 #3

Open
wants to merge 14 commits into
base: main
Choose a base branch
from
1,029 changes: 984 additions & 45 deletions Semana 1/S1TC1_arboles_ensamblajes.ipynb

Large diffs are not rendered by default.

1,659 changes: 1,414 additions & 245 deletions Semana 2/S2TC1_RandomForests_Boosting.ipynb

Large diffs are not rendered by default.

40 changes: 40 additions & 0 deletions Semana 3/model_deploy/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from flask import Flask
from flask_restx import Api, Resource, fields
from model_deployment import predict, CategoricalEncoder, DataFrameSelector

app = Flask(__name__)

api = Api(
app,
version='1.0',
title='Prediction API',
description='Prediction API')

ns = api.namespace('predict',
description='Regressor')

parser = api.parser()

parser.add_argument('Year', type=int, required=True, help='Year', location='args')
parser.add_argument('Mileage', type=int, required=True, help='Mileage', location='args')
parser.add_argument('State', type=str, required=True, help='State', location='args')
parser.add_argument('Make', type=str, required=True, help='Make', location='args')
parser.add_argument('Model', type=str, required=True, help='Model', location='args')

resource_fields = api.model('Resource', {
'result': fields.String,
})


@ns.route('/')
class ModelApi(Resource):

@api.doc(parser=parser)
@api.marshal_with(resource_fields)
def get(self):
args = parser.parse_args()
return {"result": predict(args)}, 200


if __name__ == '__main__':
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=5000)
134 changes: 134 additions & 0 deletions Semana 3/model_deploy/model_deployment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
#!/usr/bin/python

import pandas as pd
import numpy as np
import joblib



# funciones util
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils import check_array
from sklearn.preprocessing import LabelEncoder
from scipy import sparse
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import FeatureUnion

class CategoricalEncoder(BaseEstimator, TransformerMixin):
def __init__(self, encoding='onehot', categories='auto', dtype=np.float64,
handle_unknown='error'):
self.encoding = encoding
self.categories = categories
self.dtype = dtype
self.handle_unknown = handle_unknown

def fit(self, X, y=None):
if self.encoding not in ['onehot', 'onehot-dense', 'ordinal']:
template = ("encoding should be either 'onehot', 'onehot-dense' "
"or 'ordinal', got %s")
raise ValueError(template % self.handle_unknown)

if self.handle_unknown not in ['error', 'ignore']:
template = ("handle_unknown should be either 'error' or "
"'ignore', got %s")
raise ValueError(template % self.handle_unknown)

if self.encoding == 'ordinal' and self.handle_unknown == 'ignore':
raise ValueError("handle_unknown='ignore' is not supported for"
" encoding='ordinal'")

X = check_array(X, dtype=object, accept_sparse='csc', copy=True)
n_samples, n_features = X.shape

self._label_encoders_ = [LabelEncoder() for _ in range(n_features)]

for i in range(n_features):
le = self._label_encoders_[i]
Xi = X[:, i]
if self.categories == 'auto':
le.fit(Xi)
else:
valid_mask = np.in1d(Xi, self.categories[i])
if not np.all(valid_mask):
if self.handle_unknown == 'error':
diff = np.unique(Xi[~valid_mask])
msg = ("Found unknown categories {0} in column {1}"
" during fit".format(diff, i))
raise ValueError(msg)
le.classes_ = np.array(np.sort(self.categories[i]))

self.categories_ = [le.classes_ for le in self._label_encoders_]

return self

def transform(self, X):
X = check_array(X, accept_sparse='csc', dtype=object, copy=True)
n_samples, n_features = X.shape
X_int = np.zeros_like(X, dtype=int)
X_mask = np.ones_like(X, dtype=bool)

for i in range(n_features):
valid_mask = np.in1d(X[:, i], self.categories_[i])

if not np.all(valid_mask):
if self.handle_unknown == 'error':
diff = np.unique(X[~valid_mask, i])
msg = ("Found unknown categories {0} in column {1}"
" during transform".format(diff, i))
raise ValueError(msg)
else:
# Set the problematic rows to an acceptable value and
# continue `The rows are marked `X_mask` and will be
# removed later.
X_mask[:, i] = valid_mask
X[:, i][~valid_mask] = self.categories_[i][0]
X_int[:, i] = self._label_encoders_[i].transform(X[:, i])

if self.encoding == 'ordinal':
return X_int.astype(self.dtype, copy=False)

mask = X_mask.ravel()
n_values = [cats.shape[0] for cats in self.categories_]
n_values = np.array([0] + n_values)
indices = np.cumsum(n_values)

column_indices = (X_int + indices[:-1]).ravel()[mask]
row_indices = np.repeat(np.arange(n_samples, dtype=np.int32),
n_features)[mask]
data = np.ones(n_samples * n_features)[mask]

out = sparse.csc_matrix((data, (row_indices, column_indices)),
shape=(n_samples, indices[-1]),
dtype=self.dtype).tocsr()
if self.encoding == 'onehot-dense':
return out.toarray()
else:
return out

class DataFrameSelector(BaseEstimator, TransformerMixin):
def __init__(self, attribute_names):
self.attribute_names = attribute_names
def fit(self, X, y=None):
return self
def transform(self, X):
return X[self.attribute_names]


def predict(args):
model = joblib.load('model_xgb.pkl')
print(model)
year = args['Year']
mileage = args['Mileage']
state = args['State']
make = args['Make']
car_model = args['Model']

input_df = pd.DataFrame(np.array([[year, mileage, state, make, car_model]]),
columns=['Year', 'Mileage', 'State', 'Make', 'Model'])

# Make prediction
prediction = model.predict(input_df)
print(f'Predicted price: {prediction}')
return prediction
Binary file added model_deployment/Price_Car_Grupo4.pkl
Binary file not shown.
62 changes: 43 additions & 19 deletions model_deployment/api.py
Original file line number Diff line number Diff line change
@@ -1,45 +1,69 @@
#!/usr/bin/python
from flask import Flask
from flask_restplus import Api, Resource, fields
from flask_restx import Api, Resource, fields
import joblib
from m09_model_deployment import predict_proba
from flask_cors import CORS
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OrdinalEncoder

app = Flask(__name__)
CORS(app) # Enable CORS for all routes and origins

api = Api(
app,
version='1.0',
title='Phishing Prediction API',
description='Phishing Prediction API')
app,
version='1.0',
title='Predicción del precio de carro usado',
description='Predicción del precio de carro usado')

ns = api.namespace('predict',
description='Phishing Classifier')
ns = api.namespace('predict',
description='Valor del precio del carro a predecir')

parser = api.parser()

parser.add_argument(
'URL',
type=str,
required=True,
help='URL to be analyzed',
location='args')
'Year', type=int, required=True, help='Year', location='args')
parser.add_argument(
'Mileage', type=int, required=True, help='Mileage', location='args')
parser.add_argument(
'State', type=str, required=True, help='State', location='args')
parser.add_argument(
'Make', type=str, required=True, help='Make', location='args')
parser.add_argument(
'Model', type=str, required=True, help='Model', location='args')

resource_fields = api.model('Resource', {
'result': fields.String,
'result': fields.Float,
})

def predict_price(url):

#clf = joblib.load('Price_Car_Grupo4.pkl')
clf = joblib.load('Price_Car_Grupo4.pkl')
#a = url.split('-')
url_ = pd.DataFrame(url).transpose()
url_.columns=['Year', 'Mileage', 'State', 'Make', 'Model']
url_[['Year', 'Mileage']]=url_[['Year', 'Mileage']].astype(float)
enc = OrdinalEncoder()
url_[['State','Make','Model']] = enc.fit_transform(url_[['State','Make','Model']])
p1= clf.predict(url_)

return p1

@ns.route('/')
class PhishingApi(Resource):
class CarPrice(Resource):

@api.doc(parser=parser)
@api.marshal_with(resource_fields)
def get(self):
args = parser.parse_args()

features = [args['Year'], args['Mileage'], args['State'], args['Make'], args['Model']]

return {
"result": predict_proba(args['URL'])
"result": predict_price(features)
}, 200


if __name__ == '__main__':
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=8888)
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=5000)
38 changes: 22 additions & 16 deletions model_deployment/m09_model_deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,34 @@
import joblib
import sys
import os
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
import joblib
import os
os.chdir('..')
from xgboost import XGBRegressor
from sklearn.preprocessing import OrdinalEncoder


# Carga de datos de archivo .csv
dataTraining = pd.read_csv('https://raw.githubusercontent.com/davidzarruk/MIAD_ML_NLP_2023/main/datasets/dataTrain_carListings.zip')
dataTesting = pd.read_csv('https://raw.githubusercontent.com/davidzarruk/MIAD_ML_NLP_2023/main/datasets/dataTest_carListings.zip', index_col=0)

def predict_proba(url):
dataTotal= pd.concat([dataTraining,dataTesting], axis=0)
enc = OrdinalEncoder()
dataTotal[['State','Make','Model']] = enc.fit_transform(dataTotal[['State','Make','Model']])

clf = joblib.load(os.path.dirname(__file__) + '/phishing_clf.pkl')
X=dataTotal.iloc[:400000,:].drop(['Price'], axis=1)
y=dataTraining['Price']

url_ = pd.DataFrame([url], columns=['url'])

# Create features
keywords = ['https', 'login', '.php', '.html', '@', 'sign']
for keyword in keywords:
url_['keyword_' + keyword] = url_.url.str.contains(keyword).astype(int)
XTest=dataTotal.iloc[400000:,:].drop(['Price'], axis=1)

url_['lenght'] = url_.url.str.len() - 2
domain = url_.url.str.split('/', expand=True).iloc[:, 2]
url_['lenght_domain'] = domain.str.len()
url_['isIP'] = (url_.url.str.replace('.', '') * 1).str.isnumeric().astype(int)
url_['count_com'] = url_.url.str.count('com')
clf = XGBRegressor(max_depth=10, n_estimators=100, gamma=0, learning_rate=0.2,random_state=1)
clf.fit(X, y)

# Make prediction
p1 = clf.predict_proba(url_.drop('url', axis=1))[0,1]

return p1
joblib.dump(clf, 'Price_Car_Grupo4.pkl', compress=3)


if __name__ == "__main__":
Expand Down
Binary file modified model_deployment/phishing_clf.pkl
Binary file not shown.
Binary file added modelo2/Price_Car_Grupo4.pkl
Binary file not shown.
69 changes: 69 additions & 0 deletions modelo2/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#!/usr/bin/python
from flask import Flask
from flask_restx import Api, Resource, fields
import joblib
from flask_cors import CORS
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OrdinalEncoder

app = Flask(__name__)
CORS(app) # Enable CORS for all routes and origins

api = Api(
app,
version='1.0',
title='Predicción del precio de carro usado',
description='Predicción del precio de carro usado')

ns = api.namespace('predict',
description='Valor del precio del carro a predecir')

parser = api.parser()

parser.add_argument(
'Year', type=int, required=True, help='Year', location='args')
parser.add_argument(
'Mileage', type=int, required=True, help='Mileage', location='args')
parser.add_argument(
'State', type=str, required=True, help='State', location='args')
parser.add_argument(
'Make', type=str, required=True, help='Make', location='args')
parser.add_argument(
'Model', type=str, required=True, help='Model', location='args')

resource_fields = api.model('Resource', {
'result': fields.Float,
})

def predict_price(url):

#clf = joblib.load('Price_Car_Grupo4.pkl')
clf = joblib.load('Price_Car_Grupo4.pkl')
#a = url.split('-')
url_ = pd.DataFrame(url).transpose()
url_.columns=['Year', 'Mileage', 'State', 'Make', 'Model']
url_[['Year', 'Mileage']]=url_[['Year', 'Mileage']].astype(float)
enc = OrdinalEncoder()
url_[['State','Make','Model']] = enc.fit_transform(url_[['State','Make','Model']])
p1= clf.predict(url_)

return p1

@ns.route('/')
class CarPrice(Resource):

@api.doc(parser=parser)
@api.marshal_with(resource_fields)
def get(self):
args = parser.parse_args()
features = [args['Year'], args['Mileage'], args['State'], args['Make'], args['Model']]

return {
"result": predict_price(features)
}, 200


if __name__ == '__main__':
app.run(debug=True, use_reloader=False, host='0.0.0.0', port=5000)
Loading