Skip to content

Commit

Permalink
MLOps
Browse files Browse the repository at this point in the history
  • Loading branch information
Santiloza89 authored Dec 8, 2023
1 parent 20bf468 commit 3a266c7
Show file tree
Hide file tree
Showing 2 changed files with 302 additions and 0 deletions.
123 changes: 123 additions & 0 deletions app.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
import streamlit as st
import joblib
import pandas as pd
import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import QuantileTransformer, StandardScaler
from sklearn.cluster import KMeans
from datetime import date, datetime
from funciones import season,divide_estaciones,rellenar,eliminar,rellenar_con_cols,eliminar_viento,dummies_elim,rellenar_ultimos,estandarizacion,preprocesamiento, agrego_columnas
from clases import NeuralNetworkClass, NeuralNetworkReg, MiniBatchGradientDescentRegressor
import pandas as pd
import numpy as np
from tensorflow_addons.metrics import RSquare


pipe = joblib.load('regresion_logistica (2).pkl')
pipe_reg = joblib.load('gradiente-mini-batch (2).pkl')
#pipe_red_reg = joblib.load('red_neuronal_regresion1.pkl')
#pipe_red_clas = joblib.load('red_neuronal_clasificacion.pkl')

def get_user_input():
with st.form(key='my_form'):
submit_button = st.form_submit_button(label='Submit')
return submit_button

st.title('TP Aprendizaje Automatico')

#st.title('Cargar CSV')

# Widget para cargar un archivo CSV
#file = st.file_uploader('Cargar archivo CSV', type=['csv'])

#if file is not None:
# Leer el archivo CSV y mostrar los datos
# df = pd.read_csv(file)
# st.write('Datos del archivo CSV:')
# st.write(df)

selected_date = st.date_input('Date', date.today())
location = st.text_input('Location (Sydney, SydneyAirport, Canberra, Melbourne y MelbourneAirport)', 'Sidney')
a1 = st.slider('MinTemp', -20.0, 40.0, 5.0)
a2 = st.slider('MaxTemp', -10.0, 60.0, 5.0)
a3 = st.slider('Rainfall', 0.0, 500.0, 5.0)
a4 = st.slider('Evaporation', 0.0, 30.0, 5.0)
a5 = st.slider('Sunshine', 0.0, 14.0, 5.0)
dir = st.text_input('WindGustDir', 'N')
a6 = st.slider('WindGustSpeed', 0.0, 130.0, 5.0)
dir9 = st.text_input('WindDir9am', 'N')
dir3 = st.text_input('WindDir3pm', 'N')
a7 = st.slider('WindSpeed9am', 0.0, 130.0, 5.0)
a8 = st.slider('WindSpeed3pm', 0.0, 130.0, 5.0)
a9 = st.slider('Humidity9am', 0.0, 100.0, 5.0)
a10 = st.slider('Humidity3pm', 0.0, 100.0, 5.0)
a11 = st.slider('Pressure9am', 500.0, 1500.0, 5.0)
a12 = st.slider('Pressure3pm', 500.0, 1500.0, 5.0)
a13 = st.slider('Cloud9am', 0.0, 10.0, 5.0)
a14 = st.slider('Cloud3pm', 0.0, 10.0, 5.0)
a15 = st.slider('Temp9am', -20.0, 40.0, 5.0)
a16 = st.slider('Temp3pm', -10.0, 60.0, 5.0)
rain = st.text_input('RainToday (Yes, No)', 'No')


submit_button = get_user_input()
# When the 'Submit' button is pressed, perform the prediction
if submit_button:

# Crear el diccionario
data_para_predecir = {
'Date': selected_date.strftime('%Y-%m-%d'),
'Location': location,
'MinTemp': a1,
'MaxTemp': a2,
'Rainfall': a3,
'Evaporation': a4,
'Sunshine': a5,
'WindGustDir': dir,
'WindGustSpeed': a6,
'WindDir9am': dir9,
'WindDir3pm': dir3,
'WindSpeed9am': a7,
'WindSpeed3pm': a8,
'Humidity9am': a9,
'Humidity3pm': a10,
'Pressure9am': a11,
'Pressure3pm': a12,
'Cloud9am': a13,
'Cloud3pm': a14,
'Temp9am': a15,
'Temp3pm': a16,
'RainToday': rain
}

data = pd.DataFrame([data_para_predecir])

# Predicciones lluvia
prediction = pipe.predict(data)
prediction_clas = prediction[0]
# Display the prediction
st.header("Rain Tomorrow?")
if prediction_clas == 1:
st.write('Yes')
else:
st.write('No')


prediction = pipe_reg.predict(data)
prediction_reg = prediction[0]
# Display the prediction
st.header("Rainfall Tomorrow?")
st.write(prediction_reg)

#prediction = pipe_red_reg.predict(data)
#prediction_red_reg = prediction[0]
# Display the prediction
#st.header("Regresion de una red neuronal")
#st.write(prediction_red_reg)

#prediction = pipe_red_clas.predict(pd.DataFrame([data_para_predecir]))
#prediction_red_clas = prediction[0]
#Display the prediction
#st.header("Clasificacion de una red neuronal")
#st.write(prediction_red_clas)

179 changes: 179 additions & 0 deletions funciones.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
# funciones
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import FunctionTransformer

def season(date):
date = datetime.strptime(date, '%Y-%m-%d')
if date.month in [12, 1, 2]:
return 'Verano'
elif date.month in [3, 4, 5]:
return 'Otoño'
elif date.month in [6, 7, 8]:
return 'Invierno'
else:
return 'Primavera'

def divide_estaciones(df):
# Aplicamos la funcion para crear la columna season
df['season'] = df['Date'].apply(season)
return df

def rellenar(df):
#Rellenemos valores faltanes de MaxTemp con Temp3pm
df['MaxTemp'] = df['MaxTemp'].fillna(df['Temp3pm'])

#Rellenemos valores faltanes de MinTemp con Temp9am
df['MinTemp'] = df['MinTemp'].fillna(df['Temp9am'])

#Rellenemos valores faltanes de Pressure3pm con Pressure9am
df['Pressure3pm'] = df['Pressure3pm'].fillna(df['Pressure9am'])

#Rellenemos valores faltanes de Cloud3pm con Cloud9am
df['Cloud3pm'] = df['Cloud9am'].fillna(df['Cloud9am'])

#Rellenemos valores faltanes de WindGustSpeed con WindSpeed9am y WindSpeed3pm
df['WindGustSpeed'] = df['WindGustSpeed'].fillna(df['WindSpeed9am'])
df['WindGustSpeed'] = df['WindGustSpeed'].fillna(df['WindSpeed3pm'])

return df

def eliminar(df):
#Eliminamos Temp9am y Temp3pm
df = df.drop(['Temp9am','Temp3pm',], axis=1)

#Eliminamos Pressure9am
df = df.drop(['Pressure9am'], axis=1)

#Eliminamos WindSpeed9am y WindSpeed3pm
df = df.drop(['WindSpeed9am','WindSpeed3pm'], axis=1)

#Eliminamos Cloud9am
df = df.drop(['Cloud9am'], axis=1)

#Eliminamos Humidity9am
df = df.drop(['Humidity9am'], axis=1)

return df

def rellenar_con_cols(df):
#Debido a que las variables Sunshine y Cloud3pm tienen una correlacion negativa, rellenaremos los valores nulos que podamos con el negativo de la otra
df['Sunshine'].fillna(-df['Cloud3pm'], inplace=True)

df['Cloud3pm'].fillna(-df['Sunshine'], inplace=True)

#Lo mismo haremos con Evaporation y MaxTemp, solo que estas se correlacionan positivamente
df['Evaporation'].fillna(df['MaxTemp'], inplace=True)

df['MaxTemp'].fillna(df['Evaporation'], inplace=True)

return df

def eliminar_viento(df):
#Eliminamos WindDir9am y WindDir3pm
df = df.drop(['WindDir9am','WindDir3pm'], axis=1)
return df

def dummies_elim(df):
#Aplicamos Dummies para las variables categoricas que tendremos en cuenta para nuestra prediccion

df = pd.get_dummies(df, columns=['season'])

df = pd.get_dummies(df, columns=['WindGustDir'])

df = pd.get_dummies(df, columns=['RainToday'])

#Eliminamos Date y Location ya que son variables que no son de nuestro interes
df = df.drop(['Date', 'Location'], axis=1)

return df

def rellenar_ultimos(df):
# Lista de columnas numéricas y categóricas
columnas_numericas = ['Sunshine', 'WindGustSpeed', 'Humidity3pm', 'Pressure3pm','Rainfall', 'MinTemp', 'Cloud3pm', 'MaxTemp', 'Evaporation']
columnas_categoricas = ['WindGustDir']

# Imputador para rellenar valores nulos en columnas numéricas con la media
numeric_imputer = SimpleImputer(strategy='mean')
df[columnas_numericas] = numeric_imputer.fit_transform(df[columnas_numericas])

# Imputador para rellenar valores nulos en columnas categóricas con 'Sin dato'
categorical_imputer = SimpleImputer(strategy='constant', fill_value='Sin dato')
df[columnas_categoricas] = categorical_imputer.fit_transform(df[columnas_categoricas])
return df

def agrego_columnas(df):
columns_to_add = ['season_Invierno', 'season_Otoño', 'season_Primavera', 'season_Verano',
'WindGustDir_E', 'WindGustDir_ENE', 'WindGustDir_ESE', 'WindGustDir_N',
'WindGustDir_NE', 'WindGustDir_NNE', 'WindGustDir_NNW',
'WindGustDir_NW', 'WindGustDir_S', 'WindGustDir_SE', 'WindGustDir_SSE',
'WindGustDir_SSW', 'WindGustDir_SW', 'WindGustDir_Sin dato',
'WindGustDir_W', 'WindGustDir_WNW', 'WindGustDir_WSW', 'RainToday_Yes','RainToday_No']
# Obtener las columnas que no se encuentran en el DataFrame
columns_not_in_df = [col for col in columns_to_add if col not in df.columns]

# Agregar las columnas faltantes con valor 0
for new_column in columns_not_in_df:
df[new_column] = 0

df = df.drop(['RainToday_No'], axis=1)

df_deseado = ['MinTemp', 'MaxTemp', 'Rainfall', 'Evaporation', 'Sunshine',
'WindGustSpeed', 'Humidity3pm', 'Pressure3pm', 'Cloud3pm',
'season_Invierno', 'season_Otoño', 'season_Primavera', 'season_Verano',
'WindGustDir_E', 'WindGustDir_ENE', 'WindGustDir_ESE', 'WindGustDir_N',
'WindGustDir_NE', 'WindGustDir_NNE', 'WindGustDir_NNW',
'WindGustDir_NW', 'WindGustDir_S', 'WindGustDir_SE', 'WindGustDir_SSE',
'WindGustDir_SSW', 'WindGustDir_SW', 'WindGustDir_Sin dato',
'WindGustDir_W', 'WindGustDir_WNW', 'WindGustDir_WSW', 'RainToday_Yes']


df = df[df_deseado]
return df



def estandarizacion(df):
#Columnas numericas que estandarizaremos
numeric = ['MinTemp', 'MaxTemp', 'Rainfall', 'Humidity3pm','Pressure3pm','Sunshine','WindGustSpeed','Cloud3pm','Evaporation']

# Estandarizamos utilizando StandardScaler de sklearn
scaler = StandardScaler()

# estandarizamos las columnas numericas
df_scaled = df.copy()
df_scaled[numeric] = scaler.fit_transform(df[numeric])
return df_scaled


def preprocesamiento(df):
#Dvidimos las estaciones
df = divide_estaciones(df)

#Rellenamos columnas con nulos
df = rellenar(df)

#Eliminamos features
df = eliminar(df)

#Rellenamos
df = rellenar_con_cols(df)

#Elimiamos viento
df = eliminar_viento(df)

#Por ultimo, rellenamos los valores que nos quedaron Nulos
df = rellenar_ultimos(df)

#Aplicamos dummies
df = dummies_elim(df)

#Agregamos columnas faltante
df = agrego_columnas(df)

return df

0 comments on commit 3a266c7

Please sign in to comment.