Skip to content

Commit

Permalink
Create data_transformer.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KOSASIH authored Aug 7, 2024
1 parent 519f0f8 commit 8294671
Showing 1 changed file with 33 additions and 0 deletions.
33 changes: 33 additions & 0 deletions projects/DAPIO/data-processing/preprocessing/data_transformer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

class DataTransformer:
def __init__(self, data: pd.DataFrame):
self.data = data

def create_pipeline(self) -> Pipeline:
numeric_features = self.data.select_dtypes(include=['int64', 'float64']).columns
categorical_features = self.data.select_dtypes(include=['object']).columns

numeric_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='median')),
('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[
('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
('onehot', OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
transformers=[
('num', numeric_transformer, numeric_features),
('cat', categorical_transformer, categorical_features)])

return preprocessor

def transform_data(self) -> pd.DataFrame:
preprocessor = self.create_pipeline()
self.data = preprocessor.fit_transform(self.data)
return self.data

0 comments on commit 8294671

Please sign in to comment.