Skip to content

Commit

Permalink
Create data_cleaner.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KOSASIH authored Aug 7, 2024
1 parent f173b45 commit 519f0f8
Showing 1 changed file with 29 additions and 0 deletions.
29 changes: 29 additions & 0 deletions projects/DAPIO/data-processing/preprocessing/data_cleaner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

class DataCleaner:
def __init__(self, data: pd.DataFrame):
self.data = data

def handle_missing_values(self) -> pd.DataFrame:
self.data.fillna(self.data.mean(), inplace=True)
return self.data

def remove_outliers(self) -> pd.DataFrame:
Q1 = self.data.quantile(0.25)
Q3 = self.data.quantile(0.75)
IQR = Q3 - Q1
self.data = self.data[~((self.data < (Q1 - 1.5 * IQR)) | (self.data > (Q3 + 1.5 * IQR)))]
return self.data

def scale_data(self) -> pd.DataFrame:
scaler = StandardScaler()
self.data[['feature1', 'feature2', 'feature3']] = scaler.fit_transform(self.data[['feature1', 'feature2', 'feature3']])
return self.data

def preprocess_data(self) -> pd.DataFrame:
self.data = self.handle_missing_values()
self.data = self.remove_outliers()
self.data = self.scale_data()
return self.data

0 comments on commit 519f0f8

Please sign in to comment.