Skip to content

Commit

Permalink
Create data_analysis.py
Browse files Browse the repository at this point in the history
  • Loading branch information
KOSASIH authored May 10, 2024
1 parent 607fe54 commit 6972938
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions data_analytics/data_analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

class DataAnalysis:
def __init__(self, data):
self.data = data

def analyze_data(self):
# Perform data cleaning and preprocessing
self.data = self.data.dropna()
self.data = pd.get_dummies(self.data, columns=['transaction_type'])

# Perform data analysis
kmeans = KMeans(n_clusters=3, random_state=0).fit(self.data[['amount', 'frequency']])
self.data['cluster'] = kmeans.labels_

# Perform statistical analysis
summary_stats = self.data.describe()
summary_stats.loc['count'] = len(self.data)
summary_stats.loc['mean'] = np.mean(self.data)
summary_stats.loc['std'] = np.std(self.data)

return summary_stats

0 comments on commit 6972938

Please sign in to comment.