yashasvini121 · Sai-ganesh-0004 · Oct 14, 2024 · Oct 14, 2024 · Oct 19, 2024
diff --git a/App.py b/App.py
@@ -41,6 +41,7 @@
 	"- **Parkinson's Disease Detector**: Assess your risk of Parkinson's Disease with advanced machine learning algorithms."
 )
 
+
 # Parkinson's Disease Detector Section
 with st.expander("Parkinson's Disease Detector - More Information"):
 	st.subheader("Introduction")
@@ -107,3 +108,41 @@
 	- **GLD**:  The price of SPDR Gold Shares (GLD), which is the target variable representing gold prices.
 		"""
 	)
+st.write(
+    "- **Car Price Predictor**: Estimate the price of a car based on various features."
+)
+# Car Price Predictor Section
+with st.expander("Car Price Predictor - More Information"):
+    st.subheader("Introduction")
+    st.write(
+        """
+    The Car Price Predictor uses machine learning algorithms to estimate the price of a car based on various features. Factors such as the car's brand, year, mileage, and other specifications can significantly impact its resale value.
+        """
+    )
+
+    # Dataset section
+    st.subheader("Car Price Dataset")
+    st.write(
+        """
+    The dataset for this model contains information on various cars, including their features and selling prices. The goal is to predict the selling price based on features like brand, year, mileage, fuel type, seller type, transmission, and more.
+        """
+    )
+
+    # Input features section
+    st.subheader("Additional Variable Information")
+    st.write(
+        """
+    - **Name**: The name of the car.
+    - **Year**: The year the car was manufactured.
+    - **Selling Price**: The price at which the car is being sold.
+    - **Km Driven**: The total kilometers driven by the car.
+    - **Fuel**: The type of fuel used (e.g., petrol, diesel).
+    - **Seller Type**: The type of seller (e.g., individual, dealer).
+    - **Transmission**: The transmission type (e.g., manual, automatic).
+    - **Owner**: The number of previous owners.
+    - **Mileage**: The car's mileage in km/ltr/kg.
+    - **Engine**: The engine capacity.
+    - **Max Power**: The maximum power of the car.
+    - **Seats**: The number of seats in the car.
+        """
+    )
diff --git a/form_configs/car_price.json b/form_configs/car_price.json
@@ -0,0 +1,82 @@
+{
+  "Car Price Prediction Form": {
+    "Year of Manufacture": {
+      "type": "number",
+      "min_value": 2000,
+      "max_value": 2024,
+      "default_value": 2020,
+      "step": 1,
+      "field_name": "year"
+    },
+    "Kilometers Driven": {
+      "type": "number",
+      "min_value": 0,
+      "max_value": 500000,
+      "default_value": 10000,
+      "step": 1000,
+      "field_name": "km_driven"
+    },
+    "Fuel Type": {
+      "type": "dropdown",
+      "options": ["Diesel", "LPG", "Petrol"],
+      "default_value": "Petrol",
+      "field_name": "fuel"
+    },
+    "Seller Type": {
+      "type": "dropdown",
+      "options": ["Individual", "Trustmark Dealer"],
+      "default_value": "Individual",
+      "field_name": "seller_type"
+    },
+    "Transmission": {
+      "type": "dropdown",
+      "options": ["Manual", "Automatic"],
+      "default_value": "Manual",
+      "field_name": "transmission"
+    },
+    "Owner Type": {
+      "type": "dropdown",
+      "options": [
+        "First Owner",
+        "Second Owner",
+        "Third Owner",
+        "Fourth & Above Owner",
+        "Test Drive Car"
+      ],
+      "default_value": "First Owner",
+      "field_name": "owner"
+    },
+    "Mileage (km/ltr)": {
+      "type": "number",
+      "min_value": 0,
+      "max_value": 100,
+      "default_value": 15,
+      "step": 0.1,
+      "field_name": "mileage"
+    },
+    "Engine Size (cc)": {
+      "type": "number",
+      "min_value": 800,
+      "max_value": 5000,
+      "default_value": 1500,
+      "step": 100,
+      "field_name": "engine"
+    },
+    "Max Power (bhp)": {
+      "type": "number",
+      "min_value": 0,
+      "max_value": 500,
+      "default_value": 100,
+      "step": 10,
+      "field_name": "max_power"
+    },
+    "Number of Seats": {
+      "type": "number",
+      "min_value": 2,
+      "max_value": 10,
+      "default_value": 5,
+      "step": 1,
+      "field_name": "seats"
+    }
+  }
+}
diff --git a/models/car_price_prediction/data/car_data.csv b/models/car_price_prediction/data/car_data.csv
diff --git a/models/car_price_prediction/model.py b/models/car_price_prediction/model.py
@@ -0,0 +1,65 @@
+# model.py
+
+# Data manipulation and processing
+import pandas as pd
+import numpy as np
+
+# Machine Learning models
+from sklearn.linear_model import LinearRegression, Lasso
+from sklearn.model_selection import train_test_split, GridSearchCV
+from sklearn.metrics import r2_score, mean_squared_error
+from sklearn.preprocessing import StandardScaler
+
+# Load the dataset
+df = pd.read_csv('data/car_data.csv')
+
+# Data preprocessing
+df['mileage(km/ltr/kg)'] = pd.to_numeric(df['mileage(km/ltr/kg)'], errors='coerce')
+df['engine'] = pd.to_numeric(df['engine'], errors='coerce')
+df['max_power'] = pd.to_numeric(df['max_power'], errors='coerce')
+df['seats'] = pd.to_numeric(df['seats'], errors='coerce')
+
+# Fill missing values for specific columns with their median
+df['mileage(km/ltr/kg)'].fillna(df['mileage(km/ltr/kg)'].median(), inplace=True)
+df['engine'].fillna(df['engine'].median(), inplace=True)
+df['max_power'].fillna(df['max_power'].median(), inplace=True)
+df['seats'].fillna(df['seats'].median(), inplace=True)
+
+# Calculate the car's age
+df['age'] = 2024 - df['year']
+df.drop(columns=['year'], inplace=True)
+
+# One-hot encode the categorical columns
+df = pd.get_dummies(df, columns=['fuel', 'seller_type', 'transmission', 'owner'], drop_first=True)
+df.drop(columns=['name'], inplace=True)
+
+# Scale the features
+features_to_scale = ['km_driven', 'mileage(km/ltr/kg)', 'engine', 'max_power', 'seats', 'age']
+scaler = StandardScaler()
+df[features_to_scale] = scaler.fit_transform(df[features_to_scale])
+
+# Define features (X) and target (y)
+X = df.drop(columns=['selling_price'])
+y = df['selling_price']
+
+# Split the data into training and testing sets
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+# Train Linear Regression model
+lin_reg = LinearRegression()
+lin_reg.fit(X_train, y_train)
+
+# Train Lasso Regression model with hyperparameter tuning
+lasso_cv = GridSearchCV(Lasso(), {'alpha': np.logspace(-4, 0, 50)}, cv=5)
+lasso_cv.fit(X_train, y_train)
+best_alpha = lasso_cv.best_params_['alpha']
+lasso_reg = Lasso(alpha=best_alpha)
+lasso_reg.fit(X_train, y_train)
+
+# Save models and scaler for later use
+import joblib
+joblib.dump(lin_reg, 'models/linear_regression_model.pkl')
+joblib.dump(lasso_reg, 'models/lasso_regression_model.pkl')
+joblib.dump(scaler, 'models/scaler.pkl')
+
+print('Models trained and saved successfully.')