Skip to content
This repository has been archived by the owner on Jun 29, 2024. It is now read-only.

Advanced level tasks #63

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions Harshitha-Annam/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
**TASK -1 : IMAGE CONVERTER**

A program that accepts images in multiple formats (JPEG, PNG, BMP, GIF) and converts them into a desired format using Python Imaging Library (PIL).

**TASK-2: DATA ANALYSIS WITH PANDAS**

A program that loads the "Iris" dataset from Seaborn and analyze it using Pandas. Perform exploratory data analysis, cleaning, aggregation, visualizations, and correlation calculations.

**TASK-3: LINEAR REGRESSION WITH SCIKIT-LEARN**

A program apply linear regression to predict house prices from the Boston housing dataset using scikit-learn. Compare train and test scores and plot residuals

**TASK-4: IMAGE COMPRESSION**

Developed a Python tool for compressing images while maintaining quality. Explore compression techniques like RLE and DCT. Allow users to adjust compression quality, support various image formats, and provide output options. Optionally, include a user interface. Ensure code modularity, performance optimization, and test with diverse images, along with comprehensive documentation.
63 changes: 63 additions & 0 deletions Harshitha-Annam/TASK-1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Import all the necessary libraries
from PIL import Image
import os
# PIL is for opening, manipulating and saving images in different formats
# os is for handling file paths


# a function for handling the conversion of image formats
def convert_image(input_path, output_path, output_format):


try:
# opening and saving the input image
with Image.open(input_path) as img:
img.save(output_path, format = output_format)


# handling any possible exceptions
except Exception as e:
print(f"Error: {e}")


# main function
def main():

# get input file/image path from user
input_path = input("Enter the input Image Path:")


# checking if the input file exists
if not os.path.exists(input_path):
print("The input file/image does not exist.")
return


# if input file/image exists ask user for the output format
output_format = input("Enter the desired output format(eg., JPEG, PNG, BMP, GIF):").upper()


# create a list for valid output formats
valid_formats = ['JPEG', 'PNG', 'BMP', 'GIF']


# validate the desired output format given by the user
if output_format not in valid_formats:
print("Invalid output format. Please choose a valid format(eg., JPEG, PNG, BMP, GIF)")
return


# extract the file name without file extension
file_name_without_ext, ext_name = os.path.splitext(input_path)


# set the output image path
output_path = f"{file_name_without_ext}_converted.{output_format.lower()}"


# call the convert_image function and pass the arguments
convert_image(input_path, output_path, output_format)


if __name__ == "__main__":
main()
69 changes: 69 additions & 0 deletions Harshitha-Annam/TASK-2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# import necessary libraries

import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt


# load the iris dataset using load_dataset()
iris = sns.load_dataset('iris')


# explore the loaded iris dataset(EXPLORATORY DATA ANALYSIS)
print("First five rows of the Iris dataset: ")
iris.head() # gives first 5 rows of iris dataset

print("Information about the columns in the Iris dataset: ")
iris.info() # gives info about dataset columns

print("A summary of basic analysis on Iris dataset: ")
iris.describe() # gives a consolidated analysis of the dataset


# DATA CLEANING

# check for missing values in the Iris dataset
print(iris.isnull().sum())

# check for duplicates in the dataset
print("Number of duplicates in Iris dataset: ")
print(iris.duplicated().sum())
if iris.duplicated().sum() > 0:
# remove duplicates
iris = iris.drop_duplicates()

# verifying if duplicates have been removed
print(iris.duplicated().sum())


# AGGREGATION
# perform aggregation to get a statistical summary of Iris dataset grouped by species
species_mean = iris.groupby('species').mean()
print(f"The mean of sepcies in Iris dataset is :\n\n {species_mean}")

species_summary = iris.groupby('species').describe()
print(f"The statistical summary of species in Iris dataset:\n\n {species_summary}")


# VISUALISATION
# generating a grid of scatterplots with pairwise relationships for each pair of features in Iris dataset
sns.pairplot(iris, hue = 'species') # color in scatter plots represent various Iris flower species
plt.show()


# box plot
plt.figure(figsize = (6,4))
plt.title('Box plot of Iris Dataset')
sns.boxplot(data=iris, orient='h')


# correlation calculations & heatmaps

# correlation matrix
correlation_matrix = iris.drop(columns=['species']).corr()

# generate heatmap
plt.figure(figsize = (6,4))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Iris Dataset')
plt.show()
64 changes: 64 additions & 0 deletions Harshitha-Annam/TASK-3.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# from sklearn.datasets import load_boston

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


data_url = "http://lib.stat.cmu.edu/datasets/boston"
raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
target = raw_df.values[1::2, 2]


# load the dataset
X = data
y = target


# split the data into training data and testing data
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.2, random_state = 42)

# initialize the linear regression model
model = LinearRegression()
# fit the model to the training dataset
model.fit(Xtrain, ytrain)

# perform predicitons
ytrain_pred = model.predict(Xtrain)
ytest_pred = model.predict(Xtest)

# compute the training and testing scores
train_score = model.score(Xtrain, ytrain)
test_score = model.score(Xtest, ytest)
# train_mse = mean_squared_error(ytrain, ytrain_pred)
# test_mse = mean_squared_error(ytest, ytest_pred)

# print the corresponding scores
print("The training score: ", train_score)
print("The testing score: ", test_score)
# print("Training MSE: ", train_mse)
# print("Testing MSE: ", test_mse)


# visualize the residuals
train_residuals = ytrain_pred - ytrain
test_residuals = ytest_pred - ytest
# plt.subplot(1, 2, 1)
plt.scatter(ytrain_pred, train_residuals, alpha=0.5)
plt.hlines(y=0, xmin=min(ytrain_pred), xmax=max(ytrain_pred), colors='r', linestyles='dashed')
plt.xlabel('Predicted Prices')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted Prices (Train)')
plt.show()
# plt.subplot(1, 2, 2)
plt.scatter(ytest_pred, test_residuals, alpha=0.5)
plt.hlines(y=0, xmin=min(ytest_pred), xmax=max(ytest_pred), colors='r', linestyles='dashed')
plt.xlabel('Predicted Prices')
plt.ylabel('Residuals')
plt.title('Residuals vs Predicted Prices (Test)')
plt.show()
72 changes: 72 additions & 0 deletions Harshitha-Annam/TASK-4.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# import necessary libraries
from PIL import Image
import os


def get_size_format(b, factor = 1024, suffix = 'B'):
"""
Scale bytes to its proper byte format.
e.g, 1253656 => '1.20MB', 1253656678 => '1.17GB'
"""
for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
if b < factor:
return f"{b:.2f}{unit}{suffix}"
b /= factor
return f"{b:.2f}Y{suffix}"


def compress_image(image_path, new_size_ratio = 0.9, quality = 90, width = None, height = None, to_jpg = True):
try:
# load image into memory
img = Image.open(image_path)
except Exception as e:
print(f"Error loading image: {e}")
return

# print the original image shape
print("[*] Original Image Shape: ", img.size)

# get original image size in bytes
try:
image_size = os.path.getsize(image_path)
print("[*] Size before compression: ", get_size_format(image_size))
except Exception as e :
print(f"Error getting image size: {e}")
return

# resize the image if necessary
if new_size_ratio < 1.0:
img = img.resize((int(img.size[0] * new_size_ratio), int(img.size[1] * new_size_ratio)), Image.LANCZOS)
elif width and height:
img = img.resize((width, height), Image.LANCZOS)

# split the filename and extension
filename, ext = os.path.splitext(image_path)

# make new file name appending compressed to the original file name
new_name = f"{filename}_compressed.jpg" if to_jpg else f"{filename}_compressed.{ext}"


# save the compressed image
try:
img.save(new_name, optimize = True, quality = quality)
except Exception as e :
print(f"Error saving compressed file: {e}")
return

# print the new image shape
print("[+] New Image Shape:", img.size)

try:
# get new image size in bytes
new_img_size = os.path.getsize(new_name)
print("[*] Size after compression:", get_size_format(new_img_size))
print(f"[*] Compressed image saved as: {new_name}")
except Exception as e:
print(f"Error getting new image size: {e}")

# Example usage
if __name__ == "__main__":
image_path = input("Enter the path of the image to compress: ")
compress_image(image_path, new_size_ratio=0.8, quality=80, width=800, height=600)