diff --git a/Harshitha-Annam/README.md b/Harshitha-Annam/README.md new file mode 100644 index 0000000..2da95d0 --- /dev/null +++ b/Harshitha-Annam/README.md @@ -0,0 +1,15 @@ +**TASK -1 : IMAGE CONVERTER** + + A program that accepts images in multiple formats (JPEG, PNG, BMP, GIF) and converts them into a desired format using Python Imaging Library (PIL). + +**TASK-2: DATA ANALYSIS WITH PANDAS** + + A program that loads the "Iris" dataset from Seaborn and analyze it using Pandas. Perform exploratory data analysis, cleaning, aggregation, visualizations, and correlation calculations. + +**TASK-3: LINEAR REGRESSION WITH SCIKIT-LEARN** + + A program apply linear regression to predict house prices from the Boston housing dataset using scikit-learn. Compare train and test scores and plot residuals + +**TASK-4: IMAGE COMPRESSION** + + Developed a Python tool for compressing images while maintaining quality. Explore compression techniques like RLE and DCT. Allow users to adjust compression quality, support various image formats, and provide output options. Optionally, include a user interface. Ensure code modularity, performance optimization, and test with diverse images, along with comprehensive documentation. \ No newline at end of file diff --git a/Harshitha-Annam/TASK-1.py b/Harshitha-Annam/TASK-1.py new file mode 100644 index 0000000..9f1c640 --- /dev/null +++ b/Harshitha-Annam/TASK-1.py @@ -0,0 +1,63 @@ +# Import all the necessary libraries +from PIL import Image +import os +# PIL is for opening, manipulating and saving images in different formats +# os is for handling file paths + + +# a function for handling the conversion of image formats +def convert_image(input_path, output_path, output_format): + + + try: + # opening and saving the input image + with Image.open(input_path) as img: + img.save(output_path, format = output_format) + + + # handling any possible exceptions + except Exception as e: + print(f"Error: {e}") + + +# main function +def main(): + + # get input file/image path from user + input_path = input("Enter the input Image Path:") + + + # checking if the input file exists + if not os.path.exists(input_path): + print("The input file/image does not exist.") + return + + + # if input file/image exists ask user for the output format + output_format = input("Enter the desired output format(eg., JPEG, PNG, BMP, GIF):").upper() + + + # create a list for valid output formats + valid_formats = ['JPEG', 'PNG', 'BMP', 'GIF'] + + + # validate the desired output format given by the user + if output_format not in valid_formats: + print("Invalid output format. Please choose a valid format(eg., JPEG, PNG, BMP, GIF)") + return + + + # extract the file name without file extension + file_name_without_ext, ext_name = os.path.splitext(input_path) + + + # set the output image path + output_path = f"{file_name_without_ext}_converted.{output_format.lower()}" + + + # call the convert_image function and pass the arguments + convert_image(input_path, output_path, output_format) + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/Harshitha-Annam/TASK-2.py b/Harshitha-Annam/TASK-2.py new file mode 100644 index 0000000..c132a14 --- /dev/null +++ b/Harshitha-Annam/TASK-2.py @@ -0,0 +1,69 @@ +# import necessary libraries + +import seaborn as sns +import pandas as pd +import matplotlib.pyplot as plt + + +# load the iris dataset using load_dataset() +iris = sns.load_dataset('iris') + + +# explore the loaded iris dataset(EXPLORATORY DATA ANALYSIS) +print("First five rows of the Iris dataset: ") +iris.head() # gives first 5 rows of iris dataset + +print("Information about the columns in the Iris dataset: ") +iris.info() # gives info about dataset columns + +print("A summary of basic analysis on Iris dataset: ") +iris.describe() # gives a consolidated analysis of the dataset + + +# DATA CLEANING + +# check for missing values in the Iris dataset +print(iris.isnull().sum()) + +# check for duplicates in the dataset +print("Number of duplicates in Iris dataset: ") +print(iris.duplicated().sum()) +if iris.duplicated().sum() > 0: + # remove duplicates + iris = iris.drop_duplicates() + + # verifying if duplicates have been removed + print(iris.duplicated().sum()) + + +# AGGREGATION +# perform aggregation to get a statistical summary of Iris dataset grouped by species +species_mean = iris.groupby('species').mean() +print(f"The mean of sepcies in Iris dataset is :\n\n {species_mean}") + +species_summary = iris.groupby('species').describe() +print(f"The statistical summary of species in Iris dataset:\n\n {species_summary}") + + +# VISUALISATION +# generating a grid of scatterplots with pairwise relationships for each pair of features in Iris dataset +sns.pairplot(iris, hue = 'species') # color in scatter plots represent various Iris flower species +plt.show() + + +# box plot +plt.figure(figsize = (6,4)) +plt.title('Box plot of Iris Dataset') +sns.boxplot(data=iris, orient='h') + + +# correlation calculations & heatmaps + +# correlation matrix +correlation_matrix = iris.drop(columns=['species']).corr() + +# generate heatmap +plt.figure(figsize = (6,4)) +sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5) +plt.title('Correlation Heatmap of Iris Dataset') +plt.show() \ No newline at end of file diff --git a/Harshitha-Annam/TASK-3.py b/Harshitha-Annam/TASK-3.py new file mode 100644 index 0000000..a61571d --- /dev/null +++ b/Harshitha-Annam/TASK-3.py @@ -0,0 +1,64 @@ +# import necessary libraries +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +# from sklearn.datasets import load_boston + +from sklearn.model_selection import train_test_split +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error, r2_score + + +data_url = "http://lib.stat.cmu.edu/datasets/boston" +raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None) +data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]]) +target = raw_df.values[1::2, 2] + + +# load the dataset +X = data +y = target + + +# split the data into training data and testing data +Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.2, random_state = 42) + +# initialize the linear regression model +model = LinearRegression() +# fit the model to the training dataset +model.fit(Xtrain, ytrain) + +# perform predicitons +ytrain_pred = model.predict(Xtrain) +ytest_pred = model.predict(Xtest) + +# compute the training and testing scores +train_score = model.score(Xtrain, ytrain) +test_score = model.score(Xtest, ytest) +# train_mse = mean_squared_error(ytrain, ytrain_pred) +# test_mse = mean_squared_error(ytest, ytest_pred) + +# print the corresponding scores +print("The training score: ", train_score) +print("The testing score: ", test_score) +# print("Training MSE: ", train_mse) +# print("Testing MSE: ", test_mse) + + +# visualize the residuals +train_residuals = ytrain_pred - ytrain +test_residuals = ytest_pred - ytest +# plt.subplot(1, 2, 1) +plt.scatter(ytrain_pred, train_residuals, alpha=0.5) +plt.hlines(y=0, xmin=min(ytrain_pred), xmax=max(ytrain_pred), colors='r', linestyles='dashed') +plt.xlabel('Predicted Prices') +plt.ylabel('Residuals') +plt.title('Residuals vs Predicted Prices (Train)') +plt.show() +# plt.subplot(1, 2, 2) +plt.scatter(ytest_pred, test_residuals, alpha=0.5) +plt.hlines(y=0, xmin=min(ytest_pred), xmax=max(ytest_pred), colors='r', linestyles='dashed') +plt.xlabel('Predicted Prices') +plt.ylabel('Residuals') +plt.title('Residuals vs Predicted Prices (Test)') +plt.show() \ No newline at end of file diff --git a/Harshitha-Annam/TASK-4.py b/Harshitha-Annam/TASK-4.py new file mode 100644 index 0000000..57eb118 --- /dev/null +++ b/Harshitha-Annam/TASK-4.py @@ -0,0 +1,72 @@ +# import necessary libraries +from PIL import Image +import os + + +def get_size_format(b, factor = 1024, suffix = 'B'): + """ + Scale bytes to its proper byte format. + e.g, 1253656 => '1.20MB', 1253656678 => '1.17GB' + """ + for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: + if b < factor: + return f"{b:.2f}{unit}{suffix}" + b /= factor + return f"{b:.2f}Y{suffix}" + + +def compress_image(image_path, new_size_ratio = 0.9, quality = 90, width = None, height = None, to_jpg = True): + try: + # load image into memory + img = Image.open(image_path) + except Exception as e: + print(f"Error loading image: {e}") + return + + # print the original image shape + print("[*] Original Image Shape: ", img.size) + + # get original image size in bytes + try: + image_size = os.path.getsize(image_path) + print("[*] Size before compression: ", get_size_format(image_size)) + except Exception as e : + print(f"Error getting image size: {e}") + return + + # resize the image if necessary + if new_size_ratio < 1.0: + img = img.resize((int(img.size[0] * new_size_ratio), int(img.size[1] * new_size_ratio)), Image.LANCZOS) + elif width and height: + img = img.resize((width, height), Image.LANCZOS) + + # split the filename and extension + filename, ext = os.path.splitext(image_path) + + # make new file name appending compressed to the original file name + new_name = f"{filename}_compressed.jpg" if to_jpg else f"{filename}_compressed.{ext}" + + + # save the compressed image + try: + img.save(new_name, optimize = True, quality = quality) + except Exception as e : + print(f"Error saving compressed file: {e}") + return + + # print the new image shape + print("[+] New Image Shape:", img.size) + + try: + # get new image size in bytes + new_img_size = os.path.getsize(new_name) + print("[*] Size after compression:", get_size_format(new_img_size)) + print(f"[*] Compressed image saved as: {new_name}") + except Exception as e: + print(f"Error getting new image size: {e}") + +# Example usage +if __name__ == "__main__": + image_path = input("Enter the path of the image to compress: ") + compress_image(image_path, new_size_ratio=0.8, quality=80, width=800, height=600) + \ No newline at end of file