From 6fdef64c75a108ab9c2d5e0c6357058739b96da5 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Sun, 8 May 2022 23:58:31 +0530 Subject: [PATCH 1/8] Assignment1_201050 --- 201050_Tejas_part1.ipynb | 975 +++++++++++++++++++++++++++++++++++++++ 201050_Tejas_part2.ipynb | 551 ++++++++++++++++++++++ 2 files changed, 1526 insertions(+) create mode 100644 201050_Tejas_part1.ipynb create mode 100644 201050_Tejas_part2.ipynb diff --git a/201050_Tejas_part1.ipynb b/201050_Tejas_part1.ipynb new file mode 100644 index 0000000..7053d2b --- /dev/null +++ b/201050_Tejas_part1.ipynb @@ -0,0 +1,975 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "colab": { + "name": "201050_Tejas_part1", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rvFM645NE-D2" + }, + "source": [ + "# Assignment 1 - Part 1\n", + "In this assignment, we will go through basic linear algebra, NumPy, and image manipulation using Python to get everyone on the same page.\n", + "\n", + "One of the aims of this assignment is to get you to start getting comfortable searching for useful library functions online. So in many of the functions you will implement, you will have to look up helper functions.\n", + "\n", + "\\\n", + "\n", + "## Instructions\n", + "* This notebook contain blocks of code, you are required to complete those blocks(where required)\n", + "* You are required to copy this notebook (\"copy to drive\" above) and complete the code.\n", + "* For Submission, You'll be required to submit a sharable link for your copy of this notebook. (DO NOT CHANGE THE NAME OF THE FUNCTIONS)\n", + "\n", + "\\\n", + "\\\n", + "Also, I'd like to acknowledge the Stanford CS131. This assignment is highly based on the assignments from that course." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UhSVK4RoK9q5" + }, + "source": [ + "First Let's import some dependencies" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cCKqyfhIE-EQ" + }, + "source": [ + "# Imports the print function from newer versions of python\n", + "from __future__ import print_function\n", + "\n", + "# Setup\n", + "\n", + "# The Random module implements pseudo-random number generators\n", + "import random \n", + "\n", + "# Numpy is the main package for scientific computing with Python. \n", + "# This will be one of our most used libraries in this project\n", + "import numpy as np\n", + "\n", + "# The Time library helps us time code runtimes\n", + "import time\n", + "\n", + "\n", + "# Some more magic so that the notebook will reload external python modules;\n", + "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "%reload_ext autoreload" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "QLtp15rqE-EU" + }, + "source": [ + "# Part 1: Linear Algebra and NumPy Review\n", + "In this section, we will review linear algebra and learn how to use vectors and matrices in python using numpy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E8HDYpc0E-EV" + }, + "source": [ + "## Part 1.1 (5 points)\n", + "First, let's test whether you can define the following matrices and vectors using numpy. Look up `np.array()` for help. In the next code block, define $M$ as a $(4, 3)$ matrix, $a$ as a $(1, 3)$ row vector and $b$ as a $(3, 1)$ column vector:\n", + "\n", + "$$M = \\begin{bmatrix}\n", + "1 & 2 & 3 \\\\\n", + "4 & 5 & 6 \\\\\n", + "7 & 8 & 9 \\\\\n", + "10 & 11 & 12 \\end{bmatrix}\n", + "$$\n", + "\n", + "$$a = \\begin{bmatrix}\n", + "1 & 1 & 0\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "$$b = \\begin{bmatrix}\n", + "-1 \\\\ 2 \\\\ 5\n", + "\\end{bmatrix} \n", + "$$ " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mETk2NCME-EX", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d04f7792-4d53-46e8-968f-1e76faa87539" + }, + "source": [ + "### YOUR CODE HERE\n", + "M = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]]).reshape(4,3)\n", + "a = np.array([1,1,0]).reshape(1,3)\n", + "b=np.array([[-1], [2], [5]]).reshape(3,1)\n", + "### END CODE HERE\n", + "print(\"M = \\n\", M)\n", + "print(\"The size of M is: \", np.size(M))\n", + "print()\n", + "print(\"a = \", a)\n", + "print(\"The size of a is: \", np.size(a))\n", + "print()\n", + "print(\"b = \", b)\n", + "print(\"The size of b is: \", np.size(b))" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "M = \n", + " [[ 1 2 3]\n", + " [ 4 5 6]\n", + " [ 7 8 9]\n", + " [10 11 12]]\n", + "The size of M is: 12\n", + "\n", + "a = [[1 1 0]]\n", + "The size of a is: 3\n", + "\n", + "b = [[-1]\n", + " [ 2]\n", + " [ 5]]\n", + "The size of b is: 3\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rSta4NheE-EZ" + }, + "source": [ + "## Part 1.2 (5 points)\n", + "Implement the `dot_product()` method below and check that it returns the correct answer for $a^Tb$." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C5ZRjCE2MVOU" + }, + "source": [ + "def dot_product(a, b):\n", + " \"\"\"Implement dot product between the two vectors: a and b.\n", + " (optional): While you can solve this using for loops, we recommend\n", + " that you look up `np.dot()` online and use that instead.\n", + " Args:\n", + " a: numpy array of shape (x, n)\n", + " b: numpy array of shape (n, x)\n", + " Returns:\n", + " out: numpy array of shape (x, x) (scalar if x = 1)\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " out= np.array(np.dot(a,b))\n", + " pass\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "pbLIS5vIE-Ea", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1df8eea3-e803-4d83-8289-d909684726ee" + }, + "source": [ + "\n", + "# Now, let's test out this dot product. Your answer should be [[1]].\n", + "aDotB = dot_product(a, b)\n", + "print(aDotB)\n", + "\n", + "print(\"The size is: \", aDotB.shape)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[1]]\n", + "The size is: (1, 1)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0rGfcRU1E-Eb" + }, + "source": [ + "## Part 1.3 (5 points)\n", + "Implement the `complicated_matrix_function()` method and use it to compute $(ab)Ma^T$\n", + "\n", + "IMPORTANT NOTE: The `complicated_matrix_function()` method expects all inputs to be two dimensional numpy arrays, as opposed to 1-D arrays. This is an important distinction, because 2-D arrays can be transposed, while 1-D arrays cannot.\n", + "\n", + "To transpose a 2-D array, you can use the syntax `array.T` " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dglQmbuLNOk6" + }, + "source": [ + "def complicated_matrix_function(M, a, b):\n", + " \"\"\"Implement (a * b) * (M * a.T).\n", + " (optional): Use the `dot_product(a, b)` function you wrote above\n", + " as a helper function.\n", + " Args:\n", + " M: numpy matrix of shape (x, n).\n", + " a: numpy array of shape (1, n).\n", + " b: numpy array of shape (n, 1).\n", + " Returns:\n", + " out: numpy matrix of shape (x, 1).\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " c=dot_product(a,b)\n", + " d=dot_product(M,a.T)\n", + " out=c* d\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "da_uQQLhE-Ec", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0a0918fb-5563-4a10-a14d-e0c8857d4987" + }, + "source": [ + "# Your answer should be $[[3], [9], [15], [21]]$ of shape(4, 1).\n", + "ans = complicated_matrix_function(M, a, b)\n", + "print(ans)\n", + "print()\n", + "print(\"The size is: \", ans.shape)" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 3]\n", + " [ 9]\n", + " [15]\n", + " [21]]\n", + "\n", + "The size is: (4, 1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6CWXxSSOE-Ed", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dd5a355a-20bc-4b96-99df-963ef65d4990" + }, + "source": [ + "M_2 = np.array(range(4)).reshape((2,2))\n", + "a_2 = np.array([[1,1]])\n", + "b_2 = np.array([[10, 10]]).T\n", + "print(M_2.shape)\n", + "print(a_2.shape)\n", + "print(b_2.shape)\n", + "print()\n", + "\n", + "# Your answer should be $[[20], [100]]$ of shape(2, 1).\n", + "ans = complicated_matrix_function(M_2, a_2, b_2)\n", + "print(ans)\n", + "print()\n", + "print(\"The size is: \", ans.shape)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2, 2)\n", + "(1, 2)\n", + "(2, 1)\n", + "\n", + "[[ 20]\n", + " [100]]\n", + "\n", + "The size is: (2, 1)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4fHLxLl4E-Ee" + }, + "source": [ + "## Part 1.4 (10 points) [Optional/Bonus]\n", + "Implement `eigen_decomp()` and `get_eigen_values_and_vectors()` methods. In this method, perform eigenvalue decomposition on the following matrix and return the largest k eigen values and corresponding eigen vectors (k is specified in the method calls below).\n", + "\n", + "$$M = \\begin{bmatrix}\n", + "1 & 2 & 3 \\\\\n", + "4 & 5 & 6 \\\\\n", + "7 & 8 & 9 \\end{bmatrix}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RfaCSoRMOIc8" + }, + "source": [ + "def eigen_decomp(M):\n", + " \"\"\"Implement eigenvalue decomposition.\n", + " (optional): You might find the `np.linalg.eig` function useful.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " w: numpy array of shape (m, m) such that the column v[:,i] is the eigenvector corresponding to the eigenvalue w[i].\n", + " v: Matrix where every column is an eigenvector.\n", + " \"\"\"\n", + " w = None\n", + " v = None\n", + " ### YOUR CODE HERE\n", + " w,v= np.linalg.eig(M)\n", + " pass\n", + " ### END YOUR CODE\n", + " return w, v" + ], + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "YB120rb4ONBH" + }, + "source": [ + "def get_eigen_values_and_vectors(M, k):\n", + " \"\"\"Return top k eigenvalues and eigenvectors of matrix M. By top k\n", + " here we mean the eigenvalues with the top ABSOLUTE values (lookup\n", + " np.argsort for a hint on how to do so.)\n", + " (optional): Use the `eigen_decomp(M)` function you wrote above\n", + " as a helper function\n", + " Args:\n", + " M: numpy matrix of shape (m, m).\n", + " k: number of eigen values and respective vectors to return.\n", + " Returns:\n", + " eigenvalues: list of length k containing the top k eigenvalues\n", + " eigenvectors: list of length k containing the top k eigenvectors\n", + " of shape (m,)\n", + " \"\"\"\n", + " eigenvalues = []\n", + " eigenvectors = []\n", + " ### YOUR CODE HERE\n", + " w,v=eigen_decomp(M)\n", + " # L= np.concatenate(w,v, axis=0)\n", + " #w stores the eigenvalues of M \n", + " t=np.argsort(w) #this returns the indices which will sort the array of eigenvalues.\n", + " count=0\n", + " i=len(t)-1\n", + " while count\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "4 & 5 & 6 \\\\\n", + "1 & 2 & 3 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.42 & 0.85 \\\\\n", + "0 & 0.85 & 1.71 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.85 & 1.71 \\\\\n", + "0 & 0.45 & 0.85 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.42 & 0.85 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "$$\n", + "Second algorithm:\n", + "1. Take a pivot from the last row.\n", + "2. For each row above the pivot, calculate the factor f which makes the kth entry zero, and for every element in the row subtract the fth multiple of the corresponding element in the kth row\n", + "3. Repeat the above step untill the matrix is in rref\n", + "$$\\begin{bmatrix}\n", + "7 & 8 & 0 \\\\\n", + "0 & 0.42 & 0 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 0 & 0 \\\\\n", + "0 & 0.42 & 0 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "$$\n", + "\n", + "Steps for implementation:\n", + "1. Complete the function `swap_rows()`\n", + "2. Complete the function `apply_row()`\n", + "3. Complete `forward()` and `backward()`\n", + "4. Finally implement `rref()` using the `forward()` and `backward()`\n", + "\n", + "Note: You can skip this part if you want." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qUFujiFAPYz6" + }, + "source": [ + "def swap_rows(M):\n", + " \"\"\"Implement row swapping to make the largest element in the pivotial column to be the first row.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " Ms: matrix with swapped row\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms=M\n", + " flag=0\n", + " for i in range(0,np.shape(M)[1]):\n", + " if(np.max(abs(M[:,i]))!=0): #finds the first non zero column\n", + " c=(np.max(M[:,i])) #finds the maximum in it\n", + " col=i #col is the index of the pivotal column \n", + " flag=1\n", + " break\n", + " if(flag==1):\n", + " for i in range(np.shape(M)[0]):\n", + " if(M[i][col]==c):\n", + " t= Ms[0,:].copy() \n", + " Ms[0,:]=Ms[i,:]\n", + " Ms[i,:]=t #swaps the topmost row and the row with c. \n", + " ### END YOUR CODE\n", + " return Ms" + ], + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "S8lbAUSWWpyO" + }, + "source": [ + "def apply_rows(M):\n", + " \"\"\"For each row below the pivot, calculate the factor f which makes the kth\n", + " entry zero, and for every element in the row subtract the fth multiple of the\n", + " corresponding element in the kth row.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " Ms: matrix with all other entries of the pivotal col zero\n", + " \"\"\"\n", + " out = None\n", + " Ms= np.around(M.astype(float),3)\n", + " ### YOUR CODE HERE\n", + " for i in range(1,np.shape(Ms)[0]): \n", + " f= Ms[i,0]/Ms[0,0]\n", + " # print(f)\n", + " Ms[i,:]= Ms[i,:]- (Ms[i,0]*Ms[0,:])/Ms[0,0]\n", + " # print(Ms[i,:])\n", + " Ms= np.around(Ms,3)\n", + " pass\n", + " ### END YOUR CODE\n", + " return Ms" + ], + "execution_count": 70, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "GnE_-JLxPYz7" + }, + "source": [ + "def forward(M):\n", + " \"\"\"Return a partial ref using the algo described above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: ref of M\n", + " \"\"\"\n", + " out = None\n", + " Ms= np.around(M.astype(float),3)\n", + " T=np.around(M.astype(float),3)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " for i in range(0,np.shape(M)[0]-1):\n", + " Ms=T[i:np.shape(M)[0], i:np.shape(M)[1]] #at i=0, this is the whole matrix \n", + " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", + " Ms=backward(Ms)\n", + " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", + " T[i:np.shape(M)[0], i:np.shape(M)[1]]=Ms\n", + " ### END YOUR CODE\n", + " return T" + ], + "execution_count": 71, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Wb7pPGP4XmJu" + }, + "source": [ + "def backward(M):\n", + " \"\"\"Return a rref using the algo described above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: rref of M\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms=M\n", + " #This does bckward prop for lowest row/pivot\n", + " ### YOUR CODE HERE\n", + " r= np.shape(Ms)[0]-1\n", + " c= np.shape(Ms)[1]-1\n", + " for i in (range(0, r)):\n", + " f= Ms[i,c]/Ms[r,c]\n", + " Ms[i,:]= Ms[i,:]- (Ms[c,:]*f)\n", + " Ms= np.around(Ms,3)\n", + " pass\n", + " ### END YOUR CODE\n", + " return Ms\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 82, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XLq81xzXYR85" + }, + "source": [ + "def rref(M):\n", + " \"\"\"Return a rref using the algo descrbed above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: ref of M\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms= forward(M)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " r= np.shape(Ms)[0]\n", + " c= np.shape(Ms)[1]\n", + " for i in range(0,np.shape(M)[0]-1):\n", + " Ms=T[0:r-i, 0:c-i]\n", + " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", + " Ms= backward(Ms)\n", + " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", + " T[0:r-i, 0:c-i]=Ms\n", + " # print(\"T in the\", i,\" th iteration is : \" ,T)\n", + " ### END YOUR CODE\n", + " return T" + ], + "execution_count": 90, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Eiz6EbsWPYz8", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2108d9b5-6017-4dc3-d523-583491bd0d4b" + }, + "source": [ + "# Let's define M.\n", + "M = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", + "# Now let's calculate it's rref.\n", + "# Note that your code may be evaluated on other test cases as well\n", + "Mrref = rref(M)\n", + "print(Mrref)\n" + ], + "execution_count": 91, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 7. 0. 0. ]\n", + " [ 0. 0.42 0. ]\n", + " [ 0. 0. -0.05]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G46pyDzAE-Ef" + }, + "source": [ + "## Part 1.6 (10 points)\n", + "\n", + "To wrap up our overview of NumPy, let's implement something fun — a helper function for computing the Euclidean distance between two $n$-dimensional points!\n", + "\n", + "In the 2-dimensional case, computing the Euclidean distance reduces to solving the Pythagorean theorem $c = \\sqrt{a^2 + b^2}$. where, given two points $(x_1, y_1)$ and $(x_2, y_2)$, $a = x_1 - x_2$ and $b = y_1 - y_2$.\n", + "\n", + "\n", + "More generally, given two $n$-dimensional vectors, the Euclidean distance can be computed by:\n", + "\n", + "1. Performing an elementwise subtraction between the two vectors, to get $n$ difference values.\n", + "2. Squaring each of the $n$ difference values, and summing the squares.\n", + "4. Taking the square root of our sum.\n", + "\n", + "Alternatively, the Euclidean distance between length-$n$ vectors $u$ and $v$ can be written as:\n", + "\n", + "$\n", + "\\quad\\textbf{distance}(u, v) = \\sqrt{\\sum_{i=1}^n (u_i - v_i)^2}\n", + "$\n", + "\n", + "\n", + "Try implementing this function: first using native Python with a `for` loop in the `euclidean_distance_native()` function, then in NumPy **without any loops** in the `euclidean_distance_numpy()` function.\n", + "We've added some `assert` statements here to help you check functionality (if it prints nothing, then your implementation is correct)!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5xvHopPqO29C" + }, + "source": [ + "def euclidean_distance_native(u, v):\n", + " \"\"\"Computes the Euclidean distance between two vectors, represented as Python\n", + " lists.\n", + " Args:\n", + " u (List[float]): A vector, represented as a list of floats.\n", + " v (List[float]): A vector, represented as a list of floats.\n", + " Returns:\n", + " float: Euclidean distance between `u` and `v`.\n", + " \"\"\"\n", + " # First, run some checks:\n", + " assert isinstance(u, list)\n", + " assert isinstance(v, list)\n", + " assert len(u) == len(v)\n", + " sum=0\n", + " for i in range(0,len(u)):\n", + " sum+=((u[i]-v[i])**2)\n", + " # print(i)\n", + " # Compute the distance!\n", + " # Notes:\n", + " # 1) Try breaking this problem down: first, we want to get\n", + " # the difference between corresponding elements in our\n", + " # input arrays. Then, we want to square these differences.\n", + " # Finally, we want to sum the squares and square root the\n", + " # sum.\n", + " out = np.sqrt(sum)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wvLuK8MuO3LH" + }, + "source": [ + "def euclidean_distance_numpy(u, v):\n", + " \"\"\"Computes the Euclidean distance between two vectors, represented as NumPy\n", + " arrays.\n", + " Args:\n", + " u (np.ndarray): A vector, represented as a NumPy array.\n", + " v (np.ndarray): A vector, represented as a NumPy array.\n", + " Returns:\n", + " float: Euclidean distance between `u` and `v`.\n", + " \"\"\"\n", + " # First, run some checks:\n", + " assert isinstance(u, np.ndarray)\n", + " assert isinstance(v, np.ndarray)\n", + " assert u.shape == v.shape\n", + " x= u-v \n", + " out=np.dot(x.T,x)\n", + " out= np.sqrt(out) \n", + " # Compute the distance!\n", + " # Note:\n", + " # 1) You shouldn't need any loops\n", + " # 2) Some functions you can Google that might be useful:\n", + " # np.sqrt(), np.sum()\n", + " # 3) Try breaking this problem down: first, we want to get\n", + " # the difference between corresponding elements in our\n", + " # input arrays. Then, we want to square these differences.\n", + " # Finally, we want to sum the squares and square root the\n", + " # sum.\n", + "\n", + " ### YOUR CODE HERE\n", + " return out \n", + " pass\n", + " ### END YOUR CODE" + ], + "execution_count": 34, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wu9MimVJE-Eg" + }, + "source": [ + "## Testing native Python function\n", + "assert euclidean_distance_native([7.0], [6.0]) == 1.0\n", + "assert euclidean_distance_native([7.0, 0.0], [3.0, 3.0]) == 5.0\n", + "assert euclidean_distance_native([7.0, 0.0, 0.0], [3.0, 0.0, 3.0]) == 5.0" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kJDk88g1E-Ej" + }, + "source": [ + "## Testing NumPy function\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0]),\n", + " np.array([6.0])\n", + ") == 1.0\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0, 0.0]),\n", + " np.array([3.0, 3.0])\n", + ") == 5.0\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0, 0.0, 0.0]),\n", + " np.array([3.0, 0.0, 3.0])\n", + ") == 5.0" + ], + "execution_count": 36, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "n = 1000\n", + "\n", + "# Create some length-n lists and/or n-dimensional arrays\n", + "a = [0.0] * n\n", + "b = [10.0] * n\n", + "a_array = np.array(a)\n", + "b_array = np.array(b)\n", + "\n", + "# Compute runtime for native implementation\n", + "start_time = time.time()\n", + "for i in range(10000):\n", + " euclidean_distance_native(a, b)\n", + "print(\"Native:\", (time.time() - start_time), \"seconds\")\n", + "\n", + "# Compute runtime for numpy implementation\n", + "# Start by grabbing the current time in seconds\n", + "start_time = time.time()\n", + "for i in range(10000):\n", + " euclidean_distance_numpy(a_array, b_array)\n", + "print(\"NumPy:\", (time.time() - start_time), \"seconds\")" + ], + "metadata": { + "id": "E7Z38WwHhoNl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5aa04300-d529-4ac6-8f15-cc57bbc86980" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Native: 1.5537786483764648 seconds\n", + "NumPy: 0.06700706481933594 seconds\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mjik4mQXE-Ek" + }, + "source": [ + "Next, let's take a look at how these two implementations compare in terms of runtime:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t4e6MfhHE-Em" + }, + "source": [ + "As you can see, doing vectorized calculations (i.e. no for loops) with NumPy results in significantly faster computations! " + ] + }, + { + "cell_type": "markdown", + "source": [ + "Congrats You've come to the end of this notebook. If you solved everything above, impressive. If not, you might need to read/think a bit more. You can always ask doubts. Also, Note that you should submit it even if you cannot solve everything. We might evaluate these using a script later." + ], + "metadata": { + "id": "XvFE0Q5bhx6-" + } + } + ] +} \ No newline at end of file diff --git a/201050_Tejas_part2.ipynb b/201050_Tejas_part2.ipynb new file mode 100644 index 0000000..29c06fa --- /dev/null +++ b/201050_Tejas_part2.ipynb @@ -0,0 +1,551 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "201050_Tejas_part2.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JsM9yumHP9iu" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np \n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Stamatics /House_prediction.csv')\n", + "# print (df.head(2))\n", + "df.loc[df['floor']=='-', \"floor\"]=0\n", + "df.loc[df['furniture']=='furnished', \"furniture\"]=1\n", + "df.loc[df['furniture']=='not furnished', \"furniture\"]=0\n", + "df['floor'] = df['floor'].astype(int)\n", + "df['furniture']= df['furniture'].astype(int)\n", + "# print (df.head(2))\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Try to find a dependence of House Association Tax, property tax, and fire insurance on other features.\n", + "Not a formula but something intuitive like if it is increasing/decreasing with no of rooms or being furnished or not.# New Section" + ], + "metadata": { + "id": "GE0gbTHVD7lt" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "plt.figure(1)\n", + "mean_rent= df.groupby(['city'])['rent amount (R$)'].mean().plot(color = 'green',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_hoa= df.groupby(['city'])['hoa (R$)'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(2)\n", + "mean_property_tax= df.groupby(['city'])['property tax (R$)'].mean().plot(color = 'blue',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(3)\n", + "mean_fire_insurance= df.groupby(['city'])['fire insurance (R$)'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "\n", + "# print(plt.ylim())\n", + "plt.figure(4)\n", + "mean_rooms= df.groupby(['city'])['rooms'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_floor= df.groupby(['city'])['floor'].mean().plot(color = 'brown',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_floor= df.groupby(['city'])['parking spaces'].mean().plot(color = 'green',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "# parking spaces\n", + "\n", + "plt.figure(5)\n", + "mean_area= df.groupby(['city'])['area'].mean().plot(color = 'blue',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(6)\n", + "mean_total_rent= df.groupby(['city'])['total (R$)'].mean().plot(color = 'purple',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "UCuFbuVdECfb", + "outputId": "19129155-31a9-434d-cae2-6fccbbaf7e04" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Porto Alegre has the least average area, rooms, parking spaces, and rent. Campinas has relatively higher area but lower property tax and rent. " + ], + "metadata": { + "id": "6nnM3uMAyrql" + } + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "sfRnLgF7yrYj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "0ubGUhtizh9_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of rent with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['rent amount (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of rent with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "df['area']= area_col_rounded\n", + "plt.figure(figsize=(16,15))\n", + "plt.title('Variation of rent with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of rent on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "# print(len(mean_rooms))\n", + "# values = list(mean_rooms.values)\n", + "# names= list(mean_rooms.keys)\n", + "# print((values))\n", + "# names= list(mean_rooms.keys())\n", + "# x=[100*i for i in range(0,12)]\n", + "# x.append(1600)\n", + "# x.append(2000)\n", + "# x.append(12700)\n", + "# x.append(24600)\n", + "# x.append(46300)\n", + "# print(mean_rooms)\n", + "# print(x)\n", + "# print(len(values))\n", + "# plt.bar(x,values)\n", + "# print(type(mean_rooms))\n", + "# .plot(color = 'red',\n", + " # linestyle = 'solid', marker = 'o',legend=True)\n", + "# plt.hist(ar)\n", + "\n", + "df['area']= area_col\n", + "\n", + "\n", + "# plt.figure()\n", + "# plt.title('Variation of House association tax with number of rooms')\n", + "# mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot(color = 'blue',\n", + "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", + "# plt.figure()\n", + "# plt.title('Variation of property tax with number of rooms')\n", + "# mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot(color = 'blue',\n", + "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", + "\n" + ], + "metadata": { + "id": "WK_u47GLP2pq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "669c5565-c01f-40b9-c63e-a11ec72b6887" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAETCAYAAADzrOu5AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAckklEQVR4nO3de7QkZX3u8e8DyEVBAZkgDOCgjkkwiYTMAtTEeAvgJYFjNMHrqOQQT7zh5Sgaz4GgJMSsdTAcIwYDh0sURI2RGAUnCN4QYVBEkCgjDgHkMjgw3BQFf+ePejf0NHvv6T21e/Zs+H7W6rWr3qp6+63eb9fTdenqVBWSJPWxyVw3QJI0/xkmkqTeDBNJUm+GiSSpN8NEktSbYSJJ6s0waZLcmeQJ67nsK5J8cbbbNMLzPiPJVa3tB23o598YJFmZ5HmzVNeiJJVks9moT9NL8p4k/7Qey/1qkkuT3JHkzWNo10eS/K8R5pvNvndykvfPRl1zZV6GSZKzkxw1SfmBSW5cn41BVW1dVVeP8NwP2uBU1ceqar+ZPucsOAr4UGv7v47ziR6KG9rZ3BhsjOZyAzVKf6mqv66qP1uP6t8JnFdV21TVcevfyinb9fqqet9s1zsfJXlNkq+NMu+8DBPgFOCVSTJU/irgY1V176gVzfON4+OBK0aZcZ6v57wz1693kk3n8vnHbOR+P2yu/y/jNqfrV1Xz7gFsBawBnjlQth3wM+CpwN7AN4DbgBuADwGbD8xbwBuAq4AfDZQ9qQ2/EPg2cDtwLXDkwLL/1ea9sz2eBrwG+NrAPE8HLm5tvBh4+sC084H3AV8H7gC+COwwzbr+d2AFsBo4C9i5lf8Q+CXw09aOLSZZdiXwLuAy4B5gM2Bf4IL22nwHeNYobZtsvSd5vi2ADwI/bo8PTrQLeBZwHfB24Ob2f3ntFOv8bOC7A+PLgIsHxr8KHDSwju9o67gG+ASw5cC8LwIubet7AfBbrfy0odfvncCito5L2/reAvzlQF2bAIe31/4nwJnA9m3axLKHtGW/Msl6bQd8DlgF3NqGdxl6/f8GuIiu7312ov42/ZPAjW09vwI8ZWDaycDxwOeBu4BDgV8AP2/r928Dr9f/bK/XXcCJwI7AF9r//D+A7QbqHWd/ORL45za8JfDP7XW9je59s+Mky3wJuI/uvX4n8OTWjj8bmOc1rP1+XOv9zjr6Ynst39+Gd2j/p9vo3oNfBTbp0/fatN8GvtVet08AZ0w85yTrvAnwXuCa1t5TgcfMoN+ta30f0+pc1Z7jve05f729zve11/q2abfL497wj+sBfBT4p4HxPwcubcO/Q/cm2Ky92FcChw11rmXA9sBWA2UTYfIs4DfbC/pbwE08sPGa+OdtNlnnbXXeSreXtBnwsjb+2IE34A/p3gRbtfFjpljH59Bt0Pai21D/38HO0jrz86Z5jVa2zrxre66FdG/WF7R1+4M2vmBdbZtsvSd5vqOAC4FfARbQvYHeN/Ca3tvmeURrw90MbLgG6tmqdeId2rw3AdcD27RpPx14PVfSbXx3bq/9lcDrB96wNwP7AJvShcRKHgi4tV6/gXX8aHuep9KF8K+36W9p67dL+3/8I3D60LKnAo+i9auh9Xos8MfAI9u6fBL414Hp57f1/I1Wx6dpG9s2/XVtuYnQvnRoA7gGeEb7327JwEZxqE9cSBcgC9vr8632Wm1Jt7E+os077v5yJA+EyZ8D/9Zem03p3sOPnmK581k7PIbHX8ODw+T+9zvr6IusHSZ/A3ykzfcI4PeA9Ol7wOZ0G+23tjpfQhf8U4XJ6+g+UD4B2Br4F+C0GfS7da3vqXQfXLZp9f0AOGSy13LabfI4N/jjfAC/S5f4W7bxrwNvnWLew4DPDHWu5wzNc3+YTLL8B4Fjp3qTsHaYvAq4aGj5bwCvGej47x2Y9hfA2VM874nABwbGt26dbtFAZ15XmLxuYPxdE51woOwcYOm62jbZek/yfD8EXjAwvj+wcqBD/3TodbsZ2HeKur4KvJjuQ8EX6fYCDqDba7lsaB1fOTD+AeAjbfh4WpgNTP8+8PuTvX4D6zi4t3ARcHAbvhJ47sC0ndr/Y7OBZZ8wgz68J3DrwPj5DHywAPag27PYdJJlt23PN/EJ9WTg1KF5TmbyMHnFwPingeMHxt9EC7gN0F+O5IEweR1Dn96nWe58Zh4mzxkYn7YvsnaYHEW3oX3QtmF9+x7wTLo99wxMu2D4fzUw7VzgLwbGf3Um/W669aULup8DewxM+3Pg/Mley+ke8/WcCVX1NbpP7QcleSLdoa2PAyR5cpLPtZPxtwN/Tfcpd9C1U9WdZJ8k5yVZlWQN8PpJlp/KznSfOgZdQ/cpb8KNA8N304XEOuuqqjvpPhkunGL+yQyu5+OBlya5beJBF8o7rUfb1tneNrzzwPhPau3zWdPV/2W6N8Ez2/D5dG/E32/jg6Zq8+OBtw+t765DbZrMdPV9ZqCuK+kOAew4MP90/eqRSf4xyTWtX34F2Hbo/Mbg8tfQfZLcIcmmSY5J8sO27Mo2zw5TLDudmwaGfzrJ+OD6jrO/DDqNLqjOSPLjJB9I8oj1rGsyw6/NqH3x7+j2Cr6Y5Ookhw9NX5++tzNwfbWtdTO8zRg02ftqM0bsd81U6zux9z9c/0y2McD8PQE/4VTg1cArgXOqauJNcTzwn8Diqno08B5g+GR9MbWP052f2LWqHkO3mzux/HTLQfeJ4/FDZbvRHb6YqbXqSvIoukMlM6lrsL3X0n3S3Hbg8aiqOmaG9YzUXrr1/vEM2jpoOEy+zNRhMpVrgaOH1veRVXV6mz7KOg3X9/yh+rasqsH/x3R1vp3uU+U+rV8+s5UP9s1dB4Z3o/sEegvwcuBA4Hl0x7gXTbLs8HPPdP2Gjbu/PDBz1S+q6q+qag+6c44vontvj+IuusNjEx7Xtz0D7bqjqt5eVU8A/gh4W5LnjrDodH3vBmDh0AVEu01T12Tvq3tZ+0PA+v6vb6HrY8P1T/Tpket9KITJ8+hOUp8yUL4N3QnMO5P8GvA/ZljvNsDqqvpZkr3p3sgTVtGduJ3qOymfB56c5OVJNkvyp3SHKz43wzYAnA68NsmeSbag28P6ZlWtXI+6oDvB+YdJ9m+fdLdM8qwku4yw7LrWe6K9702yIMkOwP9uz7k+LqDb8O5Nd9jwCroOvw/dJ/pRfBR4fdvTTJJHJXlhkm3a9JuYfn2GfQQ4OsnjAdp6HjiD5beh++R/W5LtgSMmmeeVSfZI8ki6Qyyfqqr72rL30O2ZPpKuL6zLTNdv2Lj7y/2SPDvJb7a9tNvpNnC/HLGdlwIvbnt+T6I7GT0rkrwoyZPahn8N3Z7oKO2aru99gy4M3pzkEUleTNfPp3I68NYkuyfZmu5//4mawVWrU2l960y6fr1N69tv44H37U3ALkk2X1dd8zpM2kb1AroTT2cNTHoHXQDcQfdP/cQMq/4L4Kgkd9BtEM8ceM67gaOBr7fd132H2vQTuk9Vb6d7478TeFFV3TLDNlBV/wH8L7rj2jcATwQOnmk9A/VdS/fp9j10b/Zr6a7sWWc/WNd6N+8HltNd3fJduhO76/U9h6q6qy1/RVX9vBV/A7imqm4esY7ldB80PkR3EcQKumPAE/6GLvxuS/KOEar8e7p+9sXWNy6kC7dRfZDuBPAtbdmzJ5nnNLpj9jfSnRCf+FLeqXSHH64HvteWX5cTgT3a+s34e0gboL8MehzwKboguZJu7/O0EZt6LN1x/5voPlR+bMTlRrGY7gq3O+n634er6rx1LTRd32v9+cVtfDXwp3Qn1adyEt1r8RW6q9F+Rndua7a8iW7v7mrga3RHZk5q075Edxn2jUmm3YZNXJUgaY4lOZ/uhPSMvxUuzbV5vWciSdo4GCaSpN48zCVJ6s09E0lSb4aJJKm3h+QdNHfYYYdatGjRXDdDkuaVSy655JaqWrA+yz4kw2TRokUsX758rpshSfNKkulu6zKtsR7mSvfjQ99N96toy1vZ9kmWpfuFwGVJtmvlSXJckhVJLkuy10A9S9v8VyVZOs42S5JmbkOcM3l2Ve1ZVUva+OHAuVW1mO5umBM3Tns+3bdNF9P9FsPx0IUP3W0n9qG75cAREwEkSdo4zMUJ+AN54D5apwAHDZSfWp0L6e6muhPdbcyXVdXqqrqV7ncJDtjQjZYkTW3cYVJ09zG6JMmhrWzHqrqhDd/IA7dRXsjat1G+rpVNVb6WJIcmWZ5k+apVq2ZzHSRJ6zDuE/C/W1XXJ/kVYFmS/xycWFWVZFa+NVlVJwAnACxZssRvYkrSBjTWPZOJ33lod3n9DN05j5va4Sva34k7wF7P2r/lsEsrm6pckrSRGFuYtPv3bzMxDOwHXE53C++JK7KW0v0kJq381e2qrn2BNe1w2DnAfkm2ayfe92tlkqSNxDgPc+1I9xOnE8/z8ao6O8nFwJlJDqH7fYY/afN/nu6H7lfQ/aTkawGqanWS9wEXt/mOqqrVY2y3JGmGHpI3elyyZEn5pUWpn0WH//tcN+EhY+UxL5zrJowkySUDX+OYEe/NJUnqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqbexhkmTTJN9O8rk2vnuSbyZZkeQTSTZv5Vu08RVt+qKBOt7dyr+fZP9xt1mSNDMbYs/kLcCVA+N/CxxbVU8CbgUOaeWHALe28mPbfCTZAzgYeApwAPDhJJtugHZLkkY01jBJsgvwQuCf2niA5wCfarOcAhzUhg9s47Tpz23zHwicUVX3VNWPgBXA3uNstyRpZsa9Z/JB4J3AL9v4Y4HbqureNn4dsLANLwSuBWjT17T57y+fZBlJ0kZgbGGS5EXAzVV1ybieY+j5Dk2yPMnyVatWbYinlCQ149wzeQbwR0lWAmfQHd76e2DbJJu1eXYBrm/D1wO7ArTpjwF+Mlg+yTL3q6oTqmpJVS1ZsGDB7K+NJGlKYwuTqnp3Ve1SVYvoTqB/qapeAZwHvKTNthT4bBs+q43Tpn+pqqqVH9yu9todWAxcNK52S5JmbrN1zzLr3gWckeT9wLeBE1v5icBpSVYAq+kCiKq6IsmZwPeAe4E3VNV9G77ZkqSpbJAwqarzgfPb8NVMcjVWVf0MeOkUyx8NHD2+FkqS+vAb8JKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSeptLm5Br2bR4f8+1014SFl5zAvnugnSw5Z7JpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3tYZJkleOkqZJOnha5Q9k3ePWCZJepia8jfgkzwfeAGwMMlxA5MeDdy7roqTbAl8BdiiPc+nquqIJLsDZwCPBS4BXlVVP0+yBXAq8DvAT4A/raqVra53A4cA9wFvrqpzZrqikqTxmW7P5MfAcuBndBv9icdZwP4j1H0P8JyqeiqwJ3BAkn2BvwWOraonAbfShQTt762t/Ng2H0n2AA4GngIcAHw4yaYzWUlJ0nhNuWdSVd8BvpPk41X1i5lWXFUF3NlGH9EeBTwHeHkrPwU4EjgeOLANA3wK+FCStPIzquoe4EdJVgB7A9+YaZskSeMxyjmTvZMsS/KDJFcn+VGSq0epPMmmSS4FbgaWAT8EbquqicNk1wEL2/BC4FqANn0N3aGw+8snWUaStBGYcs9kwInAW+kOcd03k8qr6j5gzyTbAp8Bfm3GLRxRkkOBQwF22223cT2NJGkSo+yZrKmqL1TVzVX1k4nHTJ6kqm4DzgOeBmybZCLEdgGub8PXA7sCtOmPoTsRf3/5JMsMPscJVbWkqpYsWLBgJs2TJPU0Spicl+TvkjwtyV4Tj3UtlGRB2yMhyVbAHwBX0oXKS9psS4HPtuGz2jht+pfaeZezgIOTbNGuBFsMXDTi+kmSNoBRDnPt0/4uGSibOJE+nZ2AU9qVV5sAZ1bV55J8DzgjyfuBb9MdRqP9Pa2dYF9NdwUXVXVFkjOB79FdkvyGdvhMkrSRWGeYVNWz16fiqroM+O1Jyq+muxpruPxnwKTfrK+qo4Gj16cdkqTxG+V2KjsmOTHJF9r4HkkOWddykqSHj1HOmZwMnAPs3MZ/ABw2rgZJkuafUcJkh6o6E/gl3P8dEM9ZSJLuN0qY3JXksXQn3Wm3RFkz1lZJkuaVUa7mehvd5blPTPJ1YAEPXNorSdL0YdIu6/399vhVIMD31+deXZKkh65pD3O173O8rKruraorqupyg0SSNGyUw1xfT/Ih4BPAXROFVfWtsbVKkjSvjBIme7a/Rw2UjfINeEnSw8TYvgEvSXr4GOXSYEmSpmWYSJJ6M0wkSb2NcgKeJE8HFg3OX1WnjqlNkqR5Zp1hkuQ04InApTxwT64CDBNJEjDanskSYI/2q4eSJD3IKOdMLgceN+6GSJLmr1H2THYAvpfkIuCeicKq+qOxtUqSNK+MEiZHjrsRkqT5bZRvwH95QzREkjR/jfIb8PsmuTjJnUl+nuS+JLdviMZJkuaHUU7Afwh4GXAVsBXwZ8A/jLNRkqT5ZaRvwFfVCmDTqrqvqv4fcMB4myVJmk9GOQF/d5LNgUuTfAC4AW/DIkkaMEoovKrN90a6H8faFfjjcTZKkjS/jHI11zVJtgJ2qqq/2gBtkiTNM6NczfWHdPflOruN75nkrHE3TJI0f4xymOtIYG/gNoCquhTYfYxtkiTNM6OEyS+qas1QmTd9lCTdb5Srua5I8nJg0ySLgTcDF4y3WZKk+WSUPZM3AU+hu8nj6cDtwGHjbJQkaX4Z5Wquu4G/bA9Jkh5kyjBZ1xVb3oJekjRhuj2TpwHX0h3a+iaQDdIiSdK8M905k8cB7wF+A/h74A+AW6rqy6Pclj7JrknOS/K9JFckeUsr3z7JsiRXtb/btfIkOS7JiiSXJdlroK6lbf6rkizts8KSpNk3ZZi0mzqeXVVLgX2BFcD5Sd44Yt33Am+vqj3a8m9IsgdwOHBuVS0Gzm3jAM8HFrfHocDx0IUPcASwD933XY6YCCBJ0sZh2qu5kmyR5MXAPwNvAI4DPjNKxVV1Q1V9qw3fAVwJLAQOBE5ps50CHNSGDwROrc6FwLZJdgL2B5ZV1eqquhVYhnctlqSNynQn4E+lO8T1eeCvqury9X2SJIuA36Y797JjVd3QJt0I7NiGF9Kdo5lwXSubqlyStJGYbs/klXSHnN4CXJDk9va4Yya/tJhka+DTwGFVtdZyVVXM0rfpkxyaZHmS5atWrZqNKiVJI5runMkmVbVNezx64LFNVT16lMqTPIIuSD5WVf/Sim9qh69of29u5dfT3d5+wi6tbKry4faeUFVLqmrJggULRmmeJGmWjO1HrpIEOBG4sqr+z8Cks4CJK7KWAp8dKH91u6prX2BNOxx2DrBfku3aiff9WpkkaSMxyr251tcz6H5Y67tJLm1l7wGOAc5McghwDfAnbdrngRfQXTV2N/BagKpaneR9wMVtvqOqavUY2y1JmqGxhUlVfY2pv+j43EnmL7orxiar6yTgpNlrnSRpNvlb7pKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3gwTSVJvhokkqTfDRJLUm2EiSerNMJEk9WaYSJJ6M0wkSb0ZJpKk3sYWJklOSnJzkssHyrZPsizJVe3vdq08SY5LsiLJZUn2GlhmaZv/qiRLx9VeSdL6G+eeycnAAUNlhwPnVtVi4Nw2DvB8YHF7HAocD134AEcA+wB7A0dMBJAkaeMxtjCpqq8Aq4eKDwROacOnAAcNlJ9anQuBbZPsBOwPLKuq1VV1K7CMBweUJGmObehzJjtW1Q1t+EZgxza8ELh2YL7rWtlU5ZKkjcicnYCvqgJqtupLcmiS5UmWr1q1araqlSSNYEOHyU3t8BXt782t/Hpg14H5dmllU5U/SFWdUFVLqmrJggULZr3hkqSpbegwOQuYuCJrKfDZgfJXt6u69gXWtMNh5wD7JdmunXjfr5VJkjYim42r4iSnA88CdkhyHd1VWccAZyY5BLgG+JM2++eBFwArgLuB1wJU1eok7wMubvMdVVXDJ/UlSXNsbGFSVS+bYtJzJ5m3gDdMUc9JwEmz2DRJ0izzG/CSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb4aJJKk3w0SS1JthIknqzTCRJPVmmEiSejNMJEm9GSaSpN4ME0lSb/MmTJIckOT7SVYkOXyu2yNJesC8CJMkmwL/ADwf2AN4WZI95rZVkqQJ8yJMgL2BFVV1dVX9HDgDOHCO2yRJajab6waMaCFw7cD4dcA+gzMkORQ4tI3emeT7G6htDwc7ALfMdSPWJX871y3QHLBvzq7Hr++C8yVM1qmqTgBOmOt2PBQlWV5VS+a6HdIw++bGY74c5roe2HVgfJdWJknaCMyXMLkYWJxk9ySbAwcDZ81xmyRJzbw4zFVV9yZ5I3AOsClwUlVdMcfNejjx8KE2VvbNjUSqaq7bIEma5+bLYS5J0kbMMJEk9WaYSJJ6mxcn4LVhJfk1ujsMLGxF1wNnVdWVc9cqSRsz90y0liTvortdTYCL2iPA6d5gUxuzJK+d6zY8nHk1l9aS5AfAU6rqF0PlmwNXVNXiuWmZNL0k/1VVu811Ox6uPMylYb8EdgauGSrfqU2T5kySy6aaBOy4IduitRkmGnYYcG6Sq3jg5pq7AU8C3jhnrZI6OwL7A7cOlQe4YMM3RxMME62lqs5O8mS62/4PnoC/uKrum7uWSQB8Dti6qi4dnpDk/A3fHE3wnIkkqTev5pIk9WaYSJJ6M0ykaSR5c5Irk3xsFuo6Ksnz2vBhSR7Zv4XSxsFzJtI0kvwn8Lyqum6EeUP3nlrnJdRJVgJLqmrkn5xNsllV3Tvq/NKG5J6JNIUkHwGeAHwhyZok7xiYdnmSRe3x/SSnApcDv9f2ZD6a5IokX0yyVVvm5CQvSfJmuu/ynJfkvDbtzoG6X5Lk5IFlPpLkm8AHkjwxydlJLkny1XbrG2nOGSbSFKrq9cCPgWcDx04z62Lgw1X1FLovey4G/qGN3wb88VC9x03UW1XPHqEpuwBPr6q30f0Y1Juq6neAdwAfntlaSePh90yk/q6pqgsHxn808D2IS4BFPev/ZFXdl2Rr4OnAJ7sjagBs0bNuaVYYJtJo7mXtPfktB4bvGpr3noHh+4CtRqh/8OTllkPTJurfBLitqvYcoT5pg/IwlzSalcBeAEn2AnbvWd8dwDYD4zcl+fUkmwD/bbIFqup24EdJXtrakSRP7dkOaVYYJtJoPg1sn+QKunuU/aBnfScAZ0+cgAcOp7tVyAXADdMs9wrgkCTfAa6g+90Zac55abAkqTf3TCRJvRkmkqTeDBNJUm+GiSSpN8NEktSbYSJJ6s0wkST1ZphIknr7/3bRnaDyOGmxAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Rent clearly increases with increase in number of rooms and floors. Similary with area. Not really a clear correspondence with furnishing." + ], + "metadata": { + "id": "e_eqJSo-zsBZ" + } + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of House Association Tax with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of House Association Tax with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "plt.figure()\n", + "df['area']= area_col_rounded\n", + "plt.title('Variation of House Association Tax with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['hoa (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of House Association Tax on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['hoa (R$)'].mean().plot.bar()\n", + "\n", + "df['area']= area_col\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "CApEEQq5b3sB", + "outputId": "b8aa1575-5979-41c6-c5f0-2694c5e8baf2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "House association tax seems to increase with number of floors(maybe more likely to have a stronger association with an increase in number of floors). No clear dependence on other factors. " + ], + "metadata": { + "id": "Ewo9MzNhz-3s" + } + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of Property Tax with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of Property Tax with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['property tax (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "df['area']= area_col_rounded\n", + "plt.figure(figsize=(16,15))\n", + "plt.title('Variation of Property Tax with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['property tax (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of Property Tax on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['property tax (R$)'].mean().plot.bar()\n", + "\n", + "df['area']= area_col\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "TBD3b1FHherQ", + "outputId": "0f15c337-5c8a-4127-f9bb-d21d229b633f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "cE7foIo2h3Kj" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From dc8cb2ca261a1c77a7eeea9152f03535efda01e2 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Mon, 9 May 2022 00:01:41 +0530 Subject: [PATCH 2/8] Assignment-1_201050 --- .../Assignment_1/201050_Tejas_part1.ipynb | 975 ++++++++++++++++++ .../Assignment_1/201050_Tejas_part2.ipynb | 551 ++++++++++ 2 files changed, 1526 insertions(+) create mode 100644 Assignment/Assignment_1/201050_Tejas_part1.ipynb create mode 100644 Assignment/Assignment_1/201050_Tejas_part2.ipynb diff --git a/Assignment/Assignment_1/201050_Tejas_part1.ipynb b/Assignment/Assignment_1/201050_Tejas_part1.ipynb new file mode 100644 index 0000000..7053d2b --- /dev/null +++ b/Assignment/Assignment_1/201050_Tejas_part1.ipynb @@ -0,0 +1,975 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + }, + "colab": { + "name": "201050_Tejas_part1", + "provenance": [], + "collapsed_sections": [] + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rvFM645NE-D2" + }, + "source": [ + "# Assignment 1 - Part 1\n", + "In this assignment, we will go through basic linear algebra, NumPy, and image manipulation using Python to get everyone on the same page.\n", + "\n", + "One of the aims of this assignment is to get you to start getting comfortable searching for useful library functions online. So in many of the functions you will implement, you will have to look up helper functions.\n", + "\n", + "\\\n", + "\n", + "## Instructions\n", + "* This notebook contain blocks of code, you are required to complete those blocks(where required)\n", + "* You are required to copy this notebook (\"copy to drive\" above) and complete the code.\n", + "* For Submission, You'll be required to submit a sharable link for your copy of this notebook. (DO NOT CHANGE THE NAME OF THE FUNCTIONS)\n", + "\n", + "\\\n", + "\\\n", + "Also, I'd like to acknowledge the Stanford CS131. This assignment is highly based on the assignments from that course." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UhSVK4RoK9q5" + }, + "source": [ + "First Let's import some dependencies" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "cCKqyfhIE-EQ" + }, + "source": [ + "# Imports the print function from newer versions of python\n", + "from __future__ import print_function\n", + "\n", + "# Setup\n", + "\n", + "# The Random module implements pseudo-random number generators\n", + "import random \n", + "\n", + "# Numpy is the main package for scientific computing with Python. \n", + "# This will be one of our most used libraries in this project\n", + "import numpy as np\n", + "\n", + "# The Time library helps us time code runtimes\n", + "import time\n", + "\n", + "\n", + "# Some more magic so that the notebook will reload external python modules;\n", + "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n", + "%load_ext autoreload\n", + "%autoreload 2\n", + "%reload_ext autoreload" + ], + "execution_count": 3, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "QLtp15rqE-EU" + }, + "source": [ + "# Part 1: Linear Algebra and NumPy Review\n", + "In this section, we will review linear algebra and learn how to use vectors and matrices in python using numpy." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "E8HDYpc0E-EV" + }, + "source": [ + "## Part 1.1 (5 points)\n", + "First, let's test whether you can define the following matrices and vectors using numpy. Look up `np.array()` for help. In the next code block, define $M$ as a $(4, 3)$ matrix, $a$ as a $(1, 3)$ row vector and $b$ as a $(3, 1)$ column vector:\n", + "\n", + "$$M = \\begin{bmatrix}\n", + "1 & 2 & 3 \\\\\n", + "4 & 5 & 6 \\\\\n", + "7 & 8 & 9 \\\\\n", + "10 & 11 & 12 \\end{bmatrix}\n", + "$$\n", + "\n", + "$$a = \\begin{bmatrix}\n", + "1 & 1 & 0\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "$$b = \\begin{bmatrix}\n", + "-1 \\\\ 2 \\\\ 5\n", + "\\end{bmatrix} \n", + "$$ " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "mETk2NCME-EX", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "d04f7792-4d53-46e8-968f-1e76faa87539" + }, + "source": [ + "### YOUR CODE HERE\n", + "M = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]]).reshape(4,3)\n", + "a = np.array([1,1,0]).reshape(1,3)\n", + "b=np.array([[-1], [2], [5]]).reshape(3,1)\n", + "### END CODE HERE\n", + "print(\"M = \\n\", M)\n", + "print(\"The size of M is: \", np.size(M))\n", + "print()\n", + "print(\"a = \", a)\n", + "print(\"The size of a is: \", np.size(a))\n", + "print()\n", + "print(\"b = \", b)\n", + "print(\"The size of b is: \", np.size(b))" + ], + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "M = \n", + " [[ 1 2 3]\n", + " [ 4 5 6]\n", + " [ 7 8 9]\n", + " [10 11 12]]\n", + "The size of M is: 12\n", + "\n", + "a = [[1 1 0]]\n", + "The size of a is: 3\n", + "\n", + "b = [[-1]\n", + " [ 2]\n", + " [ 5]]\n", + "The size of b is: 3\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "rSta4NheE-EZ" + }, + "source": [ + "## Part 1.2 (5 points)\n", + "Implement the `dot_product()` method below and check that it returns the correct answer for $a^Tb$." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "C5ZRjCE2MVOU" + }, + "source": [ + "def dot_product(a, b):\n", + " \"\"\"Implement dot product between the two vectors: a and b.\n", + " (optional): While you can solve this using for loops, we recommend\n", + " that you look up `np.dot()` online and use that instead.\n", + " Args:\n", + " a: numpy array of shape (x, n)\n", + " b: numpy array of shape (n, x)\n", + " Returns:\n", + " out: numpy array of shape (x, x) (scalar if x = 1)\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " out= np.array(np.dot(a,b))\n", + " pass\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 5, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "pbLIS5vIE-Ea", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "1df8eea3-e803-4d83-8289-d909684726ee" + }, + "source": [ + "\n", + "# Now, let's test out this dot product. Your answer should be [[1]].\n", + "aDotB = dot_product(a, b)\n", + "print(aDotB)\n", + "\n", + "print(\"The size is: \", aDotB.shape)" + ], + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[1]]\n", + "The size is: (1, 1)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0rGfcRU1E-Eb" + }, + "source": [ + "## Part 1.3 (5 points)\n", + "Implement the `complicated_matrix_function()` method and use it to compute $(ab)Ma^T$\n", + "\n", + "IMPORTANT NOTE: The `complicated_matrix_function()` method expects all inputs to be two dimensional numpy arrays, as opposed to 1-D arrays. This is an important distinction, because 2-D arrays can be transposed, while 1-D arrays cannot.\n", + "\n", + "To transpose a 2-D array, you can use the syntax `array.T` " + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "dglQmbuLNOk6" + }, + "source": [ + "def complicated_matrix_function(M, a, b):\n", + " \"\"\"Implement (a * b) * (M * a.T).\n", + " (optional): Use the `dot_product(a, b)` function you wrote above\n", + " as a helper function.\n", + " Args:\n", + " M: numpy matrix of shape (x, n).\n", + " a: numpy array of shape (1, n).\n", + " b: numpy array of shape (n, 1).\n", + " Returns:\n", + " out: numpy matrix of shape (x, 1).\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " c=dot_product(a,b)\n", + " d=dot_product(M,a.T)\n", + " out=c* d\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 7, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "da_uQQLhE-Ec", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "0a0918fb-5563-4a10-a14d-e0c8857d4987" + }, + "source": [ + "# Your answer should be $[[3], [9], [15], [21]]$ of shape(4, 1).\n", + "ans = complicated_matrix_function(M, a, b)\n", + "print(ans)\n", + "print()\n", + "print(\"The size is: \", ans.shape)" + ], + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 3]\n", + " [ 9]\n", + " [15]\n", + " [21]]\n", + "\n", + "The size is: (4, 1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "6CWXxSSOE-Ed", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "dd5a355a-20bc-4b96-99df-963ef65d4990" + }, + "source": [ + "M_2 = np.array(range(4)).reshape((2,2))\n", + "a_2 = np.array([[1,1]])\n", + "b_2 = np.array([[10, 10]]).T\n", + "print(M_2.shape)\n", + "print(a_2.shape)\n", + "print(b_2.shape)\n", + "print()\n", + "\n", + "# Your answer should be $[[20], [100]]$ of shape(2, 1).\n", + "ans = complicated_matrix_function(M_2, a_2, b_2)\n", + "print(ans)\n", + "print()\n", + "print(\"The size is: \", ans.shape)" + ], + "execution_count": 9, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "(2, 2)\n", + "(1, 2)\n", + "(2, 1)\n", + "\n", + "[[ 20]\n", + " [100]]\n", + "\n", + "The size is: (2, 1)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4fHLxLl4E-Ee" + }, + "source": [ + "## Part 1.4 (10 points) [Optional/Bonus]\n", + "Implement `eigen_decomp()` and `get_eigen_values_and_vectors()` methods. In this method, perform eigenvalue decomposition on the following matrix and return the largest k eigen values and corresponding eigen vectors (k is specified in the method calls below).\n", + "\n", + "$$M = \\begin{bmatrix}\n", + "1 & 2 & 3 \\\\\n", + "4 & 5 & 6 \\\\\n", + "7 & 8 & 9 \\end{bmatrix}\n", + "$$\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "RfaCSoRMOIc8" + }, + "source": [ + "def eigen_decomp(M):\n", + " \"\"\"Implement eigenvalue decomposition.\n", + " (optional): You might find the `np.linalg.eig` function useful.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " w: numpy array of shape (m, m) such that the column v[:,i] is the eigenvector corresponding to the eigenvalue w[i].\n", + " v: Matrix where every column is an eigenvector.\n", + " \"\"\"\n", + " w = None\n", + " v = None\n", + " ### YOUR CODE HERE\n", + " w,v= np.linalg.eig(M)\n", + " pass\n", + " ### END YOUR CODE\n", + " return w, v" + ], + "execution_count": 10, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "YB120rb4ONBH" + }, + "source": [ + "def get_eigen_values_and_vectors(M, k):\n", + " \"\"\"Return top k eigenvalues and eigenvectors of matrix M. By top k\n", + " here we mean the eigenvalues with the top ABSOLUTE values (lookup\n", + " np.argsort for a hint on how to do so.)\n", + " (optional): Use the `eigen_decomp(M)` function you wrote above\n", + " as a helper function\n", + " Args:\n", + " M: numpy matrix of shape (m, m).\n", + " k: number of eigen values and respective vectors to return.\n", + " Returns:\n", + " eigenvalues: list of length k containing the top k eigenvalues\n", + " eigenvectors: list of length k containing the top k eigenvectors\n", + " of shape (m,)\n", + " \"\"\"\n", + " eigenvalues = []\n", + " eigenvectors = []\n", + " ### YOUR CODE HERE\n", + " w,v=eigen_decomp(M)\n", + " # L= np.concatenate(w,v, axis=0)\n", + " #w stores the eigenvalues of M \n", + " t=np.argsort(w) #this returns the indices which will sort the array of eigenvalues.\n", + " count=0\n", + " i=len(t)-1\n", + " while count\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "4 & 5 & 6 \\\\\n", + "1 & 2 & 3 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.42 & 0.85 \\\\\n", + "0 & 0.85 & 1.71 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.85 & 1.71 \\\\\n", + "0 & 0.45 & 0.85 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 8 & 9 \\\\\n", + "0 & 0.42 & 0.85 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "$$\n", + "Second algorithm:\n", + "1. Take a pivot from the last row.\n", + "2. For each row above the pivot, calculate the factor f which makes the kth entry zero, and for every element in the row subtract the fth multiple of the corresponding element in the kth row\n", + "3. Repeat the above step untill the matrix is in rref\n", + "$$\\begin{bmatrix}\n", + "7 & 8 & 0 \\\\\n", + "0 & 0.42 & 0 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "=>\n", + "\\begin{bmatrix}\n", + "7 & 0 & 0 \\\\\n", + "0 & 0.42 & 0 \\\\\n", + "0 & 0 & -0.05 \\end{bmatrix}\n", + "$$\n", + "\n", + "Steps for implementation:\n", + "1. Complete the function `swap_rows()`\n", + "2. Complete the function `apply_row()`\n", + "3. Complete `forward()` and `backward()`\n", + "4. Finally implement `rref()` using the `forward()` and `backward()`\n", + "\n", + "Note: You can skip this part if you want." + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qUFujiFAPYz6" + }, + "source": [ + "def swap_rows(M):\n", + " \"\"\"Implement row swapping to make the largest element in the pivotial column to be the first row.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " Ms: matrix with swapped row\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms=M\n", + " flag=0\n", + " for i in range(0,np.shape(M)[1]):\n", + " if(np.max(abs(M[:,i]))!=0): #finds the first non zero column\n", + " c=(np.max(M[:,i])) #finds the maximum in it\n", + " col=i #col is the index of the pivotal column \n", + " flag=1\n", + " break\n", + " if(flag==1):\n", + " for i in range(np.shape(M)[0]):\n", + " if(M[i][col]==c):\n", + " t= Ms[0,:].copy() \n", + " Ms[0,:]=Ms[i,:]\n", + " Ms[i,:]=t #swaps the topmost row and the row with c. \n", + " ### END YOUR CODE\n", + " return Ms" + ], + "execution_count": 25, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "S8lbAUSWWpyO" + }, + "source": [ + "def apply_rows(M):\n", + " \"\"\"For each row below the pivot, calculate the factor f which makes the kth\n", + " entry zero, and for every element in the row subtract the fth multiple of the\n", + " corresponding element in the kth row.\n", + " Args:\n", + " matrix: numpy matrix of shape (m, n)\n", + " Returns:\n", + " Ms: matrix with all other entries of the pivotal col zero\n", + " \"\"\"\n", + " out = None\n", + " Ms= np.around(M.astype(float),3)\n", + " ### YOUR CODE HERE\n", + " for i in range(1,np.shape(Ms)[0]): \n", + " f= Ms[i,0]/Ms[0,0]\n", + " # print(f)\n", + " Ms[i,:]= Ms[i,:]- (Ms[i,0]*Ms[0,:])/Ms[0,0]\n", + " # print(Ms[i,:])\n", + " Ms= np.around(Ms,3)\n", + " pass\n", + " ### END YOUR CODE\n", + " return Ms" + ], + "execution_count": 70, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "GnE_-JLxPYz7" + }, + "source": [ + "def forward(M):\n", + " \"\"\"Return a partial ref using the algo described above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: ref of M\n", + " \"\"\"\n", + " out = None\n", + " Ms= np.around(M.astype(float),3)\n", + " T=np.around(M.astype(float),3)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " for i in range(0,np.shape(M)[0]-1):\n", + " Ms=T[i:np.shape(M)[0], i:np.shape(M)[1]] #at i=0, this is the whole matrix \n", + " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", + " Ms=backward(Ms)\n", + " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", + " T[i:np.shape(M)[0], i:np.shape(M)[1]]=Ms\n", + " ### END YOUR CODE\n", + " return T" + ], + "execution_count": 71, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Wb7pPGP4XmJu" + }, + "source": [ + "def backward(M):\n", + " \"\"\"Return a rref using the algo described above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: rref of M\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms=M\n", + " #This does bckward prop for lowest row/pivot\n", + " ### YOUR CODE HERE\n", + " r= np.shape(Ms)[0]-1\n", + " c= np.shape(Ms)[1]-1\n", + " for i in (range(0, r)):\n", + " f= Ms[i,c]/Ms[r,c]\n", + " Ms[i,:]= Ms[i,:]- (Ms[c,:]*f)\n", + " Ms= np.around(Ms,3)\n", + " pass\n", + " ### END YOUR CODE\n", + " return Ms\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 82, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "XLq81xzXYR85" + }, + "source": [ + "def rref(M):\n", + " \"\"\"Return a rref using the algo descrbed above\n", + " Args:\n", + " M: numpy matrix of shape (m, n).\n", + " Returns:\n", + " Ms: ref of M\n", + " \"\"\"\n", + " out = None\n", + " ### YOUR CODE HERE\n", + " pass\n", + " Ms= forward(M)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " r= np.shape(Ms)[0]\n", + " c= np.shape(Ms)[1]\n", + " for i in range(0,np.shape(M)[0]-1):\n", + " Ms=T[0:r-i, 0:c-i]\n", + " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", + " Ms= backward(Ms)\n", + " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", + " T[0:r-i, 0:c-i]=Ms\n", + " # print(\"T in the\", i,\" th iteration is : \" ,T)\n", + " ### END YOUR CODE\n", + " return T" + ], + "execution_count": 90, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "Eiz6EbsWPYz8", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2108d9b5-6017-4dc3-d523-583491bd0d4b" + }, + "source": [ + "# Let's define M.\n", + "M = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", + "# Now let's calculate it's rref.\n", + "# Note that your code may be evaluated on other test cases as well\n", + "Mrref = rref(M)\n", + "print(Mrref)\n" + ], + "execution_count": 91, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[[ 7. 0. 0. ]\n", + " [ 0. 0.42 0. ]\n", + " [ 0. 0. -0.05]]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G46pyDzAE-Ef" + }, + "source": [ + "## Part 1.6 (10 points)\n", + "\n", + "To wrap up our overview of NumPy, let's implement something fun — a helper function for computing the Euclidean distance between two $n$-dimensional points!\n", + "\n", + "In the 2-dimensional case, computing the Euclidean distance reduces to solving the Pythagorean theorem $c = \\sqrt{a^2 + b^2}$. where, given two points $(x_1, y_1)$ and $(x_2, y_2)$, $a = x_1 - x_2$ and $b = y_1 - y_2$.\n", + "\n", + "\n", + "More generally, given two $n$-dimensional vectors, the Euclidean distance can be computed by:\n", + "\n", + "1. Performing an elementwise subtraction between the two vectors, to get $n$ difference values.\n", + "2. Squaring each of the $n$ difference values, and summing the squares.\n", + "4. Taking the square root of our sum.\n", + "\n", + "Alternatively, the Euclidean distance between length-$n$ vectors $u$ and $v$ can be written as:\n", + "\n", + "$\n", + "\\quad\\textbf{distance}(u, v) = \\sqrt{\\sum_{i=1}^n (u_i - v_i)^2}\n", + "$\n", + "\n", + "\n", + "Try implementing this function: first using native Python with a `for` loop in the `euclidean_distance_native()` function, then in NumPy **without any loops** in the `euclidean_distance_numpy()` function.\n", + "We've added some `assert` statements here to help you check functionality (if it prints nothing, then your implementation is correct)!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "5xvHopPqO29C" + }, + "source": [ + "def euclidean_distance_native(u, v):\n", + " \"\"\"Computes the Euclidean distance between two vectors, represented as Python\n", + " lists.\n", + " Args:\n", + " u (List[float]): A vector, represented as a list of floats.\n", + " v (List[float]): A vector, represented as a list of floats.\n", + " Returns:\n", + " float: Euclidean distance between `u` and `v`.\n", + " \"\"\"\n", + " # First, run some checks:\n", + " assert isinstance(u, list)\n", + " assert isinstance(v, list)\n", + " assert len(u) == len(v)\n", + " sum=0\n", + " for i in range(0,len(u)):\n", + " sum+=((u[i]-v[i])**2)\n", + " # print(i)\n", + " # Compute the distance!\n", + " # Notes:\n", + " # 1) Try breaking this problem down: first, we want to get\n", + " # the difference between corresponding elements in our\n", + " # input arrays. Then, we want to square these differences.\n", + " # Finally, we want to sum the squares and square root the\n", + " # sum.\n", + " out = np.sqrt(sum)\n", + " ### YOUR CODE HERE\n", + " pass\n", + " ### END YOUR CODE\n", + " return out" + ], + "execution_count": 33, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wvLuK8MuO3LH" + }, + "source": [ + "def euclidean_distance_numpy(u, v):\n", + " \"\"\"Computes the Euclidean distance between two vectors, represented as NumPy\n", + " arrays.\n", + " Args:\n", + " u (np.ndarray): A vector, represented as a NumPy array.\n", + " v (np.ndarray): A vector, represented as a NumPy array.\n", + " Returns:\n", + " float: Euclidean distance between `u` and `v`.\n", + " \"\"\"\n", + " # First, run some checks:\n", + " assert isinstance(u, np.ndarray)\n", + " assert isinstance(v, np.ndarray)\n", + " assert u.shape == v.shape\n", + " x= u-v \n", + " out=np.dot(x.T,x)\n", + " out= np.sqrt(out) \n", + " # Compute the distance!\n", + " # Note:\n", + " # 1) You shouldn't need any loops\n", + " # 2) Some functions you can Google that might be useful:\n", + " # np.sqrt(), np.sum()\n", + " # 3) Try breaking this problem down: first, we want to get\n", + " # the difference between corresponding elements in our\n", + " # input arrays. Then, we want to square these differences.\n", + " # Finally, we want to sum the squares and square root the\n", + " # sum.\n", + "\n", + " ### YOUR CODE HERE\n", + " return out \n", + " pass\n", + " ### END YOUR CODE" + ], + "execution_count": 34, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "wu9MimVJE-Eg" + }, + "source": [ + "## Testing native Python function\n", + "assert euclidean_distance_native([7.0], [6.0]) == 1.0\n", + "assert euclidean_distance_native([7.0, 0.0], [3.0, 3.0]) == 5.0\n", + "assert euclidean_distance_native([7.0, 0.0, 0.0], [3.0, 0.0, 3.0]) == 5.0" + ], + "execution_count": 35, + "outputs": [] + }, + { + "cell_type": "code", + "metadata": { + "id": "kJDk88g1E-Ej" + }, + "source": [ + "## Testing NumPy function\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0]),\n", + " np.array([6.0])\n", + ") == 1.0\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0, 0.0]),\n", + " np.array([3.0, 3.0])\n", + ") == 5.0\n", + "assert euclidean_distance_numpy(\n", + " np.array([7.0, 0.0, 0.0]),\n", + " np.array([3.0, 0.0, 3.0])\n", + ") == 5.0" + ], + "execution_count": 36, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "n = 1000\n", + "\n", + "# Create some length-n lists and/or n-dimensional arrays\n", + "a = [0.0] * n\n", + "b = [10.0] * n\n", + "a_array = np.array(a)\n", + "b_array = np.array(b)\n", + "\n", + "# Compute runtime for native implementation\n", + "start_time = time.time()\n", + "for i in range(10000):\n", + " euclidean_distance_native(a, b)\n", + "print(\"Native:\", (time.time() - start_time), \"seconds\")\n", + "\n", + "# Compute runtime for numpy implementation\n", + "# Start by grabbing the current time in seconds\n", + "start_time = time.time()\n", + "for i in range(10000):\n", + " euclidean_distance_numpy(a_array, b_array)\n", + "print(\"NumPy:\", (time.time() - start_time), \"seconds\")" + ], + "metadata": { + "id": "E7Z38WwHhoNl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5aa04300-d529-4ac6-8f15-cc57bbc86980" + }, + "execution_count": 37, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Native: 1.5537786483764648 seconds\n", + "NumPy: 0.06700706481933594 seconds\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Mjik4mQXE-Ek" + }, + "source": [ + "Next, let's take a look at how these two implementations compare in terms of runtime:" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "t4e6MfhHE-Em" + }, + "source": [ + "As you can see, doing vectorized calculations (i.e. no for loops) with NumPy results in significantly faster computations! " + ] + }, + { + "cell_type": "markdown", + "source": [ + "Congrats You've come to the end of this notebook. If you solved everything above, impressive. If not, you might need to read/think a bit more. You can always ask doubts. Also, Note that you should submit it even if you cannot solve everything. We might evaluate these using a script later." + ], + "metadata": { + "id": "XvFE0Q5bhx6-" + } + } + ] +} \ No newline at end of file diff --git a/Assignment/Assignment_1/201050_Tejas_part2.ipynb b/Assignment/Assignment_1/201050_Tejas_part2.ipynb new file mode 100644 index 0000000..29c06fa --- /dev/null +++ b/Assignment/Assignment_1/201050_Tejas_part2.ipynb @@ -0,0 +1,551 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "201050_Tejas_part2.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + } + }, + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JsM9yumHP9iu" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np \n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Stamatics /House_prediction.csv')\n", + "# print (df.head(2))\n", + "df.loc[df['floor']=='-', \"floor\"]=0\n", + "df.loc[df['furniture']=='furnished', \"furniture\"]=1\n", + "df.loc[df['furniture']=='not furnished', \"furniture\"]=0\n", + "df['floor'] = df['floor'].astype(int)\n", + "df['furniture']= df['furniture'].astype(int)\n", + "# print (df.head(2))\n" + ] + }, + { + "cell_type": "markdown", + "source": [ + "Try to find a dependence of House Association Tax, property tax, and fire insurance on other features.\n", + "Not a formula but something intuitive like if it is increasing/decreasing with no of rooms or being furnished or not.# New Section" + ], + "metadata": { + "id": "GE0gbTHVD7lt" + } + }, + { + "cell_type": "code", + "source": [ + "\n", + "plt.figure(1)\n", + "mean_rent= df.groupby(['city'])['rent amount (R$)'].mean().plot(color = 'green',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_hoa= df.groupby(['city'])['hoa (R$)'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(2)\n", + "mean_property_tax= df.groupby(['city'])['property tax (R$)'].mean().plot(color = 'blue',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(3)\n", + "mean_fire_insurance= df.groupby(['city'])['fire insurance (R$)'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "\n", + "# print(plt.ylim())\n", + "plt.figure(4)\n", + "mean_rooms= df.groupby(['city'])['rooms'].mean().plot(color = 'red',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_floor= df.groupby(['city'])['floor'].mean().plot(color = 'brown',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_floor= df.groupby(['city'])['parking spaces'].mean().plot(color = 'green',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "# parking spaces\n", + "\n", + "plt.figure(5)\n", + "mean_area= df.groupby(['city'])['area'].mean().plot(color = 'blue',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n", + "plt.figure(6)\n", + "mean_total_rent= df.groupby(['city'])['total (R$)'].mean().plot(color = 'purple',\n", + " linestyle = 'solid', marker = 'o',legend=True)\n", + "\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "UCuFbuVdECfb", + "outputId": "19129155-31a9-434d-cae2-6fccbbaf7e04" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Porto Alegre has the least average area, rooms, parking spaces, and rent. Campinas has relatively higher area but lower property tax and rent. " + ], + "metadata": { + "id": "6nnM3uMAyrql" + } + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "sfRnLgF7yrYj" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "0ubGUhtizh9_" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of rent with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['rent amount (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of rent with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "df['area']= area_col_rounded\n", + "plt.figure(figsize=(16,15))\n", + "plt.title('Variation of rent with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of rent on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "# print(len(mean_rooms))\n", + "# values = list(mean_rooms.values)\n", + "# names= list(mean_rooms.keys)\n", + "# print((values))\n", + "# names= list(mean_rooms.keys())\n", + "# x=[100*i for i in range(0,12)]\n", + "# x.append(1600)\n", + "# x.append(2000)\n", + "# x.append(12700)\n", + "# x.append(24600)\n", + "# x.append(46300)\n", + "# print(mean_rooms)\n", + "# print(x)\n", + "# print(len(values))\n", + "# plt.bar(x,values)\n", + "# print(type(mean_rooms))\n", + "# .plot(color = 'red',\n", + " # linestyle = 'solid', marker = 'o',legend=True)\n", + "# plt.hist(ar)\n", + "\n", + "df['area']= area_col\n", + "\n", + "\n", + "# plt.figure()\n", + "# plt.title('Variation of House association tax with number of rooms')\n", + "# mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot(color = 'blue',\n", + "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", + "# plt.figure()\n", + "# plt.title('Variation of property tax with number of rooms')\n", + "# mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot(color = 'blue',\n", + "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", + "\n" + ], + "metadata": { + "id": "WK_u47GLP2pq", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "outputId": "669c5565-c01f-40b9-c63e-a11ec72b6887" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEZCAYAAAB1mUk3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAdx0lEQVR4nO3deZhddZ3n8fdHIsieANUIWSgaAgq2IsaA4qOMYAigBmdQQJRII2lGbLC1W0HtDsMyhrEfF1qFQQgQRRBpHVDQEFlVZAlrWKUIgSSyRBJ2RAOf+eP8Yh/KW9u9dauS1Of1PPepc35n+f7O3T73LPeWbBMRESPba4a7AxERMfwSBhERkTCIiIiEQUREkDCIiAgSBhERQcJgrSTpOUl/2+Syh0q6YrD71I+6e0h6oPT9gKGu3yxJZ0j6116mnyDp+0PZp/6SZEnbD1PtHSXdLulZSccMRx/i1RIGw0zSLySd2KB9mqTHJI0a6Dptb2R7YT9qd5Y3hL/UsH2+7SkDrTkITgS+Vfr+/9pZqNF2N8v2UbZPKuvdU9KS1ns4InweuNr2xrZPG+7ORMJgdXAe8DFJ6tb+ceB82yv7u6LBeHMbRtsAd/dnxjV8O9c6TT4eA3m8JSnvVe1mO7dhvAHrA08D7661jQH+CLwFmAz8FngKeBT4FrBubV4DRwMPAA/V2rYvw/sDtwHPAIuBE2rLPlLmfa7c3gF8Avh1bZ53AjeXPt4MvLM27RrgJOA3wLPAFcAWvWzrkUAXsBy4FNi6tD8IvAK8WPqxXoNlFwFfAO4EXgJGAbsD15f75g5gz/70rdF2d6v1utKXVfN/CVgJbFLGTwK+UYbPBU4GNizLvFJb79bACcBFwJzSj7uBSb3cRwaOKo/nU8C3AZVpJwDfr83bWeYfVdvmk8t98hzwU2Bz4Pzy+N8MdHardQywEPgD8FXgNbXpfw/cC6wA5gLb9Pa8a7AtHyzb+1Tp2xtL+1XAy1TP8eeAHRosew1wSnn8XgS2p/fn4tZUz6nlVM+xI2vTTgB+BHy/PAYLgB2A44EnqF4XU2rzf6LcJ88CDwGHDvf7xJC8Fw13B3IzwHeBs2rj/wDcXobfRvWmN6q8+O8FPlOb18A8YDNg/VrbqjDYE/g7qr3ANwOPAweUaa96Myltn6CEQVnnCqq9lFHAIWV88zL9Gqo38h2oQu0aYFYP2/je8oazK7Ae8B/AdbXpi4C9e7mPFgG3A+NLrbHAk8B+ZdveV8Y7+upbo+1uUO864H+U4SvKuvatTftQGT4XOLl2Xy/ptp4TqN709gPWAb4C3NBLXQM/A0YDE4BlwNTauvoKgy5gO2BT4B7gd8De5fGbA5zTrdbV5XGeUOb9ZJk2razrjWXZLwPX9/a867YdOwDPl8fltVSHhbooH2RKXz/Zy/1wDVVo71zqb0nvz8XrgO9QBfku5X57b7fHYJ/a/fAQVci/lupDykNl3g2pgnPHMr4VsPNwv0cMxS27XquH84ADJb2ujB9W2rB9i+0bbK+0vQj4v8B7ui3/FdvLbb/YfcW2r7G9wPYrtu8ELmiwfE/2Bx6w/b1S/wLgPuADtXnOsf27UvsiqhdiI4cCs23favslqk9l75DU2c++AJxme3Gp9THgctuXl22bB8ynetMdaN8auRZ4TzkE8mbgtDL+OuDtVG8+/fXr0s+Xge9R7fH1Zpbtp2w/QvVmPZB+n2P7QdtPAz8HHrT9S1eHG38EvLXb/KeW584jwDeo3mSh2jv5iu17y7L/G9hF0ja1ZXt83gEHAZfZnmf7z8C/U4XyOwewLefavrvUn0IPz0VJ44E9gC/Y/qPt24GzqF5Hq/zK9tza/dBBdT//GbgQ6JQ0usz7CvAmSevbftR2vw5nrekSBqsB27+m+tR8gKTtqA4N/QBA0g6SflZOJj9D9aLcotsqFve0bkm7Sbpa0jJJT1O9yLsv35OtgYe7tT1M9al8lcdqwy8AG/VnXbafo/okP7aH+Rupb+c2wIclPbXqBryL6pPcQPvWyLVUn/R3pTqsMI8qRHcHumw/OYB1de/H6/o4zt5Kvx+vDb/YYLz7uur36cNUjxNU9+83a/ftckC8+vHq8XnHXz/er5T5m328e3subg0st/1sg2mrdL8f/lDCedU4wEa2n6cKsqOARyVdJukNA+jzGithsPqYQ/VJ5mPAXNurnrynU30Cmmh7E+CLVC/Kut5+evYHVMdSx9veFDijtnxfP1n7e6o3hboJwNI+lutzXZI2pDqePZB11fu7GPie7dG124a2Zw1wPT25HtgR+BBwre17qLZ9P6qgaHa9rXge2KA2/vpBWOf42vAEqscJqvv3H7rdv+vbvr42f2/b2/3xVqnV7OPd23Px98BmkjZuMG3Ayh7E+6g+WNxHdRh3rZcwWH3MoTq2eyTlEFGxMdUxzOfKJ5T/OcD1bkz1qemPkiYDH61NW0a1S9zTdxIuB3aQ9FFJoyQdBOxEdUx7oC4ADpe0i6T1qPZwbiyHvprxfapDBPtIWkfS68qlneP6sWxf243tF4BbqE6Srnrzv57qE2NPYfA4sLmkTfu7EQN0O/BuSRNKjeMHYZ3/ImlMOdRyLPDD0n4GcLyknQEkbSrpwwNY70XA/pL2kvRa4HNUJ/6v732xHvX4XLS9uKz3K+V58GbgCKrnyIBI2rJc1r1h6e9zVM+VtV7CYDVR3hSvpzqBdWlt0j9TvYE/S/UJ5Yd/tXDvPgWcKOlZ4N+oXqSrar5AuWKjHA7YvVufngTeT/VCfpLqJOD7bf9hgH3A9i+BfwX+k+qqqO2Agwe6ntr6FlOd5Pwi1Zv7YuBf6Mdzuq/trrmW6gTjTbXxjenhfIHt+6hCb2FZ79aN5mtWOS/yQ6orqm6huVDu7pKyrtuBy4CzS62fAKcCF5bDk3cB+w6gr/dT7eX+B9Uh0A8AH7D9p2Y62Y/n4iFUJ9R/D/wEmFmecwP1GuCzZT3LqQ4NDvQD2Bpp1SVrERExgmXPICIiEgYREZEwiIgIEgYREUHCICIiqH6nY420xRZbuLOzc7i7ERGxRrnlllv+YLuje/saGwadnZ3Mnz9/uLsREbFGkdT9Zz2AHCaKiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERLAGf+ksImJt1XncZU0vu2jW/k0tlz2DiIhIGERERMIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgI+hEGkmZLekLSXQ2mfU6SJW1RxiXpNEldku6UtGtt3umSHii36bX2t0laUJY5TZIGa+MiIqJ/+rNncC4wtXujpPHAFOCRWvO+wMRymwGcXubdDJgJ7AZMBmZKGlOWOR04srbcX9WKiIj26jMMbF8HLG8w6evA5wHX2qYBc1y5ARgtaStgH2Ce7eW2VwDzgKll2ia2b7BtYA5wQGubFBERA9XUOQNJ04Cltu/oNmkssLg2vqS09da+pEF7REQMoQH/aqmkDYAvUh0iGlKSZlAdfmLChAlDXT4iYq3VzJ7BdsC2wB2SFgHjgFslvR5YCoyvzTuutPXWPq5Be0O2z7Q9yfakjo6OJroeERGNDDgMbC+w/Te2O213Uh3a2dX2Y8ClwGHlqqLdgadtPwrMBaZIGlNOHE8B5pZpz0javVxFdBhwySBtW0RE9FN/Li29APgtsKOkJZKO6GX2y4GFQBfwXeBTALaXAycBN5fbiaWNMs9ZZZkHgZ83tykREdGsPs8Z2D6kj+mdtWEDR/cw32xgdoP2+cCb+upHRES0T76BHBERCYOIiEgYREQECYOIiCBhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAT9CANJsyU9IemuWttXJd0n6U5JP5E0ujbteEldku6XtE+tfWpp65J0XK19W0k3lvYfSlp3MDcwIiL61p89g3OBqd3a5gFvsv1m4HfA8QCSdgIOBnYuy3xH0jqS1gG+DewL7AQcUuYFOBX4uu3tgRXAES1tUUREDFifYWD7OmB5t7YrbK8sozcA48rwNOBC2y/ZfgjoAiaXW5fthbb/BFwITJMk4L3AxWX584ADWtymiIgYoME4Z/D3wM/L8FhgcW3aktLWU/vmwFO1YFnVHhERQ2hUKwtL+hKwEjh/cLrTZ70ZwAyACRMmDEXJiBhmncdd1vSyi2btP4g9Wbs1vWcg6RPA+4FDbbs0LwXG12YbV9p6an8SGC1pVLf2hmyfaXuS7UkdHR3Ndj0iIrppKgwkTQU+D3zQ9gu1SZcCB0taT9K2wETgJuBmYGK5cmhdqpPMl5YQuRo4sCw/HbikuU2JiIhm9efS0guA3wI7Sloi6QjgW8DGwDxJt0s6A8D23cBFwD3AL4Cjbb9czgl8GpgL3AtcVOYF+ALwWUldVOcQzh7ULYyIiD71ec7A9iENmnt8w7Z9CnBKg/bLgcsbtC+kutooIiKGSb6BHBERCYOIiEgYREQECYOIiCBhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAT9CANJsyU9IemuWttmkuZJeqD8HVPaJek0SV2S7pS0a22Z6WX+ByRNr7W/TdKCssxpkjTYGxkREb3rz57BucDUbm3HAVfanghcWcYB9gUmltsM4HSowgOYCewGTAZmrgqQMs+RteW614qIiDbrMwxsXwcs79Y8DTivDJ8HHFBrn+PKDcBoSVsB+wDzbC+3vQKYB0wt0zaxfYNtA3Nq64qIiCHS7DmDLW0/WoYfA7Ysw2OBxbX5lpS23tqXNGhvSNIMSfMlzV+2bFmTXY+IiO5aPoFcPtF7EPrSn1pn2p5ke1JHR8dQlIyIGBFGNbnc45K2sv1oOdTzRGlfCoyvzTeutC0F9uzWfk1pH9dg/ohYjXQed1lLyy+atf8g9STapdk9g0uBVVcETQcuqbUfVq4q2h14uhxOmgtMkTSmnDieAswt056RtHu5iuiw2roiImKI9LlnIOkCqk/1W0haQnVV0CzgIklHAA8DHymzXw7sB3QBLwCHA9heLukk4OYy34m2V52U/hTVFUvrAz8vt4iIGEJ9hoHtQ3qYtFeDeQ0c3cN6ZgOzG7TPB97UVz8iIqJ98g3kiIhIGERERMIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHzv1oaEcOglV8PzS+HRm+yZxAREQmDiIhIGEREBAmDiIggYRARESQMIiKChEFERNDi9wwk/RPwScDAAqr/ebwVcCGwOXAL8HHbf5K0HjAHeBvwJHCQ7UVlPccDRwAvA8fYnttKvyIiWjXSvtPRdBhIGgscA+xk+0VJFwEHA/sBX7d9oaQzqN7kTy9/V9jeXtLBwKnAQZJ2KsvtDGwN/FLSDrZfbmnLYq030l6sEe3U6mGiUcD6kkYBGwCPAu8FLi7TzwMOKMPTyjhl+l6SVNovtP2S7YeALmByi/2KiIgBaDoMbC8F/h14hCoEnqY6LPSU7ZVltiXA2DI8Flhcll1Z5t+83t5gmYiIGAJNh4GkMVSf6relOryzITB1kPrVU80ZkuZLmr9s2bJ2loqIGFFaOUy0N/CQ7WW2/wz8GNgDGF0OGwGMA5aW4aXAeIAyfVOqE8l/aW+wzKvYPtP2JNuTOjo6Wuh6RETUtRIGjwC7S9qgHPvfC7gHuBo4sMwzHbikDF9axinTr7Lt0n6wpPUkbQtMBG5qoV8RETFATV9NZPtGSRcDtwIrgduAM4HLgAslnVzazi6LnA18T1IXsJzqCiJs312uRLqnrOfoXEkUETG0Wvqege2ZwMxuzQtpcDWQ7T8CH+5hPacAp7TSl4iIaF6+gRwREQmDiIjIv72MGLBWvvkM+fZzrJ6yZxAREQmDiIhIGEREBDlnEIMgvx4asebLnkFERCQMIiIiYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIighbDQNJoSRdLuk/SvZLeIWkzSfMkPVD+jinzStJpkrok3Slp19p6ppf5H5A0vdWNioiIgWl1z+CbwC9svwF4C3AvcBxwpe2JwJVlHGBfYGK5zQBOB5C0GTAT2A2YDMxcFSARETE0mg4DSZsC7wbOBrD9J9tPAdOA88ps5wEHlOFpwBxXbgBGS9oK2AeYZ3u57RXAPGBqs/2KiIiBa2XPYFtgGXCOpNsknSVpQ2BL24+WeR4DtizDY4HFteWXlLae2v+KpBmS5kuav2zZsha6HhERda2EwShgV+B0228Fnue/DgkBYNuAW6jxKrbPtD3J9qSOjo7BWm1ExIjXShgsAZbYvrGMX0wVDo+Xwz+Uv0+U6UuB8bXlx5W2ntojImKINB0Gth8DFkvasTTtBdwDXAqsuiJoOnBJGb4UOKxcVbQ78HQ5nDQXmCJpTDlxPKW0RUTEEGn1fyD/I3C+pHWBhcDhVAFzkaQjgIeBj5R5Lwf2A7qAF8q82F4u6STg5jLfibaXt9iviIgYgJbCwPbtwKQGk/ZqMK+Bo3tYz2xgdit9iYiI5uUbyBERkTCIiIiEQUREkDCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERASt/3ObWE10HndZ08sumrX/IPYkItZE2TOIiIiEQUREDEIYSFpH0m2SflbGt5V0o6QuST8s/x8ZSeuV8a4yvbO2juNL+/2S9mm1TxERMTCDsWdwLHBvbfxU4Ou2twdWAEeU9iOAFaX962U+JO0EHAzsDEwFviNpnUHoV0RE9FNLYSBpHLA/cFYZF/Be4OIyy3nAAWV4WhmnTN+rzD8NuND2S7YfArqAya30KyIiBqbVPYNvAJ8HXinjmwNP2V5ZxpcAY8vwWGAxQJn+dJn/L+0NlomIiCHQdBhIej/whO1bBrE/fdWcIWm+pPnLli0bqrIREWu9VvYM9gA+KGkRcCHV4aFvAqMlrfr+wjhgaRleCowHKNM3BZ6stzdY5lVsn2l7ku1JHR0dLXQ9IiLqmg4D28fbHme7k+oE8FW2DwWuBg4ss00HLinDl5ZxyvSrbLu0H1yuNtoWmAjc1Gy/IiJi4NrxDeQvABdKOhm4DTi7tJ8NfE9SF7CcKkCwfbeki4B7gJXA0bZfbkO/IiKiB4MSBravAa4pwwtpcDWQ7T8CH+5h+VOAUwajLxERMXD5BnJERCQMIiIiYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIigoRBRESQMIiICBIGERFBwiAiImjPfzob0TqPu6zpZRfN2n8QexIR0X/ZM4iIiIRBRES0EAaSxku6WtI9ku6WdGxp30zSPEkPlL9jSrsknSapS9KdknatrWt6mf8BSdNb36yIiBiIVvYMVgKfs70TsDtwtKSdgOOAK21PBK4s4wD7AhPLbQZwOlThAcwEdgMmAzNXBUhERAyNpsPA9qO2by3DzwL3AmOBacB5ZbbzgAPK8DRgjis3AKMlbQXsA8yzvdz2CmAeMLXZfkVExMANytVEkjqBtwI3AlvafrRMegzYsgyPBRbXFltS2npqb1orV/RAruqJiJGn5RPIkjYC/hP4jO1n6tNsG3CrNWq1ZkiaL2n+smXLBmu1EREjXkthIOm1VEFwvu0fl+bHy+Efyt8nSvtSYHxt8XGlraf2v2L7TNuTbE/q6OhopesREVHTytVEAs4G7rX9tdqkS4FVVwRNBy6ptR9WriraHXi6HE6aC0yRNKacOJ5S2iIiYoi0cs5gD+DjwAJJt5e2LwKzgIskHQE8DHykTLsc2A/oAl4ADgewvVzSScDNZb4TbS9voV8RETFATYeB7V8D6mHyXg3mN3B0D+uaDcxuti8REdGafAM5IiISBhERkTCIiAgSBhERQcIgIiJIGEREBAmDiIggYRARESQMIiKChEFERJAwiIgIEgYREUHCICIiSBhERAQJg4iIIGEQEREkDCIigoRBRESQMIiICFajMJA0VdL9krokHTfc/YmIGElWizCQtA7wbWBfYCfgEEk7DW+vIiJGjtUiDIDJQJfthbb/BFwITBvmPkVEjBiyPdx9QNKBwFTbnyzjHwd2s/3pbvPNAGaU0R2B+5ssuQXwhyaXbcVw1R3O2tnmkVF7pNUdztqt1t3Gdkf3xlEtrHDI2T4TOLPV9Uiab3vSIHRpjag7nLWzzSOj9kirO5y121V3dTlMtBQYXxsfV9oiImIIrC5hcDMwUdK2ktYFDgYuHeY+RUSMGKvFYSLbKyV9GpgLrAPMtn13G0u2fKhpDas7nLWzzSOj9kirO5y121J3tTiBHBERw2t1OUwUERHDKGEQEREJg4iISBi0laQ3SNpL0kbd2qcOQe3Jkt5ehneS9FlJ+7W7boN+zBmGmu8q2ztlCGrtJmmTMry+pP8l6aeSTpW0aRvrHiNpfN9ztqX2upIOk7R3Gf+opG9JOlrSa9tc+28l/bOkb0r6mqSjVt3/0ZoRfQJZ0uG2z2nTuo8BjgbuBXYBjrV9SZl2q+1d21G3rH8m1e88jQLmAbsBVwPvA+baPqVNdbtfDizgvwFXAdj+YJvq3mR7chk+kup+/wkwBfip7VntqFvq3Q28pVwRdybwAnAxsFdp/+9tqvs08DzwIHAB8CPby9pRq0Ht86meWxsATwEbAT+m2mbZnt6muscA7weuA/YDbiv1PwR8yvY17ag7YtgesTfgkTauewGwURnuBOZTBQLAbW3ergVUl+huADwDbFLa1wfubGPdW4HvA3sC7yl/Hy3D72lj3dtqwzcDHWV4Q2BBm+/re+vb323a7e3cZqo9+ynA2cAy4BfAdGDjNm/zneXvKOBxYJ0yrjY/vxbUam0AXFOGJwzBa2pTYBZwH7AceJLqg94sYHQb674eOJ3qhzw3B04o98NFwFaDWWutP0wk6c4ebguALdtY+jW2nwOwvYjqjXFfSV+jetG000rbL9t+AXjQ9jOlHy8Cr7Sx7iTgFuBLwNOuPqm9aPta29e2se5rJI2RtDnVJ9NlALafB1a2sS7AXZIOL8N3SJoEIGkH4M9trGvbr9i+wvYRwNbAd4CpwMI21oXq/l4X2JjqTXnV4bD1gLYeJuK/vhu1HtUeCbYfGYK6FwErgD1tb2Z7c6q93hVlWrucC9wDLKbau3+Raq/oV8AZg1qpnWm6OtyoPrnsAmzT7dYJ/L6Nda8CdunWNgqYA7zc5m2+EdigDL+m1r4p3T69tqn+OOBHwLdo495Xrd4iqjfAh8rfrUr7RrTx03ntPj2X6nDNjVQBsBC4luowUbvq9vhJeNVj38ba/1S28WHgGOBK4LtUn1hntrHuscCdpdZ9wOGlvQO4rs3bfH8z0wbzce7+Whrs5/Zaf85A0tnAObZ/3WDaD2x/tE11x1F9Qn+swbQ9bP+mHXXL+tez/VKD9i2o3igXtKt2t3r7A3vY/uJQ1GtQfwNgS9sPDUGtTYBtqQJ/ie3H21xvB9u/a2eNPupvDWD795JGA3tTvVnd1Oa6OwNvBO6yfV87a3WrewXwS+C8VY+tpC2BTwDvs713m+reYfstZfhk21+uTVtg++8GrdbaHgYREa2SNAY4jur/rPxNaX6c6jfUZtle0aa6JwL/x+WQc619+1L3wEGrlTCIiGheO69KHMq6CYOIiBZIesT2hDW97mrxq6UREaszSXf2NIk2XpU4lHUTBhERfdsS2IfqUtI6AdevDXUTBhERffsZ1ZdIb+8+QdI1a0PdnDOIiIi1/xvIERHRt4RBREQkDCIiImEQ0S+q5PUSa608uSN6IKlT0v3lH/TcBZwt6S5JCyQdVOaRpK82aN9T0rWSLpG0UNIsSYdKuqnMt12Z78Nl2TskXTd8WxsjXS4tjejdRKr/ETAWOAp4C7AFcHN5834n1a/idm+ntL2R6vfvFwJn2Z4s6VjgH4HPAP8G7GN7afnBt4hhkT2DiN49bPsG4F3ABa7+T8TjVD9R/fZe2gFutv1o+QXZB4ErSvsCqp9QB/gNcG75D23rDMkWRTSQMIjo3fMtLFv/GfFXauOvUPbKbR8FfBkYD9xS/kFPxJBLGET0z6+AgyStI6kDeDdwUy/t/SJpO9s32v43qn9dOSz/5D4i5wwi+ucnwDuAOwADn7f9mKSe2t/Qz/V+VdJEqt+aubKsJ2LI5ecoIiIih4kiIiJhEBERJAwiIoKEQUREkDCIiAgSBhERQcIgIiJIGEREBPD/AeQTqGVmkZmnAAAAAElFTkSuQmCC\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Rent clearly increases with increase in number of rooms and floors. Similary with area. Not really a clear correspondence with furnishing." + ], + "metadata": { + "id": "e_eqJSo-zsBZ" + } + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of House Association Tax with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of House Association Tax with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "plt.figure()\n", + "df['area']= area_col_rounded\n", + "plt.title('Variation of House Association Tax with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['hoa (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of House Association Tax on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['hoa (R$)'].mean().plot.bar()\n", + "\n", + "df['area']= area_col\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "CApEEQq5b3sB", + "outputId": "b8aa1575-5979-41c6-c5f0-2694c5e8baf2" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "House association tax seems to increase with number of floors(maybe more likely to have a stronger association with an increase in number of floors). No clear dependence on other factors. " + ], + "metadata": { + "id": "Ewo9MzNhz-3s" + } + }, + { + "cell_type": "code", + "source": [ + "#dependancies \n", + "#maybe try to find a dependency of room rent on number of rooms? \n", + "#can plot the mean of rent \n", + "plt.figure()\n", + "plt.title('Variation of Property Tax with number of rooms')\n", + "mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot.bar()\n", + "plt.figure(figsize=(11,15))\n", + "plt.title('Variation of Property Tax with number of floors')\n", + "plt.xlim([0, 51])\n", + "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['floor'])['property tax (R$)'].mean().plot.bar()\n", + "\n", + "area_col_rounded= np.round(df['area'], -2)\n", + "area_col=df['area']\n", + "df['area']= area_col_rounded\n", + "plt.figure(figsize=(16,15))\n", + "plt.title('Variation of Property Tax with area')\n", + "plt.xlim([0, 1200])\n", + "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", + "# linestyle = 'solid', marker = 'o',legend=True)\n", + "mean_rooms= df.groupby(['area'])['property tax (R$)'].mean().plot.bar(color ='green',\n", + " edgecolor ='yellow', label ='Area')\n", + "\n", + "plt.figure()\n", + "plt.title('Variation of Property Tax on whether apartment is furnished or not')\n", + "plt.xlabel('Furnished(1) or Not furnished(0)')\n", + "plt.ylabel('Mean rent')\n", + "mean_rooms= df.groupby(['furniture'])['property tax (R$)'].mean().plot.bar()\n", + "\n", + "df['area']= area_col\n" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 1000 + }, + "id": "TBD3b1FHherQ", + "outputId": "0f15c337-5c8a-4127-f9bb-d21d229b633f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "cE7foIo2h3Kj" + }, + "execution_count": null, + "outputs": [] + } + ] +} \ No newline at end of file From e7078bf9a60ee4ff0fc74b405ecc143bf8d85a35 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Mon, 9 May 2022 00:02:00 +0530 Subject: [PATCH 3/8] Delete 201050_Tejas_part2.ipynb --- 201050_Tejas_part2.ipynb | 551 --------------------------------------- 1 file changed, 551 deletions(-) delete mode 100644 201050_Tejas_part2.ipynb diff --git a/201050_Tejas_part2.ipynb b/201050_Tejas_part2.ipynb deleted file mode 100644 index 29c06fa..0000000 --- a/201050_Tejas_part2.ipynb +++ /dev/null @@ -1,551 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "colab": { - "name": "201050_Tejas_part2.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" - }, - "language_info": { - "name": "python" - } - }, - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "id": "JsM9yumHP9iu" - }, - "outputs": [], - "source": [ - "import pandas as pd\n", - "import numpy as np \n", - "import matplotlib.pyplot as plt\n", - "import seaborn as sns\n", - "\n", - "df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Stamatics /House_prediction.csv')\n", - "# print (df.head(2))\n", - "df.loc[df['floor']=='-', \"floor\"]=0\n", - "df.loc[df['furniture']=='furnished', \"furniture\"]=1\n", - "df.loc[df['furniture']=='not furnished', \"furniture\"]=0\n", - "df['floor'] = df['floor'].astype(int)\n", - "df['furniture']= df['furniture'].astype(int)\n", - "# print (df.head(2))\n" - ] - }, - { - "cell_type": "markdown", - "source": [ - "Try to find a dependence of House Association Tax, property tax, and fire insurance on other features.\n", - "Not a formula but something intuitive like if it is increasing/decreasing with no of rooms or being furnished or not.# New Section" - ], - "metadata": { - "id": "GE0gbTHVD7lt" - } - }, - { - "cell_type": "code", - "source": [ - "\n", - "plt.figure(1)\n", - "mean_rent= df.groupby(['city'])['rent amount (R$)'].mean().plot(color = 'green',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_hoa= df.groupby(['city'])['hoa (R$)'].mean().plot(color = 'red',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "\n", - "plt.figure(2)\n", - "mean_property_tax= df.groupby(['city'])['property tax (R$)'].mean().plot(color = 'blue',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "\n", - "plt.figure(3)\n", - "mean_fire_insurance= df.groupby(['city'])['fire insurance (R$)'].mean().plot(color = 'red',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "\n", - "\n", - "# print(plt.ylim())\n", - "plt.figure(4)\n", - "mean_rooms= df.groupby(['city'])['rooms'].mean().plot(color = 'red',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_floor= df.groupby(['city'])['floor'].mean().plot(color = 'brown',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_floor= df.groupby(['city'])['parking spaces'].mean().plot(color = 'green',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "# parking spaces\n", - "\n", - "plt.figure(5)\n", - "mean_area= df.groupby(['city'])['area'].mean().plot(color = 'blue',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "\n", - "plt.figure(6)\n", - "mean_total_rent= df.groupby(['city'])['total (R$)'].mean().plot(color = 'purple',\n", - " linestyle = 'solid', marker = 'o',legend=True)\n", - "\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "UCuFbuVdECfb", - "outputId": "19129155-31a9-434d-cae2-6fccbbaf7e04" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Porto Alegre has the least average area, rooms, parking spaces, and rent. Campinas has relatively higher area but lower property tax and rent. " - ], - "metadata": { - "id": "6nnM3uMAyrql" - } - }, - { - "cell_type": "code", - "source": [ - "" - ], - "metadata": { - "id": "sfRnLgF7yrYj" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "" - ], - "metadata": { - "id": "0ubGUhtizh9_" - }, - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "#dependancies \n", - "#maybe try to find a dependency of room rent on number of rooms? \n", - "#can plot the mean of rent \n", - "plt.figure()\n", - "plt.title('Variation of rent with number of rooms')\n", - "mean_rooms= df.groupby(['rooms'])['rent amount (R$)'].mean().plot.bar()\n", - "plt.figure(figsize=(11,15))\n", - "plt.title('Variation of rent with number of floors')\n", - "plt.xlim([0, 51])\n", - "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", - "\n", - "area_col_rounded= np.round(df['area'], -2)\n", - "area_col=df['area']\n", - "df['area']= area_col_rounded\n", - "plt.figure(figsize=(16,15))\n", - "plt.title('Variation of rent with area')\n", - "plt.xlim([0, 1200])\n", - "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot.bar(color ='green',\n", - " edgecolor ='yellow', label ='Area')\n", - "\n", - "plt.figure()\n", - "plt.title('Variation of rent on whether apartment is furnished or not')\n", - "plt.xlabel('Furnished(1) or Not furnished(0)')\n", - "plt.ylabel('Mean rent')\n", - "mean_rooms= df.groupby(['furniture'])['rent amount (R$)'].mean().plot.bar()\n", - "\n", - "# print(len(mean_rooms))\n", - "# values = list(mean_rooms.values)\n", - "# names= list(mean_rooms.keys)\n", - "# print((values))\n", - "# names= list(mean_rooms.keys())\n", - "# x=[100*i for i in range(0,12)]\n", - "# x.append(1600)\n", - "# x.append(2000)\n", - "# x.append(12700)\n", - "# x.append(24600)\n", - "# x.append(46300)\n", - "# print(mean_rooms)\n", - "# print(x)\n", - "# print(len(values))\n", - "# plt.bar(x,values)\n", - "# print(type(mean_rooms))\n", - "# .plot(color = 'red',\n", - " # linestyle = 'solid', marker = 'o',legend=True)\n", - "# plt.hist(ar)\n", - "\n", - "df['area']= area_col\n", - "\n", - "\n", - "# plt.figure()\n", - "# plt.title('Variation of House association tax with number of rooms')\n", - "# mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot(color = 'blue',\n", - "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", - "# plt.figure()\n", - "# plt.title('Variation of property tax with number of rooms')\n", - "# mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot(color = 'blue',\n", - "# linestyle = 'solid', marker = 'o',legend=True) #doesn't reeally seem to have an association \n", - "\n" - ], - "metadata": { - "id": "WK_u47GLP2pq", - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "outputId": "669c5565-c01f-40b9-c63e-a11ec72b6887" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "Rent clearly increases with increase in number of rooms and floors. Similary with area. Not really a clear correspondence with furnishing." - ], - "metadata": { - "id": "e_eqJSo-zsBZ" - } - }, - { - "cell_type": "code", - "source": [ - "#dependancies \n", - "#maybe try to find a dependency of room rent on number of rooms? \n", - "#can plot the mean of rent \n", - "plt.figure()\n", - "plt.title('Variation of House Association Tax with number of rooms')\n", - "mean_rooms= df.groupby(['rooms'])['hoa (R$)'].mean().plot.bar()\n", - "plt.figure(figsize=(11,15))\n", - "plt.title('Variation of House Association Tax with number of floors')\n", - "plt.xlim([0, 51])\n", - "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot.bar()\n", - "\n", - "area_col_rounded= np.round(df['area'], -2)\n", - "area_col=df['area']\n", - "plt.figure()\n", - "df['area']= area_col_rounded\n", - "plt.title('Variation of House Association Tax with area')\n", - "plt.xlim([0, 1200])\n", - "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['area'])['hoa (R$)'].mean().plot.bar(color ='green',\n", - " edgecolor ='yellow', label ='Area')\n", - "\n", - "plt.figure()\n", - "plt.title('Variation of House Association Tax on whether apartment is furnished or not')\n", - "plt.xlabel('Furnished(1) or Not furnished(0)')\n", - "plt.ylabel('Mean rent')\n", - "mean_rooms= df.groupby(['furniture'])['hoa (R$)'].mean().plot.bar()\n", - "\n", - "df['area']= area_col\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "CApEEQq5b3sB", - "outputId": "b8aa1575-5979-41c6-c5f0-2694c5e8baf2" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAEsCAYAAAAvq2MDAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nO3de7wdVXn/8c83BAj3awiQBIIQiqASIAIWq3iD4A2sN1AhIDZaQaS1VVBbRJoWrRRFARslApaLFEVSSoGAXH5CgQSIQLhIyi1ECOF+M2rg+f2x1kkmO3ufs/ecOefsk/m+X6/9OrPXzDx7zZzZz8ysWXtGEYGZmdXDiKGugJmZDR4nfTOzGnHSNzOrESd9M7MacdI3M6sRJ30zsxpx0h8ikl6S9LqS835S0lVV16mNz91H0gO57gcN9uevbiT9UNI/9GP+0ttQt5O0TV6+NXqZJiTtMJj1Wh046bdB0hWSvtmk/EBJT0ga2WnMiFg/Ih5s47Mn5I17+WdExHkRsV+nn1mBbwI/yHX/ZeNISQ9LendD2eGSfj1oNWyTpH3zev3KUNUhIj4XESe1M62k6yR9pmH+trahTuRE2/N6TdLvC+8/WeVn9SYiHs3L92qu1yrLb+U46bfnHOBTktRQfihwXkQsazdQmR1EF9kWmD/UlajIVOAZ4LChrkg3yYl2/YhYH3gU+ECh7Lyhrt9gGObf0b5FhF99vIB1gOeBtxXKNgGWArsCewL/CzwHPA78AFirMG0ARwEPAA8VynbIw+8D7gBeABYC3yjM+2ie9qX8egtwOPDrwjR/DszJdZwD/Hlh3HXAScCNwIvAVcDmvSzrXwELSAlxFrB1Lv8/4DXg97keazeZ92Hg3Q1ljXV9fa7Tc6QdyAcb6vqZZvMCAk4Fnszr6S7gDXnc2sB38rpaDPwQWKeXZVwvr4uDgT8CkwvjRgH/ATyd6zgHGFOoz4N53oeAT+byEcDXgUdy/c4FNirEfCtwU463EDg8l58N/FNhe7oMWAI8m4fH5XHTgVdJ29tLpLMtWHkb2ih/7pJcj68DI4rrMa+jZ3PdD2hju1/+/6SXbZy0/T0FjM/vd82fs1OTmCcC38/DawIvA/9a+J4tBTYFJuTlG9nH8n+O9L16DjgdUItlKfMdfT8wL89zE/CmwvTHkb4TLwL3AB8a6jzVdj4b6goMlxfwI+DHhfefBebl4T2AvfMGOgG4Fzi2YYOanTfmdQplPV/YfYE3kpLHm0iJ66A8bvnGX4h3OCuS4ab5C3Zo/vxD8vvN8vjr8sa5Y/5SXQec3GIZ35m/vLuTEun3gRsK4x+mIak3zL/K+Ia6rknaoXwVWCt/3ovAnxXq2irp7w/cBmxM2gG8HtgqjzuVtIPaFNgA+C/gX3qp56H5i79Gnvb7Df/X/wLWzeP3ADYk7SheKNR1K2CXPPzpvFyvA9YHfgH8NI/bNi/jIXn5NwMm5XFnsyLpbwZ8OH/uBsB/Ar8s1GulddNkGzoXuDTPOwH4LXBkYT3+ibRDXwP4a+B3tEiQzf6f9L2NTwd+RdrG7gKO7mUbuysP/zlp27ylMO43zbb7Xpb/srxNbEPa4U1p8bkdfUeB3Ug78L3yOpua18faefqPAluTvrMfJ+28thrqPNVWLhvqCgyXF+lo7TlgVH5/I/A3LaY9FrikYYN6Z8M0y7+wTeb/LnBqHl5p489lh7MiGR4K3Now//+y4mjyOuDrhXGfB65o8blnAd8uvF8/J4sJ+f3yJNBi/odJR2LPFV6vFOr6F8AT5CPQXHYB+cym8YvdsJzvJCWyvRvmV/7CbV8oewv5aK1FPa8GvpuHD8nJYs38/tM0HNXl8vXy8nyYhrMI4Brg84X3f5bX20jg+OK20DDf2eSk32TcJODZwvuV1k1xGyIlpT8COxfGfRa4rrAeFxTGrZvn3bKPbb7l/5tVt/E1STvlu4AraH3E3XM0vxnpaPmrwGN5WzsROK3Zdt/L8r+18P4i4Lg2v8+9fkeBM4GTGua5H3h7i3jzgAPb+eyhfrlNv00R8WvSUfBBkrYnnS6eDyBpR0mX5Yu6LwD/DGzeEGJhq9iS9pJ0raQlkp4nnbI2zt/K1qTT+aJHgLGF908Uhl8hfcH6jBURL5GaOca2mL6ZgyJi454XaSdTjL8wIl7rpa5NRcSvSKfkpwNPSpohaUNgNCmJ3SbpOUnPkZLO6GZxJI0H3gH0tE9fSmrSeV9+/1PgSuBCSb+T9G1Ja0bEy6Qjus8Bj0v6b0k7FZar+D94hJTwxwDjSUezvZK0rqR/l/RI3oZuADburfdKweakpNtYh6bbQES8kgdbbQfN6tfrNh4RfyLtxN4AnBI5EzaKiN8Dc4G3A28DriftZPfJZde3W6esrW27xHd0W+BLPdtU3q7Gk/7XSDpM0rzCuDc0ideVnPQ7cy7pwt+ngCsjYnEuPxO4D5gYERuSjl4aL/o2/RJk55OaJ8ZHxEakNume+XubD9Jp+rYNZdsAi/qYr89YktYjHZGVidUq/nhJxe2uWNeXSQm8x5bFmSPitIjYA9iZ1Fz196Qd8e9JTS09O5uNIl2IbOZQ0nb/X5KeILXRjyKdvhMRf4qIEyNiZ1Lzw/vJF3sj4sqIeA+paec+UpNfz3IV/wfbAMtIzXQLge37XDPwJdIZwl55G3pbLm9nO3iKdGbRWIeq/m/QxzYuaSxwAvAT4BRJa/cS63rSmdtupGsm15Oa7/Yk7eya6et70K/6N/mMhcD04gFMRKwbERdI2pb0vz+a1Iy6MXB3k3hdyUm/M+cC7ya1jZ5TKN+A1N77Uj76++sO424APBMRSyXtCXyiMG4J6QJqq/7YlwM7SvqEpJGSPk5Kipd1WAdITS1HSJqUv7T/TGpvfbhErGZuIR2NfVnSmpL2BT4AXJjHzwP+Mh/17gAc2TOjpDfnM6Kei39LgdfyWcOPgFMlbZGnHStp/xZ1mEpqRphUeH0YeK+kzSS9Q9Ib8xH2C6Rk+pqkMbmL7nrAH0jNWD1nLBcAfyNpO0nrk9bbzyL16joPeLekj+X/z2aSJjWp1waknddzkjYlJdCixbTYBiJ1a7wImC5pg5yU/pZ0QboqLbfx3KvtbFLz4JGk6yW9dUW9nrQjvSci/khuuiE1yS1pMU/L5e9v/Vv4EfC5vM1J0nqS3idpA1JTX5C+m0g6gnSkPyw46XcgJ7+bSP/0WYVRf0dK1C+SNpafdRj688A3Jb0I/CPpC9zzma+QLpLdmE8l926o09Oko9EvkZpivgy8PyKe6rAORMTVwD8APyd9cbcn9XCpRP6CfwA4gHR0egZwWETclyc5ldQ2vZi0Uy12EdyQtG6fJTVdPA38ax73FdKF1JvzqfvVpKPmleR1ty1wekQ8UXjNyvMfQjq7uJiUIO4lJaifkr4rf0s6qn+G1BTRkzhm5mluIPWMWQp8IS/zo8B7Sf+fZ0g7tl2brJ7vktq7nwJuJjVRFX0P+IikZyWd1mT+L5B2hg+Seuqcn+tVld628WOALYB/yM06R5AOHv6iRaybSMvac1R/D2mdtTrKh76Xvz/1X0VEzCUd3P2AtM0tIF0bISLuAU4hXTtbTOqEcWOJOg0JtWh6MzOz1ZCP9M3MasRJ38ysRpz0zcxqxEnfzKxGnPTNzGqkq+8mt/nmm8eECROGuhpmZsPKbbfd9lRENP1Velcn/QkTJjB37tyhroaZ2bAiqfHWLMu5ecfMrEac9M3MasRJ38ysRpz0zcxqxEnfzKxG+kz6kkZJulXSbyTNl3RiLt9O0i2SFkj6maS1cvna+f2CPH5CIdbxufz+Xm59a2ZmA6SdI/0/kB4jtivp3uNT8i1qv0V6pN8OpFuP9tz7/EjSY952IN0q91sAknYm3aZ3F2AKcEabTwUyM7OK9Jn0I3kpv10zv4L05JuLc/k5wEF5+EBWPGDkYuBd+SELBwIXRsQfIuIh0v2p96xkKczMrC1ttelLWkPSPNLT4WeTnvn5XH4yEKQHG/c8j3Ms+VmTefzzpEfuLS9vMk/xs6ZJmitp7pIlrR6iY2YDZemyLUlP/uv9laaz4aatX+Tmx7FNkrQxcAmwUx+zlBYRM4AZAJMnT/YTXswG2aiRi0lX7noXJyzueyLrOh313omI54BrgbcAG0vq2WmMY8VDmBeRnhpPHr8R6dF2y8ubzGNmZoOgnd47o/MRPpLWAd5DenbotcBH8mRTgUvz8Kz8njz+V/m5mbOAg3Pvnu2AicCtVS2ImZn1rZ3mna2Ac3JPmxHARRFxmaR7gAsl/RNwB3BWnv4s4KeSFpAeBH0wQETMl3QR6SHIy4CjcrORmZkNkj6TfkTcCezWpPxBmvS+iYilwEdbxJoOTO+8mmZmVgX/ItfMrEac9M3MasRJ38ysRpz0zcxqxEnfzKxGnPTNzGrESd/MrEac9M3MasRJ38ysRpz0zcxqxEnfzKxGnPTNzGrESd/MrEac9M3MasRJ38ysRpz0zcxqxEnfzKxGnPQrtnTZloD6fKXpzMwGVzvPyLUOjBq5GJ3Y93RxwuKBr4yZWQMf6ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY30mfQljZd0raR7JM2X9MVc/g1JiyTNy6/3FuY5XtICSfdL2r9QPiWXLZB03MAskpmZtdJOP/1lwJci4nZJGwC3SZqdx50aEd8pTixpZ+BgYBdga+BqSTvm0acD7wEeA+ZImhUR91SxIGZm1rc+k35EPA48nodflHQvMLaXWQ4ELoyIPwAPSVoA7JnHLYiIBwEkXZinddI3MxskHbXpS5oA7AbckouOlnSnpJmSNsllY4GFhdkey2Wtys3MbJC0nfQlrQ/8HDg2Il4AzgS2ByaRzgROqaJCkqZJmitp7pIlS6oIaWZmWVtJX9KapIR/XkT8AiAiFkfEqxHxGvAjVjThLALGF2Yfl8tala8kImZExOSImDx69OhOl8fMzHrRTu8dAWcB90bEvxXKtypM9iHg7jw8CzhY0tqStgMmArcCc4CJkraTtBbpYu+sahbDzMza0U7vnX2AQ4G7JM3LZV8FDpE0CQjgYeCzABExX9JFpAu0y4CjIuJVAElHA1cCawAzI2J+hctiZmZ9aKf3zq9JN4FvdHkv80wHpjcpv7y3+czMbGD5F7lmZjXipG9mViNO+mZmNeKkb2ZWI076ZmY14qRvZlYjTvpmZjXipG9mViNO+mZmNeKkb2ZWI076ZmY14qRvZlYjTvpmZhVZumxL0v0pe3+l6YZGO7dWNjOzNowauRid2Pd0ccLiga9MCz7SNzOrESd9M7MacdLvYsOhfdDMhhe36Xex4dA+aGbDi4/0zcxqxEnfzKxGnPTNzGrESd/MrEac9M3MasRJ38ysRpz0zcxqxEnfzKxG+kz6ksZLulbSPZLmS/piLt9U0mxJD+S/m+RySTpN0gJJd0ravRBrap7+AUlTB26xzMysmXaO9JcBX4qInYG9gaMk7QwcB1wTEROBa/J7gAOAifk1DTgT0k4COAHYC9gTOKFnR2FmZoOjz6QfEY9HxO15+EXgXmAscCBwTp7sHOCgPHwgcG4kNwMbS9oK2B+YHRHPRMSzwGxgSqVLY2ZmveqoTV/SBGA34BZgTEQ8nkc9AYzJw2OBhYXZHstlrcobP2OapLmS5i5ZsqST6pmZWR/aTvqS1gd+DhwbES8Ux0VEAFFFhSJiRkRMjojJo0ePriKkmZllbSV9SWuSEv55EfGLXLw4N9uQ/z6ZyxcB4wuzj8tlrcrNzGyQtNN7R8BZwL0R8W+FUbOAnh44U4FLC+WH5V48ewPP52agK4H9JG2SL+Dul8vMzGyQtHM//X2AQ4G7JM3LZV8FTgYuknQk8AjwsTzucuC9wALgFeAIgIh4RtJJwJw83Tcj4plKlsLMzNrSZ9KPiF+THtHUzLuaTB/AUS1izQRmdlJBs9XR0mVbMmpk7w+/WbpsDKNGPjFINbK68JOzzIZAO09F8xPRbCD4NgxmZjXipG9mViNO+mZmNeKkb2ZWI076ZmY14qRvZlYjTvpmZjXipG9mViNO+mZmNeKkb2ZWI076ZmY14qRvZlYjTvpmZjXipG9mViNO+mZmNeKkb2ZWI076ZmY14qRvZlYjTvpmbVi6bEvSo6J7f6XpzLqXn5Fr1oZ2nmkLfq6tdT8f6ZuZ1YiTvplZjTjpW9dop93cbeZm/eM2/RpZumxLRo3su8156bIxjBr5xCDUaGXttJu7zdysf/pM+pJmAu8HnoyIN+SybwB/BSzJk301Ii7P444HjgReBY6JiCtz+RTge8AawI8j4uRqF8X64ouRZtZO887ZwJQm5adGxKT86kn4OwMHA7vkec6QtIakNYDTgQOAnYFD8rRmZjaI+jzSj4gbJE1oM96BwIUR8QfgIUkLgD3zuAUR8SCApAvztPd0XGMzMyutPxdyj5Z0p6SZkjbJZWOBhYVpHstlrcrNzGwQlU36ZwLbA5OAx4FTqqqQpGmS5kqau2TJkr5nMLPa8C+j+69U752IWH6lT9KPgMvy20XA+MKk43IZvZQ3xp4BzACYPHlylKmfma2e3Bmh/0od6UvaqvD2Q8DdeXgWcLCktSVtB0wEbgXmABMlbSdpLdLF3lnlq23dwEddZsNPO102LwD2BTaX9BhwArCvpElAAA8DnwWIiPmSLiJdoF0GHBURr+Y4RwNXkrpszoyI+ZUvjQ0qH3WZDT/t9N45pEnxWb1MPx2Y3qT8cuDyjmpnZmaV8m0YzMxqxEnfzKxGnPTNzGrESd/MrEac9M3MasRJ38ysRpz0zcxqxEnfzKxGnPTNzGrESd9smPM9kKwTfkau2TDneyBZJ3ykb2ZWI076ZmY14qRvZlYjTvpmZjXipG+rJfdoMWvOvXdsteQeLWbN+UjfzKxGnPTNzGrESd/MrEac9M3MasRJ38ysRpz0zcxqxEnfzKxGnPTNzGrESd/MrEb6TPqSZkp6UtLdhbJNJc2W9ED+u0kul6TTJC2QdKek3QvzTM3TPyBp6sAsjpmZ9aadI/2zgSkNZccB10TEROCa/B7gAGBifk0DzoS0kwBOAPYC9gRO6NlRmJnZ4Okz6UfEDcAzDcUHAufk4XOAgwrl50ZyM7CxpK2A/YHZEfFMRDwLzGbVHYmZmQ2wsm36YyLi8Tz8BDAmD48FFhameyyXtSpfhaRpkuZKmrtkyZKS1TMzs2b6fSE3IgKICurSE29GREyOiMmjR4+uKqyZmVE+6S/OzTbkv0/m8kXA+MJ043JZq3IzMxtEZZP+LKCnB85U4NJC+WG5F8/ewPO5GehKYD9Jm+QLuPvlMjMza2KgHgTU50NUJF0A7AtsLukxUi+ck4GLJB0JPAJ8LE9+OfBeYAHwCnAEQEQ8I+kkYE6e7psR0Xhx2MzMsoF6EFCfST8iDmkx6l1Npg3gqBZxZgIzO6qdmZlVyr/INTOrESd9M7MacdI3M6sRJ30zsxpx0jczqxEnfTOzGnHSNzOrESd9M7MacdI3M6sRJ30zsxpx0jczqxEnfTOzGnHSNzOrkdon/YG6Z7WZWTfq89bKq7uBume1mVk3qv2RvpnVVx3P9Gt/pG9m9VXHM30f6ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY046ZuZ1YiTvplZjfQr6Ut6WNJdkuZJmpvLNpU0W9ID+e8muVySTpO0QNKdknavYgHMzKx9VRzpvyMiJkXE5Pz+OOCaiJgIXJPfAxwATMyvacCZFXy2mZl1YCCadw4EzsnD5wAHFcrPjeRmYGNJWw3A55uZWQv9TfoBXCXpNknTctmYiHg8Dz8BjMnDY4GFhXkfy2VmZjZI+nvDtbdGxCJJWwCzJd1XHBkRISk6CZh3HtMAttlmm35Wz8zMivp1pB8Ri/LfJ4FLgD2BxT3NNvnvk3nyRcD4wuzjclljzBkRMTkiJo8ePbo/1TMzswalk76k9SRt0DMM7AfcDcwCpubJpgKX5uFZwGG5F8/ewPOFZqC21fH+12ZmVelP884Y4BJJPXHOj4grJM0BLpJ0JPAI8LE8/eXAe4EFwCvAEWU+tI73vzYzq0rppB8RDwK7Nil/GnhXk/IAjir7eWZm1n/+Ra6ZWY046ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY046ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY046ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY046ZuZ1YiTvplZjTjpm5nViJO+mVmNOOmbmdWIk76ZWY046ZuZ1cigJ31JUyTdL2mBpOMG+/PNzOpsUJO+pDWA04EDgJ2BQyTtPJh1MDOrs8E+0t8TWBARD0bEH4ELgQMHuQ5mZrWliBi8D5M+AkyJiM/k94cCe0XE0YVppgHT8ts/A+5vI/TmwFMVVrWb43Vz3aqO18116/Z43Vy3quN1c92qjtdurG0jYnSzESMrqkhlImIGMKOTeSTNjYjJVdWhm+N1c92qjtfNdev2eN1ct6rjdXPdqo5XRazBbt5ZBIwvvB+Xy8zMbBAMdtKfA0yUtJ2ktYCDgVmDXAczs9oa1OadiFgm6WjgSmANYGZEzK8gdEfNQcM8XjfXrep43Vy3bo/XzXWrOl43163qeP2ONagXcs3MbGj5F7lmZjXipG9mViNO+mZmNdJ1/fTbIWkn0i95x+aiRcCsiLh36Go1sCRtChARz3RjvLrwerPhbthdyJX0FeAQ0i0cHsvF40jdPy+MiJOHqm49JI2hsEOKiMUl42wDfBt4F/AcIGBD4FfAcRHx8FDGK8StZHmrjlVVvOGw3upE0kbAFFY+6LsyIp4bulqBpJHAkcCHgK1z8SLgUuCsiPhTiZj7Awex8rJeGhFXlK7nMEz6vwV2aVyBud///IiYWDJuFclhEvBDYCNW/OhsHClRfD4ibu8w3v8C3wUujohXc9kawEeBYyNi7yGOV9nyDsC6q7JuXbveCjErTYRVn01XFU/SYcAJwFWsvO7eA5wYEed2GG8j4HhSYt0CCOBJUqI+uZP1J+kC0v/wHFY+IJ0KbBoRH++wbt8FdgTObYh3GPBARHyxk3jLRcSwegH3ke4r0Vi+LXB/iXiTgJuBe4Gr8+u+XLZ7h7Hmke4l1Fi+N/CbEnV7oMy4QYxX2fIOwLqrsm5du97yfIcB/wecCXw9v36Yyw4rEe8ruY7HAZ/Kr+N6yoYyHuleXBs3Kd8E+G2Jul2Z67dloWzLXHZVh7Fafn7JujWdh3Sm2fF2t3z+sjMO1Yt0NLMA+B/SDxVmAFfksikl4g1WclhQom4XAmcAe5FOF7fOw2cAF3VBvMqWdwDWXZV169r1luepOhH+FlizSflaZZJNlfFyrI2alG9Usm4tDxR7G9di+ptJZ38jCmUjgI8Dt5So253Am5uU7wnc1Wm8ntewu5AbEVdI2pG04MVTxTmRT707tF5E3NLkc26WtF6Hsf5H0n+TTscW5rLxpCOxMm1wh5HaCE+k4bQYOKsL4lW5vFWvuyrjdfN6g3Tk16yd9rU8rlOvkXZsjzSUb5XHDWW86cDtkq5ixbrbhtS8c1KJuj0i6cvAOZGbdHNT7+GF+O06GPgWcIakZ0nrfmPStZ+DS9TtcOBMSRuwonlnPPB8HlfKsGvTr5qk04Dtaf4FfCgKt31uM94BNG+7vLyaGneXKpe36nXXzf+LitfbVOAfSe3cqyTCiDi7w3hTgB8ADzTE2wE4Ojq8iDgA8TYB9mfV6xfPdhKnEOs40v9iTC5+grRD/1aU7KUlaTOAiHi6zPwNsbZk5euNT/QrXt2TPnRvcij0Bljl6j0legNUHa8uhsN6qzIR5ngjqO5seiDidWXPpxYXrC+NiPtKxqu8p5KTfoUKPQF6jhpK9wTI8aruDVB1vMqWdwDWXZV169r11hC3yq6zYtUkfWuUTBhVxWvo+fQYqQmlvz2fKukWWXV38qp7Ki1X9mLA6vIibTwnk3rvPAM8nYdPpsnFsT5iteoJcBwd9gTI8w5Kb4B+xKtseQdg3VVZt65db3neYg+02fSjB1qOtx8rOkv8OL96OkvsN5TxqL7n03eBy0mJ+a35dXAu+16n2wnVXgCv9AL98vnLzri6vCpODpX1BMjzVN0boOp4VfZ8qHrddXOvjKqXtepEeC8woUn5dsC9Qxmvt+RJuZ5PlXWLpPru5JX2VOp5DbveOwNgQkR8q1gQ6ULJyZKO6DBWlT0BYEVvgNMl9ZzybwxcS7neAM16F2zUj3hVLm/V624gemVU9X+oelmr7IEG6fYsjzUpXwSsOcTxqu75tFTSmyNiTkP5m4GlHcY6FrhGUtML1iXqVnVPJcBt+uQVejXNv4DviYh3dxCrsSdAAIvpR08ASXvlOP8H7AS8Bbgn+nmRuad3AekU9lMlY1S2vFWvu4rrthaprfZ3wO2kC2v7APOBGdH5BfWeun2QFT1G+rOsVfdAOx74GKltuhjvYNLvEv6lgnjbkM6UysSrsufTHqTfWzTrFnlURNzWYbyqL1hXeoEenPQbk8MWubjnC3hypys3X70fB9wcES8VyqdE5xeGTgAOIB0pzSZtTNeR9vRXRsT0DuM1ezTlO0n9iImID3YSr0n8v2DFD0eu6nDevYD7IuJ5SeuS/ie7kxLrP0fE8x3GOwa4JCLKHDk3xjqP9D9Yh5QM1gMuId2LRxExtUTM7YG/JCWYV0ntt+dHxAsl61h1d9fXt4h3TzfEq1rV3SJzzB2AXUlNWJUsp6RNyxw8rqRsu1AdXsARHU5/DOnL+0vgYeDAwrjbS3z+XaTHSq4LvABsmMvXAe4sEe924D+AfYG357+P5+G3l4h3a2H4M8AdpN4GN9L5z+vnAyPz8AzgVNJFtROAX5So2/OkI/P/B/w1sHk/toM789+RpAOCNfJ7lfw/HEPqkfF14CbgdNKp/D3AvgO5TXfLC9is5HyVdbzI8d5U4TJd27OdAYeS2uR/nL/HXygRb5+8bPNJvwCfTTrjXwi8pXQ9h/qf380v4NEOp78LWD8PTwDmAl/M7+8o8fl3NBvO7+eViDcC+Ju88UzKZQ/2Y/0U6zcHGJ2H10GB2TgAAAbcSURBVKPDn4lTuKBHww6y5LLekZd3P9KvZpeQ2nynAht0GOtuUg+MTYAXSd00AUZR7sLmXYUdx7rAdXl4m5LbSaWJsI/P+p8S85xcSIZ7AA+Sfqj1CB0ebFB9z6dXc11OAnbu57q5uzA8h7xjy//jMgcHtwJvJDXpPgW8NZfvDtxYtp61v5Ar6c5Wo1jR3tquEZGbdCLiYUn7AhdL2pZyP4f/o6R1I+IV0pelp84bUeLn8BHxGnCqpP/MfxfTv2cqjMjNYyNIzRxL8ue8LGlZh7HulnRERPwE+I2kyRExN99yo8yPnyIv71XAVZLWJDWVHQJ8BxjdQayzSD0z1gC+BvynpAdJvWMuLFE3SOv9VWBtYP1c4UdzPTt1EamJ7h2RmyVyc8Xhedx+nQSTtHurUaTuoZ16X0Qcl4e/A3w8Iubk/+35wOQOYk2I6jpeQLq/zaGk7WKWpJeBC0j96h/uMNafJI2NiEXAS8DLufwPpG2nU2tGxF0AkpZExK8BIuJ2SeuUiJdUeRQwHF+k0/VJpG5VxdcE4HcdxvoV+Qi6UDaSdIHt1RJ1W7tF+ebAGytY9veR2svLzv8w6ajtofx3q1y+Ph0enZOOVs8mnb7eQkr0DwLXA7uWqFvLI2Zg3RLxtga2zsMbAx8B9iy53r5ISjY/Iu1Mjsjlo4EbSsSrugvoq3lbvrbJ6/cl4t3Liqa7mxvGdXpGeBXwZWBMoWwM6ej/6hJ1azyr3BP4N9JF3Zs6jLUvqSnmm6TbTtxEap6cDfxdibr9pjB8UMO4uzuN1/PyhVzpLOAnkfeiDePOj4hPdBBrHLAsmlwEkrRPRNzYv9oOD/lC7JiIeKjEvBuS+m+PBB6L8g+g2TEifltm3sEgaRfg9aQvb6mf6BdiVdYDLc97N/ChiHigybiFETG+w3hfAD5AauZ5G6mZ7BekTgSvi4hDO4hVdS+vOyJityblAt4WEdd3GG8j4BOk++D3dFUtdRsGSR8k7cheaSjfHvhwRHy705jg3jtmw94A9ED7COkI/P4m4w6KiF+WqOO+pAvqPclwIanDw8yI6KgpsOIecp+IiPM7mafD+FtExJMDFb8MJ32z1VjhOslqES93xT2K1GQ0idRR4tI87vaIaHU9YsApPz+5we3AbqRc2+lZyNGkawtP5e6fM4E3kXoIHhkRd5eqp5O+2epL0qMRsc3qEk/SXaTuii9JmgBcDPw0Ir7Xqqmmj3gbkm5+N47UM+n8wrgzIuLzHcR6jVWfGTCO1MQTEfG6Dus2PyJ2ycP/Dfw4Ii7JZ03TI2KfTuL1qH3vHbPhruIeaN0er+oecj8hddn8OfBpSR8GPhERfyD1zurE35N+OPn3saLXzUMRsV2JesHK+XmLiLgEICKuU3qwSr+DmtnwNIb0U/3GtnuRepCsTvEWS5oUEfMA8hH/+0lNH28sUbftI+LDefiXkr4G/CpfRO1IRJwi6Wek7tALST13+tOUcrGks0m9gS6RdCzpl+DvBB4tG9RJ32z4u4z0o8B5jSMkXbeaxTsMWOnCb74QfJikfy9Rt7UljYj0mw4iYrqkRcAN5N9PdCIiHgM+mncas0k/zColIr6Wf3twAeneSmsD00gXwD9ZNq7b9M2stiR9m/RL3qsbyqcA34+IiR3G24l0D59bSL932D4i7i7Ts6hF/J920sW1aQwnfTOzVQ11z6KBukGim3fMzJo7kXSht11/BexR7FkkaUJEfI9yF5nHkW7C92PStQGR7vN/SolYyznpm1ltdXnPosmkW3Z8jdQjaJ6k33f6K+FGTvpmVmdd27Moqr9BIlQRwMxsGOvmnkU9MXp6BL2P9FyNfvGFXDOzGhkx1BUwM7PB46RvZlYjTvpmZjXipG/WIUllHn1n1hWc9M0aSPqlpNskzZc0LZe9JOkUSb8B3iLpU5JulTRP0r/37AgknSlpbp73xCFdELMmnPTNVvXpiNiD9OOYYyRtBqwH3BIRuwJPAx8H9omISaR7rPTcAOtrETGZ9LCLt0t60+BX36w199M3W9Uxkj6Uh8cDE0mJ/ee57F3AHsCc9ChV1gF6Hon3sXx2MBLYCtiZ9BB0s67gpG9WkH86/27S05leyT/QGQUsjYhXeyYjPYT8+IZ5twP+DnhzRDyb74U+arDqbtYON++YrWwj4Nmc8Hei+dOTrgE+ImkLSM9GzfdX2RB4GXhe0hjggMGqtFm7fKRvtrIrgM9Jupf0AOqbGyeIiHskfR24StII4E/AURFxs6Q7gPuAhcCNg1hvs7b4NgxmZjXi5h0zsxpx0jczqxEnfTOzGnHSNzOrESd9M7MacdI3M6sRJ30zsxpx0jczq5H/D5n805EAjiIlAAAAAElFTkSuQmCC\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "markdown", - "source": [ - "House association tax seems to increase with number of floors(maybe more likely to have a stronger association with an increase in number of floors). No clear dependence on other factors. " - ], - "metadata": { - "id": "Ewo9MzNhz-3s" - } - }, - { - "cell_type": "code", - "source": [ - "#dependancies \n", - "#maybe try to find a dependency of room rent on number of rooms? \n", - "#can plot the mean of rent \n", - "plt.figure()\n", - "plt.title('Variation of Property Tax with number of rooms')\n", - "mean_rooms= df.groupby(['rooms'])['property tax (R$)'].mean().plot.bar()\n", - "plt.figure(figsize=(11,15))\n", - "plt.title('Variation of Property Tax with number of floors')\n", - "plt.xlim([0, 51])\n", - "# mean_rooms= df.groupby(['floor'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['floor'])['property tax (R$)'].mean().plot.bar()\n", - "\n", - "area_col_rounded= np.round(df['area'], -2)\n", - "area_col=df['area']\n", - "df['area']= area_col_rounded\n", - "plt.figure(figsize=(16,15))\n", - "plt.title('Variation of Property Tax with area')\n", - "plt.xlim([0, 1200])\n", - "# mean_rooms= df.groupby(['area'])['rent amount (R$)'].mean().plot(color = 'red',\n", - "# linestyle = 'solid', marker = 'o',legend=True)\n", - "mean_rooms= df.groupby(['area'])['property tax (R$)'].mean().plot.bar(color ='green',\n", - " edgecolor ='yellow', label ='Area')\n", - "\n", - "plt.figure()\n", - "plt.title('Variation of Property Tax on whether apartment is furnished or not')\n", - "plt.xlabel('Furnished(1) or Not furnished(0)')\n", - "plt.ylabel('Mean rent')\n", - "mean_rooms= df.groupby(['furniture'])['property tax (R$)'].mean().plot.bar()\n", - "\n", - "df['area']= area_col\n" - ], - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 1000 - }, - "id": "TBD3b1FHherQ", - "outputId": "0f15c337-5c8a-4127-f9bb-d21d229b633f" - }, - "execution_count": null, - "outputs": [ - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - }, - { - "output_type": "display_data", - "data": { - "text/plain": [ - "
" - ], - "image/png": "\n" - }, - "metadata": { - "needs_background": "light" - } - } - ] - }, - { - "cell_type": "code", - "source": [ - "" - ], - "metadata": { - "id": "cE7foIo2h3Kj" - }, - "execution_count": null, - "outputs": [] - } - ] -} \ No newline at end of file From bb7c2d7c4d271eff3ee5e21918b0a1718d273499 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Mon, 9 May 2022 00:02:07 +0530 Subject: [PATCH 4/8] Delete 201050_Tejas_part1.ipynb --- 201050_Tejas_part1.ipynb | 975 --------------------------------------- 1 file changed, 975 deletions(-) delete mode 100644 201050_Tejas_part1.ipynb diff --git a/201050_Tejas_part1.ipynb b/201050_Tejas_part1.ipynb deleted file mode 100644 index 7053d2b..0000000 --- a/201050_Tejas_part1.ipynb +++ /dev/null @@ -1,975 +0,0 @@ -{ - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.8" - }, - "colab": { - "name": "201050_Tejas_part1", - "provenance": [], - "collapsed_sections": [] - } - }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "rvFM645NE-D2" - }, - "source": [ - "# Assignment 1 - Part 1\n", - "In this assignment, we will go through basic linear algebra, NumPy, and image manipulation using Python to get everyone on the same page.\n", - "\n", - "One of the aims of this assignment is to get you to start getting comfortable searching for useful library functions online. So in many of the functions you will implement, you will have to look up helper functions.\n", - "\n", - "\\\n", - "\n", - "## Instructions\n", - "* This notebook contain blocks of code, you are required to complete those blocks(where required)\n", - "* You are required to copy this notebook (\"copy to drive\" above) and complete the code.\n", - "* For Submission, You'll be required to submit a sharable link for your copy of this notebook. (DO NOT CHANGE THE NAME OF THE FUNCTIONS)\n", - "\n", - "\\\n", - "\\\n", - "Also, I'd like to acknowledge the Stanford CS131. This assignment is highly based on the assignments from that course." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "UhSVK4RoK9q5" - }, - "source": [ - "First Let's import some dependencies" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "cCKqyfhIE-EQ" - }, - "source": [ - "# Imports the print function from newer versions of python\n", - "from __future__ import print_function\n", - "\n", - "# Setup\n", - "\n", - "# The Random module implements pseudo-random number generators\n", - "import random \n", - "\n", - "# Numpy is the main package for scientific computing with Python. \n", - "# This will be one of our most used libraries in this project\n", - "import numpy as np\n", - "\n", - "# The Time library helps us time code runtimes\n", - "import time\n", - "\n", - "\n", - "# Some more magic so that the notebook will reload external python modules;\n", - "# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython\n", - "%load_ext autoreload\n", - "%autoreload 2\n", - "%reload_ext autoreload" - ], - "execution_count": 3, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "collapsed": true, - "id": "QLtp15rqE-EU" - }, - "source": [ - "# Part 1: Linear Algebra and NumPy Review\n", - "In this section, we will review linear algebra and learn how to use vectors and matrices in python using numpy." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "E8HDYpc0E-EV" - }, - "source": [ - "## Part 1.1 (5 points)\n", - "First, let's test whether you can define the following matrices and vectors using numpy. Look up `np.array()` for help. In the next code block, define $M$ as a $(4, 3)$ matrix, $a$ as a $(1, 3)$ row vector and $b$ as a $(3, 1)$ column vector:\n", - "\n", - "$$M = \\begin{bmatrix}\n", - "1 & 2 & 3 \\\\\n", - "4 & 5 & 6 \\\\\n", - "7 & 8 & 9 \\\\\n", - "10 & 11 & 12 \\end{bmatrix}\n", - "$$\n", - "\n", - "$$a = \\begin{bmatrix}\n", - "1 & 1 & 0\n", - "\\end{bmatrix}\n", - "$$\n", - "\n", - "$$b = \\begin{bmatrix}\n", - "-1 \\\\ 2 \\\\ 5\n", - "\\end{bmatrix} \n", - "$$ " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "mETk2NCME-EX", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "d04f7792-4d53-46e8-968f-1e76faa87539" - }, - "source": [ - "### YOUR CODE HERE\n", - "M = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]]).reshape(4,3)\n", - "a = np.array([1,1,0]).reshape(1,3)\n", - "b=np.array([[-1], [2], [5]]).reshape(3,1)\n", - "### END CODE HERE\n", - "print(\"M = \\n\", M)\n", - "print(\"The size of M is: \", np.size(M))\n", - "print()\n", - "print(\"a = \", a)\n", - "print(\"The size of a is: \", np.size(a))\n", - "print()\n", - "print(\"b = \", b)\n", - "print(\"The size of b is: \", np.size(b))" - ], - "execution_count": 4, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "M = \n", - " [[ 1 2 3]\n", - " [ 4 5 6]\n", - " [ 7 8 9]\n", - " [10 11 12]]\n", - "The size of M is: 12\n", - "\n", - "a = [[1 1 0]]\n", - "The size of a is: 3\n", - "\n", - "b = [[-1]\n", - " [ 2]\n", - " [ 5]]\n", - "The size of b is: 3\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "rSta4NheE-EZ" - }, - "source": [ - "## Part 1.2 (5 points)\n", - "Implement the `dot_product()` method below and check that it returns the correct answer for $a^Tb$." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "C5ZRjCE2MVOU" - }, - "source": [ - "def dot_product(a, b):\n", - " \"\"\"Implement dot product between the two vectors: a and b.\n", - " (optional): While you can solve this using for loops, we recommend\n", - " that you look up `np.dot()` online and use that instead.\n", - " Args:\n", - " a: numpy array of shape (x, n)\n", - " b: numpy array of shape (n, x)\n", - " Returns:\n", - " out: numpy array of shape (x, x) (scalar if x = 1)\n", - " \"\"\"\n", - " out = None\n", - " ### YOUR CODE HERE\n", - " out= np.array(np.dot(a,b))\n", - " pass\n", - " ### END YOUR CODE\n", - " return out" - ], - "execution_count": 5, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "pbLIS5vIE-Ea", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "1df8eea3-e803-4d83-8289-d909684726ee" - }, - "source": [ - "\n", - "# Now, let's test out this dot product. Your answer should be [[1]].\n", - "aDotB = dot_product(a, b)\n", - "print(aDotB)\n", - "\n", - "print(\"The size is: \", aDotB.shape)" - ], - "execution_count": 6, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[[1]]\n", - "The size is: (1, 1)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0rGfcRU1E-Eb" - }, - "source": [ - "## Part 1.3 (5 points)\n", - "Implement the `complicated_matrix_function()` method and use it to compute $(ab)Ma^T$\n", - "\n", - "IMPORTANT NOTE: The `complicated_matrix_function()` method expects all inputs to be two dimensional numpy arrays, as opposed to 1-D arrays. This is an important distinction, because 2-D arrays can be transposed, while 1-D arrays cannot.\n", - "\n", - "To transpose a 2-D array, you can use the syntax `array.T` " - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "dglQmbuLNOk6" - }, - "source": [ - "def complicated_matrix_function(M, a, b):\n", - " \"\"\"Implement (a * b) * (M * a.T).\n", - " (optional): Use the `dot_product(a, b)` function you wrote above\n", - " as a helper function.\n", - " Args:\n", - " M: numpy matrix of shape (x, n).\n", - " a: numpy array of shape (1, n).\n", - " b: numpy array of shape (n, 1).\n", - " Returns:\n", - " out: numpy matrix of shape (x, 1).\n", - " \"\"\"\n", - " out = None\n", - " ### YOUR CODE HERE\n", - " pass\n", - " c=dot_product(a,b)\n", - " d=dot_product(M,a.T)\n", - " out=c* d\n", - " ### END YOUR CODE\n", - " return out" - ], - "execution_count": 7, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "da_uQQLhE-Ec", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "0a0918fb-5563-4a10-a14d-e0c8857d4987" - }, - "source": [ - "# Your answer should be $[[3], [9], [15], [21]]$ of shape(4, 1).\n", - "ans = complicated_matrix_function(M, a, b)\n", - "print(ans)\n", - "print()\n", - "print(\"The size is: \", ans.shape)" - ], - "execution_count": 8, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[[ 3]\n", - " [ 9]\n", - " [15]\n", - " [21]]\n", - "\n", - "The size is: (4, 1)\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6CWXxSSOE-Ed", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "dd5a355a-20bc-4b96-99df-963ef65d4990" - }, - "source": [ - "M_2 = np.array(range(4)).reshape((2,2))\n", - "a_2 = np.array([[1,1]])\n", - "b_2 = np.array([[10, 10]]).T\n", - "print(M_2.shape)\n", - "print(a_2.shape)\n", - "print(b_2.shape)\n", - "print()\n", - "\n", - "# Your answer should be $[[20], [100]]$ of shape(2, 1).\n", - "ans = complicated_matrix_function(M_2, a_2, b_2)\n", - "print(ans)\n", - "print()\n", - "print(\"The size is: \", ans.shape)" - ], - "execution_count": 9, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "(2, 2)\n", - "(1, 2)\n", - "(2, 1)\n", - "\n", - "[[ 20]\n", - " [100]]\n", - "\n", - "The size is: (2, 1)\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4fHLxLl4E-Ee" - }, - "source": [ - "## Part 1.4 (10 points) [Optional/Bonus]\n", - "Implement `eigen_decomp()` and `get_eigen_values_and_vectors()` methods. In this method, perform eigenvalue decomposition on the following matrix and return the largest k eigen values and corresponding eigen vectors (k is specified in the method calls below).\n", - "\n", - "$$M = \\begin{bmatrix}\n", - "1 & 2 & 3 \\\\\n", - "4 & 5 & 6 \\\\\n", - "7 & 8 & 9 \\end{bmatrix}\n", - "$$\n" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "RfaCSoRMOIc8" - }, - "source": [ - "def eigen_decomp(M):\n", - " \"\"\"Implement eigenvalue decomposition.\n", - " (optional): You might find the `np.linalg.eig` function useful.\n", - " Args:\n", - " matrix: numpy matrix of shape (m, n)\n", - " Returns:\n", - " w: numpy array of shape (m, m) such that the column v[:,i] is the eigenvector corresponding to the eigenvalue w[i].\n", - " v: Matrix where every column is an eigenvector.\n", - " \"\"\"\n", - " w = None\n", - " v = None\n", - " ### YOUR CODE HERE\n", - " w,v= np.linalg.eig(M)\n", - " pass\n", - " ### END YOUR CODE\n", - " return w, v" - ], - "execution_count": 10, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "YB120rb4ONBH" - }, - "source": [ - "def get_eigen_values_and_vectors(M, k):\n", - " \"\"\"Return top k eigenvalues and eigenvectors of matrix M. By top k\n", - " here we mean the eigenvalues with the top ABSOLUTE values (lookup\n", - " np.argsort for a hint on how to do so.)\n", - " (optional): Use the `eigen_decomp(M)` function you wrote above\n", - " as a helper function\n", - " Args:\n", - " M: numpy matrix of shape (m, m).\n", - " k: number of eigen values and respective vectors to return.\n", - " Returns:\n", - " eigenvalues: list of length k containing the top k eigenvalues\n", - " eigenvectors: list of length k containing the top k eigenvectors\n", - " of shape (m,)\n", - " \"\"\"\n", - " eigenvalues = []\n", - " eigenvectors = []\n", - " ### YOUR CODE HERE\n", - " w,v=eigen_decomp(M)\n", - " # L= np.concatenate(w,v, axis=0)\n", - " #w stores the eigenvalues of M \n", - " t=np.argsort(w) #this returns the indices which will sort the array of eigenvalues.\n", - " count=0\n", - " i=len(t)-1\n", - " while count\n", - "\\begin{bmatrix}\n", - "7 & 8 & 9 \\\\\n", - "4 & 5 & 6 \\\\\n", - "1 & 2 & 3 \\end{bmatrix}\n", - "=>\n", - "\\begin{bmatrix}\n", - "7 & 8 & 9 \\\\\n", - "0 & 0.42 & 0.85 \\\\\n", - "0 & 0.85 & 1.71 \\end{bmatrix}\n", - "=>\n", - "\\begin{bmatrix}\n", - "7 & 8 & 9 \\\\\n", - "0 & 0.85 & 1.71 \\\\\n", - "0 & 0.45 & 0.85 \\end{bmatrix}\n", - "=>\n", - "\\begin{bmatrix}\n", - "7 & 8 & 9 \\\\\n", - "0 & 0.42 & 0.85 \\\\\n", - "0 & 0 & -0.05 \\end{bmatrix}\n", - "$$\n", - "Second algorithm:\n", - "1. Take a pivot from the last row.\n", - "2. For each row above the pivot, calculate the factor f which makes the kth entry zero, and for every element in the row subtract the fth multiple of the corresponding element in the kth row\n", - "3. Repeat the above step untill the matrix is in rref\n", - "$$\\begin{bmatrix}\n", - "7 & 8 & 0 \\\\\n", - "0 & 0.42 & 0 \\\\\n", - "0 & 0 & -0.05 \\end{bmatrix}\n", - "=>\n", - "\\begin{bmatrix}\n", - "7 & 0 & 0 \\\\\n", - "0 & 0.42 & 0 \\\\\n", - "0 & 0 & -0.05 \\end{bmatrix}\n", - "$$\n", - "\n", - "Steps for implementation:\n", - "1. Complete the function `swap_rows()`\n", - "2. Complete the function `apply_row()`\n", - "3. Complete `forward()` and `backward()`\n", - "4. Finally implement `rref()` using the `forward()` and `backward()`\n", - "\n", - "Note: You can skip this part if you want." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "qUFujiFAPYz6" - }, - "source": [ - "def swap_rows(M):\n", - " \"\"\"Implement row swapping to make the largest element in the pivotial column to be the first row.\n", - " Args:\n", - " matrix: numpy matrix of shape (m, n)\n", - " Returns:\n", - " Ms: matrix with swapped row\n", - " \"\"\"\n", - " out = None\n", - " ### YOUR CODE HERE\n", - " pass\n", - " Ms=M\n", - " flag=0\n", - " for i in range(0,np.shape(M)[1]):\n", - " if(np.max(abs(M[:,i]))!=0): #finds the first non zero column\n", - " c=(np.max(M[:,i])) #finds the maximum in it\n", - " col=i #col is the index of the pivotal column \n", - " flag=1\n", - " break\n", - " if(flag==1):\n", - " for i in range(np.shape(M)[0]):\n", - " if(M[i][col]==c):\n", - " t= Ms[0,:].copy() \n", - " Ms[0,:]=Ms[i,:]\n", - " Ms[i,:]=t #swaps the topmost row and the row with c. \n", - " ### END YOUR CODE\n", - " return Ms" - ], - "execution_count": 25, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "S8lbAUSWWpyO" - }, - "source": [ - "def apply_rows(M):\n", - " \"\"\"For each row below the pivot, calculate the factor f which makes the kth\n", - " entry zero, and for every element in the row subtract the fth multiple of the\n", - " corresponding element in the kth row.\n", - " Args:\n", - " matrix: numpy matrix of shape (m, n)\n", - " Returns:\n", - " Ms: matrix with all other entries of the pivotal col zero\n", - " \"\"\"\n", - " out = None\n", - " Ms= np.around(M.astype(float),3)\n", - " ### YOUR CODE HERE\n", - " for i in range(1,np.shape(Ms)[0]): \n", - " f= Ms[i,0]/Ms[0,0]\n", - " # print(f)\n", - " Ms[i,:]= Ms[i,:]- (Ms[i,0]*Ms[0,:])/Ms[0,0]\n", - " # print(Ms[i,:])\n", - " Ms= np.around(Ms,3)\n", - " pass\n", - " ### END YOUR CODE\n", - " return Ms" - ], - "execution_count": 70, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "GnE_-JLxPYz7" - }, - "source": [ - "def forward(M):\n", - " \"\"\"Return a partial ref using the algo described above\n", - " Args:\n", - " M: numpy matrix of shape (m, n).\n", - " Returns:\n", - " Ms: ref of M\n", - " \"\"\"\n", - " out = None\n", - " Ms= np.around(M.astype(float),3)\n", - " T=np.around(M.astype(float),3)\n", - " ### YOUR CODE HERE\n", - " pass\n", - " for i in range(0,np.shape(M)[0]-1):\n", - " Ms=T[i:np.shape(M)[0], i:np.shape(M)[1]] #at i=0, this is the whole matrix \n", - " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", - " Ms=backward(Ms)\n", - " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", - " T[i:np.shape(M)[0], i:np.shape(M)[1]]=Ms\n", - " ### END YOUR CODE\n", - " return T" - ], - "execution_count": 71, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Wb7pPGP4XmJu" - }, - "source": [ - "def backward(M):\n", - " \"\"\"Return a rref using the algo described above\n", - " Args:\n", - " M: numpy matrix of shape (m, n).\n", - " Returns:\n", - " Ms: rref of M\n", - " \"\"\"\n", - " out = None\n", - " ### YOUR CODE HERE\n", - " pass\n", - " Ms=M\n", - " #This does bckward prop for lowest row/pivot\n", - " ### YOUR CODE HERE\n", - " r= np.shape(Ms)[0]-1\n", - " c= np.shape(Ms)[1]-1\n", - " for i in (range(0, r)):\n", - " f= Ms[i,c]/Ms[r,c]\n", - " Ms[i,:]= Ms[i,:]- (Ms[c,:]*f)\n", - " Ms= np.around(Ms,3)\n", - " pass\n", - " ### END YOUR CODE\n", - " return Ms\n", - " ### END YOUR CODE\n", - " return out" - ], - "execution_count": 82, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "XLq81xzXYR85" - }, - "source": [ - "def rref(M):\n", - " \"\"\"Return a rref using the algo descrbed above\n", - " Args:\n", - " M: numpy matrix of shape (m, n).\n", - " Returns:\n", - " Ms: ref of M\n", - " \"\"\"\n", - " out = None\n", - " ### YOUR CODE HERE\n", - " pass\n", - " Ms= forward(M)\n", - " ### YOUR CODE HERE\n", - " pass\n", - " r= np.shape(Ms)[0]\n", - " c= np.shape(Ms)[1]\n", - " for i in range(0,np.shape(M)[0]-1):\n", - " Ms=T[0:r-i, 0:c-i]\n", - " # print(\"Ms before algo in the \", i, \"th iteration is : \", Ms)\n", - " Ms= backward(Ms)\n", - " # print(\"Ms in the \", i, \"th iteration is : \", Ms)\n", - " T[0:r-i, 0:c-i]=Ms\n", - " # print(\"T in the\", i,\" th iteration is : \" ,T)\n", - " ### END YOUR CODE\n", - " return T" - ], - "execution_count": 90, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "Eiz6EbsWPYz8", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "2108d9b5-6017-4dc3-d523-583491bd0d4b" - }, - "source": [ - "# Let's define M.\n", - "M = np.array([[1,2,3],[4,5,6],[7,8,9]])\n", - "# Now let's calculate it's rref.\n", - "# Note that your code may be evaluated on other test cases as well\n", - "Mrref = rref(M)\n", - "print(Mrref)\n" - ], - "execution_count": 91, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "[[ 7. 0. 0. ]\n", - " [ 0. 0.42 0. ]\n", - " [ 0. 0. -0.05]]\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "G46pyDzAE-Ef" - }, - "source": [ - "## Part 1.6 (10 points)\n", - "\n", - "To wrap up our overview of NumPy, let's implement something fun — a helper function for computing the Euclidean distance between two $n$-dimensional points!\n", - "\n", - "In the 2-dimensional case, computing the Euclidean distance reduces to solving the Pythagorean theorem $c = \\sqrt{a^2 + b^2}$. where, given two points $(x_1, y_1)$ and $(x_2, y_2)$, $a = x_1 - x_2$ and $b = y_1 - y_2$.\n", - "\n", - "\n", - "More generally, given two $n$-dimensional vectors, the Euclidean distance can be computed by:\n", - "\n", - "1. Performing an elementwise subtraction between the two vectors, to get $n$ difference values.\n", - "2. Squaring each of the $n$ difference values, and summing the squares.\n", - "4. Taking the square root of our sum.\n", - "\n", - "Alternatively, the Euclidean distance between length-$n$ vectors $u$ and $v$ can be written as:\n", - "\n", - "$\n", - "\\quad\\textbf{distance}(u, v) = \\sqrt{\\sum_{i=1}^n (u_i - v_i)^2}\n", - "$\n", - "\n", - "\n", - "Try implementing this function: first using native Python with a `for` loop in the `euclidean_distance_native()` function, then in NumPy **without any loops** in the `euclidean_distance_numpy()` function.\n", - "We've added some `assert` statements here to help you check functionality (if it prints nothing, then your implementation is correct)!" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "5xvHopPqO29C" - }, - "source": [ - "def euclidean_distance_native(u, v):\n", - " \"\"\"Computes the Euclidean distance between two vectors, represented as Python\n", - " lists.\n", - " Args:\n", - " u (List[float]): A vector, represented as a list of floats.\n", - " v (List[float]): A vector, represented as a list of floats.\n", - " Returns:\n", - " float: Euclidean distance between `u` and `v`.\n", - " \"\"\"\n", - " # First, run some checks:\n", - " assert isinstance(u, list)\n", - " assert isinstance(v, list)\n", - " assert len(u) == len(v)\n", - " sum=0\n", - " for i in range(0,len(u)):\n", - " sum+=((u[i]-v[i])**2)\n", - " # print(i)\n", - " # Compute the distance!\n", - " # Notes:\n", - " # 1) Try breaking this problem down: first, we want to get\n", - " # the difference between corresponding elements in our\n", - " # input arrays. Then, we want to square these differences.\n", - " # Finally, we want to sum the squares and square root the\n", - " # sum.\n", - " out = np.sqrt(sum)\n", - " ### YOUR CODE HERE\n", - " pass\n", - " ### END YOUR CODE\n", - " return out" - ], - "execution_count": 33, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "wvLuK8MuO3LH" - }, - "source": [ - "def euclidean_distance_numpy(u, v):\n", - " \"\"\"Computes the Euclidean distance between two vectors, represented as NumPy\n", - " arrays.\n", - " Args:\n", - " u (np.ndarray): A vector, represented as a NumPy array.\n", - " v (np.ndarray): A vector, represented as a NumPy array.\n", - " Returns:\n", - " float: Euclidean distance between `u` and `v`.\n", - " \"\"\"\n", - " # First, run some checks:\n", - " assert isinstance(u, np.ndarray)\n", - " assert isinstance(v, np.ndarray)\n", - " assert u.shape == v.shape\n", - " x= u-v \n", - " out=np.dot(x.T,x)\n", - " out= np.sqrt(out) \n", - " # Compute the distance!\n", - " # Note:\n", - " # 1) You shouldn't need any loops\n", - " # 2) Some functions you can Google that might be useful:\n", - " # np.sqrt(), np.sum()\n", - " # 3) Try breaking this problem down: first, we want to get\n", - " # the difference between corresponding elements in our\n", - " # input arrays. Then, we want to square these differences.\n", - " # Finally, we want to sum the squares and square root the\n", - " # sum.\n", - "\n", - " ### YOUR CODE HERE\n", - " return out \n", - " pass\n", - " ### END YOUR CODE" - ], - "execution_count": 34, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "wu9MimVJE-Eg" - }, - "source": [ - "## Testing native Python function\n", - "assert euclidean_distance_native([7.0], [6.0]) == 1.0\n", - "assert euclidean_distance_native([7.0, 0.0], [3.0, 3.0]) == 5.0\n", - "assert euclidean_distance_native([7.0, 0.0, 0.0], [3.0, 0.0, 3.0]) == 5.0" - ], - "execution_count": 35, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "kJDk88g1E-Ej" - }, - "source": [ - "## Testing NumPy function\n", - "assert euclidean_distance_numpy(\n", - " np.array([7.0]),\n", - " np.array([6.0])\n", - ") == 1.0\n", - "assert euclidean_distance_numpy(\n", - " np.array([7.0, 0.0]),\n", - " np.array([3.0, 3.0])\n", - ") == 5.0\n", - "assert euclidean_distance_numpy(\n", - " np.array([7.0, 0.0, 0.0]),\n", - " np.array([3.0, 0.0, 3.0])\n", - ") == 5.0" - ], - "execution_count": 36, - "outputs": [] - }, - { - "cell_type": "code", - "source": [ - "n = 1000\n", - "\n", - "# Create some length-n lists and/or n-dimensional arrays\n", - "a = [0.0] * n\n", - "b = [10.0] * n\n", - "a_array = np.array(a)\n", - "b_array = np.array(b)\n", - "\n", - "# Compute runtime for native implementation\n", - "start_time = time.time()\n", - "for i in range(10000):\n", - " euclidean_distance_native(a, b)\n", - "print(\"Native:\", (time.time() - start_time), \"seconds\")\n", - "\n", - "# Compute runtime for numpy implementation\n", - "# Start by grabbing the current time in seconds\n", - "start_time = time.time()\n", - "for i in range(10000):\n", - " euclidean_distance_numpy(a_array, b_array)\n", - "print(\"NumPy:\", (time.time() - start_time), \"seconds\")" - ], - "metadata": { - "id": "E7Z38WwHhoNl", - "colab": { - "base_uri": "https://localhost:8080/" - }, - "outputId": "5aa04300-d529-4ac6-8f15-cc57bbc86980" - }, - "execution_count": 37, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Native: 1.5537786483764648 seconds\n", - "NumPy: 0.06700706481933594 seconds\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "Mjik4mQXE-Ek" - }, - "source": [ - "Next, let's take a look at how these two implementations compare in terms of runtime:" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "t4e6MfhHE-Em" - }, - "source": [ - "As you can see, doing vectorized calculations (i.e. no for loops) with NumPy results in significantly faster computations! " - ] - }, - { - "cell_type": "markdown", - "source": [ - "Congrats You've come to the end of this notebook. If you solved everything above, impressive. If not, you might need to read/think a bit more. You can always ask doubts. Also, Note that you should submit it even if you cannot solve everything. We might evaluate these using a script later." - ], - "metadata": { - "id": "XvFE0Q5bhx6-" - } - } - ] -} \ No newline at end of file From 982d011213c438861dfd548ac8cc98188765fbeb Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Fri, 27 May 2022 10:04:28 +0530 Subject: [PATCH 5/8] assignment 2 --- Assignment/Assignment_2/201050_Tejas_A2.ipynb | 926 ++++++++++++++++++ 1 file changed, 926 insertions(+) create mode 100644 Assignment/Assignment_2/201050_Tejas_A2.ipynb diff --git a/Assignment/Assignment_2/201050_Tejas_A2.ipynb b/Assignment/Assignment_2/201050_Tejas_A2.ipynb new file mode 100644 index 0000000..3eaf44c --- /dev/null +++ b/Assignment/Assignment_2/201050_Tejas_A2.ipynb @@ -0,0 +1,926 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "rvFM645NE-D2" + }, + "source": [ + "# Assignment 2\n", + "In this assignment, we will go through Perceptron, Linear Classifiers, Loss Functions, Gradient Descent and Back Propagation.\n", + "\n", + "\n", + "PS. this one is not from Stanford's course.\n", + "\n", + "\n", + "\n", + "\\\n", + "\n", + "## Instructions\n", + "* This notebook contain blocks of code, you are required to complete those blocks(where required)\n", + "* You are required to copy this notebook (\"copy to drive\" above) and complete the code.(DO NOT CHANGE THE NAME OF THE FUNCTIONS)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "QLtp15rqE-EU" + }, + "source": [ + "# Part 1: Perceptron\n", + "In this section, we will see how to implement a perceptron. Goal would be for you to delve into the mathematics.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Zao4e-DphaGA" + }, + "source": [ + "## Intro\n", + "What's a perceptron? It's an algorithm modelled on biological computational model to classify things into binary classes. It's a supervides learning algorithm, meaning that you need to provide labelled data containing features and the actual classifications. A perceptron would take these features as input and spit out a binary value (0 or 1). While training the model with training data, we try to minimise the error and learn the parameters involved." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wDTUoAd6ixm-" + }, + "source": [ + "**How does it work?**\\\n", + "A perceptron is modelled on a biological neuron. A neuron has input dendrites and the output is carried by axons. Similarly, a perceptron takes inputs called \"features\". After processing, a perceptron gives output. For computation, it has a \"weight\" vector which is multipled with feature vector. An activation function is added to introduce some non linearities and the output is given out.\\\n", + "It can be represented as: $$ f=\\sum_{i=1}^{m} w_ix_i +b$$\n", + "\n", + "Let's implement this simple function to give an output.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "id": "iXezofBIgzId" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "class perceptron():\n", + " def __init__(self,num_input_features=8):\n", + " self.weights = np.random.randn(num_input_features) #gives a row vector \n", + " self.bias = np.random.random()\n", + "\n", + " def activation(self,x):\n", + " '''\n", + " Implement heavside step activation function here (google ;))\n", + " '''\n", + " if(x>=0):\n", + " return 1\n", + " else:\n", + " return 0\n", + " pass\n", + "\n", + " def forward(self,x: np.ndarray):\n", + " '''\n", + "\n", + " you have random initialized weights and bias\n", + " you can access then using `self.weights` and `self.bias`\n", + " you should use activation function before returning\n", + " \n", + " x : input features\n", + " return : a binary value as the output of the perceptron \n", + " '''\n", + " # YOUR CODE HERE\n", + " f= np.dot((self.weights).T, x) + self.bias\n", + " t= self.activation(f)\n", + " return t\n", + " pass\n", + " # YOUR CODE HERE" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "oSKwDFAyocVo" + }, + "outputs": [], + "source": [ + "np.random.seed(0)\n", + "perc = perceptron(8) \n", + "# print(perc.forward(np.arange(8)))\n", + "assert perc.forward(np.arange(8))==1 #check what this does " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "NWTTg1e9r7uM" + }, + "source": [ + "# Part 2: Linear Classifier\n", + "In this section, we will see how to implement a linear Classifier.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DYDO4GcHr7uM" + }, + "source": [ + "## Intro\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "-HFvjH06r7uN" + }, + "source": [ + "**How does it work?**\n", + "\n", + "Linear Classifier uses the following function: $$Y = WX+b$$ Where, $W$ is a 2d array of weights with shape (#features, #classes).\n", + "\n", + "\n", + "Let's implement this classifier.\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "id": "9A13CEkGr7uN" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "#classes= This is not a binary classification problem- I guess 5 classes is the default here- so we output a probability vector Y , shape (5,1), bias (5,1)\n", + "class LinearClassifier():\n", + " def __init__(self,num_input_features=32,num_classes=5):\n", + " self.weights = np.random.randn(num_input_features,num_classes) # (32,5) ( 32 features, if it was binary classification W would have been (32,1))\n", + " self.bias = np.random.rand(num_classes) # (1,5) => flexible if you give only one dimension-> b.shape is (5,)\n", + "\n", + " def forward(self,x: np.ndarray):\n", + " '''\n", + " x: input features\n", + " you have random initialized weights and bias\n", + " you can access then using `self.weights` and `self.bias`\n", + " return an output vector of num_classes size\n", + " '''\n", + " # YOUR CODE HERE \n", + " Y= np.dot(x,self.weights)+self.bias # x is of shape (1,32) -> one value for each feature , Y is (1,5) apparently \n", + " # Y= np.dot( self.weights,x)+self.bias\n", + " return Y\n", + " pass\n", + " # YOUR CODE HERE" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "zgzPxyTsr7uN", + "outputId": "c0868437-90ad-4f22-e8be-856673a53b24" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "array([[ 1.30208164, 5.58136003, 0.87793013, -4.7332119 , 4.81172123]])" + ] + }, + "metadata": {}, + "execution_count": 5 + } + ], + "source": [ + "\n", + "np.random.seed(0)\n", + "lc = LinearClassifier()\n", + "# print(lc)\n", + "(lc.forward(np.random.rand(1,32))) # passing a feature vector randomly of size (1,32)\n", + "# bias = np.random.rand(5) \n", + "# print(np.shape(bias))\n", + "# Should be close to:\n", + "# array([[ 1.30208164, 5.58136003, 0.87793013, -4.7332119 , 4.81172123]])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true, + "id": "ZVgOVzJetuqo" + }, + "source": [ + "# Part 3: Loss Functions, Gradient descent and Backpropagation\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4pXryjpctuqy" + }, + "source": [ + "## Intro\n", + "\n", + "Loss Functions tells how \"off\" the output od our model is. Based upon the application, you can use several different loss functions. Formally, A loss function is a function $L:(z,y)\\in\\mathbb{R}\\times Y\\longmapsto L(z,y)\\in\\mathbb{R}$ that takes as inputs the predicted value $z$ corresponding to the real data value yy and outputs how different they are We'll implement L1 loss, L2 loss, Logistic loss, hinge loss and cross entropy loss functions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "QGRb8BHotuqy" + }, + "source": [ + "### **L1 loss**\n", + "L1 loss is the linear loss function $L = \\dfrac{1}{2}(y−z) $\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "YxVh6IL2tuqz" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "def L1Loss(z,y):\n", + " '''\n", + " y : True output.\n", + " z : Predicted output.\n", + " return : L\n", + " '''\n", + " L= 1/2*(y-z)\n", + " return L\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2xy8ZS84cKtQ" + }, + "source": [ + "### **L2 loss**\n", + "L2 loss is the quadratic loss function or the least square error function $L = \\dfrac{1}{2}(y−z)^2 $\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "id": "JThp5P-KcKtS" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "def L2Loss(z,y):\n", + " '''\n", + " y : True output. \n", + " z : Predicted output. \n", + " return : L\n", + " '''\n", + " L= 1/2*(y-z)**2\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Z2JNLnWYcLSC" + }, + "source": [ + "### **Hinge Loss**\n", + "Hinge loss is: $ L = max( 0, 1 - yz ) $" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "gQ1YM4J-cLSC" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "def hingeLoss(z,y):\n", + " '''\n", + " y : True output. \n", + " z : Predicted output. \n", + " return : L\n", + " '''\n", + " t= max(0, 1-y*z)\n", + " return t\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "m15_MjradMNY" + }, + "source": [ + "### **Cross Entropy Loss**\n", + "Another very famous loss function is Cross Entropy loss: $ L = −[ylog(z)+(1−y)log(1−z)] $." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "snJLqhszdMNY" + }, + "outputs": [], + "source": [ + "import numpy as np # We used cross entropy loss in binary classification problems- if y is 1 and z is close to zero/ vice versa there is huge loss \n", + "def CELoss(z,y):\n", + " '''\n", + " y : True output. \n", + " z : Predicted output. \n", + " return : L\n", + " '''\n", + " L= -1*(y*np.log(z)+(1-y)*np.log(1-z))\n", + " return L\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OsRPsfzxyEVL" + }, + "source": [ + "### **0-1 Loss**\n", + "Loss Function used by perceptron is: $ \\begin{cases} \n", + " 0=z-y & z=y \\\\\n", + " 1=\\dfrac{z-y}{z-y} & z\\neq y\n", + " \\end{cases} $." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "id": "5sA7GxLHyEVM" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "def zeroOneLoss(z,y):\n", + " '''\n", + " y : True output. \n", + " z : Predicted output. \n", + " return : L\n", + " '''\n", + " # essentially it seems loss is 1 if your prediction is not completely accurate?\n", + " if(z==y):\n", + " return 0\n", + " else:\n", + " return 1\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CWhbibHcgRR8" + }, + "source": [ + "## Cost Function\n", + "The cost function $J$ is commonly used to assess the performance of a model, and is defined with the loss function $L$ as follows:\n", + "$$\\boxed{J(\\theta)=\\sum_{i=1}^mL(h_\\theta(x^{(i)}), y^{(i)})}$$\n", + "where $h_\\theta$ is the hypothesis function i.e. the function used to predict the output." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "id": "SSbmhW4og97t" + }, + "outputs": [], + "source": [ + "lossFunctions = {\n", + " \"l1\" : L1Loss,\n", + " \"l2\" : L2Loss,\n", + " \"hinge\" : hingeLoss,\n", + " \"cross-entropy\" : CELoss,\n", + " \"0-1\" : zeroOneLoss\n", + "} #dictionary \n", + "\n", + "def cost(Z : np.ndarray, Y : np.ndarray, loss : str):\n", + " '''\n", + " Z : a numpy array of predictions.\n", + " Y : a numpy array of true values.\n", + " return : A numpy array of costs calculated for each example.\n", + " '''\n", + " loss_func = lossFunctions[loss]\n", + " # YOUR CODE HERE\n", + " J = None\n", + " for i in range(len(Y)):\n", + " J+=loss_func(Z[i],Y[i])\n", + " # YOUR CODE HERE\n", + " return J\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "upsN7A0zjGqx" + }, + "source": [ + "## Gradient Descent and Back Propagation\n", + "Gradient Descent is an algorithm that minimizes the loss function by calculating it's gradient. By noting $\\alpha\\in\\mathbb{R}$ the learning rate, the update rule for gradient descent is expressed with the learning rate $\\alpha$ and the cost function $J$ as follows:\n", + "\n", + "$$\\boxed{ W \\longleftarrow W -\\alpha\\nabla J( W )}$$\n", + "​\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AFCN-fYCqidi" + }, + "source": [ + "But we need to find the partial derivative of Loss function wrt every parameter to know what is the slight change that we need to apply to our parameters. This becomes particularly hard if we have more than 1 layer in our algorithm. Here's where **Back Propagation** comes in. It's a way to find gradients wrt every parameter using the chain rule. Backpropagation is a method to update the weights in the neural network by taking into account the actual output and the desired output. The derivative with respect to weight ww is computed using chain rule and is of the following form:\n", + "\n", + "$$\\boxed{\\frac{\\partial L(z,y)}{\\partial w}=\\frac{\\partial L(z,y)}{\\partial a}\\times\\frac{\\partial a}{\\partial z}\\times\\frac{\\partial z}{\\partial w}}$$\n", + "​\n", + " \n", + "As a result, the weight is updated as follows:\n", + "\n", + "$$\\boxed{w\\longleftarrow w-\\alpha\\frac{\\partial L(z,y)}{\\partial w}}$$\n", + "\n", + "So, In a neural network, weights are updated as follows:\n", + "\n", + "* Step 1: Take a batch of training data.\n", + "* Step 2: Perform forward propagation to obtain the corresponding loss.\n", + "* Step 3: Backpropagate the loss to get the gradients.\n", + "* Step 4: Use the gradients to update the weights of the network.\n", + "​\n", + "\n", + "Bonus Problem\n", + " \n", + "Now, Assuming that you know Back Propagation (read a bit about it, if you don't), we'll now implement an image classification model on CIFAR-10." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "sJoG5kkYopRN" + }, + "source": [ + "# **Bonus Problem**\n", + "\n", + "Now, Assuming that you know Back Propagation (read a bit about it, if you don't), we'll now implement an image classification model on CIFAR-10." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "_4-4RceVsor_", + "outputId": "1b70e75f-b529-475d-9d48-489272c9502d" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.8.0\n" + ] + } + ], + "source": [ + "import tensorflow as tf \n", + " \n", + "# Display the version\n", + "print(tf.__version__) \n", + " \n", + "# other imports\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from tensorflow.keras.layers import Input, Conv2D, Dense, Flatten, Dropout\n", + "from tensorflow.keras.layers import GlobalMaxPooling2D, MaxPooling2D\n", + "from tensorflow.keras.layers import BatchNormalization\n", + "from tensorflow.keras.models import Model" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yyplk5PLEUsJ", + "outputId": "2ea88acf-7d99-4b3a-ad1f-5e52c49256fd" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz\n", + "170500096/170498071 [==============================] - 12s 0us/step\n", + "170508288/170498071 [==============================] - 12s 0us/step\n", + "(50000, 32, 32, 3) (50000, 1) (10000, 32, 32, 3) (10000, 1)\n" + ] + } + ], + "source": [ + "# Load in the data\n", + "cifar10 = tf.keras.datasets.cifar10 \n", + "#The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. \n", + " \n", + "# Distribute it to train and test set\n", + "(x_train, y_train), (x_test, y_test) = cifar10.load_data() \n", + "# x is (no_of_images, image size in pixels(32,32), 3( red, blue, green)) => y is the corresponding classification into one of the ten classes ?\n", + "print(x_train.shape, y_train.shape, x_test.shape, y_test.shape)\n", + "\n", + "# Reduce pixel values\n", + "x_train, x_test = x_train / 255.0, x_test / 255.0\n", + " \n", + "# flatten the label values\n", + "y_train, y_test = y_train.flatten(), y_test.flatten()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 309 + }, + "id": "a4gbmOEdfyKD", + "outputId": "1c7d8383-8c14-4c4b-b9a2-7f30c6d0958e" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "\n", + "'''visualize data by plotting images''' \n", + "import matplotlib.pyplot as plt\n", + "key= {0:'airplane', 1:'automobile', 2:'bird',\t3:'cat',4:'deer',\t\t5:'dog',6:'frog',\t7:'horse',8:'ship',\t9:'truck'}\n", + "# YOUR CODE HERE\n", + "f, axarr = plt.subplots(3,3)\n", + "f.tight_layout()\n", + "for i in range(0,3):\n", + " for j in range(0,3):\n", + " axarr[i][j].imshow(x_train[3*(i)+j])\n", + " axarr[i][j].set_title(key[y_train[3*i+j]])\n", + "# YOUR CODE HERE" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "yJgho2AEBFbx", + "outputId": "d862bc4b-7c9e-491c-9947-ace29e5a9e8a" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "number of classes: 10\n", + "Model: \"model\"\n", + "_________________________________________________________________\n", + " Layer (type) Output Shape Param # \n", + "=================================================================\n", + " input_1 (InputLayer) [(None, 32, 32, 3)] 0 \n", + " \n", + " conv2d (Conv2D) (None, 30, 30, 32) 896 \n", + " \n", + " max_pooling2d (MaxPooling2D (None, 15, 15, 32) 0 \n", + " ) \n", + " \n", + " conv2d_1 (Conv2D) (None, 13, 13, 64) 18496 \n", + " \n", + " max_pooling2d_1 (MaxPooling (None, 6, 6, 64) 0 \n", + " 2D) \n", + " \n", + " flatten (Flatten) (None, 2304) 0 \n", + " \n", + " dense (Dense) (None, 10) 23050 \n", + " \n", + " dense_1 (Dense) (None, 10) 110 \n", + " \n", + "=================================================================\n", + "Total params: 42,552\n", + "Trainable params: 42,552\n", + "Non-trainable params: 0\n", + "_________________________________________________________________\n" + ] + } + ], + "source": [ + "()\n", + "# number of classes\n", + "K = len(set(y_train)) #set() method is used to convert any of the iterable to sequence of iterable elements with distinct elements\n", + "#set(y_train) will most liekly be the set s={0,1,..9}\n", + "'''\n", + " calculate total number of classes\n", + " for output layer\n", + "'''\n", + "print(\"number of classes:\", K)\n", + "''' \n", + " Build the model using the functional API\n", + " input layer\n", + "'''\n", + "'''\n", + " YOUR CODE HERE\n", + "'''\n", + " \n", + "'''Hidden layer'''\n", + "# YOUR CODE HERE\n", + "pass\n", + "visible = Input(shape=x_train[0].shape) # (32,32,3)\n", + "conv1 = Conv2D(32, (3,3), activation='relu')(visible)\n", + "pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)\n", + "conv2 = Conv2D(64,(3,3),activation='relu')(pool1)\n", + "pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)\n", + "conv3 = Conv2D(128,(3,3),activation='relu')(pool1)\n", + "pool3 = MaxPooling2D(pool_size=(2, 2))(conv2)\n", + "flat = Flatten()(pool2)\n", + "hidden1 = Dense(10, activation='relu')(flat)\n", + "\n", + "# YOUR CODE HERE\n", + " \n", + "\"\"\"last hidden layer i.e.. output layer\"\"\"\n", + "# YOUR CODE HERE\n", + "output = Dense(10, activation='softmax')(hidden1)\n", + "model = Model(inputs=visible, outputs=output)\n", + "pass\n", + "# YOUR CODE HERE\n", + " \n", + "'''model description'''\n", + "model.summary()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "id": "PLc4Bay65TyA" + }, + "outputs": [], + "source": [ + "# Compile\n", + "'''\n", + " YOUR CODE HERE\n", + "'''\n", + "model.compile(optimizer='adam',\n", + " loss='sparse_categorical_crossentropy',\n", + " metrics=['accuracy'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "U0fGsDCRsQrn", + "outputId": "dba01b2c-438d-40c8-8b23-d2ed47348c6b" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Epoch 1/50\n", + "1563/1563 [==============================] - 66s 42ms/step - loss: 1.7323 - accuracy: 0.3531 - val_loss: 1.4692 - val_accuracy: 0.4593\n", + "Epoch 2/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 1.4196 - accuracy: 0.4759 - val_loss: 1.3474 - val_accuracy: 0.5011\n", + "Epoch 3/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 1.3043 - accuracy: 0.5190 - val_loss: 1.2650 - val_accuracy: 0.5349\n", + "Epoch 4/50\n", + "1563/1563 [==============================] - 63s 41ms/step - loss: 1.2301 - accuracy: 0.5452 - val_loss: 1.2137 - val_accuracy: 0.5570\n", + "Epoch 5/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 1.1710 - accuracy: 0.5705 - val_loss: 1.1862 - val_accuracy: 0.5702\n", + "Epoch 6/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 1.1214 - accuracy: 0.5873 - val_loss: 1.1861 - val_accuracy: 0.5733\n", + "Epoch 7/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 1.0811 - accuracy: 0.6043 - val_loss: 1.1200 - val_accuracy: 0.5905\n", + "Epoch 8/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 1.0517 - accuracy: 0.6149 - val_loss: 1.0898 - val_accuracy: 0.6027\n", + "Epoch 9/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 1.0182 - accuracy: 0.6263 - val_loss: 1.0966 - val_accuracy: 0.6031\n", + "Epoch 10/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9975 - accuracy: 0.6360 - val_loss: 1.0835 - val_accuracy: 0.6082\n", + "Epoch 11/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9774 - accuracy: 0.6430 - val_loss: 1.0664 - val_accuracy: 0.6211\n", + "Epoch 12/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9573 - accuracy: 0.6501 - val_loss: 1.0489 - val_accuracy: 0.6255\n", + "Epoch 13/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9350 - accuracy: 0.6576 - val_loss: 1.0746 - val_accuracy: 0.6233\n", + "Epoch 14/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9212 - accuracy: 0.6622 - val_loss: 1.0336 - val_accuracy: 0.6384\n", + "Epoch 15/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.9069 - accuracy: 0.6675 - val_loss: 1.0467 - val_accuracy: 0.6305\n", + "Epoch 16/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.8918 - accuracy: 0.6744 - val_loss: 1.0304 - val_accuracy: 0.6312\n", + "Epoch 17/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.8801 - accuracy: 0.6791 - val_loss: 1.0254 - val_accuracy: 0.6370\n", + "Epoch 18/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.8679 - accuracy: 0.6831 - val_loss: 1.0184 - val_accuracy: 0.6392\n", + "Epoch 19/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.8544 - accuracy: 0.6868 - val_loss: 1.1225 - val_accuracy: 0.6222\n", + "Epoch 20/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.8466 - accuracy: 0.6914 - val_loss: 1.0388 - val_accuracy: 0.6328\n", + "Epoch 21/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.8297 - accuracy: 0.6958 - val_loss: 1.0181 - val_accuracy: 0.6418\n", + "Epoch 22/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.8222 - accuracy: 0.6987 - val_loss: 1.0307 - val_accuracy: 0.6401\n", + "Epoch 23/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.8180 - accuracy: 0.7007 - val_loss: 1.0310 - val_accuracy: 0.6439\n", + "Epoch 24/50\n", + "1563/1563 [==============================] - 62s 40ms/step - loss: 0.8049 - accuracy: 0.7052 - val_loss: 1.0461 - val_accuracy: 0.6341\n", + "Epoch 25/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7983 - accuracy: 0.7070 - val_loss: 1.0432 - val_accuracy: 0.6411\n", + "Epoch 26/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7916 - accuracy: 0.7116 - val_loss: 1.0347 - val_accuracy: 0.6439\n", + "Epoch 27/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7816 - accuracy: 0.7135 - val_loss: 1.0814 - val_accuracy: 0.6300\n", + "Epoch 28/50\n", + "1563/1563 [==============================] - 63s 41ms/step - loss: 0.7763 - accuracy: 0.7178 - val_loss: 1.0354 - val_accuracy: 0.6500\n", + "Epoch 29/50\n", + "1563/1563 [==============================] - 63s 41ms/step - loss: 0.7692 - accuracy: 0.7182 - val_loss: 1.0428 - val_accuracy: 0.6473\n", + "Epoch 30/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.7603 - accuracy: 0.7201 - val_loss: 1.0590 - val_accuracy: 0.6435\n", + "Epoch 31/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.7528 - accuracy: 0.7243 - val_loss: 1.0498 - val_accuracy: 0.6509\n", + "Epoch 32/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.7473 - accuracy: 0.7252 - val_loss: 1.0959 - val_accuracy: 0.6316\n", + "Epoch 33/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.7462 - accuracy: 0.7249 - val_loss: 1.0841 - val_accuracy: 0.6359\n", + "Epoch 34/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7369 - accuracy: 0.7289 - val_loss: 1.0814 - val_accuracy: 0.6388\n", + "Epoch 35/50\n", + "1563/1563 [==============================] - 63s 41ms/step - loss: 0.7287 - accuracy: 0.7315 - val_loss: 1.0593 - val_accuracy: 0.6459\n", + "Epoch 36/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7255 - accuracy: 0.7350 - val_loss: 1.0535 - val_accuracy: 0.6532\n", + "Epoch 37/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7183 - accuracy: 0.7357 - val_loss: 1.0714 - val_accuracy: 0.6474\n", + "Epoch 38/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7138 - accuracy: 0.7381 - val_loss: 1.0699 - val_accuracy: 0.6470\n", + "Epoch 39/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7058 - accuracy: 0.7393 - val_loss: 1.0864 - val_accuracy: 0.6468\n", + "Epoch 40/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.7033 - accuracy: 0.7404 - val_loss: 1.0904 - val_accuracy: 0.6413\n", + "Epoch 41/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.6999 - accuracy: 0.7420 - val_loss: 1.1323 - val_accuracy: 0.6451\n", + "Epoch 42/50\n", + "1563/1563 [==============================] - 63s 40ms/step - loss: 0.6904 - accuracy: 0.7440 - val_loss: 1.0680 - val_accuracy: 0.6539\n", + "Epoch 43/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6854 - accuracy: 0.7448 - val_loss: 1.0869 - val_accuracy: 0.6483\n", + "Epoch 44/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6857 - accuracy: 0.7465 - val_loss: 1.1113 - val_accuracy: 0.6442\n", + "Epoch 45/50\n", + "1563/1563 [==============================] - 63s 41ms/step - loss: 0.6802 - accuracy: 0.7480 - val_loss: 1.0838 - val_accuracy: 0.6474\n", + "Epoch 46/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6781 - accuracy: 0.7507 - val_loss: 1.1039 - val_accuracy: 0.6472\n", + "Epoch 47/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6673 - accuracy: 0.7518 - val_loss: 1.1137 - val_accuracy: 0.6500\n", + "Epoch 48/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6655 - accuracy: 0.7544 - val_loss: 1.1335 - val_accuracy: 0.6410\n", + "Epoch 49/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6588 - accuracy: 0.7549 - val_loss: 1.1107 - val_accuracy: 0.6471\n", + "Epoch 50/50\n", + "1563/1563 [==============================] - 64s 41ms/step - loss: 0.6582 - accuracy: 0.7560 - val_loss: 1.1022 - val_accuracy: 0.6497\n" + ] + } + ], + "source": [ + "# Fit\n", + "'''\n", + " YOUR CODE HERE\n", + "'''\n", + "r = model.fit(x_train, y_train, validation_data=(x_test, y_test), epochs=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "id": "RDq_RE6osSh8", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 283 + }, + "outputId": "6015ee91-cae4-48a6-ae5f-357dc16e6636" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Original label is cat and predicted label is cat\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "
" + ], + "image/png": "\n" + }, + "metadata": { + "needs_background": "light" + } + } + ], + "source": [ + "# label mapping\n", + " \n", + "labels = '''airplane automobile bird cat deer dog frog horse ship truck'''.split()\n", + " \n", + "# select the image from our test dataset\n", + "image_number = 0\n", + " \n", + "# display the image\n", + "plt.imshow(x_test[image_number])\n", + " \n", + "# load the image in an array\n", + "n = np.array(x_test[image_number])\n", + " \n", + "# reshape it\n", + "p = n.reshape(1, 32, 32, 3)\n", + " \n", + "# pass in the network for prediction and\n", + "# save the predicted label\n", + "predicted_label = labels[model.predict(p).argmax()]\n", + " \n", + "# load the original label\n", + "original_label = labels[y_test[image_number]]\n", + " \n", + "# display the result\n", + "print(\"Original label is {} and predicted label is {}\".format(\n", + " original_label, predicted_label))" + ] + }, + { + "cell_type": "code", + "source": [ + "" + ], + "metadata": { + "id": "5VIj4PyVxG7G" + }, + "execution_count": null, + "outputs": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "201050_Tejas_A2.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} \ No newline at end of file From a73efb5d7d325ddadfeede4f7d600a210baf0523 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Tue, 21 Jun 2022 20:08:17 +0530 Subject: [PATCH 6/8] Create assignment 3 folder --- Assignment/Assignment_3/ReadMe.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 Assignment/Assignment_3/ReadMe.md diff --git a/Assignment/Assignment_3/ReadMe.md b/Assignment/Assignment_3/ReadMe.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/Assignment/Assignment_3/ReadMe.md @@ -0,0 +1 @@ + From df6ddf4a7d653966ae37ef6f258a431d6ce7393f Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Tue, 21 Jun 2022 20:09:23 +0530 Subject: [PATCH 7/8] Assignment-3_201050 --- Assignment/Assignment_3/201050_Tejas_A3.ipynb | 405 ++++++++++++++++++ 1 file changed, 405 insertions(+) create mode 100644 Assignment/Assignment_3/201050_Tejas_A3.ipynb diff --git a/Assignment/Assignment_3/201050_Tejas_A3.ipynb b/Assignment/Assignment_3/201050_Tejas_A3.ipynb new file mode 100644 index 0000000..757a0b2 --- /dev/null +++ b/Assignment/Assignment_3/201050_Tejas_A3.ipynb @@ -0,0 +1,405 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RNN for image classification on MNIST dataset" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Importing libraries:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt \n", + "import torchvision\n", + "from torchvision import datasets\n", + "from torchvision.transforms import ToTensor\n", + "from torchvision.transforms import transforms \n", + "import torch.nn as nn \n", + "from torch import optim,utils\n", + "import os\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Loading the data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset MNIST\n", + " Number of datapoints: 60000\n", + " Root location: /Users/tejasr/Documents/IITK/Semesters/sem 4 /Stamatics project /data\n", + " Split: Train\n", + " StandardTransform\n", + "Transform: Compose(\n", + " ToTensor()\n", + " )\n", + "Dataset MNIST\n", + " Number of datapoints: 10000\n", + " Root location: /Users/tejasr/Documents/IITK/Semesters/sem 4 /Stamatics project /data\n", + " Split: Test\n", + " StandardTransform\n", + "Transform: Compose(\n", + " ToTensor()\n", + " )\n" + ] + } + ], + "source": [ + "\n", + "transform = transforms.Compose([transforms.ToTensor()])\n", + "\n", + "train_data= datasets.MNIST(train=True,root='/Users/tejasr/Documents/IITK/Semesters/sem 4 /Stamatics project /data',download = True,\n", + " transform=transform)\n", + "test_data= datasets.MNIST(train=False,root='/Users/tejasr/Documents/IITK/Semesters/sem 4 /Stamatics project /data',download = True,transform=transform)\n", + "print(train_data)\n", + "print(test_data)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size=100\n", + "\n", + "trainLoader= torch.utils.data.DataLoader(dataset= train_data, batch_size=batch_size, shuffle=True)\n", + "testLoader= torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Visualisation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Image tensor is of size torch.Size([100, 1, 28, 28]) \n", + "Labels tensor is of size torch.Size([100])\n" + ] + } + ], + "source": [ + "# Exploring the dataset\n", + "\n", + "# functions to show an image\n", + "def imshow(img):\n", + " npimg = img.numpy()\n", + " plt.imshow(np.transpose(npimg, (1, 2, 0)))\n", + "\n", + "# get some random training images\n", + "dataiter = iter(trainLoader)\n", + "images, labels = dataiter.next() #returns iterator and shifts pointer ahead to next position/image \n", + "images_shape= images.shape\n", + "labels_shape= labels.shape\n", + "print(\"Image tensor is of size\", images_shape,\"\\nLabels tensor is of size\", labels_shape)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The image tensor is of size [100,1,28,28], meaning that each batch has 100 images, and each image is of size 28*28 pixels, and the corresponding labels size is 100, giving us the number classification of each digit. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "numpy.squeeze() removes single dimensional entities from the array, here a [1,28,28] image is converted into a [28,28] image which can be plotted " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "figure = plt.figure()\n", + "for index in range(1, 26):\n", + " plt.subplot(5, 5, index)\n", + " plt.axis('off')\n", + " plt.imshow(images[index-1].numpy().squeeze(), cmap='inferno')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defining the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [], + "source": [ + "steps = 28\n", + "input_size = 28 # represents the size of the input at each time unit\n", + "hidden_size =60\n", + "output_size = 10 # equal to the number of classes= 10. \n", + "n_epochs = 30\n", + "num_layers=1 #default, number of stacked LSTM layers\n", + "learning_rate=0.0001" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [], + "source": [ + "class RNN(nn.Module):\n", + " def __init__(self, input_size, hidden_size, output_size, num_layers):\n", + " super(RNN,self).__init__()\n", + "\n", + " self.input_size=input_size\n", + " self.hidden_size=hidden_size\n", + " self.output_size=output_size\n", + " self.num_layers=num_layers\n", + " self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)\n", + " # self.rnn= nn.RNN(self.input_size,self.hidden_size) #building an lstm\n", + " self.fc= nn.Linear(self.hidden_size,self.output_size)\n", + " \n", + " def forward(self, x):\n", + " h0= torch.zeros(self.num_layers, x.size(0), self.hidden_size )#Initial hidden state \n", + " c0= torch.zeros(self.num_layers, x.size(0), self.hidden_size ) #Initial cell state \n", + " #The lstm model requires these two input along with x \n", + " out,lstm= self.lstm(x,(h0,c0))\n", + " out= self.fc(out[:,-1,:])\n", + " return out \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RNN(\n", + " (lstm): LSTM(28, 60, batch_first=True)\n", + " (fc): Linear(in_features=60, out_features=10, bias=True)\n", + ")\n" + ] + } + ], + "source": [ + "# nn.Module[input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size,steps=steps,batch_size=batch_size]\n", + "model = RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, output_size=output_size)\n", + "print(model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Defining the loss and the accuracy function:" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [], + "source": [ + "criterion= nn.CrossEntropyLoss() #We use cross entropy loss \n", + "optimizer= torch.optim.Adam(model.parameters(), lr=learning_rate) #Adam is Adaptative Moment Estimation, improved SGD " + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [], + "source": [ + "def accuracy(testLoader, model): #calculates the accuracy on the test dataset \n", + " correct=0\n", + " total=0\n", + " for images,labels in testLoader:\n", + " images = images.reshape(-1, 28, input_size)\n", + " output= model(images) #size of output.data.size() is [100,10]-> for each image in the batch we need to take the maximum along the rows\n", + " # print(output.data.size(),\" \",labels.size())\n", + " #labels are of size [100] as expected \n", + " #torch.max returns a tuple, max_values, and max_indices \n", + " _, predictions = torch.max(output.data, 1)\n", + " #Here, the max indices corresponds to the value of the predicted digit, if index 7 has the max value then we predict 7 and so on \n", + " assert(len(predictions)==len(labels))\n", + " for i in range(0,len(predictions)):\n", + " if(predictions[i]==labels[i]):\n", + " correct+=1\n", + " total+=len(predictions) \n", + " return correct/total\n", + " # print(f\"Accuracy is {correct/total:.4f} on the test dataset\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Finally, training the model:" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "We iterate over a total of 30 epochs and train the model\n", + "Loss in epoch 2 of 30 is: 0.7302, with an accuracy of 0.8199 on the test dataset\n", + "Loss in epoch 4 of 30 is: 0.3852, with an accuracy of 0.8955 on the test dataset\n", + "Loss in epoch 6 of 30 is: 0.3641, with an accuracy of 0.9145 on the test dataset\n", + "Loss in epoch 8 of 30 is: 0.1878, with an accuracy of 0.9292 on the test dataset\n", + "Loss in epoch 10 of 30 is: 0.2532, with an accuracy of 0.9372 on the test dataset\n", + "Loss in epoch 12 of 30 is: 0.3514, with an accuracy of 0.9427 on the test dataset\n", + "Loss in epoch 14 of 30 is: 0.2539, with an accuracy of 0.9459 on the test dataset\n", + "Loss in epoch 16 of 30 is: 0.0651, with an accuracy of 0.9518 on the test dataset\n", + "Loss in epoch 18 of 30 is: 0.2226, with an accuracy of 0.9513 on the test dataset\n", + "Loss in epoch 20 of 30 is: 0.1403, with an accuracy of 0.9514 on the test dataset\n", + "Loss in epoch 22 of 30 is: 0.0808, with an accuracy of 0.9604 on the test dataset\n", + "Loss in epoch 24 of 30 is: 0.1253, with an accuracy of 0.9597 on the test dataset\n", + "Loss in epoch 26 of 30 is: 0.1497, with an accuracy of 0.9623 on the test dataset\n", + "Loss in epoch 28 of 30 is: 0.1490, with an accuracy of 0.9647 on the test dataset\n", + "Loss in epoch 30 of 30 is: 0.1882, with an accuracy of 0.9648 on the test dataset\n" + ] + } + ], + "source": [ + "print(f\"We iterate over a total of {n_epochs} epochs and train the model\")\n", + "for epoch in range(0,n_epochs):\n", + " for i, (images,labels) in enumerate(trainLoader):\n", + " optimizer.zero_grad()\n", + " images = images.reshape(-1, 28, input_size)\n", + " output= model(images)\n", + " loss= criterion(output, labels)\n", + " loss.backward()\n", + " optimizer.step()\n", + " if(epoch%2):\n", + " print(f\"Loss in epoch {epoch+1} of {n_epochs} is: {loss.item():.4f}, with an accuracy of {accuracy(testLoader, model):.4f} on the test dataset\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Final accuracy with the trained model is 0.9648 on the test dataset\n" + ] + } + ], + "source": [ + "correct=0\n", + "total=0\n", + "\n", + "for images,labels in testLoader:\n", + " images = images.reshape(-1, 28, input_size)\n", + " output= model(images) #size of output.data.size() is [100,10]-> for each image in the batch we need to take the maximum along the rows\n", + " # print(output.data.size(),\" \",labels.size())\n", + " #labels are of size [100] as expected \n", + " #torch.max returns a tuple, max_values, and max_indices \n", + " _, predictions = torch.max(output.data, 1)\n", + " #Here, the max indices corresponds to the value of the predicted digit, if index 7 has the max value then we predict 7 and so on \n", + " assert(len(predictions)==len(labels))\n", + " for i in range(0,len(predictions)):\n", + " if(predictions[i]==labels[i]):\n", + " correct+=1\n", + " total+=len(predictions) \n", + "\n", + "print(f\"Final accuracy with the trained model is {correct/total:.4f} on the test dataset\")\n", + " \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### We get a final accuracy of 96.5% with the help of the RNN(LSTM) model!\n", + "##### The model gets higher accuracies much lesser number of epochs with around hundred hidden layers, but that model reaches almost peak accuracy in one epoch. " + ] + } + ], + "metadata": { + "interpreter": { + "hash": "52703fc3410d475a421fa6bbc0062a51698cb2a16eb528a89f070312507882d6" + }, + "kernelspec": { + "display_name": "Python 3.9.7 ('base')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} From d2affdc8f900852108a6325b2a1e816e67a0fac2 Mon Sep 17 00:00:00 2001 From: Tejas Ramakrishnan <82379532+tejasr20@users.noreply.github.com> Date: Thu, 23 Jun 2022 07:36:18 +0530 Subject: [PATCH 8/8] Delete ReadMe.md --- Assignment/Assignment_3/ReadMe.md | 1 - 1 file changed, 1 deletion(-) delete mode 100644 Assignment/Assignment_3/ReadMe.md diff --git a/Assignment/Assignment_3/ReadMe.md b/Assignment/Assignment_3/ReadMe.md deleted file mode 100644 index 8b13789..0000000 --- a/Assignment/Assignment_3/ReadMe.md +++ /dev/null @@ -1 +0,0 @@ -