From 5b27c1799384f73641e2807064407e16ab1155c3 Mon Sep 17 00:00:00 2001
From: fedepacher <fede_pacher@hotmail.com>
Date: Sun, 11 Jun 2023 09:21:26 -0300
Subject: [PATCH] refact(#1): refactor unnested function

---
 etl.ipynb | 898 +++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 713 insertions(+), 185 deletions(-)

diff --git a/etl.ipynb b/etl.ipynb
index cf635f4..b597816 100644
--- a/etl.ipynb
+++ b/etl.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 437,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -23,14 +23,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 438,
+   "execution_count": 88,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_70350/3456366765.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
+      "/tmp/ipykernel_415942/3456366765.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n",
       "  df_movies = pd.read_csv(url)\n"
      ]
     }
@@ -43,7 +43,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 439,
+   "execution_count": 51,
    "metadata": {},
    "outputs": [
     {
@@ -276,7 +276,7 @@
        "[5 rows x 24 columns]"
       ]
      },
-     "execution_count": 439,
+     "execution_count": 51,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -287,7 +287,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 440,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -296,7 +296,7 @@
        "(45466, 24)"
       ]
      },
-     "execution_count": 440,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -315,7 +315,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 441,
+   "execution_count": 89,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -324,7 +324,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 442,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -372,7 +372,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 443,
+   "execution_count": 90,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -381,7 +381,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 444,
+   "execution_count": 91,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -390,7 +390,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 445,
+   "execution_count": 55,
    "metadata": {},
    "outputs": [
     {
@@ -438,7 +438,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 446,
+   "execution_count": 92,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -447,7 +447,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 447,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -491,23 +491,55 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Create funtion to convert string to dictionary"
+    "## Create funtion to convert string to list"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 448,
+   "execution_count": 93,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def str_to_list(value):\n",
+    "    \"\"\"Convert string to list.\n",
+    "\n",
+    "    Args:\n",
+    "        value (str): String valu to be converted to dictionary.\n",
+    "\n",
+    "    Returns:\n",
+    "        None : Return an empty list if NaN, bool or float value.\n",
+    "    \"\"\"\n",
+    "    if pd.isna(value):\n",
+    "        return []\n",
+    "    else:\n",
+    "        value = ast.literal_eval(value)\n",
+    "        if isinstance(value, bool) or isinstance(value, float):\n",
+    "            return []\n",
+    "        return value"
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create function to convert string to dictionary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 95,
    "metadata": {},
    "outputs": [],
    "source": [
     "def str_to_dict(value):\n",
-    "    \"\"\"Convert string to dictionary\n",
+    "    \"\"\"Convert string to dictionary.\n",
     "\n",
     "    Args:\n",
-    "        value (str): String valu to be converted to dictionary\n",
+    "        value (str): String valu to be converted to dictionary.\n",
     "\n",
     "    Returns:\n",
-    "        None : Return None if NaN value is present\n",
+    "        None : Return None if NaN value is present.\n",
     "    \"\"\"\n",
     "    if pd.isna(value):\n",
     "        return None\n",
@@ -515,6 +547,40 @@
     "        return ast.literal_eval(value)"
    ]
   },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Function to get elements of list of list of dictionaries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 96,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_list(funct, dataframe='', column='', key_name=''):\n",
+    "    \"\"\"Get the list of key elements from a dictionary.\n",
+    "\n",
+    "    Args:\n",
+    "        funct (callable): Function to convert str to list or dict.\n",
+    "        dataframe (str, optional): Dataframe to extract information. Defaults to ''.\n",
+    "        column (str, optional): Column of the dataframe to get information. Defaults to ''.\n",
+    "        key_name (str, optional): Dictionary key to get value. Defaults to ''. \n",
+    "\n",
+    "    Returns:\n",
+    "        List: List of list of elements of the dictionary\n",
+    "    \"\"\"\n",
+    "    dataframe[column] = dataframe[column].apply(funct)\n",
+    "    column_list = dataframe[column].to_list()\n",
+    "\n",
+    "    return_list = [[data[key_name] for data in inter_list if key_name in data] for inter_list in column_list]\n",
+    "\n",
+    "    return return_list"
+   ]
+  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -525,7 +591,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 449,
+   "execution_count": 97,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -534,7 +600,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 450,
+   "execution_count": 99,
    "metadata": {},
    "outputs": [
     {
@@ -620,7 +686,7 @@
        "4  /7qwE57OVZmMJChBpLEbJEmzUydk.jpg  "
       ]
      },
-     "execution_count": 450,
+     "execution_count": 99,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -632,7 +698,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 451,
+   "execution_count": 100,
    "metadata": {},
    "outputs": [
     {
@@ -718,7 +784,7 @@
        "4  /nts4iOmNnq7GNicycMJ9pSAn204.jpg    /7qwE57OVZmMJChBpLEbJEmzUydk.jpg  "
       ]
      },
-     "execution_count": 451,
+     "execution_count": 100,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -732,7 +798,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 452,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -741,7 +807,7 @@
        "(45466, 4)"
       ]
      },
-     "execution_count": 452,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -752,16 +818,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 453,
+   "execution_count": 102,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "KeyError",
+     "evalue": "\"['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'] not found in axis\"",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mKeyError\u001b[0m                                  Traceback (most recent call last)",
+      "\u001b[0;32m/tmp/ipykernel_415942/1005892911.py\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_nested\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'belongs_to_collection_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'belongs_to_collection_poster_path'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'belongs_to_collection_backdrop_path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    309\u001b[0m                     \u001b[0mstacklevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstacklevel\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    310\u001b[0m                 )\n\u001b[0;32m--> 311\u001b[0;31m             \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    312\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    313\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   4955\u001b[0m                 \u001b[0mweight\u001b[0m  \u001b[0;36m1.0\u001b[0m     \u001b[0;36m0.8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4956\u001b[0m         \"\"\"\n\u001b[0;32m-> 4957\u001b[0;31m         return super().drop(\n\u001b[0m\u001b[1;32m   4958\u001b[0m             \u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4959\u001b[0m             \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m   4265\u001b[0m         \u001b[0;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4266\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4267\u001b[0;31m                 \u001b[0mobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4269\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[0;34m(self, labels, axis, level, errors, consolidate, only_slice)\u001b[0m\n\u001b[1;32m   4309\u001b[0m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4310\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4311\u001b[0;31m                 \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   4312\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   4313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m   6659\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6660\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6661\u001b[0;31m                 \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{list(labels[mask])} not found in axis\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   6662\u001b[0m             \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   6663\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mKeyError\u001b[0m: \"['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'] not found in axis\""
+     ]
+    }
+   ],
    "source": [
     "df_nested.drop(columns=['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'], inplace=True)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 454,
+   "execution_count": 103,
    "metadata": {},
    "outputs": [
     {
@@ -910,7 +993,7 @@
        "2  Grumpy Old Men Collection  "
       ]
      },
-     "execution_count": 454,
+     "execution_count": 103,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -923,7 +1006,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 455,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -972,40 +1055,525 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 456,
+   "execution_count": 104,
    "metadata": {},
    "outputs": [],
    "source": [
     "columns = ['genres', 'production_companies', 'production_countries', 'spoken_languages']\n",
+    "key = 'name'\n",
+    "df = pd.DataFrame()\n",
+    "for _, column in enumerate(columns):\n",
+    "    elements_list = []\n",
+    "    elements_list = get_list(str_to_list, dataframe=df_movies, column=column, key_name=key)\n",
     "\n",
-    "for column in columns:\n",
-    "    df_movies[column] = df_movies[column].apply(str_to_dict)\n",
-    "    df_nested = pd.json_normalize(df_movies[column]).fillna('')\n",
-    "    nested_columns = df_nested.columns\n",
-    "    df_nested_1 = pd.DataFrame()\n",
-    "    for nested_column in nested_columns:\n",
-    "        df_aux = pd.json_normalize(df_nested[nested_column]).fillna('')\n",
-    "        if 'production_countries' in column:\n",
-    "            df_aux.rename(columns={'iso_3166_1': 'id'}, inplace=True)\n",
-    "        if 'spoken_languages' in column:\n",
-    "            df_aux.rename(columns={'iso_639_1': 'id'}, inplace=True)\n",
-    "        df_aux.drop(columns='id', axis=1, inplace=True)\n",
-    "        new_columns_names = {col : f'{column}_{nested_column}_{col}' for col in df_aux.columns}\n",
-    "        df_aux.rename(columns=new_columns_names, inplace=True)\n",
-    "        df_nested_1 = pd.concat([df_nested_1, df_aux], axis=1)\n",
-    "\n",
-    "    # convert columns to a list of columns\n",
-    "    column_name = f'{column}_name'\n",
-    "    column_name_list = df_nested_1.columns.to_list()\n",
-    "    df_nested_1[column_name] = df_nested_1.values.tolist()\n",
-    "    df_nested_1.drop(columns=column_name_list, axis=1, inplace=True)\n",
-    "    df_movies = pd.concat([df_movies, df_nested_1], axis=1)\n",
-    "    df_movies.drop(columns=column, inplace=True)"
+    "    df_aux = pd.DataFrame({column: elements_list})\n",
+    "    df = pd.concat([df, df_aux], axis=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_movies.drop(columns=columns, inplace=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>genres</th>\n",
+       "      <th>production_companies</th>\n",
+       "      <th>production_countries</th>\n",
+       "      <th>spoken_languages</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>[Animation, Comedy, Family]</td>\n",
+       "      <td>[Pixar Animation Studios]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>[Adventure, Fantasy, Family]</td>\n",
+       "      <td>[TriStar Pictures, Teitler Film, Interscope Co...</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English, Français]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>[Romance, Comedy]</td>\n",
+       "      <td>[Warner Bros., Lancaster Gate]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>[Comedy, Drama, Romance]</td>\n",
+       "      <td>[Twentieth Century Fox Film Corporation]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>[Comedy]</td>\n",
+       "      <td>[Sandollar Productions, Touchstone Pictures]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                         genres  \\\n",
+       "0   [Animation, Comedy, Family]   \n",
+       "1  [Adventure, Fantasy, Family]   \n",
+       "2             [Romance, Comedy]   \n",
+       "3      [Comedy, Drama, Romance]   \n",
+       "4                      [Comedy]   \n",
+       "\n",
+       "                                production_companies  \\\n",
+       "0                          [Pixar Animation Studios]   \n",
+       "1  [TriStar Pictures, Teitler Film, Interscope Co...   \n",
+       "2                     [Warner Bros., Lancaster Gate]   \n",
+       "3           [Twentieth Century Fox Film Corporation]   \n",
+       "4       [Sandollar Productions, Touchstone Pictures]   \n",
+       "\n",
+       "         production_countries     spoken_languages  \n",
+       "0  [United States of America]            [English]  \n",
+       "1  [United States of America]  [English, Français]  \n",
+       "2  [United States of America]            [English]  \n",
+       "3  [United States of America]            [English]  \n",
+       "4  [United States of America]            [English]  "
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>budget</th>\n",
+       "      <th>id</th>\n",
+       "      <th>original_language</th>\n",
+       "      <th>overview</th>\n",
+       "      <th>popularity</th>\n",
+       "      <th>release_date</th>\n",
+       "      <th>revenue</th>\n",
+       "      <th>runtime</th>\n",
+       "      <th>status</th>\n",
+       "      <th>title</th>\n",
+       "      <th>vote_average</th>\n",
+       "      <th>vote_count</th>\n",
+       "      <th>return</th>\n",
+       "      <th>belongs_to_collection_name</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>30000000.0</td>\n",
+       "      <td>862</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
+       "      <td>21.946943</td>\n",
+       "      <td>1995-10-30</td>\n",
+       "      <td>373554033.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>5415.0</td>\n",
+       "      <td>12.451801</td>\n",
+       "      <td>Toy Story Collection</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>65000000.0</td>\n",
+       "      <td>8844</td>\n",
+       "      <td>en</td>\n",
+       "      <td>When siblings Judy and Peter discover an encha...</td>\n",
+       "      <td>17.015539</td>\n",
+       "      <td>1995-12-15</td>\n",
+       "      <td>262797249.0</td>\n",
+       "      <td>104.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>2413.0</td>\n",
+       "      <td>4.043035</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15602</td>\n",
+       "      <td>en</td>\n",
+       "      <td>A family wedding reignites the ancient feud be...</td>\n",
+       "      <td>11.7129</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>Grumpy Old Men Collection</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16000000.0</td>\n",
+       "      <td>31357</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
+       "      <td>3.859495</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>81452156.0</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>5.090760</td>\n",
+       "      <td></td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>11862</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Just when George Banks has recovered from his ...</td>\n",
+       "      <td>8.387519</td>\n",
+       "      <td>1995-02-10</td>\n",
+       "      <td>76578911.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>173.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>Father of the Bride Collection</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       budget     id original_language  \\\n",
+       "0  30000000.0    862                en   \n",
+       "1  65000000.0   8844                en   \n",
+       "2         0.0  15602                en   \n",
+       "3  16000000.0  31357                en   \n",
+       "4         0.0  11862                en   \n",
+       "\n",
+       "                                            overview popularity release_date  \\\n",
+       "0  Led by Woody, Andy's toys live happily in his ...  21.946943   1995-10-30   \n",
+       "1  When siblings Judy and Peter discover an encha...  17.015539   1995-12-15   \n",
+       "2  A family wedding reignites the ancient feud be...    11.7129   1995-12-22   \n",
+       "3  Cheated on, mistreated and stepped on, the wom...   3.859495   1995-12-22   \n",
+       "4  Just when George Banks has recovered from his ...   8.387519   1995-02-10   \n",
+       "\n",
+       "       revenue  runtime    status                        title  vote_average  \\\n",
+       "0  373554033.0     81.0  Released                    Toy Story           7.7   \n",
+       "1  262797249.0    104.0  Released                      Jumanji           6.9   \n",
+       "2          0.0    101.0  Released             Grumpier Old Men           6.5   \n",
+       "3   81452156.0    127.0  Released            Waiting to Exhale           6.1   \n",
+       "4   76578911.0    106.0  Released  Father of the Bride Part II           5.7   \n",
+       "\n",
+       "   vote_count     return      belongs_to_collection_name  \n",
+       "0      5415.0  12.451801            Toy Story Collection  \n",
+       "1      2413.0   4.043035                                  \n",
+       "2        92.0   0.000000       Grumpy Old Men Collection  \n",
+       "3        34.0   5.090760                                  \n",
+       "4       173.0   0.000000  Father of the Bride Collection  "
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_movies.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 457,
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>budget</th>\n",
+       "      <th>id</th>\n",
+       "      <th>original_language</th>\n",
+       "      <th>overview</th>\n",
+       "      <th>popularity</th>\n",
+       "      <th>release_date</th>\n",
+       "      <th>revenue</th>\n",
+       "      <th>runtime</th>\n",
+       "      <th>status</th>\n",
+       "      <th>title</th>\n",
+       "      <th>vote_average</th>\n",
+       "      <th>vote_count</th>\n",
+       "      <th>return</th>\n",
+       "      <th>belongs_to_collection_name</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>production_companies</th>\n",
+       "      <th>production_countries</th>\n",
+       "      <th>spoken_languages</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>30000000.0</td>\n",
+       "      <td>862</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Led by Woody, Andy's toys live happily in his ...</td>\n",
+       "      <td>21.946943</td>\n",
+       "      <td>1995-10-30</td>\n",
+       "      <td>373554033.0</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Toy Story</td>\n",
+       "      <td>7.7</td>\n",
+       "      <td>5415.0</td>\n",
+       "      <td>12.451801</td>\n",
+       "      <td>Toy Story Collection</td>\n",
+       "      <td>[Animation, Comedy, Family]</td>\n",
+       "      <td>[Pixar Animation Studios]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>65000000.0</td>\n",
+       "      <td>8844</td>\n",
+       "      <td>en</td>\n",
+       "      <td>When siblings Judy and Peter discover an encha...</td>\n",
+       "      <td>17.015539</td>\n",
+       "      <td>1995-12-15</td>\n",
+       "      <td>262797249.0</td>\n",
+       "      <td>104.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Jumanji</td>\n",
+       "      <td>6.9</td>\n",
+       "      <td>2413.0</td>\n",
+       "      <td>4.043035</td>\n",
+       "      <td></td>\n",
+       "      <td>[Adventure, Fantasy, Family]</td>\n",
+       "      <td>[TriStar Pictures, Teitler Film, Interscope Co...</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English, Français]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>15602</td>\n",
+       "      <td>en</td>\n",
+       "      <td>A family wedding reignites the ancient feud be...</td>\n",
+       "      <td>11.7129</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Grumpier Old Men</td>\n",
+       "      <td>6.5</td>\n",
+       "      <td>92.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>Grumpy Old Men Collection</td>\n",
+       "      <td>[Romance, Comedy]</td>\n",
+       "      <td>[Warner Bros., Lancaster Gate]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>16000000.0</td>\n",
+       "      <td>31357</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Cheated on, mistreated and stepped on, the wom...</td>\n",
+       "      <td>3.859495</td>\n",
+       "      <td>1995-12-22</td>\n",
+       "      <td>81452156.0</td>\n",
+       "      <td>127.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Waiting to Exhale</td>\n",
+       "      <td>6.1</td>\n",
+       "      <td>34.0</td>\n",
+       "      <td>5.090760</td>\n",
+       "      <td></td>\n",
+       "      <td>[Comedy, Drama, Romance]</td>\n",
+       "      <td>[Twentieth Century Fox Film Corporation]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>0.0</td>\n",
+       "      <td>11862</td>\n",
+       "      <td>en</td>\n",
+       "      <td>Just when George Banks has recovered from his ...</td>\n",
+       "      <td>8.387519</td>\n",
+       "      <td>1995-02-10</td>\n",
+       "      <td>76578911.0</td>\n",
+       "      <td>106.0</td>\n",
+       "      <td>Released</td>\n",
+       "      <td>Father of the Bride Part II</td>\n",
+       "      <td>5.7</td>\n",
+       "      <td>173.0</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>Father of the Bride Collection</td>\n",
+       "      <td>[Comedy]</td>\n",
+       "      <td>[Sandollar Productions, Touchstone Pictures]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       budget     id original_language  \\\n",
+       "0  30000000.0    862                en   \n",
+       "1  65000000.0   8844                en   \n",
+       "2         0.0  15602                en   \n",
+       "3  16000000.0  31357                en   \n",
+       "4         0.0  11862                en   \n",
+       "\n",
+       "                                            overview popularity release_date  \\\n",
+       "0  Led by Woody, Andy's toys live happily in his ...  21.946943   1995-10-30   \n",
+       "1  When siblings Judy and Peter discover an encha...  17.015539   1995-12-15   \n",
+       "2  A family wedding reignites the ancient feud be...    11.7129   1995-12-22   \n",
+       "3  Cheated on, mistreated and stepped on, the wom...   3.859495   1995-12-22   \n",
+       "4  Just when George Banks has recovered from his ...   8.387519   1995-02-10   \n",
+       "\n",
+       "       revenue  runtime    status                        title  vote_average  \\\n",
+       "0  373554033.0     81.0  Released                    Toy Story           7.7   \n",
+       "1  262797249.0    104.0  Released                      Jumanji           6.9   \n",
+       "2          0.0    101.0  Released             Grumpier Old Men           6.5   \n",
+       "3   81452156.0    127.0  Released            Waiting to Exhale           6.1   \n",
+       "4   76578911.0    106.0  Released  Father of the Bride Part II           5.7   \n",
+       "\n",
+       "   vote_count     return      belongs_to_collection_name  \\\n",
+       "0      5415.0  12.451801            Toy Story Collection   \n",
+       "1      2413.0   4.043035                                   \n",
+       "2        92.0   0.000000       Grumpy Old Men Collection   \n",
+       "3        34.0   5.090760                                   \n",
+       "4       173.0   0.000000  Father of the Bride Collection   \n",
+       "\n",
+       "                         genres  \\\n",
+       "0   [Animation, Comedy, Family]   \n",
+       "1  [Adventure, Fantasy, Family]   \n",
+       "2             [Romance, Comedy]   \n",
+       "3      [Comedy, Drama, Romance]   \n",
+       "4                      [Comedy]   \n",
+       "\n",
+       "                                production_companies  \\\n",
+       "0                          [Pixar Animation Studios]   \n",
+       "1  [TriStar Pictures, Teitler Film, Interscope Co...   \n",
+       "2                     [Warner Bros., Lancaster Gate]   \n",
+       "3           [Twentieth Century Fox Film Corporation]   \n",
+       "4       [Sandollar Productions, Touchstone Pictures]   \n",
+       "\n",
+       "         production_countries     spoken_languages  \n",
+       "0  [United States of America]            [English]  \n",
+       "1  [United States of America]  [English, Français]  \n",
+       "2  [United States of America]            [English]  \n",
+       "3  [United States of America]            [English]  \n",
+       "4  [United States of America]            [English]  "
+      ]
+     },
+     "execution_count": 109,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_movies = pd.concat([df_movies, df], axis=1)\n",
+    "df_movies.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
    "metadata": {},
    "outputs": [
     {
@@ -1014,7 +1582,7 @@
        "(45466, 18)"
       ]
      },
-     "execution_count": 457,
+     "execution_count": 110,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1025,7 +1593,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 458,
+   "execution_count": 111,
    "metadata": {},
    "outputs": [
     {
@@ -1051,10 +1619,10 @@
       " 11  vote_count                  45460 non-null  float64\n",
       " 12  return                      45466 non-null  float64\n",
       " 13  belongs_to_collection_name  45466 non-null  object \n",
-      " 14  genres_name                 45466 non-null  object \n",
-      " 15  production_companies_name   45466 non-null  object \n",
-      " 16  production_countries_name   45466 non-null  object \n",
-      " 17  spoken_languages_name       45466 non-null  object \n",
+      " 14  genres                      45466 non-null  object \n",
+      " 15  production_companies        45466 non-null  object \n",
+      " 16  production_countries        45466 non-null  object \n",
+      " 17  spoken_languages            45466 non-null  object \n",
       "dtypes: float64(6), object(12)\n",
       "memory usage: 6.2+ MB\n"
      ]
@@ -1074,7 +1642,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 459,
+   "execution_count": 112,
    "metadata": {},
    "outputs": [
     {
@@ -1100,10 +1668,10 @@
       " 11  vote_count                  45376 non-null  float64\n",
       " 12  return                      45379 non-null  float64\n",
       " 13  belongs_to_collection_name  45379 non-null  object \n",
-      " 14  genres_name                 45379 non-null  object \n",
-      " 15  production_companies_name   45379 non-null  object \n",
-      " 16  production_countries_name   45379 non-null  object \n",
-      " 17  spoken_languages_name       45379 non-null  object \n",
+      " 14  genres                      45379 non-null  object \n",
+      " 15  production_companies        45379 non-null  object \n",
+      " 16  production_countries        45379 non-null  object \n",
+      " 17  spoken_languages            45379 non-null  object \n",
       "dtypes: float64(6), object(12)\n",
       "memory usage: 6.6+ MB\n"
      ]
@@ -1124,14 +1692,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 460,
+   "execution_count": 113,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/tmp/ipykernel_70350/235543016.py:2: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.\n",
+      "/tmp/ipykernel_415942/235543016.py:2: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.\n",
       "  filter = df_movies['release_date'].str.contains(regex_filter)\n"
      ]
     }
@@ -1153,7 +1721,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 461,
+   "execution_count": 114,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1170,7 +1738,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 462,
+   "execution_count": 115,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1179,7 +1747,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 463,
+   "execution_count": 116,
    "metadata": {},
    "outputs": [
     {
@@ -1217,10 +1785,10 @@
        "      <th>vote_count</th>\n",
        "      <th>return</th>\n",
        "      <th>belongs_to_collection_name</th>\n",
-       "      <th>genres_name</th>\n",
-       "      <th>production_companies_name</th>\n",
-       "      <th>production_countries_name</th>\n",
-       "      <th>spoken_languages_name</th>\n",
+       "      <th>genres</th>\n",
+       "      <th>production_companies</th>\n",
+       "      <th>production_countries</th>\n",
+       "      <th>spoken_languages</th>\n",
        "      <th>release_year</th>\n",
        "    </tr>\n",
        "  </thead>\n",
@@ -1241,10 +1809,10 @@
        "      <td>5415.0</td>\n",
        "      <td>12.451801</td>\n",
        "      <td>Toy Story Collection</td>\n",
-       "      <td>[Animation, Comedy, Family, , , , , ]</td>\n",
-       "      <td>[Pixar Animation Studios, , , , , , , , , , , ...</td>\n",
-       "      <td>[United States of America, , , , , , , , , , ,...</td>\n",
-       "      <td>[English, , , , , , , , , , , , , , , , , , ]</td>\n",
+       "      <td>[Animation, Comedy, Family]</td>\n",
+       "      <td>[Pixar Animation Studios]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
        "      <td>1995</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1263,10 +1831,10 @@
        "      <td>2413.0</td>\n",
        "      <td>4.043035</td>\n",
        "      <td></td>\n",
-       "      <td>[Adventure, Fantasy, Family, , , , , ]</td>\n",
+       "      <td>[Adventure, Fantasy, Family]</td>\n",
        "      <td>[TriStar Pictures, Teitler Film, Interscope Co...</td>\n",
-       "      <td>[United States of America, , , , , , , , , , ,...</td>\n",
-       "      <td>[English, Français, , , , , , , , , , , , , , ...</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English, Français]</td>\n",
        "      <td>1995</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1285,10 +1853,10 @@
        "      <td>92.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>Grumpy Old Men Collection</td>\n",
-       "      <td>[Romance, Comedy, , , , , , ]</td>\n",
-       "      <td>[Warner Bros., Lancaster Gate, , , , , , , , ,...</td>\n",
-       "      <td>[United States of America, , , , , , , , , , ,...</td>\n",
-       "      <td>[English, , , , , , , , , , , , , , , , , , ]</td>\n",
+       "      <td>[Romance, Comedy]</td>\n",
+       "      <td>[Warner Bros., Lancaster Gate]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
        "      <td>1995</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1307,10 +1875,10 @@
        "      <td>34.0</td>\n",
        "      <td>5.090760</td>\n",
        "      <td></td>\n",
-       "      <td>[Comedy, Drama, Romance, , , , , ]</td>\n",
-       "      <td>[Twentieth Century Fox Film Corporation, , , ,...</td>\n",
-       "      <td>[United States of America, , , , , , , , , , ,...</td>\n",
-       "      <td>[English, , , , , , , , , , , , , , , , , , ]</td>\n",
+       "      <td>[Comedy, Drama, Romance]</td>\n",
+       "      <td>[Twentieth Century Fox Film Corporation]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
        "      <td>1995</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -1329,10 +1897,10 @@
        "      <td>173.0</td>\n",
        "      <td>0.000000</td>\n",
        "      <td>Father of the Bride Collection</td>\n",
-       "      <td>[Comedy, , , , , , , ]</td>\n",
-       "      <td>[Sandollar Productions, Touchstone Pictures, ,...</td>\n",
-       "      <td>[United States of America, , , , , , , , , , ,...</td>\n",
-       "      <td>[English, , , , , , , , , , , , , , , , , , ]</td>\n",
+       "      <td>[Comedy]</td>\n",
+       "      <td>[Sandollar Productions, Touchstone Pictures]</td>\n",
+       "      <td>[United States of America]</td>\n",
+       "      <td>[English]</td>\n",
        "      <td>1995</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1368,36 +1936,29 @@
        "3        34.0   5.090760                                   \n",
        "4       173.0   0.000000  Father of the Bride Collection   \n",
        "\n",
-       "                              genres_name  \\\n",
-       "0   [Animation, Comedy, Family, , , , , ]   \n",
-       "1  [Adventure, Fantasy, Family, , , , , ]   \n",
-       "2           [Romance, Comedy, , , , , , ]   \n",
-       "3      [Comedy, Drama, Romance, , , , , ]   \n",
-       "4                  [Comedy, , , , , , , ]   \n",
+       "                         genres  \\\n",
+       "0   [Animation, Comedy, Family]   \n",
+       "1  [Adventure, Fantasy, Family]   \n",
+       "2             [Romance, Comedy]   \n",
+       "3      [Comedy, Drama, Romance]   \n",
+       "4                      [Comedy]   \n",
        "\n",
-       "                           production_companies_name  \\\n",
-       "0  [Pixar Animation Studios, , , , , , , , , , , ...   \n",
+       "                                production_companies  \\\n",
+       "0                          [Pixar Animation Studios]   \n",
        "1  [TriStar Pictures, Teitler Film, Interscope Co...   \n",
-       "2  [Warner Bros., Lancaster Gate, , , , , , , , ,...   \n",
-       "3  [Twentieth Century Fox Film Corporation, , , ,...   \n",
-       "4  [Sandollar Productions, Touchstone Pictures, ,...   \n",
-       "\n",
-       "                           production_countries_name  \\\n",
-       "0  [United States of America, , , , , , , , , , ,...   \n",
-       "1  [United States of America, , , , , , , , , , ,...   \n",
-       "2  [United States of America, , , , , , , , , , ,...   \n",
-       "3  [United States of America, , , , , , , , , , ,...   \n",
-       "4  [United States of America, , , , , , , , , , ,...   \n",
+       "2                     [Warner Bros., Lancaster Gate]   \n",
+       "3           [Twentieth Century Fox Film Corporation]   \n",
+       "4       [Sandollar Productions, Touchstone Pictures]   \n",
        "\n",
-       "                               spoken_languages_name  release_year  \n",
-       "0      [English, , , , , , , , , , , , , , , , , , ]          1995  \n",
-       "1  [English, Français, , , , , , , , , , , , , , ...          1995  \n",
-       "2      [English, , , , , , , , , , , , , , , , , , ]          1995  \n",
-       "3      [English, , , , , , , , , , , , , , , , , , ]          1995  \n",
-       "4      [English, , , , , , , , , , , , , , , , , , ]          1995  "
+       "         production_countries     spoken_languages  release_year  \n",
+       "0  [United States of America]            [English]          1995  \n",
+       "1  [United States of America]  [English, Français]          1995  \n",
+       "2  [United States of America]            [English]          1995  \n",
+       "3  [United States of America]            [English]          1995  \n",
+       "4  [United States of America]            [English]          1995  "
       ]
      },
-     "execution_count": 463,
+     "execution_count": 116,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1416,7 +1977,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 464,
+   "execution_count": 117,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1433,7 +1994,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 465,
+   "execution_count": 118,
    "metadata": {},
    "outputs": [
     {
@@ -1453,7 +2014,7 @@
        "Name: id, Length: 45346, dtype: int64"
       ]
      },
-     "execution_count": 465,
+     "execution_count": 118,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1464,7 +2025,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 466,
+   "execution_count": 119,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1481,7 +2042,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 467,
+   "execution_count": 120,
    "metadata": {},
    "outputs": [
     {
@@ -1490,7 +2051,7 @@
        "Int64Index([], dtype='int64')"
       ]
      },
-     "execution_count": 467,
+     "execution_count": 120,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1501,7 +2062,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 468,
+   "execution_count": 121,
    "metadata": {},
    "outputs": [
     {
@@ -1650,7 +2211,7 @@
        "max    14075.000000  1.239638e+07   2020.000000  "
       ]
      },
-     "execution_count": 468,
+     "execution_count": 121,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1669,7 +2230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 469,
+   "execution_count": 122,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1680,7 +2241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 470,
+   "execution_count": 123,
    "metadata": {},
    "outputs": [
     {
@@ -1760,7 +2321,7 @@
        "4  [{'credit_id': '52fe44959251416c75039ed7', 'de...  11862  "
       ]
      },
-     "execution_count": 470,
+     "execution_count": 123,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1771,7 +2332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 471,
+   "execution_count": 124,
    "metadata": {},
    "outputs": [
     {
@@ -1795,39 +2356,6 @@
     "df_credits.info()"
    ]
   },
-  {
-   "attachments": {},
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Function to get elements of list of list of dictionaries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 472,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "def get_list(dataframe='', column='', key_name=''):\n",
-    "    \"\"\"Get the list of key elements from a dictionary.\n",
-    "\n",
-    "    Args:\n",
-    "        dataframe (str, optional): Dataframe to extract information. Defaults to ''.\n",
-    "        column (str, optional): Column of the dataframe to get information. Defaults to ''.\n",
-    "        key_name (str, optional): Dictionary key to get value. Defaults to ''.\n",
-    "\n",
-    "    Returns:\n",
-    "        List: List of list of elements of the dictionary\n",
-    "    \"\"\"\n",
-    "    dataframe[column] = dataframe[column].apply(str_to_dict)\n",
-    "    column_list = dataframe[column].to_list()\n",
-    "\n",
-    "    return_list = [[data[key_name] for data in inter_list if key_name in data] for inter_list in column_list]\n",
-    "\n",
-    "    return return_list"
-   ]
-  },
   {
    "attachments": {},
    "cell_type": "markdown",
@@ -1838,7 +2366,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 473,
+   "execution_count": 125,
    "metadata": {},
    "outputs": [
     {
@@ -1911,7 +2439,7 @@
        "4  [Steve Martin, Diane Keaton, Martin Short, Kim...   Alan Silvestri  11862"
       ]
      },
-     "execution_count": 473,
+     "execution_count": 125,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1922,7 +2450,7 @@
     "df_actors = pd.DataFrame()\n",
     "for _, (column, key) in enumerate(zip(columns, key_list)):\n",
     "    elements_list = []\n",
-    "    elements_list = get_list(dataframe=df_credits, column=column, key_name=key)\n",
+    "    elements_list = get_list(str_to_dict, dataframe=df_credits, column=column, key_name=key)\n",
     "    if 'crew' in column:\n",
     "        new_list = [element[0] if len(element) > 0 else 'No Director' for element in elements_list]\n",
     "        elements_list = new_list.copy()\n",
@@ -1934,7 +2462,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 474,
+   "execution_count": 126,
    "metadata": {},
    "outputs": [
     {
@@ -1954,7 +2482,7 @@
        "Name: id, Length: 45432, dtype: int64"
       ]
      },
-     "execution_count": 474,
+     "execution_count": 126,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1973,7 +2501,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 475,
+   "execution_count": 127,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1990,7 +2518,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 476,
+   "execution_count": 128,
    "metadata": {},
    "outputs": [
     {
@@ -1999,7 +2527,7 @@
        "Int64Index([], dtype='int64')"
       ]
      },
-     "execution_count": 476,
+     "execution_count": 128,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2018,7 +2546,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 477,
+   "execution_count": 129,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2035,7 +2563,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 478,
+   "execution_count": 130,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2052,7 +2580,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 479,
+   "execution_count": 131,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2062,7 +2590,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 480,
+   "execution_count": 132,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -2079,7 +2607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 481,
+   "execution_count": 133,
    "metadata": {},
    "outputs": [
     {
@@ -2089,7 +2617,7 @@
        "Name: cast, dtype: object"
       ]
      },
-     "execution_count": 481,
+     "execution_count": 133,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2109,7 +2637,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 482,
+   "execution_count": 134,
    "metadata": {},
    "outputs": [
     {
@@ -2129,17 +2657,17 @@
        "vote_count                                                                  0.0\n",
        "return                                                                      0.0\n",
        "belongs_to_collection_name                                                     \n",
-       "genres_name                                                    [, , , , , , , ]\n",
-       "production_companies_name     [, , , , , , , , , , , , , , , , , , , , , , ,...\n",
-       "production_countries_name     [, , , , , , , , , , , , , , , , , , , , , , ,...\n",
-       "spoken_languages_name                    [, , , , , , , , , , , , , , , , , , ]\n",
+       "genres                                                                       []\n",
+       "production_companies                                                         []\n",
+       "production_countries                                                         []\n",
+       "spoken_languages                                                             []\n",
        "release_year                                                               2017\n",
        "cast                                                                        NaN\n",
        "director                                                                    NaN\n",
        "Name: 42783, dtype: object"
       ]
      },
-     "execution_count": 482,
+     "execution_count": 134,
      "metadata": {},
      "output_type": "execute_result"
     }