From 5b27c1799384f73641e2807064407e16ab1155c3 Mon Sep 17 00:00:00 2001 From: fedepacher Date: Sun, 11 Jun 2023 09:21:26 -0300 Subject: [PATCH] refact(#1): refactor unnested function --- etl.ipynb | 898 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 713 insertions(+), 185 deletions(-) diff --git a/etl.ipynb b/etl.ipynb index cf635f4..b597816 100644 --- a/etl.ipynb +++ b/etl.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 437, + "execution_count": 18, "metadata": {}, "outputs": [], "source": [ @@ -23,14 +23,14 @@ }, { "cell_type": "code", - "execution_count": 438, + "execution_count": 88, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_70350/3456366765.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n", + "/tmp/ipykernel_415942/3456366765.py:3: DtypeWarning: Columns (10) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df_movies = pd.read_csv(url)\n" ] } @@ -43,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 439, + "execution_count": 51, "metadata": {}, "outputs": [ { @@ -276,7 +276,7 @@ "[5 rows x 24 columns]" ] }, - "execution_count": 439, + "execution_count": 51, "metadata": {}, "output_type": "execute_result" } @@ -287,7 +287,7 @@ }, { "cell_type": "code", - "execution_count": 440, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -296,7 +296,7 @@ "(45466, 24)" ] }, - "execution_count": 440, + "execution_count": 21, "metadata": {}, "output_type": "execute_result" } @@ -315,7 +315,7 @@ }, { "cell_type": "code", - "execution_count": 441, + "execution_count": 89, "metadata": {}, "outputs": [], "source": [ @@ -324,7 +324,7 @@ }, { "cell_type": "code", - "execution_count": 442, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -372,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 443, + "execution_count": 90, "metadata": {}, "outputs": [], "source": [ @@ -381,7 +381,7 @@ }, { "cell_type": "code", - "execution_count": 444, + "execution_count": 91, "metadata": {}, "outputs": [], "source": [ @@ -390,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 445, + "execution_count": 55, "metadata": {}, "outputs": [ { @@ -438,7 +438,7 @@ }, { "cell_type": "code", - "execution_count": 446, + "execution_count": 92, "metadata": {}, "outputs": [], "source": [ @@ -447,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 447, + "execution_count": 28, "metadata": {}, "outputs": [ { @@ -491,23 +491,55 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Create funtion to convert string to dictionary" + "## Create funtion to convert string to list" ] }, { "cell_type": "code", - "execution_count": 448, + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "def str_to_list(value):\n", + " \"\"\"Convert string to list.\n", + "\n", + " Args:\n", + " value (str): String valu to be converted to dictionary.\n", + "\n", + " Returns:\n", + " None : Return an empty list if NaN, bool or float value.\n", + " \"\"\"\n", + " if pd.isna(value):\n", + " return []\n", + " else:\n", + " value = ast.literal_eval(value)\n", + " if isinstance(value, bool) or isinstance(value, float):\n", + " return []\n", + " return value" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create function to convert string to dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": 95, "metadata": {}, "outputs": [], "source": [ "def str_to_dict(value):\n", - " \"\"\"Convert string to dictionary\n", + " \"\"\"Convert string to dictionary.\n", "\n", " Args:\n", - " value (str): String valu to be converted to dictionary\n", + " value (str): String valu to be converted to dictionary.\n", "\n", " Returns:\n", - " None : Return None if NaN value is present\n", + " None : Return None if NaN value is present.\n", " \"\"\"\n", " if pd.isna(value):\n", " return None\n", @@ -515,6 +547,40 @@ " return ast.literal_eval(value)" ] }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Function to get elements of list of list of dictionaries" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "def get_list(funct, dataframe='', column='', key_name=''):\n", + " \"\"\"Get the list of key elements from a dictionary.\n", + "\n", + " Args:\n", + " funct (callable): Function to convert str to list or dict.\n", + " dataframe (str, optional): Dataframe to extract information. Defaults to ''.\n", + " column (str, optional): Column of the dataframe to get information. Defaults to ''.\n", + " key_name (str, optional): Dictionary key to get value. Defaults to ''. \n", + "\n", + " Returns:\n", + " List: List of list of elements of the dictionary\n", + " \"\"\"\n", + " dataframe[column] = dataframe[column].apply(funct)\n", + " column_list = dataframe[column].to_list()\n", + "\n", + " return_list = [[data[key_name] for data in inter_list if key_name in data] for inter_list in column_list]\n", + "\n", + " return return_list" + ] + }, { "attachments": {}, "cell_type": "markdown", @@ -525,7 +591,7 @@ }, { "cell_type": "code", - "execution_count": 449, + "execution_count": 97, "metadata": {}, "outputs": [], "source": [ @@ -534,7 +600,7 @@ }, { "cell_type": "code", - "execution_count": 450, + "execution_count": 99, "metadata": {}, "outputs": [ { @@ -620,7 +686,7 @@ "4 /7qwE57OVZmMJChBpLEbJEmzUydk.jpg " ] }, - "execution_count": 450, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } @@ -632,7 +698,7 @@ }, { "cell_type": "code", - "execution_count": 451, + "execution_count": 100, "metadata": {}, "outputs": [ { @@ -718,7 +784,7 @@ "4 /nts4iOmNnq7GNicycMJ9pSAn204.jpg /7qwE57OVZmMJChBpLEbJEmzUydk.jpg " ] }, - "execution_count": 451, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } @@ -732,7 +798,7 @@ }, { "cell_type": "code", - "execution_count": 452, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -741,7 +807,7 @@ "(45466, 4)" ] }, - "execution_count": 452, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } @@ -752,16 +818,33 @@ }, { "cell_type": "code", - "execution_count": 453, + "execution_count": 102, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'] not found in axis\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/tmp/ipykernel_415942/1005892911.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf_nested\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'belongs_to_collection_id'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'belongs_to_collection_poster_path'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'belongs_to_collection_backdrop_path'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 309\u001b[0m \u001b[0mstacklevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mstacklevel\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 310\u001b[0m )\n\u001b[0;32m--> 311\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 312\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 313\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4955\u001b[0m \u001b[0mweight\u001b[0m \u001b[0;36m1.0\u001b[0m \u001b[0;36m0.8\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4956\u001b[0m \"\"\"\n\u001b[0;32m-> 4957\u001b[0;31m return super().drop(\n\u001b[0m\u001b[1;32m 4958\u001b[0m \u001b[0mlabels\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4959\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4265\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32min\u001b[0m \u001b[0maxes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4266\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlabels\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4267\u001b[0;31m \u001b[0mobj\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_drop_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4268\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4269\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minplace\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36m_drop_axis\u001b[0;34m(self, labels, axis, level, errors, consolidate, only_slice)\u001b[0m\n\u001b[1;32m 4309\u001b[0m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4310\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 4311\u001b[0;31m \u001b[0mnew_axis\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdrop\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlabels\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4312\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnew_axis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/anaconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36mdrop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 6659\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0many\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6660\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0;34m\"ignore\"\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 6661\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mKeyError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"{list(labels[mask])} not found in axis\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6662\u001b[0m \u001b[0mindexer\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexer\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m~\u001b[0m\u001b[0mmask\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 6663\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdelete\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mindexer\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: \"['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'] not found in axis\"" + ] + } + ], "source": [ "df_nested.drop(columns=['belongs_to_collection_id', 'belongs_to_collection_poster_path', 'belongs_to_collection_backdrop_path'], inplace=True)" ] }, { "cell_type": "code", - "execution_count": 454, + "execution_count": 103, "metadata": {}, "outputs": [ { @@ -910,7 +993,7 @@ "2 Grumpy Old Men Collection " ] }, - "execution_count": 454, + "execution_count": 103, "metadata": {}, "output_type": "execute_result" } @@ -923,7 +1006,7 @@ }, { "cell_type": "code", - "execution_count": 455, + "execution_count": 38, "metadata": {}, "outputs": [ { @@ -972,40 +1055,525 @@ }, { "cell_type": "code", - "execution_count": 456, + "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "columns = ['genres', 'production_companies', 'production_countries', 'spoken_languages']\n", + "key = 'name'\n", + "df = pd.DataFrame()\n", + "for _, column in enumerate(columns):\n", + " elements_list = []\n", + " elements_list = get_list(str_to_list, dataframe=df_movies, column=column, key_name=key)\n", "\n", - "for column in columns:\n", - " df_movies[column] = df_movies[column].apply(str_to_dict)\n", - " df_nested = pd.json_normalize(df_movies[column]).fillna('')\n", - " nested_columns = df_nested.columns\n", - " df_nested_1 = pd.DataFrame()\n", - " for nested_column in nested_columns:\n", - " df_aux = pd.json_normalize(df_nested[nested_column]).fillna('')\n", - " if 'production_countries' in column:\n", - " df_aux.rename(columns={'iso_3166_1': 'id'}, inplace=True)\n", - " if 'spoken_languages' in column:\n", - " df_aux.rename(columns={'iso_639_1': 'id'}, inplace=True)\n", - " df_aux.drop(columns='id', axis=1, inplace=True)\n", - " new_columns_names = {col : f'{column}_{nested_column}_{col}' for col in df_aux.columns}\n", - " df_aux.rename(columns=new_columns_names, inplace=True)\n", - " df_nested_1 = pd.concat([df_nested_1, df_aux], axis=1)\n", - "\n", - " # convert columns to a list of columns\n", - " column_name = f'{column}_name'\n", - " column_name_list = df_nested_1.columns.to_list()\n", - " df_nested_1[column_name] = df_nested_1.values.tolist()\n", - " df_nested_1.drop(columns=column_name_list, axis=1, inplace=True)\n", - " df_movies = pd.concat([df_movies, df_nested_1], axis=1)\n", - " df_movies.drop(columns=column, inplace=True)" + " df_aux = pd.DataFrame({column: elements_list})\n", + " df = pd.concat([df, df_aux], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "metadata": {}, + "outputs": [], + "source": [ + "df_movies.drop(columns=columns, inplace=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
genresproduction_companiesproduction_countriesspoken_languages
0[Animation, Comedy, Family][Pixar Animation Studios][United States of America][English]
1[Adventure, Fantasy, Family][TriStar Pictures, Teitler Film, Interscope Co...[United States of America][English, Français]
2[Romance, Comedy][Warner Bros., Lancaster Gate][United States of America][English]
3[Comedy, Drama, Romance][Twentieth Century Fox Film Corporation][United States of America][English]
4[Comedy][Sandollar Productions, Touchstone Pictures][United States of America][English]
\n", + "
" + ], + "text/plain": [ + " genres \\\n", + "0 [Animation, Comedy, Family] \n", + "1 [Adventure, Fantasy, Family] \n", + "2 [Romance, Comedy] \n", + "3 [Comedy, Drama, Romance] \n", + "4 [Comedy] \n", + "\n", + " production_companies \\\n", + "0 [Pixar Animation Studios] \n", + "1 [TriStar Pictures, Teitler Film, Interscope Co... \n", + "2 [Warner Bros., Lancaster Gate] \n", + "3 [Twentieth Century Fox Film Corporation] \n", + "4 [Sandollar Productions, Touchstone Pictures] \n", + "\n", + " production_countries spoken_languages \n", + "0 [United States of America] [English] \n", + "1 [United States of America] [English, Français] \n", + "2 [United States of America] [English] \n", + "3 [United States of America] [English] \n", + "4 [United States of America] [English] " + ] + }, + "execution_count": 105, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
budgetidoriginal_languageoverviewpopularityrelease_daterevenueruntimestatustitlevote_averagevote_countreturnbelongs_to_collection_name
030000000.0862enLed by Woody, Andy's toys live happily in his ...21.9469431995-10-30373554033.081.0ReleasedToy Story7.75415.012.451801Toy Story Collection
165000000.08844enWhen siblings Judy and Peter discover an encha...17.0155391995-12-15262797249.0104.0ReleasedJumanji6.92413.04.043035
20.015602enA family wedding reignites the ancient feud be...11.71291995-12-220.0101.0ReleasedGrumpier Old Men6.592.00.000000Grumpy Old Men Collection
316000000.031357enCheated on, mistreated and stepped on, the wom...3.8594951995-12-2281452156.0127.0ReleasedWaiting to Exhale6.134.05.090760
40.011862enJust when George Banks has recovered from his ...8.3875191995-02-1076578911.0106.0ReleasedFather of the Bride Part II5.7173.00.000000Father of the Bride Collection
\n", + "
" + ], + "text/plain": [ + " budget id original_language \\\n", + "0 30000000.0 862 en \n", + "1 65000000.0 8844 en \n", + "2 0.0 15602 en \n", + "3 16000000.0 31357 en \n", + "4 0.0 11862 en \n", + "\n", + " overview popularity release_date \\\n", + "0 Led by Woody, Andy's toys live happily in his ... 21.946943 1995-10-30 \n", + "1 When siblings Judy and Peter discover an encha... 17.015539 1995-12-15 \n", + "2 A family wedding reignites the ancient feud be... 11.7129 1995-12-22 \n", + "3 Cheated on, mistreated and stepped on, the wom... 3.859495 1995-12-22 \n", + "4 Just when George Banks has recovered from his ... 8.387519 1995-02-10 \n", + "\n", + " revenue runtime status title vote_average \\\n", + "0 373554033.0 81.0 Released Toy Story 7.7 \n", + "1 262797249.0 104.0 Released Jumanji 6.9 \n", + "2 0.0 101.0 Released Grumpier Old Men 6.5 \n", + "3 81452156.0 127.0 Released Waiting to Exhale 6.1 \n", + "4 76578911.0 106.0 Released Father of the Bride Part II 5.7 \n", + "\n", + " vote_count return belongs_to_collection_name \n", + "0 5415.0 12.451801 Toy Story Collection \n", + "1 2413.0 4.043035 \n", + "2 92.0 0.000000 Grumpy Old Men Collection \n", + "3 34.0 5.090760 \n", + "4 173.0 0.000000 Father of the Bride Collection " + ] + }, + "execution_count": 108, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_movies.head()" ] }, { "cell_type": "code", - "execution_count": 457, + "execution_count": 109, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
budgetidoriginal_languageoverviewpopularityrelease_daterevenueruntimestatustitlevote_averagevote_countreturnbelongs_to_collection_namegenresproduction_companiesproduction_countriesspoken_languages
030000000.0862enLed by Woody, Andy's toys live happily in his ...21.9469431995-10-30373554033.081.0ReleasedToy Story7.75415.012.451801Toy Story Collection[Animation, Comedy, Family][Pixar Animation Studios][United States of America][English]
165000000.08844enWhen siblings Judy and Peter discover an encha...17.0155391995-12-15262797249.0104.0ReleasedJumanji6.92413.04.043035[Adventure, Fantasy, Family][TriStar Pictures, Teitler Film, Interscope Co...[United States of America][English, Français]
20.015602enA family wedding reignites the ancient feud be...11.71291995-12-220.0101.0ReleasedGrumpier Old Men6.592.00.000000Grumpy Old Men Collection[Romance, Comedy][Warner Bros., Lancaster Gate][United States of America][English]
316000000.031357enCheated on, mistreated and stepped on, the wom...3.8594951995-12-2281452156.0127.0ReleasedWaiting to Exhale6.134.05.090760[Comedy, Drama, Romance][Twentieth Century Fox Film Corporation][United States of America][English]
40.011862enJust when George Banks has recovered from his ...8.3875191995-02-1076578911.0106.0ReleasedFather of the Bride Part II5.7173.00.000000Father of the Bride Collection[Comedy][Sandollar Productions, Touchstone Pictures][United States of America][English]
\n", + "
" + ], + "text/plain": [ + " budget id original_language \\\n", + "0 30000000.0 862 en \n", + "1 65000000.0 8844 en \n", + "2 0.0 15602 en \n", + "3 16000000.0 31357 en \n", + "4 0.0 11862 en \n", + "\n", + " overview popularity release_date \\\n", + "0 Led by Woody, Andy's toys live happily in his ... 21.946943 1995-10-30 \n", + "1 When siblings Judy and Peter discover an encha... 17.015539 1995-12-15 \n", + "2 A family wedding reignites the ancient feud be... 11.7129 1995-12-22 \n", + "3 Cheated on, mistreated and stepped on, the wom... 3.859495 1995-12-22 \n", + "4 Just when George Banks has recovered from his ... 8.387519 1995-02-10 \n", + "\n", + " revenue runtime status title vote_average \\\n", + "0 373554033.0 81.0 Released Toy Story 7.7 \n", + "1 262797249.0 104.0 Released Jumanji 6.9 \n", + "2 0.0 101.0 Released Grumpier Old Men 6.5 \n", + "3 81452156.0 127.0 Released Waiting to Exhale 6.1 \n", + "4 76578911.0 106.0 Released Father of the Bride Part II 5.7 \n", + "\n", + " vote_count return belongs_to_collection_name \\\n", + "0 5415.0 12.451801 Toy Story Collection \n", + "1 2413.0 4.043035 \n", + "2 92.0 0.000000 Grumpy Old Men Collection \n", + "3 34.0 5.090760 \n", + "4 173.0 0.000000 Father of the Bride Collection \n", + "\n", + " genres \\\n", + "0 [Animation, Comedy, Family] \n", + "1 [Adventure, Fantasy, Family] \n", + "2 [Romance, Comedy] \n", + "3 [Comedy, Drama, Romance] \n", + "4 [Comedy] \n", + "\n", + " production_companies \\\n", + "0 [Pixar Animation Studios] \n", + "1 [TriStar Pictures, Teitler Film, Interscope Co... \n", + "2 [Warner Bros., Lancaster Gate] \n", + "3 [Twentieth Century Fox Film Corporation] \n", + "4 [Sandollar Productions, Touchstone Pictures] \n", + "\n", + " production_countries spoken_languages \n", + "0 [United States of America] [English] \n", + "1 [United States of America] [English, Français] \n", + "2 [United States of America] [English] \n", + "3 [United States of America] [English] \n", + "4 [United States of America] [English] " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_movies = pd.concat([df_movies, df], axis=1)\n", + "df_movies.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 110, "metadata": {}, "outputs": [ { @@ -1014,7 +1582,7 @@ "(45466, 18)" ] }, - "execution_count": 457, + "execution_count": 110, "metadata": {}, "output_type": "execute_result" } @@ -1025,7 +1593,7 @@ }, { "cell_type": "code", - "execution_count": 458, + "execution_count": 111, "metadata": {}, "outputs": [ { @@ -1051,10 +1619,10 @@ " 11 vote_count 45460 non-null float64\n", " 12 return 45466 non-null float64\n", " 13 belongs_to_collection_name 45466 non-null object \n", - " 14 genres_name 45466 non-null object \n", - " 15 production_companies_name 45466 non-null object \n", - " 16 production_countries_name 45466 non-null object \n", - " 17 spoken_languages_name 45466 non-null object \n", + " 14 genres 45466 non-null object \n", + " 15 production_companies 45466 non-null object \n", + " 16 production_countries 45466 non-null object \n", + " 17 spoken_languages 45466 non-null object \n", "dtypes: float64(6), object(12)\n", "memory usage: 6.2+ MB\n" ] @@ -1074,7 +1642,7 @@ }, { "cell_type": "code", - "execution_count": 459, + "execution_count": 112, "metadata": {}, "outputs": [ { @@ -1100,10 +1668,10 @@ " 11 vote_count 45376 non-null float64\n", " 12 return 45379 non-null float64\n", " 13 belongs_to_collection_name 45379 non-null object \n", - " 14 genres_name 45379 non-null object \n", - " 15 production_companies_name 45379 non-null object \n", - " 16 production_countries_name 45379 non-null object \n", - " 17 spoken_languages_name 45379 non-null object \n", + " 14 genres 45379 non-null object \n", + " 15 production_companies 45379 non-null object \n", + " 16 production_countries 45379 non-null object \n", + " 17 spoken_languages 45379 non-null object \n", "dtypes: float64(6), object(12)\n", "memory usage: 6.6+ MB\n" ] @@ -1124,14 +1692,14 @@ }, { "cell_type": "code", - "execution_count": 460, + "execution_count": 113, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/tmp/ipykernel_70350/235543016.py:2: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.\n", + "/tmp/ipykernel_415942/235543016.py:2: UserWarning: This pattern is interpreted as a regular expression, and has match groups. To actually get the groups, use str.extract.\n", " filter = df_movies['release_date'].str.contains(regex_filter)\n" ] } @@ -1153,7 +1721,7 @@ }, { "cell_type": "code", - "execution_count": 461, + "execution_count": 114, "metadata": {}, "outputs": [], "source": [ @@ -1170,7 +1738,7 @@ }, { "cell_type": "code", - "execution_count": 462, + "execution_count": 115, "metadata": {}, "outputs": [], "source": [ @@ -1179,7 +1747,7 @@ }, { "cell_type": "code", - "execution_count": 463, + "execution_count": 116, "metadata": {}, "outputs": [ { @@ -1217,10 +1785,10 @@ " vote_count\n", " return\n", " belongs_to_collection_name\n", - " genres_name\n", - " production_companies_name\n", - " production_countries_name\n", - " spoken_languages_name\n", + " genres\n", + " production_companies\n", + " production_countries\n", + " spoken_languages\n", " release_year\n", " \n", " \n", @@ -1241,10 +1809,10 @@ " 5415.0\n", " 12.451801\n", " Toy Story Collection\n", - " [Animation, Comedy, Family, , , , , ]\n", - " [Pixar Animation Studios, , , , , , , , , , , ...\n", - " [United States of America, , , , , , , , , , ,...\n", - " [English, , , , , , , , , , , , , , , , , , ]\n", + " [Animation, Comedy, Family]\n", + " [Pixar Animation Studios]\n", + " [United States of America]\n", + " [English]\n", " 1995\n", " \n", " \n", @@ -1263,10 +1831,10 @@ " 2413.0\n", " 4.043035\n", " \n", - " [Adventure, Fantasy, Family, , , , , ]\n", + " [Adventure, Fantasy, Family]\n", " [TriStar Pictures, Teitler Film, Interscope Co...\n", - " [United States of America, , , , , , , , , , ,...\n", - " [English, Français, , , , , , , , , , , , , , ...\n", + " [United States of America]\n", + " [English, Français]\n", " 1995\n", " \n", " \n", @@ -1285,10 +1853,10 @@ " 92.0\n", " 0.000000\n", " Grumpy Old Men Collection\n", - " [Romance, Comedy, , , , , , ]\n", - " [Warner Bros., Lancaster Gate, , , , , , , , ,...\n", - " [United States of America, , , , , , , , , , ,...\n", - " [English, , , , , , , , , , , , , , , , , , ]\n", + " [Romance, Comedy]\n", + " [Warner Bros., Lancaster Gate]\n", + " [United States of America]\n", + " [English]\n", " 1995\n", " \n", " \n", @@ -1307,10 +1875,10 @@ " 34.0\n", " 5.090760\n", " \n", - " [Comedy, Drama, Romance, , , , , ]\n", - " [Twentieth Century Fox Film Corporation, , , ,...\n", - " [United States of America, , , , , , , , , , ,...\n", - " [English, , , , , , , , , , , , , , , , , , ]\n", + " [Comedy, Drama, Romance]\n", + " [Twentieth Century Fox Film Corporation]\n", + " [United States of America]\n", + " [English]\n", " 1995\n", " \n", " \n", @@ -1329,10 +1897,10 @@ " 173.0\n", " 0.000000\n", " Father of the Bride Collection\n", - " [Comedy, , , , , , , ]\n", - " [Sandollar Productions, Touchstone Pictures, ,...\n", - " [United States of America, , , , , , , , , , ,...\n", - " [English, , , , , , , , , , , , , , , , , , ]\n", + " [Comedy]\n", + " [Sandollar Productions, Touchstone Pictures]\n", + " [United States of America]\n", + " [English]\n", " 1995\n", " \n", " \n", @@ -1368,36 +1936,29 @@ "3 34.0 5.090760 \n", "4 173.0 0.000000 Father of the Bride Collection \n", "\n", - " genres_name \\\n", - "0 [Animation, Comedy, Family, , , , , ] \n", - "1 [Adventure, Fantasy, Family, , , , , ] \n", - "2 [Romance, Comedy, , , , , , ] \n", - "3 [Comedy, Drama, Romance, , , , , ] \n", - "4 [Comedy, , , , , , , ] \n", + " genres \\\n", + "0 [Animation, Comedy, Family] \n", + "1 [Adventure, Fantasy, Family] \n", + "2 [Romance, Comedy] \n", + "3 [Comedy, Drama, Romance] \n", + "4 [Comedy] \n", "\n", - " production_companies_name \\\n", - "0 [Pixar Animation Studios, , , , , , , , , , , ... \n", + " production_companies \\\n", + "0 [Pixar Animation Studios] \n", "1 [TriStar Pictures, Teitler Film, Interscope Co... \n", - "2 [Warner Bros., Lancaster Gate, , , , , , , , ,... \n", - "3 [Twentieth Century Fox Film Corporation, , , ,... \n", - "4 [Sandollar Productions, Touchstone Pictures, ,... \n", - "\n", - " production_countries_name \\\n", - "0 [United States of America, , , , , , , , , , ,... \n", - "1 [United States of America, , , , , , , , , , ,... \n", - "2 [United States of America, , , , , , , , , , ,... \n", - "3 [United States of America, , , , , , , , , , ,... \n", - "4 [United States of America, , , , , , , , , , ,... \n", + "2 [Warner Bros., Lancaster Gate] \n", + "3 [Twentieth Century Fox Film Corporation] \n", + "4 [Sandollar Productions, Touchstone Pictures] \n", "\n", - " spoken_languages_name release_year \n", - "0 [English, , , , , , , , , , , , , , , , , , ] 1995 \n", - "1 [English, Français, , , , , , , , , , , , , , ... 1995 \n", - "2 [English, , , , , , , , , , , , , , , , , , ] 1995 \n", - "3 [English, , , , , , , , , , , , , , , , , , ] 1995 \n", - "4 [English, , , , , , , , , , , , , , , , , , ] 1995 " + " production_countries spoken_languages release_year \n", + "0 [United States of America] [English] 1995 \n", + "1 [United States of America] [English, Français] 1995 \n", + "2 [United States of America] [English] 1995 \n", + "3 [United States of America] [English] 1995 \n", + "4 [United States of America] [English] 1995 " ] }, - "execution_count": 463, + "execution_count": 116, "metadata": {}, "output_type": "execute_result" } @@ -1416,7 +1977,7 @@ }, { "cell_type": "code", - "execution_count": 464, + "execution_count": 117, "metadata": {}, "outputs": [], "source": [ @@ -1433,7 +1994,7 @@ }, { "cell_type": "code", - "execution_count": 465, + "execution_count": 118, "metadata": {}, "outputs": [ { @@ -1453,7 +2014,7 @@ "Name: id, Length: 45346, dtype: int64" ] }, - "execution_count": 465, + "execution_count": 118, "metadata": {}, "output_type": "execute_result" } @@ -1464,7 +2025,7 @@ }, { "cell_type": "code", - "execution_count": 466, + "execution_count": 119, "metadata": {}, "outputs": [], "source": [ @@ -1481,7 +2042,7 @@ }, { "cell_type": "code", - "execution_count": 467, + "execution_count": 120, "metadata": {}, "outputs": [ { @@ -1490,7 +2051,7 @@ "Int64Index([], dtype='int64')" ] }, - "execution_count": 467, + "execution_count": 120, "metadata": {}, "output_type": "execute_result" } @@ -1501,7 +2062,7 @@ }, { "cell_type": "code", - "execution_count": 468, + "execution_count": 121, "metadata": {}, "outputs": [ { @@ -1650,7 +2211,7 @@ "max 14075.000000 1.239638e+07 2020.000000 " ] }, - "execution_count": 468, + "execution_count": 121, "metadata": {}, "output_type": "execute_result" } @@ -1669,7 +2230,7 @@ }, { "cell_type": "code", - "execution_count": 469, + "execution_count": 122, "metadata": {}, "outputs": [], "source": [ @@ -1680,7 +2241,7 @@ }, { "cell_type": "code", - "execution_count": 470, + "execution_count": 123, "metadata": {}, "outputs": [ { @@ -1760,7 +2321,7 @@ "4 [{'credit_id': '52fe44959251416c75039ed7', 'de... 11862 " ] }, - "execution_count": 470, + "execution_count": 123, "metadata": {}, "output_type": "execute_result" } @@ -1771,7 +2332,7 @@ }, { "cell_type": "code", - "execution_count": 471, + "execution_count": 124, "metadata": {}, "outputs": [ { @@ -1795,39 +2356,6 @@ "df_credits.info()" ] }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Function to get elements of list of list of dictionaries" - ] - }, - { - "cell_type": "code", - "execution_count": 472, - "metadata": {}, - "outputs": [], - "source": [ - "def get_list(dataframe='', column='', key_name=''):\n", - " \"\"\"Get the list of key elements from a dictionary.\n", - "\n", - " Args:\n", - " dataframe (str, optional): Dataframe to extract information. Defaults to ''.\n", - " column (str, optional): Column of the dataframe to get information. Defaults to ''.\n", - " key_name (str, optional): Dictionary key to get value. Defaults to ''.\n", - "\n", - " Returns:\n", - " List: List of list of elements of the dictionary\n", - " \"\"\"\n", - " dataframe[column] = dataframe[column].apply(str_to_dict)\n", - " column_list = dataframe[column].to_list()\n", - "\n", - " return_list = [[data[key_name] for data in inter_list if key_name in data] for inter_list in column_list]\n", - "\n", - " return return_list" - ] - }, { "attachments": {}, "cell_type": "markdown", @@ -1838,7 +2366,7 @@ }, { "cell_type": "code", - "execution_count": 473, + "execution_count": 125, "metadata": {}, "outputs": [ { @@ -1911,7 +2439,7 @@ "4 [Steve Martin, Diane Keaton, Martin Short, Kim... Alan Silvestri 11862" ] }, - "execution_count": 473, + "execution_count": 125, "metadata": {}, "output_type": "execute_result" } @@ -1922,7 +2450,7 @@ "df_actors = pd.DataFrame()\n", "for _, (column, key) in enumerate(zip(columns, key_list)):\n", " elements_list = []\n", - " elements_list = get_list(dataframe=df_credits, column=column, key_name=key)\n", + " elements_list = get_list(str_to_dict, dataframe=df_credits, column=column, key_name=key)\n", " if 'crew' in column:\n", " new_list = [element[0] if len(element) > 0 else 'No Director' for element in elements_list]\n", " elements_list = new_list.copy()\n", @@ -1934,7 +2462,7 @@ }, { "cell_type": "code", - "execution_count": 474, + "execution_count": 126, "metadata": {}, "outputs": [ { @@ -1954,7 +2482,7 @@ "Name: id, Length: 45432, dtype: int64" ] }, - "execution_count": 474, + "execution_count": 126, "metadata": {}, "output_type": "execute_result" } @@ -1973,7 +2501,7 @@ }, { "cell_type": "code", - "execution_count": 475, + "execution_count": 127, "metadata": {}, "outputs": [], "source": [ @@ -1990,7 +2518,7 @@ }, { "cell_type": "code", - "execution_count": 476, + "execution_count": 128, "metadata": {}, "outputs": [ { @@ -1999,7 +2527,7 @@ "Int64Index([], dtype='int64')" ] }, - "execution_count": 476, + "execution_count": 128, "metadata": {}, "output_type": "execute_result" } @@ -2018,7 +2546,7 @@ }, { "cell_type": "code", - "execution_count": 477, + "execution_count": 129, "metadata": {}, "outputs": [], "source": [ @@ -2035,7 +2563,7 @@ }, { "cell_type": "code", - "execution_count": 478, + "execution_count": 130, "metadata": {}, "outputs": [], "source": [ @@ -2052,7 +2580,7 @@ }, { "cell_type": "code", - "execution_count": 479, + "execution_count": 131, "metadata": {}, "outputs": [], "source": [ @@ -2062,7 +2590,7 @@ }, { "cell_type": "code", - "execution_count": 480, + "execution_count": 132, "metadata": {}, "outputs": [], "source": [ @@ -2079,7 +2607,7 @@ }, { "cell_type": "code", - "execution_count": 481, + "execution_count": 133, "metadata": {}, "outputs": [ { @@ -2089,7 +2617,7 @@ "Name: cast, dtype: object" ] }, - "execution_count": 481, + "execution_count": 133, "metadata": {}, "output_type": "execute_result" } @@ -2109,7 +2637,7 @@ }, { "cell_type": "code", - "execution_count": 482, + "execution_count": 134, "metadata": {}, "outputs": [ { @@ -2129,17 +2657,17 @@ "vote_count 0.0\n", "return 0.0\n", "belongs_to_collection_name \n", - "genres_name [, , , , , , , ]\n", - "production_companies_name [, , , , , , , , , , , , , , , , , , , , , , ,...\n", - "production_countries_name [, , , , , , , , , , , , , , , , , , , , , , ,...\n", - "spoken_languages_name [, , , , , , , , , , , , , , , , , , ]\n", + "genres []\n", + "production_companies []\n", + "production_countries []\n", + "spoken_languages []\n", "release_year 2017\n", "cast NaN\n", "director NaN\n", "Name: 42783, dtype: object" ] }, - "execution_count": 482, + "execution_count": 134, "metadata": {}, "output_type": "execute_result" }