diff --git a/content/descriptive_statistics/_index.md b/content/descriptive_statistics/_index.md index 7d2f2bf..f38666b 100755 --- a/content/descriptive_statistics/_index.md +++ b/content/descriptive_statistics/_index.md @@ -51,7 +51,7 @@ Für eine erste Demo bist Du verantwortlich: - Berechne 2-3 Statistiken und erstelle 2-3 Visualisierungen basierend auf den Informationen im [Datensatz](/2023-2024-ZK_Data_Librarian_Modul_3/organisation/dataset/). - Nutze `pandas` zur Berechnung der Statistiken und `seaborn` für die Visualisierungen. -Lade bis spätestens **24.03.23** Deinen Report in Form eines *Jupyter Notebooks* in der [Dateiablage](https://zbiw.th-koeln.de/moodle/mod/assign/view.php?id=12281&forceview=1) in Moodle hoch. +Lade bis spätestens **18.03.24** Deinen Report in Form eines *Jupyter Notebooks* in der [Dateiablage](https://zbiw.th-koeln.de/moodle/mod/assign/view.php?id=12281&forceview=1) in Moodle hoch. {{% /customnotice %}} diff --git a/content/solutions.files/2.8.solutions_na.ipynb b/content/solutions.files/2.8.solutions_na.ipynb index bdb8135..58aa6e0 100644 --- a/content/solutions.files/2.8.solutions_na.ipynb +++ b/content/solutions.files/2.8.solutions_na.ipynb @@ -80,6 +80,136 @@ "df.isna().sum()" ] }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 keine Angabe\n", + "1 keine Angabe\n", + "2 45 to 54 years\n", + "3 keine Angabe\n", + "4 keine Angabe\n", + " ... \n", + "436285 35 to 44 years\n", + "436286 10 to 19 years\n", + "436287 60 to 64 years\n", + "436288 35 to 44 years\n", + "436289 75 years and over\n", + "Name: Age Range, Length: 436290, dtype: object" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Age Range'].fillna(\"keine Angabe\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Patron Type Code 0\n", + "Patron Type Definition 0\n", + "Total Checkouts 0\n", + "Total Renewals 0\n", + "Age Range 912\n", + "Home Library Code 2\n", + "Home Library Definition 0\n", + "Circulation Active Month 39513\n", + "Circulation Active Year 39513\n", + "Notice Preference Code 42989\n", + "Notice Preference Definition 0\n", + "Provided Email Address 0\n", + "Year Patron Registered 0\n", + "Within San Francisco County 1207\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2 45 to 54 years\n", + "5 45 to 54 years\n", + "6 55 to 59 years\n", + "7 45 to 54 years\n", + "9 45 to 54 years\n", + " ... \n", + "436285 35 to 44 years\n", + "436286 10 to 19 years\n", + "436287 60 to 64 years\n", + "436288 35 to 44 years\n", + "436289 75 years and over\n", + "Name: Age Range, Length: 435378, dtype: object" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df['Age Range'].dropna()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Patron Type Code 0\n", + "Patron Type Definition 0\n", + "Total Checkouts 0\n", + "Total Renewals 0\n", + "Age Range 912\n", + "Home Library Code 2\n", + "Home Library Definition 0\n", + "Circulation Active Month 39513\n", + "Circulation Active Year 39513\n", + "Notice Preference Code 42989\n", + "Notice Preference Definition 0\n", + "Provided Email Address 0\n", + "Year Patron Registered 0\n", + "Within San Francisco County 1207\n", + "dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.isna().sum()" + ] + }, { "cell_type": "code", "execution_count": 4,