Skip to content

Commit

Permalink
aktualizovana pracovni verze notebooku
Browse files Browse the repository at this point in the history
  • Loading branch information
PetraVidnerova committed Nov 10, 2024
1 parent 174a67b commit 64eb53c
Showing 1 changed file with 91 additions and 40 deletions.
131 changes: 91 additions & 40 deletions lessons/pydata/homework_revisited/index_na_hodinu.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,24 @@
"np.random.seed(42)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import warnings\n",
"from sklearn.exceptions import ConvergenceWarning\n",
"warnings.filterwarnings(\"ignore\", category=ConvergenceWarning)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ÚKOL 1**: Načtěte data, vyhoďte přebytečné sloupce, vyberte vstupy a výstupy a připravte rozdělení na trénovací a testovací množinu.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
Expand All @@ -33,7 +51,7 @@
"outputs": [],
"source": [
"fish_data = pd.read_csv(\"fish_data.csv\", index_col=0)\n",
"# fish_data = fish_data.drop(columns=[____])\n",
"# fish_data = fish_data.drop(columns=[___])\n",
"fish_data"
]
},
Expand Down Expand Up @@ -73,8 +91,14 @@
"source": [
"from sklearn.model_selection import train_test_split \n",
"\n",
"X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y)\n",
"# X_train_raw, X_test_raw, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=X[\"Species\"])"
"X_train_raw, X_test_raw, y_train, y_test = ..."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ÚKOL 2**: Překódujte kategorické proměnné a přeškálujte všechny sloupce."
]
},
{
Expand All @@ -91,22 +115,19 @@
"outputs": [],
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.compose import make_column_transformer\n",
"\n",
"categorical_columns = [____] \n",
"categorical_columns = ___ \n",
"\n",
"encoder = OneHotEncoder()\n",
"encoder.fit(X_train_raw[categorical_columns])\n",
"column_names = encoder.get_feature_names_out()\n",
" \n",
"def transform_species(X_raw):\n",
" X_res = X_raw.drop(columns=[\"Species\"])\n",
" X_res = X_res.reindex(columns=list(X_res.columns)+list(column_names))\n",
" X_res[list(column_names)] = encoder.transform(X_raw[categorical_columns]).toarray() \n",
" return X_res\n",
"transformer = make_column_transformer(\n",
" (_______, _____),\n",
" remainder=\"passthrough\"\n",
")\n",
"\n",
"X_train_onehot = transform_species(X_train_raw)\n",
"X_test_onehot = transform_species(X_test_raw)\n",
"X_train_onehot"
"X_train_onehot = transformer._______(X_train_raw)\n",
"X_test_onehot = transformer.________(X_test_raw)\n",
"\n",
"pd.DataFrame(X_train_onehot, columns=transformer.get_feature_names_out())"
]
},
{
Expand All @@ -124,11 +145,11 @@
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"scaler.____(X_train_onehot)\n",
"scaler = ______\n",
"scaler._____(_____)\n",
"\n",
"X_train = scaler.____(X_train_onehot)\n",
"X_test = scaler.____(X_test_onehot)"
"X_train = scaler.transform(X_train_onehot)\n",
"X_test = scaler.transform(X_test_onehot)"
]
},
{
Expand All @@ -151,6 +172,13 @@
" * C, float, optional (default=1.0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
Expand Down Expand Up @@ -207,6 +235,13 @@
"``` "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**ÚKOL 3**: Dopište funkci `fit_and_eval` dle instrukcí."
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -218,11 +253,13 @@
"def fit_and_eval(X_train, y_train, X_test, y_test, model, name):\n",
" \"\"\" 1. Natrénuje model na trénovací množině.\n",
" 2. Spočte hodnoty metrik na trénovací i testovací množině.\n",
" vrátí slovník ve tvaru {\"název metriky train\": hodnota , \"název metriky test\": hodnota} \n",
" vrátí slovník ve tvaru {\"název metriky\": hodnota} \n",
" \"\"\" \n",
" ...\n",
" # zde dopiš kód\n",
" ... \n",
" return {\n",
" .... \n",
" \"MSE_test\": ____,\n",
" \"MSE_train\": ____\n",
" }"
]
},
Expand Down Expand Up @@ -296,8 +333,8 @@
"y_real_test = test_data.pop(\"Weight\")\n",
"X_real_test = test_data \n",
"\n",
"X_real_test = transform_species(X_real_test)\n",
"X_real_test_scaled = scaler.transform(X_real_test)"
"X_real_test_transformed = _____\n",
"X_real_test_scaled = _______"
]
},
{
Expand All @@ -309,7 +346,8 @@
"y_pred_test = best_model.predict(X_real_test_scaled)\n",
"\n",
"print(f\"MAE {mean_absolute_error(y_real_test, y_pred_test):.3f}\")\n",
"print(f\"MSE {mean_squared_error(y_real_test, y_pred_test):.3f}\")"
"print(f\"MSE {mean_squared_error(y_real_test, y_pred_test):.3f}\")\n",
"print(f\"R2 {r2_score(y_real_test, y_pred_test):.3f}\")"
]
},
{
Expand Down Expand Up @@ -342,36 +380,49 @@
"metadata": {},
"outputs": [],
"source": [
"is_bream = X_real_test[\"Species_Bream\"] == 1 \n",
"bream = X_real_test[is_bream][\"Length3\"]\n",
"\n",
"bream_weights = y_real_test[is_bream]\n",
"is_bream = X_real_test[\"Species\"] == \"Bream\"\n",
"predicted_bream_weights = best_model.predict(X_real_test_scaled[is_bream])\n",
"\n",
"is_roach = X_real_test[\"Species_Roach\"] == 1\n",
"roach = X_real_test[is_roach][\"Length3\"]\n",
"roach_weights = y_real_test[is_roach]\n",
"is_roach = X_real_test[\"Species\"] == \"Roach\"\n",
"predicted_roach_weights = best_model.predict(X_real_test_scaled[is_roach])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"result_bream = pd.DataFrame()\n",
"result_bream[\"length\"] = X_real_test[is_bream][\"Length3\"]\n",
"result_bream[\"weight\"] = y_real_test[is_bream]\n",
"result_bream[\"predicted\"] = predicted_bream_weights\n",
"result_bream = result_bream.sort_values(\"length\")\n",
"\n",
"result_roach = pd.DataFrame()\n",
"result_roach[\"length\"] = X_real_test[is_roach][\"Length3\"]\n",
"result_roach[\"weight\"] = y_real_test[is_roach]\n",
"result_roach[\"predicted\"] = predicted_roach_weights\n",
"result_roach = result_roach.sort_values(\"length\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import matplotlib.pyplot as plt \n",
"%matplotlib inline\n",
"\n",
"fig, ax = plt.subplots(1, 2)\n",
"\n",
"ax[0].scatter(bream, bream_weights, label=\"true weight\");\n",
"ax[0].scatter(bream, predicted_bream_weights, label=\"prediction\");\n",
"ax[0].plot(result_bream[\"length\"], result_bream[\"weight\"], label=\"true weight\", marker=\"o\");\n",
"ax[0].plot(result_bream[\"length\"], result_bream[\"predicted\"], label=\"prediction\", marker=\"o\");\n",
"ax[0].legend()\n",
"ax[0].set_title(\"Bream\")\n",
"\n",
"ax[1].scatter(roach, roach_weights, label=\"true weight\");\n",
"ax[1].scatter(roach, predicted_roach_weights, label=\"prediction\");\n",
"ax[1].plot(result_roach[\"length\"], result_roach[\"weight\"], label=\"true weight\", marker=\"o\");\n",
"ax[1].plot(result_roach[\"length\"], result_roach[\"predicted\"], label=\"prediction\", marker=\"o\");\n",
"ax[1].legend()\n",
"ax[1].set_title(\"Roach\");"
]
Expand All @@ -393,9 +444,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.13"
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

0 comments on commit 64eb53c

Please sign in to comment.