From 7975a6c2233289a815de189a0fb6b7f9816c90bd Mon Sep 17 00:00:00 2001
From: nikk-nikaznan <nik.k.nikaznan@outlook.com>
Date: Tue, 3 Dec 2024 10:51:43 +0000
Subject: [PATCH 1/2] add title and just year as x-axis label

---
 episodes/14-looping-data-sets.md | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/episodes/14-looping-data-sets.md b/episodes/14-looping-data-sets.md
index 94d7ddbe4..7a22bbeab 100644
--- a/episodes/14-looping-data-sets.md
+++ b/episodes/14-looping-data-sets.md
@@ -192,12 +192,11 @@ to either filter out those columns or tell pandas to ignore them.
 This solution builds a useful legend by using the [string `split` method][split-method] to
 extract the `region` from the path 'data/gapminder\_gdp\_a\_specific\_region.csv'.
 
-```python
-import glob
+```import glob
 import pandas as pd
 import matplotlib.pyplot as plt
 fig, ax = plt.subplots(1,1)
-for filename in glob.glob('data/gapminder_gdp*.csv'):
+for filename in glob.glob('/Users/nikkhadijahnikaznan/Downloads/data/gapminder_gdp*.csv'):
     dataframe = pd.read_csv(filename)
     # extract <region> from the filename, expected to be in the format 'data/gapminder_gdp_<region>.csv'.
     # we will split the string using the split method and `_` as our separator,
@@ -207,13 +206,21 @@ for filename in glob.glob('data/gapminder_gdp*.csv'):
     # convenient abstractions for working with filesystem paths and could solve this as well:
     # from pathlib import Path
     # region = Path(filename).stem.split('_')[-1]
-    region = filename.split('_')[-1][:-4] 
+    region = filename.split('_')[-1][:-4]
+    # extract the years from the columns of the dataframe 
+    headings = dataframe.columns[1:]
+    years = headings.str.split('_').str.get(1)
     # pandas raises errors when it encounters non-numeric columns in a dataframe computation
     # but we can tell pandas to ignore them with the `numeric_only` parameter
     dataframe.mean(numeric_only=True).plot(ax=ax, label=region)
     # NOTE: another way of doing this selects just the columns with gdp in their name using the filter method
     # dataframe.filter(like="gdp").mean().plot(ax=ax, label=region)
-
+# set the title and labels
+ax.set_title('GDP Per Capita for Regions Over Time')
+ax.set_xticks(range(len(years)))
+ax.set_xticklabels(years)
+ax.set_xlabel('Year')
+plt.tight_layout()
 plt.legend()
 plt.show()
 ```

From 2c5f20eadbafa1ef3ead51a6fa9fcec42114c545 Mon Sep 17 00:00:00 2001
From: nikk-nikaznan <nik.k.nikaznan@outlook.com>
Date: Tue, 3 Dec 2024 10:53:54 +0000
Subject: [PATCH 2/2] cleaned up

---
 episodes/14-looping-data-sets.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/episodes/14-looping-data-sets.md b/episodes/14-looping-data-sets.md
index 7a22bbeab..1a7af685c 100644
--- a/episodes/14-looping-data-sets.md
+++ b/episodes/14-looping-data-sets.md
@@ -192,11 +192,12 @@ to either filter out those columns or tell pandas to ignore them.
 This solution builds a useful legend by using the [string `split` method][split-method] to
 extract the `region` from the path 'data/gapminder\_gdp\_a\_specific\_region.csv'.
 
-```import glob
+```python
+import glob
 import pandas as pd
 import matplotlib.pyplot as plt
 fig, ax = plt.subplots(1,1)
-for filename in glob.glob('/Users/nikkhadijahnikaznan/Downloads/data/gapminder_gdp*.csv'):
+for filename in glob.glob('data/gapminder_gdp*.csv'):
     dataframe = pd.read_csv(filename)
     # extract <region> from the filename, expected to be in the format 'data/gapminder_gdp_<region>.csv'.
     # we will split the string using the split method and `_` as our separator,