forked from epfl-ada/Projects
-
Notifications
You must be signed in to change notification settings - Fork 2
/
data_loading.py
62 lines (44 loc) · 1.56 KB
/
data_loading.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
import pandas as pd
import os
class YearLoader:
"""
This class represents data for one year.
Parameters
----------
year : int
Year the data to be load
Attributes
----------
default_encoding : str
Encoding used to load data in dataframe
default_mapping_csv : dict
Mapping between english csv names and french csv names
dataframes: dict
Dictionnary containing all dataFrames for a year (keys in english)
"""
def __init__(self, year):
self.default_encoding = "ISO-8859-1"
self.default_mapping_csv = {
"characteristics": "caracteristiques",
"locations" : "lieux",
"passengers" : "usagers",
"vehicles" : "vehicules"
}
self.year = year
separator = ""
delimiter = ","
# Handle exceptions in file paths
if int(year) <= 2016:
separator = "_"
else:
separator = "-"
self.dataframes = {}
for k, v in self.default_mapping_csv.items():
# Handle exceptions in delimiter
if int(year) == 2009 and v == "caracteristiques":
delimiter = "\t"
else:
delimiter = ","
self.dataframes.update({k:pd.read_csv(os.path.join(os.path.dirname(__file__),"../data/{2}/{0}{1}{2}.csv").format(v, separator, year),delimiter=delimiter, encoding=self.default_encoding)})
def get_dataframe(self, name):
return self.dataframes[name]