-
Notifications
You must be signed in to change notification settings - Fork 1
/
parallel.py
39 lines (29 loc) · 1.22 KB
/
parallel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from ipyparallel import Client
c = Client()
c.ids
dview = c[:]
dview.block = True
dview.apply(lambda : "Hello, World")
lview = c.load_balanced_view()
lview.block = True
import pandas
dat = pandas.read_csv('bayArea.csv', header = None, encoding = 'latin1')
dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime','ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum','ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay','Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode','Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay')
dview.execute('import statsmodels.api as sm')
dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')]
dests = dat2.Dest.unique()
mydict = dict(dat2 = dat2, dests = dests)
dview.push(mydict)
def f(id):
sub = dat2.loc[dat2.Dest == dests[id],:]
sub = sm.add_constant(sub)
model = sm.OLS(sub.DepDelay, sub.loc[:,('const','Year')])
results = model.fit()
return results.params
import time
time.time()
parallel_result = lview.map(f, range(len(dests)))
#result = map(f, range(len(dests)))
time.time()
# some NaN values because all 'Year' values are the same for some destinations
parallel_result