Skip to content

Commit

Permalink
added materials from 2017,
Browse files Browse the repository at this point in the history
with a few initial edits on interactive jobs
  • Loading branch information
paciorek committed Sep 14, 2018
1 parent 1c2a7b7 commit c7b24c0
Show file tree
Hide file tree
Showing 10 changed files with 1,935 additions and 0 deletions.
10 changes: 10 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
all: intro.html intro_slides.html

intro.html: intro.md
pandoc -s -o intro.html intro.md

intro_slides.html: intro.md
pandoc -s --webtex -t slidy -o intro_slides.html intro.md

clean:
rm -rf intro.html
9 changes: 9 additions & 0 deletions calc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
import numpy as np
import time
n = 10000
x = np.random.normal(0, 1, size=(n, n))
print(time.time())
x = x.T.dot(x)
print(time.time())
U = np.linalg.cholesky(x)
print(time.time())
534 changes: 534 additions & 0 deletions intro.html

Large diffs are not rendered by default.

686 changes: 686 additions & 0 deletions intro.md

Large diffs are not rendered by default.

596 changes: 596 additions & 0 deletions intro_slides.html

Large diffs are not rendered by default.

16 changes: 16 additions & 0 deletions job.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash
# Job name:
#SBATCH --job-name=test
#
# Account:
#SBATCH --account=co_stat
#
# Partition:
#SBATCH --partition=savio2
#
# Wall clock limit (30 seconds here):
#SBATCH --time=00:00:30
#
## Command(s) to run:
module load python/3.2.3 numpy
python3 calc.py >& calc.out
27 changes: 27 additions & 0 deletions parallel-multi.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
library(doMPI)

cl = startMPIcluster() # by default will start one fewer slave
registerDoMPI(cl)
clusterSize(cl) # just to check

dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
stringsAsFactors = FALSE)
names(dat)[16:18] <- c('delay', 'origin', 'dest')
table(dat$dest)

destVals <- unique(dat$dest)

# restrict to only columns we need to reduce copying time
dat2 <- subset(dat, select = c('delay', 'origin', 'dest'))

# some overhead in copying 'dat2' to worker processes...
results <- foreach(destVal = destVals) %dopar% {
sub <- subset(dat2, dest == destVal)
summary(sub$delay)
}


results

closeCluster(cl)
mpi.quit()
18 changes: 18 additions & 0 deletions parallel-one.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
library(doParallel)

nCores <- as.numeric(Sys.getenv('SLURM_CPUS_ON_NODE'))
registerDoParallel(nCores)

dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
stringsAsFactors = FALSE)
names(dat)[16:18] <- c('delay', 'origin', 'dest')
table(dat$dest)

destVals <- unique(dat$dest)

results <- foreach(destVal = destVals) %dopar% {
sub <- subset(dat, dest == destVal)
summary(sub$delay)
}

results
39 changes: 39 additions & 0 deletions parallel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from IPython.parallel import Client
c = Client()
c.ids

dview = c[:]
dview.block = True
dview.apply(lambda : "Hello, World")

lview = c.load_balanced_view()
lview.block = True

import pandas
dat = pandas.read_csv('bayArea.csv', header = None)
dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime','ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum','ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay','Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode','Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay')

dview.execute('import statsmodels.api as sm')

dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')]
dests = dat2.Dest.unique()

mydict = dict(dat2 = dat2, dests = dests)
dview.push(mydict)

def f(id):
sub = dat2.loc[dat2.Dest == dests[id],:]
sub = sm.add_constant(sub)
model = sm.OLS(sub.DepDelay, sub.loc[:,('const','Year')])
results = model.fit()
return results.params

import time
time.time()
parallel_result = lview.map(f, range(len(dests)))
#result = map(f, range(len(dests)))
time.time()

# some NaN values because all 'Year' values are the same for some destinations

parallel_result
Binary file added savioOverview.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit c7b24c0

Please sign in to comment.