added materials from 2017,

with a few initial edits on interactive jobs
ucberkeley · Sep 14, 2018 · c7b24c0 · c7b24c0
1 parent 1c2a7b7
commit c7b24c0
Show file tree

Hide file tree

Showing 10 changed files with 1,935 additions and 0 deletions.
diff --git a/Makefile b/Makefile
@@ -0,0 +1,10 @@
+all: intro.html intro_slides.html
+
+intro.html: intro.md
+	pandoc -s -o intro.html intro.md
+
+intro_slides.html: intro.md
+	pandoc -s --webtex -t slidy -o intro_slides.html intro.md
+
+clean:
+	rm -rf intro.html
diff --git a/calc.py b/calc.py
@@ -0,0 +1,9 @@
+import numpy as np
+import time
+n = 10000
+x = np.random.normal(0, 1, size=(n, n))
+print(time.time())
+x = x.T.dot(x)
+print(time.time())
+U = np.linalg.cholesky(x)
+print(time.time())
diff --git a/intro.html b/intro.html
diff --git a/intro.md b/intro.md
diff --git a/intro_slides.html b/intro_slides.html
diff --git a/job.sh b/job.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+# Job name:
+#SBATCH --job-name=test
+#
+# Account:
+#SBATCH --account=co_stat
+#
+# Partition:
+#SBATCH --partition=savio2
+#
+# Wall clock limit (30 seconds here):
+#SBATCH --time=00:00:30
+#
+## Command(s) to run:
+module load python/3.2.3 numpy
+python3 calc.py >& calc.out
diff --git a/parallel-multi.R b/parallel-multi.R
@@ -0,0 +1,27 @@
+library(doMPI)
+
+cl = startMPIcluster()  # by default will start one fewer slave
+registerDoMPI(cl)
+clusterSize(cl) # just to check
+
+dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
+                stringsAsFactors = FALSE)
+names(dat)[16:18] <- c('delay', 'origin', 'dest')
+table(dat$dest)
+
+destVals <- unique(dat$dest)
+
+# restrict to only columns we need to reduce copying time
+dat2 <- subset(dat, select = c('delay', 'origin', 'dest'))
+
+# some overhead in copying 'dat2' to worker processes...
+results <- foreach(destVal = destVals) %dopar% {
+    sub <- subset(dat2, dest == destVal)
+    summary(sub$delay)
+}
+
+
+results
+
+closeCluster(cl)
+mpi.quit()
diff --git a/parallel-one.R b/parallel-one.R
@@ -0,0 +1,18 @@
+library(doParallel)
+
+nCores <- as.numeric(Sys.getenv('SLURM_CPUS_ON_NODE'))
+registerDoParallel(nCores)
+
+dat <- read.csv('/global/scratch/paciorek/bayArea.csv', header = FALSE,
+                stringsAsFactors = FALSE)
+names(dat)[16:18] <- c('delay', 'origin', 'dest')
+table(dat$dest)
+
+destVals <- unique(dat$dest)
+
+results <- foreach(destVal = destVals) %dopar% {
+    sub <- subset(dat, dest == destVal)
+    summary(sub$delay)
+}
+
+results
diff --git a/parallel.py b/parallel.py
@@ -0,0 +1,39 @@
+from IPython.parallel import Client
+c = Client()
+c.ids
+
+dview = c[:]
+dview.block = True
+dview.apply(lambda : "Hello, World")
+
+lview = c.load_balanced_view()
+lview.block = True
+
+import pandas
+dat = pandas.read_csv('bayArea.csv', header = None)
+dat.columns = ('Year','Month','DayofMonth','DayOfWeek','DepTime','CRSDepTime','ArrTime','CRSArrTime','UniqueCarrier','FlightNum','TailNum','ActualElapsedTime','CRSElapsedTime','AirTime','ArrDelay','DepDelay','Origin','Dest','Distance','TaxiIn','TaxiOut','Cancelled','CancellationCode','Diverted','CarrierDelay','WeatherDelay','NASDelay','SecurityDelay','LateAircraftDelay')
+
+dview.execute('import statsmodels.api as sm')
+
+dat2 = dat.loc[:, ('DepDelay','Year','Dest','Origin')]
+dests = dat2.Dest.unique()
+
+mydict = dict(dat2 = dat2, dests = dests)
+dview.push(mydict)
+
+def f(id):
+    sub = dat2.loc[dat2.Dest == dests[id],:]
+    sub = sm.add_constant(sub)
+    model = sm.OLS(sub.DepDelay, sub.loc[:,('const','Year')])
+    results = model.fit()
+    return results.params
+
+import time
+time.time()
+parallel_result = lview.map(f, range(len(dests)))
+#result = map(f, range(len(dests)))
+time.time()
+
+# some NaN values because all 'Year' values are the same for some destinations
+
+parallel_result
diff --git a/savioOverview.jpeg b/savioOverview.jpeg