-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_once.R
125 lines (92 loc) · 4.92 KB
/
run_once.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
############################################################################################
# Demonstration script for high-level structures and functions associated with
# the Buchholz release code project
# Created by Will Daniels ([email protected])
# Based on MATLAB code written by Peter Simonson ([email protected])
# Version 17-September-2019
############################################################################################
#### WORKSPACE CLEANUP ####
rm(list = ls()) # clear environment
cat("\014") # Clear console
### IMPORT LIBRARIES ####
library(foreach)
library(doParallel)
library(regClimateChem)
library(gtools)
library(here)
#### PATH SETTING ####
# Set the working directory to the directory in which this file is saved.
# The following must be included in the working directory:
# A subdirectory that contains the response and index data
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
# Set the output directory to the directory in which the output is saved.
# The output of the run_once.R script is a single .RData file
output.dir <- "/home/wdaniels/Desktop/test_dir"
dir.create(output.dir, showWarnings = FALSE)
#### LOAD USER SPECIFICATIONS ####
# The user config script is where the user sets information about how to run the linear
# regression and lag search.
source("single_run_config_file.R")
#### TIMING ####
start.time <- Sys.time()
#### DATA PROCESSING: RESPONSE DATA IMPORT ####
# Read in the CO data from the specified location and reformat the date for later use.
response.data <- populate.response.data(response.data)
response.data <- decon.data.struct.time(response.data)
#### DATA PROCESSING: EXPLANATORY DATA IMPORT ####
# Read the Index data from the specified locations and reformat the date.
# Note: dates are assumed to be in different formats between files.
# Specifically, YYYYMM format in the CO data and YYYYMMDD in the index data.
index.data <- populate.index.data(index.data)
index.data <- decon.data.struct.time(index.data)
#### DATA PROCESSING: INITIALIZE MODEL PARAMETERS STRUCTURE ####
# Determines the temporal extent of each data stream, and computes the lag space that the
# data will support (assuming full utilization of the CO data).
model.parameters <- initialize.model.parameters(index.data, response.data, model.parameters)
#### DATA PROCESSING: MODEL PARAMETER STRCUTRE COMPLETION ####
# Computes and attaches elements needed for data slicing and regression: masks and
# regression formula.
model.parameters <- complete.model.parameters(model.parameters, index.data)
#### DATA PROCESSING: CONSTRUCT AND ATTACH DATE OFFSET ####
# Applies date offset, referenced to the last month of response data present in the stream.
temp.output <- initialize.uniform.date.index(response.data, index.data)
response.data <- temp.output$response
index.data <- temp.output$index
rm(temp.output)
#### DATA PROCESSING: CONSTRUCT A LAG SET MATRIX ####
# Generate the full Cartesian product sets of potential lags for the model search,
# based on the user-specified lag limits.
lag.space.matrix <- generate.lag.space.matrix(model.parameters)
#### DATA PROCESSING: SMOOTH INDEX DATA ####
# Smooth index data
index.data.smoothed <- smooth(index.data, model.parameters$smoothing.parameter)
#### DATA PROCESSING: LAG SPACE SEARCH ####
# Stepwise regression for every lag set that appears in the lagSpaceMatrix.
# For each lag set, only a model evaluation metric for the final model produced by the
# selection process is preserved.
model.evaluation.metric.list <- regress.over.lagset(lag.space.matrix, response.data,
index.data.smoothed, model.parameters)
### DATA PACKAGING ###
# Unlist output from regress.over.lagset
adj.r2.list <- unlist(model.evaluation.metric.list[seq(1,length(model.evaluation.metric.list),4)])
bic.list <- unlist(model.evaluation.metric.list[seq(2,length(model.evaluation.metric.list),4)])
coef.list <- model.evaluation.metric.list[seq(3,length(model.evaluation.metric.list),4)]
lagset.list <- model.evaluation.metric.list[seq(4,length(model.evaluation.metric.list),4)]
# Finish timer
end.time <- Sys.time()
total.time <- difftime(end.time, start.time, units = "secs")[[1]]
# Put all relavant information about the particular run into an output object
output.list <- list(adj.r2.list = adj.r2.list,
bic.list = bic.list,
coef.list = coef.list,
lagset.list = lagset.list,
run.time = total.time,
run.parameters = list(model.parameters = model.parameters,
index.data = index.data,
response.data = response.data))
# Cleanup
rm(model.evaluation.metric.list, adj.r2.list, bic.list, coef.list, lagset.list)
gc()
### SAVE OUTPUT ###
# Save output object to the output directory
saveRDS(output.list, paste(output.dir, "single_run_output.RData", sep = "/"))