From 4a43c38203f81e1c40a47ddba7d3131bb685cd3d Mon Sep 17 00:00:00 2001 From: milesmedina <44986194+milesmedina@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:47:22 -0400 Subject: [PATCH] small corrections --- docs/eval_paradigm.Rmd | 13 +++++--- docs/eval_paradigm.html | 73 +++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/docs/eval_paradigm.Rmd b/docs/eval_paradigm.Rmd index bfdbdfb..9119488 100644 --- a/docs/eval_paradigm.Rmd +++ b/docs/eval_paradigm.Rmd @@ -67,7 +67,6 @@ library(plyr) library(dplyr) library(tidyr) library(mgcv) -library(here) ``` ## Pinellas County water quality data {#data_pcwq} @@ -84,7 +83,7 @@ The code below loads the raw data from file (`../data-raw/DataDownload_2999342_r ```{r} # load data - pcwq1 <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21] + pcwq1 <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21] # dates pcwq1$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y") # qualifier codes @@ -136,7 +135,7 @@ This section loads the EPC water quality data from file (`../data/epcwq.RData`), ```{r} # load data - load( here("data/epcwq.RData" )) + load( "../data/epcwq.RData" ) # dates epcwq1 <- epcwq |> as.data.frame() epcwq1$date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d") @@ -220,7 +219,7 @@ This section loads the Tampa Bay Estuary Program's monthly external loading esti ```{r} # load data - loads <- read.csv( here("data-raw/TB_loads_monthly.csv" )) + loads <- read.csv( "../data-raw/TB_loads_monthly.csv" ) # subset TN data for OTB segment loads <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ] # date column @@ -388,6 +387,10 @@ Below, we fit linear models for chlorophyll as a function of cumulative 3-month ) } # // end TN_chl_plot() function +# remove incomplete cases in the data +epc1 <- epc1[ complete.cases(epc1), ] +epc2 <- epc2[ complete.cases(epc2), ] +epc3 <- epc3[ complete.cases(epc3), ] # models ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 ) @@ -401,7 +404,7 @@ TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" ) TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" ) ``` -The diagnostics for each of the above OTB models indicate that the assumption of homoskedastic, Gaussian residuals is questionable. +The diagnostics for each of the above OTB models indicate that the assumption of homoskedastic, Gaussian residuals is not met. ```{r fig.width=7, fig.height=7} par(mfrow=c(2,2)) # North of CC diff --git a/docs/eval_paradigm.html b/docs/eval_paradigm.html index 8a1ee25..6c032ee 100644 --- a/docs/eval_paradigm.html +++ b/docs/eval_paradigm.html @@ -520,8 +520,7 @@
Pinellas County collects water quality data at five strata along the @@ -539,7 +538,7 @@
pcwq2
dataframe for analysis.
# load data
- <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21]
+ pcwq1 <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21]
pcwq1 # dates
$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y")
pcwq1# qualifier codes
@@ -598,7 +597,7 @@ Environmental Protection Commission of Hillsborough County (EPC)
cases, the Secchi depth is not necessarily a reliable proxy for water
clarity (e.g., in shallow areas).
# load data
- load( here("data/epcwq.RData" ))
+ load( "../data/epcwq.RData" )
# dates
<- epcwq |> as.data.frame()
epcwq1 $date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d")
@@ -688,7 +687,7 @@ epcwq1Tampa Bay Estuary Program nitrogen load estimates
etc.), standardizes column names, and creates a new loads
dataframe for analysis.
# load data
- <- read.csv( here("data-raw/TB_loads_monthly.csv" ))
+ loads <- read.csv( "../data-raw/TB_loads_monthly.csv" )
loads # subset TN data for OTB segment
<- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
loads # date column
@@ -876,36 +875,40 @@ Nitrogen loads and chlorophyll, with 3-month cumulative loads
)# // end TN_chl_plot() function
}
-
-# models
-<- gam( chla ~ TN_load_3mo, data = epc1 )
- ncmod1 <- gam( chla ~ TN_load_3mo, data = epc2 )
- ncmod2 <- gam( chla ~ TN_load_3mo, data = epc3 )
- ncmod3
-# plots
-par( mfrow=c(1,3) )
-TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
-
+# remove incomplete cases in the data
+<- epc1[ complete.cases(epc1), ]
+ epc1 <- epc2[ complete.cases(epc2), ]
+ epc2 <- epc3[ complete.cases(epc3), ]
+ epc3
+# models
+<- gam( chla ~ TN_load_3mo, data = epc1 )
+ ncmod1 <- gam( chla ~ TN_load_3mo, data = epc2 )
+ ncmod2 <- gam( chla ~ TN_load_3mo, data = epc3 )
+ ncmod3
+# plots
+par( mfrow=c(1,3) )
+TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
+
The diagnostics for each of the above OTB models indicate that the
-assumption of homoskedastic, Gaussian residuals is questionable.
+assumption of homoskedastic, Gaussian residuals is not met.
par(mfrow=c(2,2))
# North of CC
gam.check(ncmod1,rep=1000)
-
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
# Between CC and HF
gam.check(ncmod2,rep=1000)
-
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
# Between HF and GB
gam.check(ncmod3,rep=1000)
-
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
@@ -1223,7 +1226,7 @@ E1 (Safety Harbor / Mobbly Bay)
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam1, rep = 1000 )
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 22 iterations.
@@ -1235,7 +1238,7 @@ E1 (Safety Harbor / Mobbly Bay)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120 3 1.07 0.82
+## te(decdate,value.x) 120 3 1.07 0.8
summary( gam1 )
##
## Family: gaussian
@@ -1289,7 +1292,7 @@ E2 (Largo Inlet)
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam2, rep = 1000 )
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 14 iterations.
@@ -1301,7 +1304,7 @@ E2 (Largo Inlet)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.00 8.14 1.01 0.47
+## te(decdate,value.x) 120.00 8.14 1.01 0.55
summary( gam2 )
##
## Family: gaussian
@@ -1354,7 +1357,7 @@ E3 (Feather Sound)
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam3, rep = 1000 )
-
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 22 iterations.
@@ -1366,7 +1369,7 @@ E3 (Feather Sound)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.00 4.52 0.89 0.045 *
+## te(decdate,value.x) 120.00 4.52 0.89 0.08 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary( gam3 )
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 40 iterations.
@@ -1482,7 +1485,7 @@ E5 (Weedon Island)
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam5, rep = 1000 )
-
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 16 iterations.
@@ -1561,7 +1564,7 @@ North of Courtney Campbell
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam6, rep = 1000 )
-
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 10 iterations.
@@ -1573,7 +1576,7 @@ North of Courtney Campbell
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 34.3 1 0.52
+## te(decdate,value.x) 120.0 34.3 1 0.48
summary( gam6 )
##
## Family: gaussian
@@ -1631,7 +1634,7 @@ Between the Frankland Bridge and Courtney Campbell
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam7, rep = 1000 )
-
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
@@ -1643,7 +1646,7 @@ Between the Frankland Bridge and Courtney Campbell
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 30.4 1.03 0.68
+## te(decdate,value.x) 120.0 30.4 1.03 0.76
summary( gam7 )
##
## Family: gaussian
@@ -1709,7 +1712,7 @@ Between Gandy Blvd and the Frankland Bridge
# model diagnostics
par(mfrow=c(2,2))
gam.check( gam8, rep = 1000 )
-
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
@@ -1721,7 +1724,7 @@ Between Gandy Blvd and the Frankland Bridge
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 22.9 1.05 0.85
+## te(decdate,value.x) 120.0 22.9 1.05 0.84
summary( gam8 )
##
## Family: gaussian