From 4a43c38203f81e1c40a47ddba7d3131bb685cd3d Mon Sep 17 00:00:00 2001 From: milesmedina <44986194+milesmedina@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:47:22 -0400 Subject: [PATCH] small corrections --- docs/eval_paradigm.Rmd | 13 +++++--- docs/eval_paradigm.html | 73 +++++++++++++++++++++-------------------- 2 files changed, 46 insertions(+), 40 deletions(-) diff --git a/docs/eval_paradigm.Rmd b/docs/eval_paradigm.Rmd index bfdbdfb..9119488 100644 --- a/docs/eval_paradigm.Rmd +++ b/docs/eval_paradigm.Rmd @@ -67,7 +67,6 @@ library(plyr) library(dplyr) library(tidyr) library(mgcv) -library(here) ``` ## Pinellas County water quality data {#data_pcwq} @@ -84,7 +83,7 @@ The code below loads the raw data from file (`../data-raw/DataDownload_2999342_r ```{r} # load data - pcwq1 <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21] + pcwq1 <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21] # dates pcwq1$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y") # qualifier codes @@ -136,7 +135,7 @@ This section loads the EPC water quality data from file (`../data/epcwq.RData`), ```{r} # load data - load( here("data/epcwq.RData" )) + load( "../data/epcwq.RData" ) # dates epcwq1 <- epcwq |> as.data.frame() epcwq1$date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d") @@ -220,7 +219,7 @@ This section loads the Tampa Bay Estuary Program's monthly external loading esti ```{r} # load data - loads <- read.csv( here("data-raw/TB_loads_monthly.csv" )) + loads <- read.csv( "../data-raw/TB_loads_monthly.csv" ) # subset TN data for OTB segment loads <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ] # date column @@ -388,6 +387,10 @@ Below, we fit linear models for chlorophyll as a function of cumulative 3-month ) } # // end TN_chl_plot() function +# remove incomplete cases in the data +epc1 <- epc1[ complete.cases(epc1), ] +epc2 <- epc2[ complete.cases(epc2), ] +epc3 <- epc3[ complete.cases(epc3), ] # models ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 ) @@ -401,7 +404,7 @@ TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" ) TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" ) ``` -The diagnostics for each of the above OTB models indicate that the assumption of homoskedastic, Gaussian residuals is questionable. +The diagnostics for each of the above OTB models indicate that the assumption of homoskedastic, Gaussian residuals is not met. ```{r fig.width=7, fig.height=7} par(mfrow=c(2,2)) # North of CC diff --git a/docs/eval_paradigm.html b/docs/eval_paradigm.html index 8a1ee25..6c032ee 100644 --- a/docs/eval_paradigm.html +++ b/docs/eval_paradigm.html @@ -520,8 +520,7 @@

Data Processing

library(plyr) library(dplyr) library(tidyr) -library(mgcv) -library(here) +library(mgcv)

Pinellas County water quality data

Pinellas County collects water quality data at five strata along the @@ -539,7 +538,7 @@

Pinellas County water quality data

timeframe (monthly means), and creates a new pcwq2 dataframe for analysis.

  # load data
-    pcwq1 <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21]
+    pcwq1 <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21]
   # dates
     pcwq1$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y")
   # qualifier codes
@@ -598,7 +597,7 @@ 

Environmental Protection Commission of Hillsborough County (EPC) cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas).

  # load data
-    load( here("data/epcwq.RData" ))
+    load( "../data/epcwq.RData" )
   # dates
     epcwq1 <- epcwq |> as.data.frame()
     epcwq1$date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d")
@@ -688,7 +687,7 @@ 

Tampa Bay Estuary Program nitrogen load estimates

etc.), standardizes column names, and creates a new loads dataframe for analysis.

  # load data
-  loads <- read.csv( here("data-raw/TB_loads_monthly.csv" ))
+  loads <- read.csv( "../data-raw/TB_loads_monthly.csv" )
   # subset TN data for OTB segment
   loads <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
   # date column
@@ -876,36 +875,40 @@ 

Nitrogen loads and chlorophyll, with 3-month cumulative loads

) } # // end TN_chl_plot() function - -# models -ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 ) -ncmod2 <- gam( chla ~ TN_load_3mo, data = epc2 ) -ncmod3 <- gam( chla ~ TN_load_3mo, data = epc3 ) - -# plots -par( mfrow=c(1,3) ) -TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" ) -TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" ) -TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
-

+# remove incomplete cases in the data +epc1 <- epc1[ complete.cases(epc1), ] +epc2 <- epc2[ complete.cases(epc2), ] +epc3 <- epc3[ complete.cases(epc3), ] + +# models +ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 ) +ncmod2 <- gam( chla ~ TN_load_3mo, data = epc2 ) +ncmod3 <- gam( chla ~ TN_load_3mo, data = epc3 ) + +# plots +par( mfrow=c(1,3) ) +TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" ) +TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" ) +TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
+

The diagnostics for each of the above OTB models indicate that the -assumption of homoskedastic, Gaussian residuals is questionable.

+assumption of homoskedastic, Gaussian residuals is not met.

par(mfrow=c(2,2))
   # North of CC
   gam.check(ncmod1,rep=1000)
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
  # Between CC and HF
   gam.check(ncmod2,rep=1000)
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
  # Between HF and GB
   gam.check(ncmod3,rep=1000)
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
@@ -1223,7 +1226,7 @@

E1 (Safety Harbor / Mobbly Bay)

# model diagnostics par(mfrow=c(2,2)) gam.check( gam1, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 22 iterations.
@@ -1235,7 +1238,7 @@ 

E1 (Safety Harbor / Mobbly Bay)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120 3 1.07 0.82
+## te(decdate,value.x) 120 3 1.07 0.8
  summary( gam1 )
## 
 ## Family: gaussian 
@@ -1289,7 +1292,7 @@ 

E2 (Largo Inlet)

# model diagnostics par(mfrow=c(2,2)) gam.check( gam2, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 14 iterations.
@@ -1301,7 +1304,7 @@ 

E2 (Largo Inlet)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.00 8.14 1.01 0.47
+## te(decdate,value.x) 120.00 8.14 1.01 0.55
  summary( gam2 )
## 
 ## Family: gaussian 
@@ -1354,7 +1357,7 @@ 

E3 (Feather Sound)

# model diagnostics par(mfrow=c(2,2)) gam.check( gam3, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 22 iterations.
@@ -1366,7 +1369,7 @@ 

E3 (Feather Sound)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.00 4.52 0.89 0.045 * +## te(decdate,value.x) 120.00 4.52 0.89 0.08 . ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  summary( gam3 )
@@ -1419,7 +1422,7 @@

E4 (Gateway)

# model diagnostics par(mfrow=c(2,2)) gam.check( gam4, rep = 1000 ) -

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 40 iterations.
@@ -1482,7 +1485,7 @@ 

E5 (Weedon Island)

# model diagnostics par(mfrow=c(2,2)) gam.check( gam5, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 16 iterations.
@@ -1561,7 +1564,7 @@ 

North of Courtney Campbell

# model diagnostics par(mfrow=c(2,2)) gam.check( gam6, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 10 iterations.
@@ -1573,7 +1576,7 @@ 

North of Courtney Campbell

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 34.3 1 0.52
+## te(decdate,value.x) 120.0 34.3 1 0.48
  summary( gam6 )
## 
 ## Family: gaussian 
@@ -1631,7 +1634,7 @@ 

Between the Frankland Bridge and Courtney Campbell

# model diagnostics par(mfrow=c(2,2)) gam.check( gam7, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 6 iterations.
@@ -1643,7 +1646,7 @@ 

Between the Frankland Bridge and Courtney Campbell

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 30.4 1.03 0.68
+## te(decdate,value.x) 120.0 30.4 1.03 0.76
 summary( gam7 )
## 
 ## Family: gaussian 
@@ -1709,7 +1712,7 @@ 

Between Gandy Blvd and the Frankland Bridge

# model diagnostics par(mfrow=c(2,2)) gam.check( gam8, rep = 1000 )
-

+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 6 iterations.
@@ -1721,7 +1724,7 @@ 

Between Gandy Blvd and the Frankland Bridge

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 22.9 1.05 0.85
+## te(decdate,value.x) 120.0 22.9 1.05 0.84
  summary( gam8 )
## 
 ## Family: gaussian