From 0acb514e5d11bc6a8619b363df15580d0197a65a Mon Sep 17 00:00:00 2001 From: milesmedina <44986194+milesmedina@users.noreply.github.com> Date: Thu, 25 Apr 2024 13:05:19 -0400 Subject: [PATCH] update file paths in narrative --- docs/eval_paradigm.Rmd | 4 +- docs/eval_paradigm.html | 1188 +++++++++++++++++++-------------------- 2 files changed, 591 insertions(+), 601 deletions(-) diff --git a/docs/eval_paradigm.Rmd b/docs/eval_paradigm.Rmd index 8a044f1..bfdbdfb 100644 --- a/docs/eval_paradigm.Rmd +++ b/docs/eval_paradigm.Rmd @@ -56,7 +56,7 @@ The results of generalized additive models (GAMs) indicated that the TN-chloroph # Data Processing {#data} -Water quality and external loading data were assembled by the `./data_proc.R` script. This section processes these data to support the subsequent analyses, including a few basic QA/QC checks. +Water quality and external loading data were assembled by the `../R/data_proc.R` script. This section processes these data to support the subsequent analyses, including a few basic QA/QC checks. We begin by loading packages (see [References](#ref)). @@ -132,7 +132,7 @@ head( pcwq2 ) ## Environmental Protection Commission of Hillsborough County (EPC) water quality data {#data_epc} -This section loads the EPC water quality data from file (`./data/epcwq.RData`), pivots the dataframe from wide to long format, performs QC checks, aggregates the data to a monthly timeframe (monthly means), and creates a new `epcwq2` dataframe for analysis. As indicated in output below, a substantial number of Secchi records indicate that reported Secchi depth equals the total depth (i.e., the Secchi depth was visible at bottom); in these cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas). +This section loads the EPC water quality data from file (`../data/epcwq.RData`), pivots the dataframe from wide to long format, performs QC checks, aggregates the data to a monthly timeframe (monthly means), and creates a new `epcwq2` dataframe for analysis. As indicated in output below, a substantial number of Secchi records indicate that reported Secchi depth equals the total depth (i.e., the Secchi depth was visible at bottom); in these cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas). ```{r} # load data diff --git a/docs/eval_paradigm.html b/docs/eval_paradigm.html index 63b2277..8a1ee25 100644 --- a/docs/eval_paradigm.html +++ b/docs/eval_paradigm.html @@ -511,14 +511,17 @@

Overview

Data Processing

Water quality and external loading data were assembled by the -./data_proc.R script. This section processes these data to -support the subsequent analyses, including a few basic QA/QC checks.

+../R/data_proc.R script. This section processes these data +to support the subsequent analyses, including a few basic QA/QC +checks.

We begin by loading packages (see References).

-
  library(lubridate)
-  library(plyr)
-  library(dplyr)
-  library(tidyr)
-  library(mgcv)
+
knitr::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE)
+library(lubridate)
+library(plyr)
+library(dplyr)
+library(tidyr)
+library(mgcv)
+library(here)

Pinellas County water quality data

Pinellas County collects water quality data at five strata along the @@ -536,7 +539,7 @@

Pinellas County water quality data

timeframe (monthly means), and creates a new pcwq2 dataframe for analysis.

  # load data
-    pcwq1 <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21]
+    pcwq1 <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21]
   # dates
     pcwq1$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y")
   # qualifier codes
@@ -567,12 +570,10 @@ 

Pinellas County water quality data

pcwq2 <- pcwq1[, c("Date","Analyte","Result_Value","Unit","StationID") ] colnames( pcwq2 ) <- c("date","param", "value", "unit", "site" ) # aggregate data to monthly timeframe - pcwq2 <- pcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame()
-
## `summarise()` has grouped output by 'date', 'param', 'site'. You can override
-## using the `.groups` argument.
-
    pcwq2$date <- pcwq2$date |> floor_date('month')
+ pcwq2 <- pcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame() + pcwq2$date <- pcwq2$date |> floor_date('month')

The dataset contains 11,808 rows.

-
head( pcwq2 )
+
head( pcwq2 )
##         date param  site unit value
 ## 1 2000-01-01   BOD 62-01  mgl  1.00
 ## 2 2000-01-01  Chla 62-01  ugl  2.60
@@ -588,7 +589,7 @@ 

Pinellas County water quality data

Environmental Protection Commission of Hillsborough County (EPC) water quality data

This section loads the EPC water quality data from file -(./data/epcwq.RData), pivots the dataframe from wide to +(../data/epcwq.RData), pivots the dataframe from wide to long format, performs QC checks, aggregates the data to a monthly timeframe (monthly means), and creates a new epcwq2 dataframe for analysis. As indicated in output below, a substantial @@ -596,88 +597,77 @@

Environmental Protection Commission of Hillsborough County (EPC) total depth (i.e., the Secchi depth was visible at bottom); in these cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas).

-
  # load data
-    load( "../data/epcwq.RData" )
-  # dates
-    epcwq1 <- epcwq |> as.data.frame()
-    epcwq1$date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d")
-
## Warning in as.POSIXlt.POSIXct(x, tz = tz): unknown timezone '%Y-%m-%d'
-
  # subset columns (data and 'Q' columns must be listed in consecutive pairs!)
-    epcwq1 <- select( epcwq1, date, StationNumber, SampleDepth,
-                              Total_Nitrogen, Total_NitrogenQ,
-                              Chlorophyll_a, Chlorophyll_aQ,
-                              SecchiDepth, Secchi_Q,
-                              Total_Suspended_Solids, Total_Suspended_SolidsQ,
-                              Turbidity, TurbidityQ )
-  # subset OTB stations
-    epcwq1 <- epcwq1[ which( epcwq1$StationNumber %in% c(36,  38,  40,  41,  42,  46,  47,
-                                                         50,  51,  60,  61,  62,  63,  64,
-                                                         65,  66,  67,  68) ), ]
-  # subset by date
-    epcwq1 <- epcwq1[ which( year(epcwq1$date) >= 2000 ), ]
-  # loop through data and QC columns to pivot from wide to long format
-    datacols <- seq( 4, 12, 2 )
-    epcwq2 <- data.frame( matrix(ncol=7,nrow=0) )
-    for( i in datacols ){
-      temp <- epcwq1[ ,1:3 ]
-      temp$Analyte <- colnames(epcwq1)[i]
-      temp$value <- epcwq1[,i] |> as.numeric()
-      temp$unit <- NA
-      temp$QA <- epcwq1[,i+1]
-      temp <- temp[ complete.cases( temp$value ), ]
-      epcwq2 <- rbind( epcwq2, temp )
-    }
-
## Warning: NAs introduced by coercion
-
## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-
  # standardize parameter names
-    param.names.old <- epcwq2$Analyte |> unique() |> sort()
-    param.names.new <- c( "Chla", "Secchi", "TN", "TSS", "Turbidity" )
-    epcwq2$param <- mapvalues( epcwq2$Analyte, param.names.old, param.names.new )
-  # units
-    units <- c( "ug/l", "m", "mg/l", "mg/l", "NTU" )  # listed in same order as param.names.new, above
-    epcwq2$unit <- mapvalues( epcwq2$param, param.names.new, units )
-  # qualifier codes
-    # replace "NULL" with NA
-    epcwq2$QA[ which(epcwq2$QA=="NULL") ] <- NA
-    # specify codes
-    fatal.codes <- c("*","?","A","B","G","H","J","K",
-                     "L","N","O","Q","T","V","Y","Z")
-    # define function to locate fatal records
-    find.fatal <- function( QUALIFIER, FATAL=fatal.codes ){  # QUALIFER arg is a string
-      input.code <- QUALIFIER |> strsplit(split='') |>
-        unlist() |> toupper()  # parse string into single characters
-      fatal <- input.code %in% FATAL |> any()  # check if any characters match FATAL
-      return( fatal )  # return TRUE or FALSE
-    }  # // end find.fatal()
-    # apply function to locate fatal records
-    rm.fatal.idx <- apply( matrix(epcwq2$QA,ncol=1), 1, find.fatal ) |> which()
-    epcwq2 <- epcwq2[ -rm.fatal.idx ]
-    # acknowledge Secchi records visible on bottom
-    epcwq2[ which( epcwq2$param=="Secchi" & epcwq2$QA==">"), ] |> nrow() # number of VOB Secchi records
+
  # load data
+    load( here("data/epcwq.RData" ))
+  # dates
+    epcwq1 <- epcwq |> as.data.frame()
+    epcwq1$date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d")
+  # subset columns (data and 'Q' columns must be listed in consecutive pairs!)
+    epcwq1 <- select( epcwq1, date, StationNumber, SampleDepth,
+                              Total_Nitrogen, Total_NitrogenQ,
+                              Chlorophyll_a, Chlorophyll_aQ,
+                              SecchiDepth, Secchi_Q,
+                              Total_Suspended_Solids, Total_Suspended_SolidsQ,
+                              Turbidity, TurbidityQ )
+  # subset OTB stations
+    epcwq1 <- epcwq1[ which( epcwq1$StationNumber %in% c(36,  38,  40,  41,  42,  46,  47,
+                                                         50,  51,  60,  61,  62,  63,  64,
+                                                         65,  66,  67,  68) ), ]
+  # subset by date
+    epcwq1 <- epcwq1[ which( year(epcwq1$date) >= 2000 ), ]
+  # loop through data and QC columns to pivot from wide to long format
+    datacols <- seq( 4, 12, 2 )
+    epcwq2 <- data.frame( matrix(ncol=7,nrow=0) )
+    for( i in datacols ){
+      temp <- epcwq1[ ,1:3 ]
+      temp$Analyte <- colnames(epcwq1)[i]
+      temp$value <- epcwq1[,i] |> as.numeric()
+      temp$unit <- NA
+      temp$QA <- epcwq1[,i+1]
+      temp <- temp[ complete.cases( temp$value ), ]
+      epcwq2 <- rbind( epcwq2, temp )
+    }
+  # standardize parameter names
+    param.names.old <- epcwq2$Analyte |> unique() |> sort()
+    param.names.new <- c( "Chla", "Secchi", "TN", "TSS", "Turbidity" )
+    epcwq2$param <- mapvalues( epcwq2$Analyte, param.names.old, param.names.new )
+  # units
+    units <- c( "ug/l", "m", "mg/l", "mg/l", "NTU" )  # listed in same order as param.names.new, above
+    epcwq2$unit <- mapvalues( epcwq2$param, param.names.new, units )
+  # qualifier codes
+    # replace "NULL" with NA
+    epcwq2$QA[ which(epcwq2$QA=="NULL") ] <- NA
+    # specify codes
+    fatal.codes <- c("*","?","A","B","G","H","J","K",
+                     "L","N","O","Q","T","V","Y","Z")
+    # define function to locate fatal records
+    find.fatal <- function( QUALIFIER, FATAL=fatal.codes ){  # QUALIFER arg is a string
+      input.code <- QUALIFIER |> strsplit(split='') |>
+        unlist() |> toupper()  # parse string into single characters
+      fatal <- input.code %in% FATAL |> any()  # check if any characters match FATAL
+      return( fatal )  # return TRUE or FALSE
+    }  # // end find.fatal()
+    # apply function to locate fatal records
+    rm.fatal.idx <- apply( matrix(epcwq2$QA,ncol=1), 1, find.fatal ) |> which()
+    epcwq2 <- epcwq2[ -rm.fatal.idx ]
+    # acknowledge Secchi records visible on bottom
+    epcwq2[ which( epcwq2$param=="Secchi" & epcwq2$QA==">"), ] |> nrow() # number of VOB Secchi records
## [1] 1333
-
    epcwq2[ which( epcwq2$param=="Secchi"), ] |> nrow()  # total number of Secchi records
+
    epcwq2[ which( epcwq2$param=="Secchi"), ] |> nrow()  # total number of Secchi records
## [1] 5092
-
  # coerce depth to numeric
-    epcwq2$SampleDepth <- epcwq2$SampleDepth |> as.numeric()
-  # check for duplicates (confirm FALSE)
-    epcwq2 |> duplicated() |> any()
+
  # coerce depth to numeric
+    epcwq2$SampleDepth <- epcwq2$SampleDepth |> as.numeric()
+  # check for duplicates (confirm FALSE)
+    epcwq2 |> duplicated() |> any()
## [1] FALSE
-
  # subset and rename columns
-    epcwq2 <- epcwq2[, c("date","param","value","unit","StationNumber") ]
-    colnames( epcwq2 ) <- c("date","param", "value", "unit", "site" )
-  # aggregate data to monthly timeframe
-    epcwq2 <- epcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame()
-
## `summarise()` has grouped output by 'date', 'param', 'site'. You can override
-## using the `.groups` argument.
-
    epcwq2$date <- epcwq2$date |> floor_date('month')
+
  # subset and rename columns
+    epcwq2 <- epcwq2[, c("date","param","value","unit","StationNumber") ]
+    colnames( epcwq2 ) <- c("date","param", "value", "unit", "site" )
+  # aggregate data to monthly timeframe
+    epcwq2 <- epcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame()
+    epcwq2$date <- epcwq2$date |> floor_date('month')

The dataset contains 20,906 rows.

-
head( epcwq2 )
+
head( epcwq2 )
##         date param site unit value
 ## 1 2000-01-01  Chla   36 ug/l 3.563
 ## 2 2000-01-01  Chla   38 ug/l 3.161
@@ -697,24 +687,24 @@ 

Tampa Bay Estuary Program nitrogen load estimates

TN loads by summing over TN load sources to OTB (e.g., AD, NPS, DPS, etc.), standardizes column names, and creates a new loads dataframe for analysis.

-
  # load data
-  loads <- read.csv( "../data-raw/TB_loads_monthly.csv" )
-  # subset TN data for OTB segment
-  loads <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
-  # date column
-  loads$date <- as.Date( paste0(loads$year,"-",loads$month,"-",01), format="%Y-%m-%d" )
-  # aggregate total TN load across sources
-  loads <- loads |> group_by(date) |> summarise(tn_load=sum(tn_load)) |> as.data.frame()
-  # create columns
-  loads$param <- "TN load"
-  loads$site <- "OTB watershed"
-  loads$unit <- 'tons'
-  # rename columns
-  colnames( loads ) <- c('date','value','param','site','unit')
-  # re-order columns
-  loads <- select( loads, date, param, value, unit, site )
+
  # load data
+  loads <- read.csv( here("data-raw/TB_loads_monthly.csv" ))
+  # subset TN data for OTB segment
+  loads <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
+  # date column
+  loads$date <- as.Date( paste0(loads$year,"-",loads$month,"-",01), format="%Y-%m-%d" )
+  # aggregate total TN load across sources
+  loads <- loads |> group_by(date) |> summarise(tn_load=sum(tn_load)) |> as.data.frame()
+  # create columns
+  loads$param <- "TN load"
+  loads$site <- "OTB watershed"
+  loads$unit <- 'tons'
+  # rename columns
+  colnames( loads ) <- c('date','value','param','site','unit')
+  # re-order columns
+  loads <- select( loads, date, param, value, unit, site )

The dataset contains 324 rows.

-
head( loads )
+
head( loads )
##         date   param    value unit          site
 ## 1 1995-01-01 TN load 20.78062 tons OTB watershed
 ## 2 1995-02-01 TN load 19.80657 tons OTB watershed
@@ -751,71 +741,71 @@ 

Nitrogen loads and chlorophyll, with 3-month cumulative loads

and include the columns date, chla, TN_load (monthly load values), and TN_load_3mo (cumulative 3-month loads).

-
# Subset and join load and chl-a data for OTB sub-segments
-  # Sub-segment north of Courtney Campbell
-  epc1 <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]  # subset EPC data by station
-  epc1 <- epc1[which(epc1$param=="Chla"),] |> group_by(date) |>  # aggregate chl data to monthly means
-            summarise(value=mean(value)) |> as.data.frame()
-  epc1 <- left_join( epc1, loads[,c('date','value')], by='date' )  # value.x is chl; value.y is load
-  colnames(epc1) <- c("date","chla","TN_load")
-  # Sub-segment between Courtney Campbell and Frankland
-  epc2 <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
-  epc2 <- epc2[which(epc2$param=="Chla"),] |> group_by(date) |> 
-          summarise(value=mean(value)) |> as.data.frame()
-  epc2 <- left_join( epc2, loads[,c('date','value')], by='date' ) 
-  colnames(epc2) <- c("date","chla","TN_load")
-  # Sub-segment between Frankland and Gandy
-  epc3 <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
-  epc3 <- epc3[which(epc3$param=="Chla"),] |> group_by(date) |> 
-        summarise(value=mean(value)) |> as.data.frame()
-  epc3 <- left_join( epc3, loads[,c('date','value')], by='date' )
-  colnames(epc3) <- c("date","chla","TN_load")
-  
-# Compute 3-month TN loads
-  # Define function to sum load values over a specified window (`win`)
-  # The function assumes there are no NAs 
-  sum3mo <- function( x, win = 3 ){
-    out <- rep(NA,length=(win-1))
-    for( i in win:length(x) ){
-      out[i] <- sum( x[i:(i-win+1)] )
-    }
-    return( out )
-  }
-  # Add a 3-month TN load column to each dataset
-  epc1$TN_load_3mo <- sum3mo( epc1$TN_load )
-  epc2$TN_load_3mo <- sum3mo( epc2$TN_load )
-  epc3$TN_load_3mo <- sum3mo( epc3$TN_load )
+
# Subset and join load and chl-a data for OTB sub-segments
+  # Sub-segment north of Courtney Campbell
+  epc1 <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]  # subset EPC data by station
+  epc1 <- epc1[which(epc1$param=="Chla"),] |> group_by(date) |>  # aggregate chl data to monthly means
+            summarise(value=mean(value)) |> as.data.frame()
+  epc1 <- left_join( epc1, loads[,c('date','value')], by='date' )  # value.x is chl; value.y is load
+  colnames(epc1) <- c("date","chla","TN_load")
+  # Sub-segment between Courtney Campbell and Frankland
+  epc2 <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
+  epc2 <- epc2[which(epc2$param=="Chla"),] |> group_by(date) |> 
+          summarise(value=mean(value)) |> as.data.frame()
+  epc2 <- left_join( epc2, loads[,c('date','value')], by='date' ) 
+  colnames(epc2) <- c("date","chla","TN_load")
+  # Sub-segment between Frankland and Gandy
+  epc3 <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
+  epc3 <- epc3[which(epc3$param=="Chla"),] |> group_by(date) |> 
+        summarise(value=mean(value)) |> as.data.frame()
+  epc3 <- left_join( epc3, loads[,c('date','value')], by='date' )
+  colnames(epc3) <- c("date","chla","TN_load")
+  
+# Compute 3-month TN loads
+  # Define function to sum load values over a specified window (`win`)
+  # The function assumes there are no NAs 
+  sum3mo <- function( x, win = 3 ){
+    out <- rep(NA,length=(win-1))
+    for( i in win:length(x) ){
+      out[i] <- sum( x[i:(i-win+1)] )
+    }
+    return( out )
+  }
+  # Add a 3-month TN load column to each dataset
+  epc1$TN_load_3mo <- sum3mo( epc1$TN_load )
+  epc2$TN_load_3mo <- sum3mo( epc2$TN_load )
+  epc3$TN_load_3mo <- sum3mo( epc3$TN_load )

The monthly sub-segment chlorophyll-a data and segment-wide TN load data are visualized below.

-
par( mfrow=c(2,2) )
-plot( chla ~ date, data = epc1, pch = 16, col = rgb(0,0.4,0.8,0.7),
-      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
-      main = "Chlorophyll-a, OTB north of Courtney Campbell" )
-  lines( chla ~ date, data = epc1, col = rgb(0,0.4,0.8,0.4) )
-  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
-          col = rgb(0,0,0,0.2) )
-  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
-plot( chla ~ date, data = epc2, pch = 16, col = rgb(0,0.4,0.8,0.7),
-      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
-      main = "Chlorophyll-a, OTB between Frankland & Courtney Campbell" )
-  lines( chla ~ date, data = epc2, col = rgb(0,0.4,0.8,0.4) )
-  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
-          col = rgb(0,0,0,0.2) )
-  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
-plot( chla ~ date, data = epc3, pch = 16, col = rgb(0,0.4,0.8,0.7),
-      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
-      main = "Chlorophyll-a, OTB between Gandy & Frankland" )
-  lines( chla ~ date, data = epc3, col = rgb(0,0.4,0.8,0.4) )
-  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
-          col = rgb(0,0,0,0.2) )
-  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
-plot( TN_load ~ date, data = epc3, pch = 16, col = rgb(0.2,0.2,0.2,0.7),
-      ylab = "TN load (tons)", xlab = "", las = 1,
-      main = "Monthly TN load, Old Tampa Bay" )
-  lines( TN_load ~ date, data = epc3, col = rgb(0.2,0.2,0.2,0.4) )
-  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
-          col = rgb(0,0,0,0.2) )
-  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+
par( mfrow=c(2,2) )
+plot( chla ~ date, data = epc1, pch = 16, col = rgb(0,0.4,0.8,0.7),
+      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+      main = "Chlorophyll-a, OTB north of Courtney Campbell" )
+  lines( chla ~ date, data = epc1, col = rgb(0,0.4,0.8,0.4) )
+  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+          col = rgb(0,0,0,0.2) )
+  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+plot( chla ~ date, data = epc2, pch = 16, col = rgb(0,0.4,0.8,0.7),
+      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+      main = "Chlorophyll-a, OTB between Frankland & Courtney Campbell" )
+  lines( chla ~ date, data = epc2, col = rgb(0,0.4,0.8,0.4) )
+  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+          col = rgb(0,0,0,0.2) )
+  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+plot( chla ~ date, data = epc3, pch = 16, col = rgb(0,0.4,0.8,0.7),
+      ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+      main = "Chlorophyll-a, OTB between Gandy & Frankland" )
+  lines( chla ~ date, data = epc3, col = rgb(0,0.4,0.8,0.4) )
+  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+          col = rgb(0,0,0,0.2) )
+  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+plot( TN_load ~ date, data = epc3, pch = 16, col = rgb(0.2,0.2,0.2,0.7),
+      ylab = "TN load (tons)", xlab = "", las = 1,
+      main = "Monthly TN load, Old Tampa Bay" )
+  lines( TN_load ~ date, data = epc3, col = rgb(0.2,0.2,0.2,0.4) )
+  abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+          col = rgb(0,0,0,0.2) )
+  abline( h = axTicks(2), col = rgb(0,0,0,0.2) )

Below, we fit linear models for chlorophyll as a function of cumulative 3-month TN loads at each sub-segment and visualize the @@ -830,92 +820,92 @@

Nitrogen loads and chlorophyll, with 3-month cumulative loads

nor Gaussian (diagnostic visualizations follow); experimentation with alternative model specifications that include a month-of-year term did not improve the model diagnostics.

-
# Define plotting function for data and model
-  TN_chl_plot <- function( dat, mod, lab.main ){
-     # 95% CI
-     pred.seq <- seq( min( dat$TN_load_3mo, na.rm=TRUE ),
-                      max( dat$TN_load_3mo, na.rm=TRUE ), length.out=100 )
-     pred <- predict.gam( mod,
-                          newdata = data.frame( TN_load_3mo = pred.seq ),
-                          se.fit=TRUE )
-     
-     # P value
-      pval <- summary(mod)$p.table[2,4]
-      pcol1 <- rgb(1,0,0,0.4)
-      pcol2 <- rgb(1,0,0,0.15)
-      if( pval < 0.001 ){
-        pstring <- "P < 0.001"
-      } else if( pval < 0.01 ){
-        pstring <- "P < 0.01"
-      } else if( pval < 0.05 ){
-        pstring <- "P < 0.05"
-      } else {
-        pstring <- "P > 0.05"
-        pcol1 <- rgb(0.2,0.3,0.9,0.4)
-        pcol2 <- rgb(0,0.1,0.6,0.15)
-      }
-      
-      # scatterplot data
-      plot( chla ~ TN_load_3mo, data = dat,
-            col = rgb(0,0,0,0.6), cex = 1.4,
-            main = paste( lab.main,"\n",
-                          min(dat$date), " - ", max(dat$date) ),
-            cex.main = 1.6, cex.lab = 1.4,
-            xlim = range( c(0,dat$TN_load_3mo,pred.seq), na.rm=TRUE ),
-            ylim = range( c(0,dat$chla,pred$fit-pred$se.fit*1.96,
-                            pred$fit+pred$se.fit*1.96)*1.2 ),
-            xlab = "3-month TN load (tons)", cex.axis = 1.4,
-            ylab = "Chlorophyll-a (ug/l)", las = 1
-      )
-      abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
-      abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
-      abline( v = 0, col = rgb(0,0,0,0.5) )
-      abline( h = 0, col = rgb(0,0,0,0.5) )
-      
-      # plot model fit
-      lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
-      polygon( x = c( pred.seq, rev(pred.seq) ),
-               y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
-               col = pcol2, border = rgb(0,0,0,0) )
-      # write model R2 and pval
-      legend( 'top', horiz = TRUE, bty = 'n',
-              border = rgb(0,0,0,0), cex = 1.4,
-              legend = paste0( "N = ", nrow(dat), "   ",
-                               "R2 = ", round(summary(mod)$r.sq,3),
-                               "   ", pstring )
-              )
-  }  # // end TN_chl_plot() function
-
-
-# models
-ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 )
-ncmod2 <- gam( chla ~ TN_load_3mo, data = epc2 )
-ncmod3 <- gam( chla ~ TN_load_3mo, data = epc3 )
-
-# plots
-par( mfrow=c(1,3) )
-TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
+
# Define plotting function for data and model
+  TN_chl_plot <- function( dat, mod, lab.main ){
+     # 95% CI
+     pred.seq <- seq( min( dat$TN_load_3mo, na.rm=TRUE ),
+                      max( dat$TN_load_3mo, na.rm=TRUE ), length.out=100 )
+     pred <- predict.gam( mod,
+                          newdata = data.frame( TN_load_3mo = pred.seq ),
+                          se.fit=TRUE )
+     
+     # P value
+      pval <- summary(mod)$p.table[2,4]
+      pcol1 <- rgb(1,0,0,0.4)
+      pcol2 <- rgb(1,0,0,0.15)
+      if( pval < 0.001 ){
+        pstring <- "P < 0.001"
+      } else if( pval < 0.01 ){
+        pstring <- "P < 0.01"
+      } else if( pval < 0.05 ){
+        pstring <- "P < 0.05"
+      } else {
+        pstring <- "P > 0.05"
+        pcol1 <- rgb(0.2,0.3,0.9,0.4)
+        pcol2 <- rgb(0,0.1,0.6,0.15)
+      }
+      
+      # scatterplot data
+      plot( chla ~ TN_load_3mo, data = dat,
+            col = rgb(0,0,0,0.6), cex = 1.4,
+            main = paste( lab.main,"\n",
+                          min(dat$date), " - ", max(dat$date) ),
+            cex.main = 1.6, cex.lab = 1.4,
+            xlim = range( c(0,dat$TN_load_3mo,pred.seq), na.rm=TRUE ),
+            ylim = range( c(0,dat$chla,pred$fit-pred$se.fit*1.96,
+                            pred$fit+pred$se.fit*1.96)*1.2 ),
+            xlab = "3-month TN load (tons)", cex.axis = 1.4,
+            ylab = "Chlorophyll-a (ug/l)", las = 1
+      )
+      abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
+      abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+      abline( v = 0, col = rgb(0,0,0,0.5) )
+      abline( h = 0, col = rgb(0,0,0,0.5) )
+      
+      # plot model fit
+      lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
+      polygon( x = c( pred.seq, rev(pred.seq) ),
+               y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
+               col = pcol2, border = rgb(0,0,0,0) )
+      # write model R2 and pval
+      legend( 'top', horiz = TRUE, bty = 'n',
+              border = rgb(0,0,0,0), cex = 1.4,
+              legend = paste0( "N = ", nrow(dat), "   ",
+                               "R2 = ", round(summary(mod)$r.sq,3),
+                               "   ", pstring )
+              )
+  }  # // end TN_chl_plot() function
+
+
+# models
+ncmod1 <- gam( chla ~ TN_load_3mo, data = epc1 )
+ncmod2 <- gam( chla ~ TN_load_3mo, data = epc2 )
+ncmod3 <- gam( chla ~ TN_load_3mo, data = epc3 )
+
+# plots
+par( mfrow=c(1,3) )
+TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )

The diagnostics for each of the above OTB models indicate that the assumption of homoskedastic, Gaussian residuals is questionable.

-
par(mfrow=c(2,2))
-  # North of CC
-  gam.check(ncmod1,rep=1000)
-

+
par(mfrow=c(2,2))
+  # North of CC
+  gam.check(ncmod1,rep=1000)
+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
-
  # Between CC and HF
-  gam.check(ncmod2,rep=1000)
-

+
  # Between CC and HF
+  gam.check(ncmod2,rep=1000)
+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
-
  # Between HF and GB
-  gam.check(ncmod3,rep=1000)
-

+
  # Between HF and GB
+  gam.check(ncmod3,rep=1000)
+

## 
 ## Method: GCV   Optimizer: magic
 ## Model required no smoothing parameter selectionModel rank =  2 / 2
@@ -949,107 +939,107 @@

Nitrogen loads and chlorophyll, with delayed loads

line indicates whether the estimated slope is statistically significant (red) or not significant (gray) at alpha = 0.05. The function returns the input data and the model object.

-
  # define regression/plot function
-  # x, y      dataframes for explanatory (x) and response (y) variables,
-  #           each containing columns date, param, value, unit, 
-  # param.x,  water quality parameters in x$param, y$param (strings)
-  #  param.y
-  # site.x,   location labels in x$site, y$site (string)
-  #  site.y
-  # log       logical, if TRUE log10-transform the x and y data. FALSE by default
-  # delay     number of time steps to delay param.x (non-negative integer)
-  OTB_corr <- function( x, y,
-                        param.x, param.y,
-                        site.x, site.y = site.x,
-                        log = FALSE,
-                        delay = 0 ){
-    
-    # units
-    unit.x <- x$unit[ which( x$param==param.x ) ] |> unique()
-    unit.y <- y$unit[ which( y$param==param.y ) ] |> unique()
-    
-    # assemble data for x and y in wide format
-    dat.x <- x[ which( x$param==param.x & x$site==site.x ), ]
-    dat.x <- dat.x |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
-    dat.y <- y[ which( y$param==param.y & y$site==site.y ), ]
-    dat.y <- dat.y |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
-    dat <- dplyr::inner_join( dat.x, dat.y, by = 'date' )
-    
-    # log10-transform
-    if( log == TRUE ){
-      dat$value.x[ which( dat$value.x != 0) ] <- dat$value.x |> log10()
-      dat$value.y[ which( dat$value.y != 0) ] <- dat$value.y |> log10()
-      unit.x <- paste( "log10", unit.x )
-      unit.y <- paste( "log10", unit.y )
-    }
-    
-    # delay
-    if( delay > 0 ){
-      dateseq <- data.frame( date = seq.Date( min(dat$date), max(dat$date), 'month' ) )
-      dat <- dplyr::left_join( dateseq, dat, by = 'date' )
-      dat$value.x <- dplyr::lag( dat$value.x, n = delay )
-      dat <- na.omit( dat )
-    } else if ( delay < 0 ){
-      stop("Negative delay specified.")
-    }
-    
-    # fit linear model
-    # fit model
-    mod1 <- gam( value.y ~ value.x, data = dat )
-    # plot fit with 95% CI
-    pred.seq <- seq( min( dat$value.x ), max( dat$value.x ), length.out=100 )
-    pred <- predict.gam( mod1,
-                         newdata = data.frame( value.x = pred.seq ),
-                         se.fit=TRUE )
-    # P value
-    pval <- summary(mod1)$p.table[2,4]
-    pcol1 <- rgb(1,0,0,0.4)
-    pcol2 <- rgb(1,0,0,0.15)
-    if( pval < 0.001 ){
-      pstring <- "P < 0.001"
-    } else if( pval < 0.01 ){
-      pstring <- "P < 0.01"
-    } else if( pval < 0.05 ){
-      pstring <- "P < 0.05"
-    } else {
-      pstring <- "P > 0.05"
-      pcol1 <- rgb(0.2,0.3,0.9,0.4)
-      pcol2 <- rgb(0,0.1,0.6,0.15)
-    }
-    
-    # scatterplot data
-    plot( value.y ~ value.x, data = dat,
-          col = rgb(0,0,0,0.6), cex = 1.4,
-          main = paste( param.y, "at", site.y, " vs. ", param.x, "at", site.x,"\n",
-                        min(dat$date), " - ", max(dat$date) ),
-          cex.main = 1.6, cex.lab = 1.4,
-          xlim = range( c(0,dat$value.x,pred.seq) ),
-          ylim = range( c(0,dat$value.y,pred$fit-pred$se.fit*1.96,
-                          pred$fit+pred$se.fit*1.96)*1.2 ),
-          xlab = paste0( param.x, " (", unit.x, ")" ), cex.axis = 1.4,
-          ylab = paste0( param.y, " (", unit.y, ")" ), las = 1
-    )
-    abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
-    abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
-    abline( v = 0, col = rgb(0,0,0,0.5) )
-    abline( h = 0, col = rgb(0,0,0,0.5) )
-    
-    # plot model fit
-    lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
-    polygon( x = c( pred.seq, rev(pred.seq) ),
-             y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
-             col = pcol2, border = rgb(0,0,0,0) )
-    # write model R2 and pval
-    legend( 'top', horiz = TRUE, bty = 'n',
-            border = rgb(0,0,0,0), cex = 1.4,
-            legend = paste0( "N = ", nrow(dat), "   ",
-                             "R2 = ", round(summary(mod1)$r.sq,3),
-                             "   ", pstring, "   delay = ", delay )
-    )
-    
-    return( list( data = dat, model = mod1 ) )
-    
-  }  # // end OTB_corr()
+
  # define regression/plot function
+  # x, y      dataframes for explanatory (x) and response (y) variables,
+  #           each containing columns date, param, value, unit, 
+  # param.x,  water quality parameters in x$param, y$param (strings)
+  #  param.y
+  # site.x,   location labels in x$site, y$site (string)
+  #  site.y
+  # log       logical, if TRUE log10-transform the x and y data. FALSE by default
+  # delay     number of time steps to delay param.x (non-negative integer)
+  OTB_corr <- function( x, y,
+                        param.x, param.y,
+                        site.x, site.y = site.x,
+                        log = FALSE,
+                        delay = 0 ){
+    
+    # units
+    unit.x <- x$unit[ which( x$param==param.x ) ] |> unique()
+    unit.y <- y$unit[ which( y$param==param.y ) ] |> unique()
+    
+    # assemble data for x and y in wide format
+    dat.x <- x[ which( x$param==param.x & x$site==site.x ), ]
+    dat.x <- dat.x |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
+    dat.y <- y[ which( y$param==param.y & y$site==site.y ), ]
+    dat.y <- dat.y |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
+    dat <- dplyr::inner_join( dat.x, dat.y, by = 'date' )
+    
+    # log10-transform
+    if( log == TRUE ){
+      dat$value.x[ which( dat$value.x != 0) ] <- dat$value.x |> log10()
+      dat$value.y[ which( dat$value.y != 0) ] <- dat$value.y |> log10()
+      unit.x <- paste( "log10", unit.x )
+      unit.y <- paste( "log10", unit.y )
+    }
+    
+    # delay
+    if( delay > 0 ){
+      dateseq <- data.frame( date = seq.Date( min(dat$date), max(dat$date), 'month' ) )
+      dat <- dplyr::left_join( dateseq, dat, by = 'date' )
+      dat$value.x <- dplyr::lag( dat$value.x, n = delay )
+      dat <- na.omit( dat )
+    } else if ( delay < 0 ){
+      stop("Negative delay specified.")
+    }
+    
+    # fit linear model
+    # fit model
+    mod1 <- gam( value.y ~ value.x, data = dat )
+    # plot fit with 95% CI
+    pred.seq <- seq( min( dat$value.x ), max( dat$value.x ), length.out=100 )
+    pred <- predict.gam( mod1,
+                         newdata = data.frame( value.x = pred.seq ),
+                         se.fit=TRUE )
+    # P value
+    pval <- summary(mod1)$p.table[2,4]
+    pcol1 <- rgb(1,0,0,0.4)
+    pcol2 <- rgb(1,0,0,0.15)
+    if( pval < 0.001 ){
+      pstring <- "P < 0.001"
+    } else if( pval < 0.01 ){
+      pstring <- "P < 0.01"
+    } else if( pval < 0.05 ){
+      pstring <- "P < 0.05"
+    } else {
+      pstring <- "P > 0.05"
+      pcol1 <- rgb(0.2,0.3,0.9,0.4)
+      pcol2 <- rgb(0,0.1,0.6,0.15)
+    }
+    
+    # scatterplot data
+    plot( value.y ~ value.x, data = dat,
+          col = rgb(0,0,0,0.6), cex = 1.4,
+          main = paste( param.y, "at", site.y, " vs. ", param.x, "at", site.x,"\n",
+                        min(dat$date), " - ", max(dat$date) ),
+          cex.main = 1.6, cex.lab = 1.4,
+          xlim = range( c(0,dat$value.x,pred.seq) ),
+          ylim = range( c(0,dat$value.y,pred$fit-pred$se.fit*1.96,
+                          pred$fit+pred$se.fit*1.96)*1.2 ),
+          xlab = paste0( param.x, " (", unit.x, ")" ), cex.axis = 1.4,
+          ylab = paste0( param.y, " (", unit.y, ")" ), las = 1
+    )
+    abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
+    abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+    abline( v = 0, col = rgb(0,0,0,0.5) )
+    abline( h = 0, col = rgb(0,0,0,0.5) )
+    
+    # plot model fit
+    lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
+    polygon( x = c( pred.seq, rev(pred.seq) ),
+             y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
+             col = pcol2, border = rgb(0,0,0,0) )
+    # write model R2 and pval
+    legend( 'top', horiz = TRUE, bty = 'n',
+            border = rgb(0,0,0,0), cex = 1.4,
+            legend = paste0( "N = ", nrow(dat), "   ",
+                             "R2 = ", round(summary(mod1)$r.sq,3),
+                             "   ", pstring, "   delay = ", delay )
+    )
+    
+    return( list( data = dat, model = mod1 ) )
+    
+  }  # // end OTB_corr()

The plots below show relationships between log10-transformed external TN loads and chlorophyll-a concentrations within the three OTB sub-segments compartmentalized by the Courtney Campbell, Howard @@ -1062,39 +1052,39 @@

Nitrogen loads and chlorophyll, with delayed loads

scatter (unexplained variance) around the regression line. Although not visualized below, the diagnostics for each model indicate that the residuals are homoskedastic and approximately Gaussian.

-
  par(mfrow=c(3,3))
-  # assemble and re-label data for locations north of Courtney Campbell
-  epc1 <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]
-  epc1$site <- "North CC"
-  # models
-  epc1_load_Chla_0 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla', 
-                                'OTB watershed', 'North CC', log = TRUE, delay = 0 )
-  epc1_load_Chla_1 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
-                                'OTB watershed', 'North CC', log = TRUE, delay = 1 )
-  epc1_load_Chla_2 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
-                                'OTB watershed', 'North CC', log = TRUE, delay = 2 )
-  
-  # assemble and re-label data for locations between Frankland & Campbell
-  epc2 <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
-  epc2$site <- "HF-CC"
-  # models
-  epc2_load_Chla_0 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
-                                'OTB watershed', 'HF-CC', log = TRUE, delay = 0 )
-  epc2_load_Chla_1 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
-                                'OTB watershed', 'HF-CC', log = TRUE, delay = 1 )
-  epc2_load_Chla_2 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
-                                'OTB watershed', 'HF-CC', log = TRUE, delay = 2 )
-  
-  # assemble and re-label data for locations between Gandy & Frankland
-  epc3 <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
-  epc3$site <- "GB-HF"
-  # models
-  epc3_load_Chla_0 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
-                                'OTB watershed', 'GB-HF', log = TRUE, delay = 0 )
-  epc3_load_Chla_1 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
-                                'OTB watershed', 'GB-HF', log = TRUE, delay = 1 )
-  epc3_load_Chla_2 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
-                                'OTB watershed', 'GB-HF', log = TRUE, delay = 2 )
+
  par(mfrow=c(3,3))
+  # assemble and re-label data for locations north of Courtney Campbell
+  epc1 <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]
+  epc1$site <- "North CC"
+  # models
+  epc1_load_Chla_0 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla', 
+                                'OTB watershed', 'North CC', log = TRUE, delay = 0 )
+  epc1_load_Chla_1 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
+                                'OTB watershed', 'North CC', log = TRUE, delay = 1 )
+  epc1_load_Chla_2 <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
+                                'OTB watershed', 'North CC', log = TRUE, delay = 2 )
+  
+  # assemble and re-label data for locations between Frankland & Campbell
+  epc2 <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
+  epc2$site <- "HF-CC"
+  # models
+  epc2_load_Chla_0 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+                                'OTB watershed', 'HF-CC', log = TRUE, delay = 0 )
+  epc2_load_Chla_1 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+                                'OTB watershed', 'HF-CC', log = TRUE, delay = 1 )
+  epc2_load_Chla_2 <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+                                'OTB watershed', 'HF-CC', log = TRUE, delay = 2 )
+  
+  # assemble and re-label data for locations between Gandy & Frankland
+  epc3 <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
+  epc3$site <- "GB-HF"
+  # models
+  epc3_load_Chla_0 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+                                'OTB watershed', 'GB-HF', log = TRUE, delay = 0 )
+  epc3_load_Chla_1 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+                                'OTB watershed', 'GB-HF', log = TRUE, delay = 1 )
+  epc3_load_Chla_2 <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+                                'OTB watershed', 'GB-HF', log = TRUE, delay = 2 )

The plots below show relationships between log10-transformed TN loads and chlorophyll-a concentrations within Pinellas County strata E1–E5 @@ -1107,37 +1097,37 @@

Nitrogen loads and chlorophyll, with delayed loads

although the R2 values are substantially lower. Although not visualized below, the diagnostics for each model indicate that the residuals are homoskedastic and approximately Gaussian.

-
  par(mfrow=c(5,3))
-  E1_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
-                              log = TRUE, delay = 0 )
-  E1_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
-                              log = TRUE, delay = 1 )
-  E1_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
-                              log = TRUE, delay = 2 )
-  E2_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
-                              log = TRUE, delay = 0 )
-  E2_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
-                              log = TRUE, delay = 1 )
-  E2_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
-                              log = TRUE, delay = 2 )
-  E3_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
-                              log = TRUE, delay = 0 )
-  E3_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
-                              log = TRUE, delay = 1 )
-  E3_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
-                              log = TRUE, delay = 2 )
-  E4_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
-                              log = TRUE, delay = 0 )
-  E4_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
-                              log = TRUE, delay = 1 )
-  E4_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
-                              log = TRUE, delay = 2 )
-  E5_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
-                              log = TRUE, delay = 0 )
-  E5_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
-                              log = TRUE, delay = 1 )
-  E5_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
-                              log = TRUE, delay = 2 )
+
  par(mfrow=c(5,3))
+  E1_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+                              log = TRUE, delay = 0 )
+  E1_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+                              log = TRUE, delay = 1 )
+  E1_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+                              log = TRUE, delay = 2 )
+  E2_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+                              log = TRUE, delay = 0 )
+  E2_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+                              log = TRUE, delay = 1 )
+  E2_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+                              log = TRUE, delay = 2 )
+  E3_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+                              log = TRUE, delay = 0 )
+  E3_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+                              log = TRUE, delay = 1 )
+  E3_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+                              log = TRUE, delay = 2 )
+  E4_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+                              log = TRUE, delay = 0 )
+  E4_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+                              log = TRUE, delay = 1 )
+  E4_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+                              log = TRUE, delay = 2 )
+  E5_load_Chla_0 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+                              log = TRUE, delay = 0 )
+  E5_load_Chla_1 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+                              log = TRUE, delay = 1 )
+  E5_load_Chla_2 <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+                              log = TRUE, delay = 2 )

top


@@ -1168,31 +1158,31 @@

Chlorophyll and light

correlation. Although not visualized below, the diagnostics for each model indicate that the residuals are homoskedastic and approximately Gaussian.

-
  par(mfrow=c(3,3))
-
-  # models
-  epc1_Chla_Secchi_0 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
-                                'North CC', log = TRUE, delay = 0 )
-  epc1_Chla_Secchi_1 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
-                                'North CC', log = TRUE, delay = 1 )
-  epc1_Chla_Secchi_2 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
-                                'North CC', log = TRUE, delay = 2 )
-  
-  # models
-  epc2_Chla_Secchi_0 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
-                                'HF-CC', log = TRUE, delay = 0 )
-  epc2_Chla_Secchi_1 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
-                                'HF-CC', log = TRUE, delay = 1 )
-  epc2_Chla_Secchi_2 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
-                                'HF-CC', log = TRUE, delay = 2 )
-  
-  # models
-  epc3_Chla_Secchi_0 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
-                                'GB-HF', log = TRUE, delay = 0 )
-  epc3_Chla_Secchi_1 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
-                                'GB-HF', log = TRUE, delay = 1 )
-  epc3_Chla_Secchi_2 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
-                                'GB-HF', log = TRUE, delay = 2 )
+
  par(mfrow=c(3,3))
+
+  # models
+  epc1_Chla_Secchi_0 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+                                'North CC', log = TRUE, delay = 0 )
+  epc1_Chla_Secchi_1 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+                                'North CC', log = TRUE, delay = 1 )
+  epc1_Chla_Secchi_2 <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+                                'North CC', log = TRUE, delay = 2 )
+  
+  # models
+  epc2_Chla_Secchi_0 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+                                'HF-CC', log = TRUE, delay = 0 )
+  epc2_Chla_Secchi_1 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+                                'HF-CC', log = TRUE, delay = 1 )
+  epc2_Chla_Secchi_2 <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+                                'HF-CC', log = TRUE, delay = 2 )
+  
+  # models
+  epc3_Chla_Secchi_0 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+                                'GB-HF', log = TRUE, delay = 0 )
+  epc3_Chla_Secchi_1 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+                                'GB-HF', log = TRUE, delay = 1 )
+  epc3_Chla_Secchi_2 <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+                                'GB-HF', log = TRUE, delay = 2 )

top


@@ -1219,21 +1209,21 @@

E1 (Safety Harbor / Mobbly Bay)

(value.x), and their tensor product interaction. Model diagnostics indicate that the residuals are approximately Gaussian (QQ plot) and that the model had sufficient basis dimensions (k).

-
  # join TN load data to chlorophyll data
-  gamdat_E1 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E1" ), ],
-                           y = loads,
-                           by = 'date'
-                          )
-  nrow(gamdat_E1)
+
  # join TN load data to chlorophyll data
+  gamdat_E1 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E1" ), ],
+                           y = loads,
+                           by = 'date'
+                          )
+  nrow(gamdat_E1)
## [1] 128
-
  # decimal date
-  gamdat_E1$decdate <- gamdat_E1$date |> decimal_date()
-  # fit gam
-  gam1 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E1 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam1, rep = 1000 )
-

+
  # decimal date
+  gamdat_E1$decdate <- gamdat_E1$date |> decimal_date()
+  # fit gam
+  gam1 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E1 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam1, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 22 iterations.
@@ -1245,8 +1235,8 @@ 

E1 (Safety Harbor / Mobbly Bay)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120 3 1.07 0.78
-
  summary( gam1 )
+## te(decdate,value.x) 120 3 1.07 0.82
+
  summary( gam1 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1274,10 +1264,10 @@ 

E1 (Safety Harbor / Mobbly Bay)

changed over time (decdate). In the heatmap image, colors range from red (relatively low magnitude) to yellow (higher magnitude). White patches indicate gaps in the input space.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam1, scheme = 1)
-  plot.gam( gam1, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam1, scheme = 1)
+  plot.gam( gam1, scheme = 2)

top

@@ -1285,21 +1275,21 @@

E1 (Safety Harbor / Mobbly Bay)

E2 (Largo Inlet)

The model for stratum E2 (Largo Inlet) shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  gamdat_E2 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E2" ), ],
-                           y = loads,
-                           by = 'date'
-                          )
-  nrow(gamdat_E2)
+
  # join TN load data to chlorophyll data
+  gamdat_E2 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E2" ), ],
+                           y = loads,
+                           by = 'date'
+                          )
+  nrow(gamdat_E2)
## [1] 133
-
  # decimal date
-  gamdat_E2$decdate <- gamdat_E2$date |> decimal_date()
-  # fit gam
-  gam2 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E2 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam2, rep = 1000 )
-

+
  # decimal date
+  gamdat_E2$decdate <- gamdat_E2$date |> decimal_date()
+  # fit gam
+  gam2 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E2 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam2, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 14 iterations.
@@ -1311,8 +1301,8 @@ 

E2 (Largo Inlet)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.00 8.14 1.01 0.58
-
  summary( gam2 )
+## te(decdate,value.x) 120.00 8.14 1.01 0.47
+
  summary( gam2 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1339,10 +1329,10 @@ 

E2 (Largo Inlet)

substantially over time (decdate) at Largo Inlet, with TN loads (value.x) most strongly associated with chlorophyll concentrations between about 2010 and 2015.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam2, scheme = 1)
-  plot.gam( gam2, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam2, scheme = 1)
+  plot.gam( gam2, scheme = 2)

top

@@ -1350,21 +1340,21 @@

E2 (Largo Inlet)

E3 (Feather Sound)

The model for stratum E3 (Feather Sound) shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  gamdat_E3 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E3" ), ],
-                           y = loads,
-                           by = 'date'
-                          )
-  nrow(gamdat_E3)
+
  # join TN load data to chlorophyll data
+  gamdat_E3 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E3" ), ],
+                           y = loads,
+                           by = 'date'
+                          )
+  nrow(gamdat_E3)
## [1] 132
-
  # decimal date
-  gamdat_E3$decdate <- gamdat_E3$date |> decimal_date()
-  # fit gam
-  gam3 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E3 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam3, rep = 1000 )
-

+
  # decimal date
+  gamdat_E3$decdate <- gamdat_E3$date |> decimal_date()
+  # fit gam
+  gam3 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E3 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam3, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 22 iterations.
@@ -1379,7 +1369,7 @@ 

E3 (Feather Sound)

## te(decdate,value.x) 120.00 4.52 0.89 0.045 * ## --- ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-
  summary( gam3 )
+
  summary( gam3 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1404,10 +1394,10 @@ 

E3 (Feather Sound)

The plots below suggest that the association between chlorophyll concentrations and TN load (value.x) has not changed substantially over time (decdate) at Feather Sound.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam3, scheme = 1)
-  plot.gam( gam3, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam3, scheme = 1)
+  plot.gam( gam3, scheme = 2)

top

@@ -1415,21 +1405,21 @@

E3 (Feather Sound)

E4 (Gateway)

The model for stratum E4 (Gateway) shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  gamdat_E4 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E4" ), ],
-                           y = loads,
-                           by = 'date'
-                          )
-  nrow(gamdat_E4)
+
  # join TN load data to chlorophyll data
+  gamdat_E4 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E4" ), ],
+                           y = loads,
+                           by = 'date'
+                          )
+  nrow(gamdat_E4)
## [1] 126
-
  # decimal date
-  gamdat_E4$decdate <- gamdat_E4$date |> decimal_date()
-  # fit gam
-  gam4 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E4 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam4, rep = 1000 )
-

+
  # decimal date
+  gamdat_E4$decdate <- gamdat_E4$date |> decimal_date()
+  # fit gam
+  gam4 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E4 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam4, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 40 iterations.
@@ -1442,7 +1432,7 @@ 

E4 (Gateway)

## ## k' edf k-index p-value ## te(decdate,value.x) 120.00 5.71 0.97 0.28
-
  summary( gam4 )
+
  summary( gam4 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1467,10 +1457,10 @@ 

E4 (Gateway)

The plots below suggest that the association between chlorophyll concentrations and TN load (value.x) has not changed substantially over time (decdate) at Gateway.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam4, scheme = 1)
-  plot.gam( gam4, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam4, scheme = 1)
+  plot.gam( gam4, scheme = 2)

top

@@ -1478,21 +1468,21 @@

E4 (Gateway)

E5 (Weedon Island)

The model for stratum E5 (Weedon Island) shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  gamdat_E5 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E5" ), ],
-                           y = loads,
-                           by = 'date'
-                          )
-  nrow(gamdat_E5)
+
  # join TN load data to chlorophyll data
+  gamdat_E5 <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E5" ), ],
+                           y = loads,
+                           by = 'date'
+                          )
+  nrow(gamdat_E5)
## [1] 129
-
  # decimal date
-  gamdat_E5$decdate <- gamdat_E5$date |> decimal_date()
-  # fit gam
-  gam5 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E5 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam5, rep = 1000 )
-

+
  # decimal date
+  gamdat_E5$decdate <- gamdat_E5$date |> decimal_date()
+  # fit gam
+  gam5 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E5 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam5, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 16 iterations.
@@ -1504,8 +1494,8 @@ 

E5 (Weedon Island)

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 12.2 1.21 0.99
-
  summary( gam5 )
+## te(decdate,value.x) 120.0 12.2 1.21 1
+
  summary( gam5 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1531,10 +1521,10 @@ 

E5 (Weedon Island)

between chlorophyll concentrations and TN load (value.x) at Weedon Island over time (decdate), for moderate to high levels of TN load.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam5, scheme = 1)
-  plot.gam( gam5, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam5, scheme = 1)
+  plot.gam( gam5, scheme = 2)

top


@@ -1556,22 +1546,22 @@

North of Courtney Campbell

the linear model, we aggregate the data into monthly averages across EPC monitoring sites north of the Courtney Campbell causeway. The model shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  epc1.chl <- epc1[ which( epc1$param=="Chla" ), ]
-  epc1.chl <- epc1.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
-  epc1.sec <- epc1[ which( epc1$param=="Secchi" ), ]
-  epc1.sec <- epc1.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
-  gamdat_epc1 <- inner_join( x = epc1.chl, y = epc1.sec, by = 'date' )
-  nrow(gamdat_E2)
+
  # join TN load data to chlorophyll data
+  epc1.chl <- epc1[ which( epc1$param=="Chla" ), ]
+  epc1.chl <- epc1.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+  epc1.sec <- epc1[ which( epc1$param=="Secchi" ), ]
+  epc1.sec <- epc1.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+  gamdat_epc1 <- inner_join( x = epc1.chl, y = epc1.sec, by = 'date' )
+  nrow(gamdat_E2)
## [1] 133
-
  # decimal date
-  gamdat_epc1$decdate <- gamdat_epc1$date |> decimal_date()
-  # fit gam
-  gam6 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc1 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam6, rep = 1000 )
-

+
  # decimal date
+  gamdat_epc1$decdate <- gamdat_epc1$date |> decimal_date()
+  # fit gam
+  gam6 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc1 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam6, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 10 iterations.
@@ -1584,7 +1574,7 @@ 

North of Courtney Campbell

## ## k' edf k-index p-value ## te(decdate,value.x) 120.0 34.3 1 0.52
-
  summary( gam6 )
+
  summary( gam6 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1612,10 +1602,10 @@ 

North of Courtney Campbell

substantially over time (decdate). Redder areas in the heatmap indicate time periods when increased chlorophyll concentrations were more strongly associated with reduced water clarity.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam6, scheme = 1)
-  plot.gam( gam6, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam6, scheme = 1)
+  plot.gam( gam6, scheme = 2)

top

@@ -1626,22 +1616,22 @@

Between the Frankland Bridge and Courtney Campbell

the linear model, we aggregate the data into monthly averages across EPC monitoring sites between the Frankland bridge and the Courtney Campbell causeway. The model shows good diagnostics.

-
 # join TN load data to chlorophyll data
- epc2.chl <- epc2[ which( epc2$param=="Chla" ), ]
- epc2.chl <- epc2.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc2.sec <- epc2[ which( epc2$param=="Secchi" ), ]
- epc2.sec <- epc2.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- gamdat_epc2 <- inner_join( x = epc2.chl, y = epc2.sec, by = 'date' )
- nrow(gamdat_E2)
+
 # join TN load data to chlorophyll data
+ epc2.chl <- epc2[ which( epc2$param=="Chla" ), ]
+ epc2.chl <- epc2.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc2.sec <- epc2[ which( epc2$param=="Secchi" ), ]
+ epc2.sec <- epc2.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ gamdat_epc2 <- inner_join( x = epc2.chl, y = epc2.sec, by = 'date' )
+ nrow(gamdat_E2)
## [1] 133
-
 # decimal date
- gamdat_epc2$decdate <- gamdat_epc2$date |> decimal_date()
- # fit gam
- gam7 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc2 )
- # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam7, rep = 1000 )
-

+
 # decimal date
+ gamdat_epc2$decdate <- gamdat_epc2$date |> decimal_date()
+ # fit gam
+ gam7 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc2 )
+ # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam7, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 6 iterations.
@@ -1653,8 +1643,8 @@ 

Between the Frankland Bridge and Courtney Campbell

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 30.4 1.03 0.78
-
 summary( gam7 )
+## te(decdate,value.x) 120.0 30.4 1.03 0.68
+
 summary( gam7 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1690,10 +1680,10 @@ 

Between the Frankland Bridge and Courtney Campbell

summary(...) outputs), as do the scales of the axes of the coefficient surface plots. Further interpretation and comparison of these results can be pursued at a later phase of the project.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam7, scheme = 1)
-  plot.gam( gam7, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam7, scheme = 1)
+  plot.gam( gam7, scheme = 2)

top

@@ -1704,22 +1694,22 @@

Between Gandy Blvd and the Frankland Bridge

the linear model, we aggregate the data into monthly averages across EPC monitoring sites between Gandy Blvd and the Frankland Bridge. The model shows good diagnostics.

-
  # join TN load data to chlorophyll data
-  epc3.chl <- epc3[ which( epc3$param=="Chla" ), ]
-  epc3.chl <- epc3.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
-  epc3.sec <- epc3[ which( epc3$param=="Secchi" ), ]
-  epc3.sec <- epc3.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
-  gamdat_epc3 <- inner_join( x = epc3.chl, y = epc3.sec, by = 'date' )
-  nrow(gamdat_E2)
+
  # join TN load data to chlorophyll data
+  epc3.chl <- epc3[ which( epc3$param=="Chla" ), ]
+  epc3.chl <- epc3.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+  epc3.sec <- epc3[ which( epc3$param=="Secchi" ), ]
+  epc3.sec <- epc3.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+  gamdat_epc3 <- inner_join( x = epc3.chl, y = epc3.sec, by = 'date' )
+  nrow(gamdat_E2)
## [1] 133
-
  # decimal date
-  gamdat_epc3$decdate <- gamdat_epc3$date |> decimal_date()
-  # fit gam
-  gam8 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc3 )
-  # model diagnostics
-  par(mfrow=c(2,2))
-  gam.check( gam8, rep = 1000 )
-

+
  # decimal date
+  gamdat_epc3$decdate <- gamdat_epc3$date |> decimal_date()
+  # fit gam
+  gam8 <- gam(  log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc3 )
+  # model diagnostics
+  par(mfrow=c(2,2))
+  gam.check( gam8, rep = 1000 )
+

## 
 ## Method: GCV   Optimizer: magic
 ## Smoothing parameter selection converged after 6 iterations.
@@ -1731,8 +1721,8 @@ 

Between Gandy Blvd and the Frankland Bridge

## indicate that k is too low, especially if edf is close to k'. ## ## k' edf k-index p-value -## te(decdate,value.x) 120.0 22.9 1.05 0.86
-
  summary( gam8 )
+## te(decdate,value.x) 120.0 22.9 1.05 0.85
+
  summary( gam8 )
## 
 ## Family: gaussian 
 ## Link function: identity 
@@ -1760,10 +1750,10 @@ 

Between Gandy Blvd and the Frankland Bridge

Comparisons of these results with those of previous models (gam6 and gam7) are not straightforward and can be pursued during a later phase of the project.

-
  # plot smooth
-  par(mfrow=c(1,2))
-  plot.gam( gam8, scheme = 1)
-  plot.gam( gam8, scheme = 2)
+
  # plot smooth
+  par(mfrow=c(1,2))
+  plot.gam( gam8, scheme = 1)
+  plot.gam( gam8, scheme = 2)

top