diff --git a/docs/eval_paradigm.Rmd b/docs/eval_paradigm.Rmd index 8a044f1..bfdbdfb 100644 --- a/docs/eval_paradigm.Rmd +++ b/docs/eval_paradigm.Rmd @@ -56,7 +56,7 @@ The results of generalized additive models (GAMs) indicated that the TN-chloroph # Data Processing {#data} -Water quality and external loading data were assembled by the `./data_proc.R` script. This section processes these data to support the subsequent analyses, including a few basic QA/QC checks. +Water quality and external loading data were assembled by the `../R/data_proc.R` script. This section processes these data to support the subsequent analyses, including a few basic QA/QC checks. We begin by loading packages (see [References](#ref)). @@ -132,7 +132,7 @@ head( pcwq2 ) ## Environmental Protection Commission of Hillsborough County (EPC) water quality data {#data_epc} -This section loads the EPC water quality data from file (`./data/epcwq.RData`), pivots the dataframe from wide to long format, performs QC checks, aggregates the data to a monthly timeframe (monthly means), and creates a new `epcwq2` dataframe for analysis. As indicated in output below, a substantial number of Secchi records indicate that reported Secchi depth equals the total depth (i.e., the Secchi depth was visible at bottom); in these cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas). +This section loads the EPC water quality data from file (`../data/epcwq.RData`), pivots the dataframe from wide to long format, performs QC checks, aggregates the data to a monthly timeframe (monthly means), and creates a new `epcwq2` dataframe for analysis. As indicated in output below, a substantial number of Secchi records indicate that reported Secchi depth equals the total depth (i.e., the Secchi depth was visible at bottom); in these cases, the Secchi depth is not necessarily a reliable proxy for water clarity (e.g., in shallow areas). ```{r} # load data diff --git a/docs/eval_paradigm.html b/docs/eval_paradigm.html index 63b2277..8a1ee25 100644 --- a/docs/eval_paradigm.html +++ b/docs/eval_paradigm.html @@ -511,14 +511,17 @@
Water quality and external loading data were assembled by the
-./data_proc.R
script. This section processes these data to
-support the subsequent analyses, including a few basic QA/QC checks.
../R/data_proc.R
script. This section processes these data
+to support the subsequent analyses, including a few basic QA/QC
+checks.
We begin by loading packages (see References).
-library(lubridate)
- library(plyr)
- library(dplyr)
- library(tidyr)
- library(mgcv)
::opts_chunk$set(echo=TRUE, message=FALSE, warning=FALSE)
+ knitrlibrary(lubridate)
+library(plyr)
+library(dplyr)
+library(tidyr)
+library(mgcv)
+library(here)
Pinellas County collects water quality data at five strata along the @@ -536,7 +539,7 @@
pcwq2
dataframe for analysis.
# load data
- <- read.csv( "../data-raw/DataDownload_2999342_row.csv" )[,1:21]
+ pcwq1 <- read.csv( here("data-raw/DataDownload_2999342_row.csv" ))[,1:21]
pcwq1 # dates
$Date <- pcwq1$SampleDate |> as.Date("%m/%d/%Y")
pcwq1# qualifier codes
@@ -567,12 +570,10 @@ Pinellas County water quality data
<- pcwq1[, c("Date","Analyte","Result_Value","Unit","StationID") ]
pcwq2 colnames( pcwq2 ) <- c("date","param", "value", "unit", "site" )
# aggregate data to monthly timeframe
- <- pcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame() pcwq2
## `summarise()` has grouped output by 'date', 'param', 'site'. You can override
-## using the `.groups` argument.
-$date <- pcwq2$date |> floor_date('month') pcwq2
The dataset contains 11,808 rows.
-head( pcwq2 )
head( pcwq2 )
## date param site unit value
## 1 2000-01-01 BOD 62-01 mgl 1.00
## 2 2000-01-01 Chla 62-01 ugl 2.60
@@ -588,7 +589,7 @@ Pinellas County water quality data
Environmental Protection Commission of Hillsborough County (EPC)
water quality data
This section loads the EPC water quality data from file
-(./data/epcwq.RData
), pivots the dataframe from wide to
+(../data/epcwq.RData
), pivots the dataframe from wide to
long format, performs QC checks, aggregates the data to a monthly
timeframe (monthly means), and creates a new epcwq2
dataframe for analysis. As indicated in output below, a substantial
@@ -596,88 +597,77 @@
Environmental Protection Commission of Hillsborough County (EPC)
total depth (i.e., the Secchi depth was visible at bottom); in these
cases, the Secchi depth is not necessarily a reliable proxy for water
clarity (e.g., in shallow areas).
-# load data
- load( "../data/epcwq.RData" )
- # dates
- <- epcwq |> as.data.frame()
- epcwq1 $date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d") epcwq1
-## Warning in as.POSIXlt.POSIXct(x, tz = tz): unknown timezone '%Y-%m-%d'
-# subset columns (data and 'Q' columns must be listed in consecutive pairs!)
- <- select( epcwq1, date, StationNumber, SampleDepth,
- epcwq1
- Total_Nitrogen, Total_NitrogenQ,
- Chlorophyll_a, Chlorophyll_aQ,
- SecchiDepth, Secchi_Q,
- Total_Suspended_Solids, Total_Suspended_SolidsQ,
- Turbidity, TurbidityQ )# subset OTB stations
- <- epcwq1[ which( epcwq1$StationNumber %in% c(36, 38, 40, 41, 42, 46, 47,
- epcwq1 50, 51, 60, 61, 62, 63, 64,
- 65, 66, 67, 68) ), ]
- # subset by date
- <- epcwq1[ which( year(epcwq1$date) >= 2000 ), ]
- epcwq1 # loop through data and QC columns to pivot from wide to long format
- <- seq( 4, 12, 2 )
- datacols <- data.frame( matrix(ncol=7,nrow=0) )
- epcwq2 for( i in datacols ){
- <- epcwq1[ ,1:3 ]
- temp $Analyte <- colnames(epcwq1)[i]
- temp$value <- epcwq1[,i] |> as.numeric()
- temp$unit <- NA
- temp$QA <- epcwq1[,i+1]
- temp<- temp[ complete.cases( temp$value ), ]
- temp <- rbind( epcwq2, temp )
- epcwq2 }
-## Warning: NAs introduced by coercion
-## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-
-## Warning: NAs introduced by coercion
-# standardize parameter names
- <- epcwq2$Analyte |> unique() |> sort()
- param.names.old <- c( "Chla", "Secchi", "TN", "TSS", "Turbidity" )
- param.names.new $param <- mapvalues( epcwq2$Analyte, param.names.old, param.names.new )
- epcwq2# units
- <- c( "ug/l", "m", "mg/l", "mg/l", "NTU" ) # listed in same order as param.names.new, above
- units $unit <- mapvalues( epcwq2$param, param.names.new, units )
- epcwq2# qualifier codes
- # replace "NULL" with NA
- $QA[ which(epcwq2$QA=="NULL") ] <- NA
- epcwq2# specify codes
- <- c("*","?","A","B","G","H","J","K",
- fatal.codes "L","N","O","Q","T","V","Y","Z")
- # define function to locate fatal records
- <- function( QUALIFIER, FATAL=fatal.codes ){ # QUALIFER arg is a string
- find.fatal <- QUALIFIER |> strsplit(split='') |>
- input.code unlist() |> toupper() # parse string into single characters
- <- input.code %in% FATAL |> any() # check if any characters match FATAL
- fatal return( fatal ) # return TRUE or FALSE
- # // end find.fatal()
- } # apply function to locate fatal records
- <- apply( matrix(epcwq2$QA,ncol=1), 1, find.fatal ) |> which()
- rm.fatal.idx <- epcwq2[ -rm.fatal.idx ]
- epcwq2 # acknowledge Secchi records visible on bottom
- which( epcwq2$param=="Secchi" & epcwq2$QA==">"), ] |> nrow() # number of VOB Secchi records epcwq2[
+# load data
+ load( here("data/epcwq.RData" ))
+ # dates
+ <- epcwq |> as.data.frame()
+ epcwq1 $date <- epcwq1$SampleTime |> as.Date("%Y-%m-%d")
+ epcwq1# subset columns (data and 'Q' columns must be listed in consecutive pairs!)
+ <- select( epcwq1, date, StationNumber, SampleDepth,
+ epcwq1
+ Total_Nitrogen, Total_NitrogenQ,
+ Chlorophyll_a, Chlorophyll_aQ,
+ SecchiDepth, Secchi_Q,
+ Total_Suspended_Solids, Total_Suspended_SolidsQ,
+ Turbidity, TurbidityQ )# subset OTB stations
+ <- epcwq1[ which( epcwq1$StationNumber %in% c(36, 38, 40, 41, 42, 46, 47,
+ epcwq1 50, 51, 60, 61, 62, 63, 64,
+ 65, 66, 67, 68) ), ]
+ # subset by date
+ <- epcwq1[ which( year(epcwq1$date) >= 2000 ), ]
+ epcwq1 # loop through data and QC columns to pivot from wide to long format
+ <- seq( 4, 12, 2 )
+ datacols <- data.frame( matrix(ncol=7,nrow=0) )
+ epcwq2 for( i in datacols ){
+ <- epcwq1[ ,1:3 ]
+ temp $Analyte <- colnames(epcwq1)[i]
+ temp$value <- epcwq1[,i] |> as.numeric()
+ temp$unit <- NA
+ temp$QA <- epcwq1[,i+1]
+ temp<- temp[ complete.cases( temp$value ), ]
+ temp <- rbind( epcwq2, temp )
+ epcwq2
+ }# standardize parameter names
+ <- epcwq2$Analyte |> unique() |> sort()
+ param.names.old <- c( "Chla", "Secchi", "TN", "TSS", "Turbidity" )
+ param.names.new $param <- mapvalues( epcwq2$Analyte, param.names.old, param.names.new )
+ epcwq2# units
+ <- c( "ug/l", "m", "mg/l", "mg/l", "NTU" ) # listed in same order as param.names.new, above
+ units $unit <- mapvalues( epcwq2$param, param.names.new, units )
+ epcwq2# qualifier codes
+ # replace "NULL" with NA
+ $QA[ which(epcwq2$QA=="NULL") ] <- NA
+ epcwq2# specify codes
+ <- c("*","?","A","B","G","H","J","K",
+ fatal.codes "L","N","O","Q","T","V","Y","Z")
+ # define function to locate fatal records
+ <- function( QUALIFIER, FATAL=fatal.codes ){ # QUALIFER arg is a string
+ find.fatal <- QUALIFIER |> strsplit(split='') |>
+ input.code unlist() |> toupper() # parse string into single characters
+ <- input.code %in% FATAL |> any() # check if any characters match FATAL
+ fatal return( fatal ) # return TRUE or FALSE
+ # // end find.fatal()
+ } # apply function to locate fatal records
+ <- apply( matrix(epcwq2$QA,ncol=1), 1, find.fatal ) |> which()
+ rm.fatal.idx <- epcwq2[ -rm.fatal.idx ]
+ epcwq2 # acknowledge Secchi records visible on bottom
+ which( epcwq2$param=="Secchi" & epcwq2$QA==">"), ] |> nrow() # number of VOB Secchi records epcwq2[
## [1] 1333
-which( epcwq2$param=="Secchi"), ] |> nrow() # total number of Secchi records epcwq2[
+which( epcwq2$param=="Secchi"), ] |> nrow() # total number of Secchi records epcwq2[
## [1] 5092
-# coerce depth to numeric
- $SampleDepth <- epcwq2$SampleDepth |> as.numeric()
- epcwq2# check for duplicates (confirm FALSE)
- |> duplicated() |> any() epcwq2
+# coerce depth to numeric
+ $SampleDepth <- epcwq2$SampleDepth |> as.numeric()
+ epcwq2# check for duplicates (confirm FALSE)
+ |> duplicated() |> any() epcwq2
## [1] FALSE
-# subset and rename columns
- <- epcwq2[, c("date","param","value","unit","StationNumber") ]
- epcwq2 colnames( epcwq2 ) <- c("date","param", "value", "unit", "site" )
- # aggregate data to monthly timeframe
- <- epcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame() epcwq2
-## `summarise()` has grouped output by 'date', 'param', 'site'. You can override
-## using the `.groups` argument.
-$date <- epcwq2$date |> floor_date('month') epcwq2
+# subset and rename columns
+ <- epcwq2[, c("date","param","value","unit","StationNumber") ]
+ epcwq2 colnames( epcwq2 ) <- c("date","param", "value", "unit", "site" )
+ # aggregate data to monthly timeframe
+ <- epcwq2 |> group_by(date,param,site,unit) |> summarise(value=mean(value)) |> as.data.frame()
+ epcwq2 $date <- epcwq2$date |> floor_date('month') epcwq2
The dataset contains 20,906 rows.
-head( epcwq2 )
+head( epcwq2 )
## date param site unit value
## 1 2000-01-01 Chla 36 ug/l 3.563
## 2 2000-01-01 Chla 38 ug/l 3.161
@@ -697,24 +687,24 @@ Tampa Bay Estuary Program nitrogen load estimates
TN loads by summing over TN load sources to OTB (e.g., AD, NPS, DPS,
etc.), standardizes column names, and creates a new loads
dataframe for analysis.
-# load data
- <- read.csv( "../data-raw/TB_loads_monthly.csv" )
- loads # subset TN data for OTB segment
- <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
- loads # date column
- $date <- as.Date( paste0(loads$year,"-",loads$month,"-",01), format="%Y-%m-%d" )
- loads# aggregate total TN load across sources
- <- loads |> group_by(date) |> summarise(tn_load=sum(tn_load)) |> as.data.frame()
- loads # create columns
- $param <- "TN load"
- loads$site <- "OTB watershed"
- loads$unit <- 'tons'
- loads# rename columns
- colnames( loads ) <- c('date','value','param','site','unit')
- # re-order columns
- <- select( loads, date, param, value, unit, site ) loads
+# load data
+ <- read.csv( here("data-raw/TB_loads_monthly.csv" ))
+ loads # subset TN data for OTB segment
+ <- loads[ which( loads$bay_segment=="Old Tampa Bay"), c('year','month','tn_load') ]
+ loads # date column
+ $date <- as.Date( paste0(loads$year,"-",loads$month,"-",01), format="%Y-%m-%d" )
+ loads# aggregate total TN load across sources
+ <- loads |> group_by(date) |> summarise(tn_load=sum(tn_load)) |> as.data.frame()
+ loads # create columns
+ $param <- "TN load"
+ loads$site <- "OTB watershed"
+ loads$unit <- 'tons'
+ loads# rename columns
+ colnames( loads ) <- c('date','value','param','site','unit')
+ # re-order columns
+ <- select( loads, date, param, value, unit, site ) loads
The dataset contains 324 rows.
-head( loads )
+head( loads )
## date param value unit site
## 1 1995-01-01 TN load 20.78062 tons OTB watershed
## 2 1995-02-01 TN load 19.80657 tons OTB watershed
@@ -751,71 +741,71 @@ Nitrogen loads and chlorophyll, with 3-month cumulative loads
and include the columns date
, chla
,
TN_load
(monthly load values), and TN_load_3mo
(cumulative 3-month loads).
-# Subset and join load and chl-a data for OTB sub-segments
-# Sub-segment north of Courtney Campbell
- <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ] # subset EPC data by station
- epc1 <- epc1[which(epc1$param=="Chla"),] |> group_by(date) |> # aggregate chl data to monthly means
- epc1 summarise(value=mean(value)) |> as.data.frame()
- <- left_join( epc1, loads[,c('date','value')], by='date' ) # value.x is chl; value.y is load
- epc1 colnames(epc1) <- c("date","chla","TN_load")
- # Sub-segment between Courtney Campbell and Frankland
- <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
- epc2 <- epc2[which(epc2$param=="Chla"),] |> group_by(date) |>
- epc2 summarise(value=mean(value)) |> as.data.frame()
- <- left_join( epc2, loads[,c('date','value')], by='date' )
- epc2 colnames(epc2) <- c("date","chla","TN_load")
- # Sub-segment between Frankland and Gandy
- <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
- epc3 <- epc3[which(epc3$param=="Chla"),] |> group_by(date) |>
- epc3 summarise(value=mean(value)) |> as.data.frame()
- <- left_join( epc3, loads[,c('date','value')], by='date' )
- epc3 colnames(epc3) <- c("date","chla","TN_load")
-
- # Compute 3-month TN loads
-# Define function to sum load values over a specified window (`win`)
- # The function assumes there are no NAs
- <- function( x, win = 3 ){
- sum3mo <- rep(NA,length=(win-1))
- out for( i in win:length(x) ){
- <- sum( x[i:(i-win+1)] )
- out[i]
- }return( out )
-
- }# Add a 3-month TN load column to each dataset
- $TN_load_3mo <- sum3mo( epc1$TN_load )
- epc1$TN_load_3mo <- sum3mo( epc2$TN_load )
- epc2$TN_load_3mo <- sum3mo( epc3$TN_load ) epc3
+# Subset and join load and chl-a data for OTB sub-segments
+# Sub-segment north of Courtney Campbell
+ <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ] # subset EPC data by station
+ epc1 <- epc1[which(epc1$param=="Chla"),] |> group_by(date) |> # aggregate chl data to monthly means
+ epc1 summarise(value=mean(value)) |> as.data.frame()
+ <- left_join( epc1, loads[,c('date','value')], by='date' ) # value.x is chl; value.y is load
+ epc1 colnames(epc1) <- c("date","chla","TN_load")
+ # Sub-segment between Courtney Campbell and Frankland
+ <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
+ epc2 <- epc2[which(epc2$param=="Chla"),] |> group_by(date) |>
+ epc2 summarise(value=mean(value)) |> as.data.frame()
+ <- left_join( epc2, loads[,c('date','value')], by='date' )
+ epc2 colnames(epc2) <- c("date","chla","TN_load")
+ # Sub-segment between Frankland and Gandy
+ <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
+ epc3 <- epc3[which(epc3$param=="Chla"),] |> group_by(date) |>
+ epc3 summarise(value=mean(value)) |> as.data.frame()
+ <- left_join( epc3, loads[,c('date','value')], by='date' )
+ epc3 colnames(epc3) <- c("date","chla","TN_load")
+
+ # Compute 3-month TN loads
+# Define function to sum load values over a specified window (`win`)
+ # The function assumes there are no NAs
+ <- function( x, win = 3 ){
+ sum3mo <- rep(NA,length=(win-1))
+ out for( i in win:length(x) ){
+ <- sum( x[i:(i-win+1)] )
+ out[i]
+ }return( out )
+
+ }# Add a 3-month TN load column to each dataset
+ $TN_load_3mo <- sum3mo( epc1$TN_load )
+ epc1$TN_load_3mo <- sum3mo( epc2$TN_load )
+ epc2$TN_load_3mo <- sum3mo( epc3$TN_load ) epc3
The monthly sub-segment chlorophyll-a data and segment-wide TN load
data are visualized below.
-par( mfrow=c(2,2) )
-plot( chla ~ date, data = epc1, pch = 16, col = rgb(0,0.4,0.8,0.7),
-ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
- main = "Chlorophyll-a, OTB north of Courtney Campbell" )
- lines( chla ~ date, data = epc1, col = rgb(0,0.4,0.8,0.4) )
- abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
- col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
- plot( chla ~ date, data = epc2, pch = 16, col = rgb(0,0.4,0.8,0.7),
-ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
- main = "Chlorophyll-a, OTB between Frankland & Courtney Campbell" )
- lines( chla ~ date, data = epc2, col = rgb(0,0.4,0.8,0.4) )
- abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
- col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
- plot( chla ~ date, data = epc3, pch = 16, col = rgb(0,0.4,0.8,0.7),
-ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
- main = "Chlorophyll-a, OTB between Gandy & Frankland" )
- lines( chla ~ date, data = epc3, col = rgb(0,0.4,0.8,0.4) )
- abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
- col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
- plot( TN_load ~ date, data = epc3, pch = 16, col = rgb(0.2,0.2,0.2,0.7),
-ylab = "TN load (tons)", xlab = "", las = 1,
- main = "Monthly TN load, Old Tampa Bay" )
- lines( TN_load ~ date, data = epc3, col = rgb(0.2,0.2,0.2,0.4) )
- abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
- col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+par( mfrow=c(2,2) )
+plot( chla ~ date, data = epc1, pch = 16, col = rgb(0,0.4,0.8,0.7),
+ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+ main = "Chlorophyll-a, OTB north of Courtney Campbell" )
+ lines( chla ~ date, data = epc1, col = rgb(0,0.4,0.8,0.4) )
+ abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+ col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+ plot( chla ~ date, data = epc2, pch = 16, col = rgb(0,0.4,0.8,0.7),
+ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+ main = "Chlorophyll-a, OTB between Frankland & Courtney Campbell" )
+ lines( chla ~ date, data = epc2, col = rgb(0,0.4,0.8,0.4) )
+ abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+ col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+ plot( chla ~ date, data = epc3, pch = 16, col = rgb(0,0.4,0.8,0.7),
+ylim = c(0,80), ylab = "Chlorophyll-a (ug/l)", xlab = "", las = 1,
+ main = "Chlorophyll-a, OTB between Gandy & Frankland" )
+ lines( chla ~ date, data = epc3, col = rgb(0,0.4,0.8,0.4) )
+ abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+ col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+ plot( TN_load ~ date, data = epc3, pch = 16, col = rgb(0.2,0.2,0.2,0.7),
+ylab = "TN load (tons)", xlab = "", las = 1,
+ main = "Monthly TN load, Old Tampa Bay" )
+ lines( TN_load ~ date, data = epc3, col = rgb(0.2,0.2,0.2,0.4) )
+ abline( v = seq.Date(as.Date("2000-01-01"),as.Date("2024-01-01"),'year'),
+ col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
Below, we fit linear models for chlorophyll as a function of
cumulative 3-month TN loads at each sub-segment and visualize the
@@ -830,92 +820,92 @@
Nitrogen loads and chlorophyll, with 3-month cumulative loads
nor Gaussian (diagnostic visualizations follow); experimentation with
alternative model specifications that include a month-of-year term did
not improve the model diagnostics.
-# Define plotting function for data and model
-<- function( dat, mod, lab.main ){
- TN_chl_plot # 95% CI
- <- seq( min( dat$TN_load_3mo, na.rm=TRUE ),
- pred.seq max( dat$TN_load_3mo, na.rm=TRUE ), length.out=100 )
- <- predict.gam( mod,
- pred newdata = data.frame( TN_load_3mo = pred.seq ),
- se.fit=TRUE )
-
- # P value
- <- summary(mod)$p.table[2,4]
- pval <- rgb(1,0,0,0.4)
- pcol1 <- rgb(1,0,0,0.15)
- pcol2 if( pval < 0.001 ){
- <- "P < 0.001"
- pstring else if( pval < 0.01 ){
- } <- "P < 0.01"
- pstring else if( pval < 0.05 ){
- } <- "P < 0.05"
- pstring else {
- } <- "P > 0.05"
- pstring <- rgb(0.2,0.3,0.9,0.4)
- pcol1 <- rgb(0,0.1,0.6,0.15)
- pcol2
- }
- # scatterplot data
- plot( chla ~ TN_load_3mo, data = dat,
- col = rgb(0,0,0,0.6), cex = 1.4,
- main = paste( lab.main,"\n",
- min(dat$date), " - ", max(dat$date) ),
- cex.main = 1.6, cex.lab = 1.4,
- xlim = range( c(0,dat$TN_load_3mo,pred.seq), na.rm=TRUE ),
- ylim = range( c(0,dat$chla,pred$fit-pred$se.fit*1.96,
- $fit+pred$se.fit*1.96)*1.2 ),
- predxlab = "3-month TN load (tons)", cex.axis = 1.4,
- ylab = "Chlorophyll-a (ug/l)", las = 1
-
- )abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
- abline( v = 0, col = rgb(0,0,0,0.5) )
- abline( h = 0, col = rgb(0,0,0,0.5) )
-
- # plot model fit
- lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
- polygon( x = c( pred.seq, rev(pred.seq) ),
- y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
- col = pcol2, border = rgb(0,0,0,0) )
- # write model R2 and pval
- legend( 'top', horiz = TRUE, bty = 'n',
- border = rgb(0,0,0,0), cex = 1.4,
- legend = paste0( "N = ", nrow(dat), " ",
- "R2 = ", round(summary(mod)$r.sq,3),
- " ", pstring )
-
- )# // end TN_chl_plot() function
- }
-
-# models
-<- gam( chla ~ TN_load_3mo, data = epc1 )
- ncmod1 <- gam( chla ~ TN_load_3mo, data = epc2 )
- ncmod2 <- gam( chla ~ TN_load_3mo, data = epc3 )
- ncmod3
-# plots
-par( mfrow=c(1,3) )
-TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
-TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
+# Define plotting function for data and model
+<- function( dat, mod, lab.main ){
+ TN_chl_plot # 95% CI
+ <- seq( min( dat$TN_load_3mo, na.rm=TRUE ),
+ pred.seq max( dat$TN_load_3mo, na.rm=TRUE ), length.out=100 )
+ <- predict.gam( mod,
+ pred newdata = data.frame( TN_load_3mo = pred.seq ),
+ se.fit=TRUE )
+
+ # P value
+ <- summary(mod)$p.table[2,4]
+ pval <- rgb(1,0,0,0.4)
+ pcol1 <- rgb(1,0,0,0.15)
+ pcol2 if( pval < 0.001 ){
+ <- "P < 0.001"
+ pstring else if( pval < 0.01 ){
+ } <- "P < 0.01"
+ pstring else if( pval < 0.05 ){
+ } <- "P < 0.05"
+ pstring else {
+ } <- "P > 0.05"
+ pstring <- rgb(0.2,0.3,0.9,0.4)
+ pcol1 <- rgb(0,0.1,0.6,0.15)
+ pcol2
+ }
+ # scatterplot data
+ plot( chla ~ TN_load_3mo, data = dat,
+ col = rgb(0,0,0,0.6), cex = 1.4,
+ main = paste( lab.main,"\n",
+ min(dat$date), " - ", max(dat$date) ),
+ cex.main = 1.6, cex.lab = 1.4,
+ xlim = range( c(0,dat$TN_load_3mo,pred.seq), na.rm=TRUE ),
+ ylim = range( c(0,dat$chla,pred$fit-pred$se.fit*1.96,
+ $fit+pred$se.fit*1.96)*1.2 ),
+ predxlab = "3-month TN load (tons)", cex.axis = 1.4,
+ ylab = "Chlorophyll-a (ug/l)", las = 1
+
+ )abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+ abline( v = 0, col = rgb(0,0,0,0.5) )
+ abline( h = 0, col = rgb(0,0,0,0.5) )
+
+ # plot model fit
+ lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
+ polygon( x = c( pred.seq, rev(pred.seq) ),
+ y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
+ col = pcol2, border = rgb(0,0,0,0) )
+ # write model R2 and pval
+ legend( 'top', horiz = TRUE, bty = 'n',
+ border = rgb(0,0,0,0), cex = 1.4,
+ legend = paste0( "N = ", nrow(dat), " ",
+ "R2 = ", round(summary(mod)$r.sq,3),
+ " ", pstring )
+
+ )# // end TN_chl_plot() function
+ }
+
+# models
+<- gam( chla ~ TN_load_3mo, data = epc1 )
+ ncmod1 <- gam( chla ~ TN_load_3mo, data = epc2 )
+ ncmod2 <- gam( chla ~ TN_load_3mo, data = epc3 )
+ ncmod3
+# plots
+par( mfrow=c(1,3) )
+TN_chl_plot( epc1, ncmod1, "Chla north of CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc2, ncmod2, "Chla bw HF-CC vs. 3mo TN load (OTB)" )
+TN_chl_plot( epc3, ncmod3, "Chla bw GB-HF vs. 3mo TN load (OTB)" )
The diagnostics for each of the above OTB models indicate that the
assumption of homoskedastic, Gaussian residuals is questionable.
-par(mfrow=c(2,2))
-# North of CC
- gam.check(ncmod1,rep=1000)
-
+par(mfrow=c(2,2))
+# North of CC
+ gam.check(ncmod1,rep=1000)
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
-# Between CC and HF
- gam.check(ncmod2,rep=1000)
-
+# Between CC and HF
+ gam.check(ncmod2,rep=1000)
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
-# Between HF and GB
- gam.check(ncmod3,rep=1000)
-
+# Between HF and GB
+ gam.check(ncmod3,rep=1000)
+
##
## Method: GCV Optimizer: magic
## Model required no smoothing parameter selectionModel rank = 2 / 2
@@ -949,107 +939,107 @@ Nitrogen loads and chlorophyll, with delayed loads
line indicates whether the estimated slope is statistically significant
(red) or not significant (gray) at alpha = 0.05. The function returns
the input data and the model object.
-# define regression/plot function
- # x, y dataframes for explanatory (x) and response (y) variables,
- # each containing columns date, param, value, unit,
- # param.x, water quality parameters in x$param, y$param (strings)
- # param.y
- # site.x, location labels in x$site, y$site (string)
- # site.y
- # log logical, if TRUE log10-transform the x and y data. FALSE by default
- # delay number of time steps to delay param.x (non-negative integer)
- <- function( x, y,
- OTB_corr
- param.x, param.y,site.y = site.x,
- site.x, log = FALSE,
- delay = 0 ){
-
- # units
- <- x$unit[ which( x$param==param.x ) ] |> unique()
- unit.x <- y$unit[ which( y$param==param.y ) ] |> unique()
- unit.y
- # assemble data for x and y in wide format
- <- x[ which( x$param==param.x & x$site==site.x ), ]
- dat.x <- dat.x |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
- dat.x <- y[ which( y$param==param.y & y$site==site.y ), ]
- dat.y <- dat.y |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
- dat.y <- dplyr::inner_join( dat.x, dat.y, by = 'date' )
- dat
- # log10-transform
- if( log == TRUE ){
- $value.x[ which( dat$value.x != 0) ] <- dat$value.x |> log10()
- dat$value.y[ which( dat$value.y != 0) ] <- dat$value.y |> log10()
- dat<- paste( "log10", unit.x )
- unit.x <- paste( "log10", unit.y )
- unit.y
- }
- # delay
- if( delay > 0 ){
- <- data.frame( date = seq.Date( min(dat$date), max(dat$date), 'month' ) )
- dateseq <- dplyr::left_join( dateseq, dat, by = 'date' )
- dat $value.x <- dplyr::lag( dat$value.x, n = delay )
- dat<- na.omit( dat )
- dat else if ( delay < 0 ){
- } stop("Negative delay specified.")
-
- }
- # fit linear model
- # fit model
- <- gam( value.y ~ value.x, data = dat )
- mod1 # plot fit with 95% CI
- <- seq( min( dat$value.x ), max( dat$value.x ), length.out=100 )
- pred.seq <- predict.gam( mod1,
- pred newdata = data.frame( value.x = pred.seq ),
- se.fit=TRUE )
- # P value
- <- summary(mod1)$p.table[2,4]
- pval <- rgb(1,0,0,0.4)
- pcol1 <- rgb(1,0,0,0.15)
- pcol2 if( pval < 0.001 ){
- <- "P < 0.001"
- pstring else if( pval < 0.01 ){
- } <- "P < 0.01"
- pstring else if( pval < 0.05 ){
- } <- "P < 0.05"
- pstring else {
- } <- "P > 0.05"
- pstring <- rgb(0.2,0.3,0.9,0.4)
- pcol1 <- rgb(0,0.1,0.6,0.15)
- pcol2
- }
- # scatterplot data
- plot( value.y ~ value.x, data = dat,
- col = rgb(0,0,0,0.6), cex = 1.4,
- main = paste( param.y, "at", site.y, " vs. ", param.x, "at", site.x,"\n",
- min(dat$date), " - ", max(dat$date) ),
- cex.main = 1.6, cex.lab = 1.4,
- xlim = range( c(0,dat$value.x,pred.seq) ),
- ylim = range( c(0,dat$value.y,pred$fit-pred$se.fit*1.96,
- $fit+pred$se.fit*1.96)*1.2 ),
- predxlab = paste0( param.x, " (", unit.x, ")" ), cex.axis = 1.4,
- ylab = paste0( param.y, " (", unit.y, ")" ), las = 1
-
- )abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
- abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
- abline( v = 0, col = rgb(0,0,0,0.5) )
- abline( h = 0, col = rgb(0,0,0,0.5) )
-
- # plot model fit
- lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
- polygon( x = c( pred.seq, rev(pred.seq) ),
- y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
- col = pcol2, border = rgb(0,0,0,0) )
- # write model R2 and pval
- legend( 'top', horiz = TRUE, bty = 'n',
- border = rgb(0,0,0,0), cex = 1.4,
- legend = paste0( "N = ", nrow(dat), " ",
- "R2 = ", round(summary(mod1)$r.sq,3),
- " ", pstring, " delay = ", delay )
-
- )
- return( list( data = dat, model = mod1 ) )
-
- # // end OTB_corr() }
+# define regression/plot function
+ # x, y dataframes for explanatory (x) and response (y) variables,
+ # each containing columns date, param, value, unit,
+ # param.x, water quality parameters in x$param, y$param (strings)
+ # param.y
+ # site.x, location labels in x$site, y$site (string)
+ # site.y
+ # log logical, if TRUE log10-transform the x and y data. FALSE by default
+ # delay number of time steps to delay param.x (non-negative integer)
+ <- function( x, y,
+ OTB_corr
+ param.x, param.y,site.y = site.x,
+ site.x, log = FALSE,
+ delay = 0 ){
+
+ # units
+ <- x$unit[ which( x$param==param.x ) ] |> unique()
+ unit.x <- y$unit[ which( y$param==param.y ) ] |> unique()
+ unit.y
+ # assemble data for x and y in wide format
+ <- x[ which( x$param==param.x & x$site==site.x ), ]
+ dat.x <- dat.x |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
+ dat.x <- y[ which( y$param==param.y & y$site==site.y ), ]
+ dat.y <- dat.y |> group_by(date) |> summarise(value=mean(value)) |> as.data.frame()
+ dat.y <- dplyr::inner_join( dat.x, dat.y, by = 'date' )
+ dat
+ # log10-transform
+ if( log == TRUE ){
+ $value.x[ which( dat$value.x != 0) ] <- dat$value.x |> log10()
+ dat$value.y[ which( dat$value.y != 0) ] <- dat$value.y |> log10()
+ dat<- paste( "log10", unit.x )
+ unit.x <- paste( "log10", unit.y )
+ unit.y
+ }
+ # delay
+ if( delay > 0 ){
+ <- data.frame( date = seq.Date( min(dat$date), max(dat$date), 'month' ) )
+ dateseq <- dplyr::left_join( dateseq, dat, by = 'date' )
+ dat $value.x <- dplyr::lag( dat$value.x, n = delay )
+ dat<- na.omit( dat )
+ dat else if ( delay < 0 ){
+ } stop("Negative delay specified.")
+
+ }
+ # fit linear model
+ # fit model
+ <- gam( value.y ~ value.x, data = dat )
+ mod1 # plot fit with 95% CI
+ <- seq( min( dat$value.x ), max( dat$value.x ), length.out=100 )
+ pred.seq <- predict.gam( mod1,
+ pred newdata = data.frame( value.x = pred.seq ),
+ se.fit=TRUE )
+ # P value
+ <- summary(mod1)$p.table[2,4]
+ pval <- rgb(1,0,0,0.4)
+ pcol1 <- rgb(1,0,0,0.15)
+ pcol2 if( pval < 0.001 ){
+ <- "P < 0.001"
+ pstring else if( pval < 0.01 ){
+ } <- "P < 0.01"
+ pstring else if( pval < 0.05 ){
+ } <- "P < 0.05"
+ pstring else {
+ } <- "P > 0.05"
+ pstring <- rgb(0.2,0.3,0.9,0.4)
+ pcol1 <- rgb(0,0.1,0.6,0.15)
+ pcol2
+ }
+ # scatterplot data
+ plot( value.y ~ value.x, data = dat,
+ col = rgb(0,0,0,0.6), cex = 1.4,
+ main = paste( param.y, "at", site.y, " vs. ", param.x, "at", site.x,"\n",
+ min(dat$date), " - ", max(dat$date) ),
+ cex.main = 1.6, cex.lab = 1.4,
+ xlim = range( c(0,dat$value.x,pred.seq) ),
+ ylim = range( c(0,dat$value.y,pred$fit-pred$se.fit*1.96,
+ $fit+pred$se.fit*1.96)*1.2 ),
+ predxlab = paste0( param.x, " (", unit.x, ")" ), cex.axis = 1.4,
+ ylab = paste0( param.y, " (", unit.y, ")" ), las = 1
+
+ )abline( v = axTicks(1), col = rgb(0,0,0,0.2) )
+ abline( h = axTicks(2), col = rgb(0,0,0,0.2) )
+ abline( v = 0, col = rgb(0,0,0,0.5) )
+ abline( h = 0, col = rgb(0,0,0,0.5) )
+
+ # plot model fit
+ lines( pred$fit ~ pred.seq, col = pcol1, lwd = 2, lty = 2 )
+ polygon( x = c( pred.seq, rev(pred.seq) ),
+ y = c( (pred$fit+pred$se.fit*1.96), rev((pred$fit-pred$se.fit*1.96)) ),
+ col = pcol2, border = rgb(0,0,0,0) )
+ # write model R2 and pval
+ legend( 'top', horiz = TRUE, bty = 'n',
+ border = rgb(0,0,0,0), cex = 1.4,
+ legend = paste0( "N = ", nrow(dat), " ",
+ "R2 = ", round(summary(mod1)$r.sq,3),
+ " ", pstring, " delay = ", delay )
+
+ )
+ return( list( data = dat, model = mod1 ) )
+
+ # // end OTB_corr() }
The plots below show relationships between log10-transformed external
TN loads and chlorophyll-a concentrations within the three OTB
sub-segments compartmentalized by the Courtney Campbell, Howard
@@ -1062,39 +1052,39 @@
Nitrogen loads and chlorophyll, with delayed loads
scatter (unexplained variance) around the regression line. Although not
visualized below, the diagnostics for each model indicate that the
residuals are homoskedastic and approximately Gaussian.
-par(mfrow=c(3,3))
- # assemble and re-label data for locations north of Courtney Campbell
- <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]
- epc1 $site <- "North CC"
- epc1# models
- <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
- epc1_load_Chla_0 'OTB watershed', 'North CC', log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
- epc1_load_Chla_1 'OTB watershed', 'North CC', log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
- epc1_load_Chla_2 'OTB watershed', 'North CC', log = TRUE, delay = 2 )
-
- # assemble and re-label data for locations between Frankland & Campbell
- <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
- epc2 $site <- "HF-CC"
- epc2# models
- <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
- epc2_load_Chla_0 'OTB watershed', 'HF-CC', log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
- epc2_load_Chla_1 'OTB watershed', 'HF-CC', log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
- epc2_load_Chla_2 'OTB watershed', 'HF-CC', log = TRUE, delay = 2 )
-
- # assemble and re-label data for locations between Gandy & Frankland
- <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
- epc3 $site <- "GB-HF"
- epc3# models
- <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
- epc3_load_Chla_0 'OTB watershed', 'GB-HF', log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
- epc3_load_Chla_1 'OTB watershed', 'GB-HF', log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
- epc3_load_Chla_2 'OTB watershed', 'GB-HF', log = TRUE, delay = 2 )
+par(mfrow=c(3,3))
+ # assemble and re-label data for locations north of Courtney Campbell
+ <- epcwq2[ which( epcwq2$site %in% c(46,47,60,61,62,64) ), ]
+ epc1 $site <- "North CC"
+ epc1# models
+ <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
+ epc1_load_Chla_0 'OTB watershed', 'North CC', log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
+ epc1_load_Chla_1 'OTB watershed', 'North CC', log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = epc1, 'TN load', 'Chla',
+ epc1_load_Chla_2 'OTB watershed', 'North CC', log = TRUE, delay = 2 )
+
+ # assemble and re-label data for locations between Frankland & Campbell
+ <- epcwq2[ which( epcwq2$site %in% c(40,41,42,63,65,66) ), ]
+ epc2 $site <- "HF-CC"
+ epc2# models
+ <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+ epc2_load_Chla_0 'OTB watershed', 'HF-CC', log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+ epc2_load_Chla_1 'OTB watershed', 'HF-CC', log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = epc2, 'TN load', 'Chla',
+ epc2_load_Chla_2 'OTB watershed', 'HF-CC', log = TRUE, delay = 2 )
+
+ # assemble and re-label data for locations between Gandy & Frankland
+ <- epcwq2[ which( epcwq2$site %in% c(38,50,51,67) ), ]
+ epc3 $site <- "GB-HF"
+ epc3# models
+ <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+ epc3_load_Chla_0 'OTB watershed', 'GB-HF', log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+ epc3_load_Chla_1 'OTB watershed', 'GB-HF', log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = epc3, 'TN load', 'Chla',
+ epc3_load_Chla_2 'OTB watershed', 'GB-HF', log = TRUE, delay = 2 )
The plots below show relationships between log10-transformed TN loads
and chlorophyll-a concentrations within Pinellas County strata E1–E5
@@ -1107,37 +1097,37 @@
Nitrogen loads and chlorophyll, with delayed loads
although the R2 values are substantially lower. Although not visualized
below, the diagnostics for each model indicate that the residuals are
homoskedastic and approximately Gaussian.
-par(mfrow=c(5,3))
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
- E1_load_Chla_0 log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
- E1_load_Chla_1 log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
- E1_load_Chla_2 log = TRUE, delay = 2 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
- E2_load_Chla_0 log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
- E2_load_Chla_1 log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
- E2_load_Chla_2 log = TRUE, delay = 2 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
- E3_load_Chla_0 log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
- E3_load_Chla_1 log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
- E3_load_Chla_2 log = TRUE, delay = 2 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
- E4_load_Chla_0 log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
- E4_load_Chla_1 log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
- E4_load_Chla_2 log = TRUE, delay = 2 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
- E5_load_Chla_0 log = TRUE, delay = 0 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
- E5_load_Chla_1 log = TRUE, delay = 1 )
- <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
- E5_load_Chla_2 log = TRUE, delay = 2 )
+par(mfrow=c(5,3))
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+ E1_load_Chla_0 log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+ E1_load_Chla_1 log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E1',
+ E1_load_Chla_2 log = TRUE, delay = 2 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+ E2_load_Chla_0 log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+ E2_load_Chla_1 log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E2',
+ E2_load_Chla_2 log = TRUE, delay = 2 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+ E3_load_Chla_0 log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+ E3_load_Chla_1 log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E3',
+ E3_load_Chla_2 log = TRUE, delay = 2 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+ E4_load_Chla_0 log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+ E4_load_Chla_1 log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E4',
+ E4_load_Chla_2 log = TRUE, delay = 2 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+ E5_load_Chla_0 log = TRUE, delay = 0 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+ E5_load_Chla_1 log = TRUE, delay = 1 )
+ <- OTB_corr( x = loads, y = pcwq2, 'TN load', 'Chla', 'OTB watershed', 'E5',
+ E5_load_Chla_2 log = TRUE, delay = 2 )
@@ -1168,31 +1158,31 @@
Chlorophyll and light
correlation. Although not visualized below, the diagnostics for each
model indicate that the residuals are homoskedastic and approximately
Gaussian.
-par(mfrow=c(3,3))
-
-# models
- <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
- epc1_Chla_Secchi_0 'North CC', log = TRUE, delay = 0 )
- <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
- epc1_Chla_Secchi_1 'North CC', log = TRUE, delay = 1 )
- <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
- epc1_Chla_Secchi_2 'North CC', log = TRUE, delay = 2 )
-
- # models
- <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
- epc2_Chla_Secchi_0 'HF-CC', log = TRUE, delay = 0 )
- <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
- epc2_Chla_Secchi_1 'HF-CC', log = TRUE, delay = 1 )
- <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
- epc2_Chla_Secchi_2 'HF-CC', log = TRUE, delay = 2 )
-
- # models
- <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
- epc3_Chla_Secchi_0 'GB-HF', log = TRUE, delay = 0 )
- <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
- epc3_Chla_Secchi_1 'GB-HF', log = TRUE, delay = 1 )
- <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
- epc3_Chla_Secchi_2 'GB-HF', log = TRUE, delay = 2 )
+par(mfrow=c(3,3))
+
+# models
+ <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+ epc1_Chla_Secchi_0 'North CC', log = TRUE, delay = 0 )
+ <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+ epc1_Chla_Secchi_1 'North CC', log = TRUE, delay = 1 )
+ <- OTB_corr( x = epc1, y = epc1, 'Chla', 'Secchi',
+ epc1_Chla_Secchi_2 'North CC', log = TRUE, delay = 2 )
+
+ # models
+ <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+ epc2_Chla_Secchi_0 'HF-CC', log = TRUE, delay = 0 )
+ <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+ epc2_Chla_Secchi_1 'HF-CC', log = TRUE, delay = 1 )
+ <- OTB_corr( x = epc2, y = epc2, 'Chla', 'Secchi',
+ epc2_Chla_Secchi_2 'HF-CC', log = TRUE, delay = 2 )
+
+ # models
+ <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+ epc3_Chla_Secchi_0 'GB-HF', log = TRUE, delay = 0 )
+ <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+ epc3_Chla_Secchi_1 'GB-HF', log = TRUE, delay = 1 )
+ <- OTB_corr( x = epc3, y = epc3, 'Chla', 'Secchi',
+ epc3_Chla_Secchi_2 'GB-HF', log = TRUE, delay = 2 )
@@ -1219,21 +1209,21 @@
E1 (Safety Harbor / Mobbly Bay)
(value.x
), and their tensor product interaction. Model
diagnostics indicate that the residuals are approximately Gaussian (QQ
plot) and that the model had sufficient basis dimensions (k).
-# join TN load data to chlorophyll data
- <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E1" ), ],
- gamdat_E1 y = loads,
- by = 'date'
-
- )nrow(gamdat_E1)
+# join TN load data to chlorophyll data
+ <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E1" ), ],
+ gamdat_E1 y = loads,
+ by = 'date'
+
+ )nrow(gamdat_E1)
## [1] 128
-# decimal date
- $decdate <- gamdat_E1$date |> decimal_date()
- gamdat_E1# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E1 )
- gam1 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam1, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_E1$date |> decimal_date()
+ gamdat_E1# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E1 )
+ gam1 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam1, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 22 iterations.
@@ -1245,8 +1235,8 @@ E1 (Safety Harbor / Mobbly Bay)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120 3 1.07 0.78
-summary( gam1 )
+## te(decdate,value.x) 120 3 1.07 0.82
+summary( gam1 )
##
## Family: gaussian
## Link function: identity
@@ -1274,10 +1264,10 @@ E1 (Safety Harbor / Mobbly Bay)
changed over time (decdate
). In the heatmap image, colors
range from red (relatively low magnitude) to yellow (higher magnitude).
White patches indicate gaps in the input space.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam1, scheme = 1)
- plot.gam( gam1, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam1, scheme = 1)
+ plot.gam( gam1, scheme = 2)
@@ -1285,21 +1275,21 @@ E1 (Safety Harbor / Mobbly Bay)
E2 (Largo Inlet)
The model for stratum E2 (Largo Inlet) shows good diagnostics.
-# join TN load data to chlorophyll data
- <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E2" ), ],
- gamdat_E2 y = loads,
- by = 'date'
-
- )nrow(gamdat_E2)
+# join TN load data to chlorophyll data
+ <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E2" ), ],
+ gamdat_E2 y = loads,
+ by = 'date'
+
+ )nrow(gamdat_E2)
## [1] 133
-# decimal date
- $decdate <- gamdat_E2$date |> decimal_date()
- gamdat_E2# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E2 )
- gam2 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam2, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_E2$date |> decimal_date()
+ gamdat_E2# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E2 )
+ gam2 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam2, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 14 iterations.
@@ -1311,8 +1301,8 @@ E2 (Largo Inlet)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.00 8.14 1.01 0.58
-summary( gam2 )
+## te(decdate,value.x) 120.00 8.14 1.01 0.47
+summary( gam2 )
##
## Family: gaussian
## Link function: identity
@@ -1339,10 +1329,10 @@ E2 (Largo Inlet)
substantially over time (decdate
) at Largo Inlet, with TN
loads (value.x
) most strongly associated with chlorophyll
concentrations between about 2010 and 2015.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam2, scheme = 1)
- plot.gam( gam2, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam2, scheme = 1)
+ plot.gam( gam2, scheme = 2)
@@ -1350,21 +1340,21 @@ E2 (Largo Inlet)
E3 (Feather Sound)
The model for stratum E3 (Feather Sound) shows good diagnostics.
-# join TN load data to chlorophyll data
- <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E3" ), ],
- gamdat_E3 y = loads,
- by = 'date'
-
- )nrow(gamdat_E3)
+# join TN load data to chlorophyll data
+ <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E3" ), ],
+ gamdat_E3 y = loads,
+ by = 'date'
+
+ )nrow(gamdat_E3)
## [1] 132
-# decimal date
- $decdate <- gamdat_E3$date |> decimal_date()
- gamdat_E3# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E3 )
- gam3 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam3, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_E3$date |> decimal_date()
+ gamdat_E3# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E3 )
+ gam3 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam3, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 22 iterations.
@@ -1379,7 +1369,7 @@ E3 (Feather Sound)
## te(decdate,value.x) 120.00 4.52 0.89 0.045 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
-summary( gam3 )
+summary( gam3 )
##
## Family: gaussian
## Link function: identity
@@ -1404,10 +1394,10 @@ E3 (Feather Sound)
The plots below suggest that the association between chlorophyll
concentrations and TN load (value.x
) has not changed
substantially over time (decdate
) at Feather Sound.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam3, scheme = 1)
- plot.gam( gam3, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam3, scheme = 1)
+ plot.gam( gam3, scheme = 2)
@@ -1415,21 +1405,21 @@ E3 (Feather Sound)
E4 (Gateway)
The model for stratum E4 (Gateway) shows good diagnostics.
-# join TN load data to chlorophyll data
- <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E4" ), ],
- gamdat_E4 y = loads,
- by = 'date'
-
- )nrow(gamdat_E4)
+# join TN load data to chlorophyll data
+ <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E4" ), ],
+ gamdat_E4 y = loads,
+ by = 'date'
+
+ )nrow(gamdat_E4)
## [1] 126
-# decimal date
- $decdate <- gamdat_E4$date |> decimal_date()
- gamdat_E4# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E4 )
- gam4 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam4, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_E4$date |> decimal_date()
+ gamdat_E4# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E4 )
+ gam4 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam4, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 40 iterations.
@@ -1442,7 +1432,7 @@ E4 (Gateway)
##
## k' edf k-index p-value
## te(decdate,value.x) 120.00 5.71 0.97 0.28
-summary( gam4 )
+summary( gam4 )
##
## Family: gaussian
## Link function: identity
@@ -1467,10 +1457,10 @@ E4 (Gateway)
The plots below suggest that the association between chlorophyll
concentrations and TN load (value.x
) has not changed
substantially over time (decdate
) at Gateway.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam4, scheme = 1)
- plot.gam( gam4, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam4, scheme = 1)
+ plot.gam( gam4, scheme = 2)
@@ -1478,21 +1468,21 @@ E4 (Gateway)
E5 (Weedon Island)
The model for stratum E5 (Weedon Island) shows good diagnostics.
-# join TN load data to chlorophyll data
- <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E5" ), ],
- gamdat_E5 y = loads,
- by = 'date'
-
- )nrow(gamdat_E5)
+# join TN load data to chlorophyll data
+ <- inner_join( x = pcwq2[ which( pcwq2$param=='Chla' & pcwq2$site=="E5" ), ],
+ gamdat_E5 y = loads,
+ by = 'date'
+
+ )nrow(gamdat_E5)
## [1] 129
-# decimal date
- $decdate <- gamdat_E5$date |> decimal_date()
- gamdat_E5# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E5 )
- gam5 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam5, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_E5$date |> decimal_date()
+ gamdat_E5# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_E5 )
+ gam5 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam5, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 16 iterations.
@@ -1504,8 +1494,8 @@ E5 (Weedon Island)
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 12.2 1.21 0.99
-summary( gam5 )
+## te(decdate,value.x) 120.0 12.2 1.21 1
+summary( gam5 )
##
## Family: gaussian
## Link function: identity
@@ -1531,10 +1521,10 @@ E5 (Weedon Island)
between chlorophyll concentrations and TN load (value.x
) at
Weedon Island over time (decdate
), for moderate to high
levels of TN load.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam5, scheme = 1)
- plot.gam( gam5, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam5, scheme = 1)
+ plot.gam( gam5, scheme = 2)
@@ -1556,22 +1546,22 @@
North of Courtney Campbell
the linear model, we aggregate the data into monthly averages across EPC
monitoring sites north of the Courtney Campbell causeway. The model
shows good diagnostics.
-# join TN load data to chlorophyll data
- <- epc1[ which( epc1$param=="Chla" ), ]
- epc1.chl <- epc1.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc1.chl <- epc1[ which( epc1$param=="Secchi" ), ]
- epc1.sec <- epc1.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc1.sec <- inner_join( x = epc1.chl, y = epc1.sec, by = 'date' )
- gamdat_epc1 nrow(gamdat_E2)
+# join TN load data to chlorophyll data
+ <- epc1[ which( epc1$param=="Chla" ), ]
+ epc1.chl <- epc1.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc1.chl <- epc1[ which( epc1$param=="Secchi" ), ]
+ epc1.sec <- epc1.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc1.sec <- inner_join( x = epc1.chl, y = epc1.sec, by = 'date' )
+ gamdat_epc1 nrow(gamdat_E2)
## [1] 133
-# decimal date
- $decdate <- gamdat_epc1$date |> decimal_date()
- gamdat_epc1# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc1 )
- gam6 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam6, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_epc1$date |> decimal_date()
+ gamdat_epc1# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc1 )
+ gam6 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam6, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 10 iterations.
@@ -1584,7 +1574,7 @@ North of Courtney Campbell
##
## k' edf k-index p-value
## te(decdate,value.x) 120.0 34.3 1 0.52
-summary( gam6 )
+summary( gam6 )
##
## Family: gaussian
## Link function: identity
@@ -1612,10 +1602,10 @@ North of Courtney Campbell
substantially over time (decdate
). Redder areas in the
heatmap indicate time periods when increased chlorophyll concentrations
were more strongly associated with reduced water clarity.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam6, scheme = 1)
- plot.gam( gam6, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam6, scheme = 1)
+ plot.gam( gam6, scheme = 2)
@@ -1626,22 +1616,22 @@ Between the Frankland Bridge and Courtney Campbell
the linear model, we aggregate the data into monthly averages across EPC
monitoring sites between the Frankland bridge and the Courtney Campbell
causeway. The model shows good diagnostics.
-# join TN load data to chlorophyll data
- <- epc2[ which( epc2$param=="Chla" ), ]
- epc2.chl <- epc2.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc2.chl <- epc2[ which( epc2$param=="Secchi" ), ]
- epc2.sec <- epc2.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc2.sec <- inner_join( x = epc2.chl, y = epc2.sec, by = 'date' )
- gamdat_epc2 nrow(gamdat_E2)
+# join TN load data to chlorophyll data
+ <- epc2[ which( epc2$param=="Chla" ), ]
+ epc2.chl <- epc2.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc2.chl <- epc2[ which( epc2$param=="Secchi" ), ]
+ epc2.sec <- epc2.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc2.sec <- inner_join( x = epc2.chl, y = epc2.sec, by = 'date' )
+ gamdat_epc2 nrow(gamdat_E2)
## [1] 133
-# decimal date
- $decdate <- gamdat_epc2$date |> decimal_date()
- gamdat_epc2# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc2 )
- gam7 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam7, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_epc2$date |> decimal_date()
+ gamdat_epc2# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc2 )
+ gam7 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam7, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
@@ -1653,8 +1643,8 @@ Between the Frankland Bridge and Courtney Campbell
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 30.4 1.03 0.78
-summary( gam7 )
+## te(decdate,value.x) 120.0 30.4 1.03 0.68
+summary( gam7 )
##
## Family: gaussian
## Link function: identity
@@ -1690,10 +1680,10 @@ Between the Frankland Bridge and Courtney Campbell
summary(...)
outputs), as do the scales of the axes of the
coefficient surface plots. Further interpretation and comparison of
these results can be pursued at a later phase of the project.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam7, scheme = 1)
- plot.gam( gam7, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam7, scheme = 1)
+ plot.gam( gam7, scheme = 2)
@@ -1704,22 +1694,22 @@ Between Gandy Blvd and the Frankland Bridge
the linear model, we aggregate the data into monthly averages across EPC
monitoring sites between Gandy Blvd and the Frankland Bridge. The model
shows good diagnostics.
-# join TN load data to chlorophyll data
- <- epc3[ which( epc3$param=="Chla" ), ]
- epc3.chl <- epc3.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc3.chl <- epc3[ which( epc3$param=="Secchi" ), ]
- epc3.sec <- epc3.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
- epc3.sec <- inner_join( x = epc3.chl, y = epc3.sec, by = 'date' )
- gamdat_epc3 nrow(gamdat_E2)
+# join TN load data to chlorophyll data
+ <- epc3[ which( epc3$param=="Chla" ), ]
+ epc3.chl <- epc3.chl |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc3.chl <- epc3[ which( epc3$param=="Secchi" ), ]
+ epc3.sec <- epc3.sec |> group_by(date) |> summarize(value=mean(value)) |> as.data.frame()
+ epc3.sec <- inner_join( x = epc3.chl, y = epc3.sec, by = 'date' )
+ gamdat_epc3 nrow(gamdat_E2)
## [1] 133
-# decimal date
- $decdate <- gamdat_epc3$date |> decimal_date()
- gamdat_epc3# fit gam
- <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc3 )
- gam8 # model diagnostics
- par(mfrow=c(2,2))
- gam.check( gam8, rep = 1000 )
-
+# decimal date
+ $decdate <- gamdat_epc3$date |> decimal_date()
+ gamdat_epc3# fit gam
+ <- gam( log10(value.y) ~ te( decdate, value.x, k=11 ), data = gamdat_epc3 )
+ gam8 # model diagnostics
+ par(mfrow=c(2,2))
+ gam.check( gam8, rep = 1000 )
+
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
@@ -1731,8 +1721,8 @@ Between Gandy Blvd and the Frankland Bridge
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
-## te(decdate,value.x) 120.0 22.9 1.05 0.86
-summary( gam8 )
+## te(decdate,value.x) 120.0 22.9 1.05 0.85
+summary( gam8 )
##
## Family: gaussian
## Link function: identity
@@ -1760,10 +1750,10 @@ Between Gandy Blvd and the Frankland Bridge
Comparisons of these results with those of previous models
(gam6
and gam7
) are not straightforward and
can be pursued during a later phase of the project.
-# plot smooth
- par(mfrow=c(1,2))
- plot.gam( gam8, scheme = 1)
- plot.gam( gam8, scheme = 2)
+# plot smooth
+ par(mfrow=c(1,2))
+ plot.gam( gam8, scheme = 1)
+ plot.gam( gam8, scheme = 2)