Skip to content

Commit

Permalink
Merge pull request #173 from r-transit/dev/route-frequencies
Browse files Browse the repository at this point in the history
restore get_route_frequency and vignette
  • Loading branch information
polettif authored Nov 22, 2021
2 parents 0440831 + a997c6a commit d738c16
Show file tree
Hide file tree
Showing 12 changed files with 583 additions and 50 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ export(filter_feed_by_trips)
export(filter_stop_times)
export(filter_stops)
export(get_feedlist)
export(get_route_frequency)
export(get_route_geometry)
export(get_stop_frequency)
export(get_trip_geometry)
Expand Down
106 changes: 86 additions & 20 deletions R/frequencies.R
Original file line number Diff line number Diff line change
@@ -1,18 +1,24 @@
#' Get Stop Frequency
#'
#' Note that some GTFS feeds contain a frequency data frame already.
#' Calculate the number of departures and mean headways for all stops within a
#' given timespan and for given service_ids.
#'
#' @note Some GTFS feeds contain a frequency data frame already.
#' Consider using this instead, as it will be more accurate than what
#' tidytransit calculates.
#' tidytransit calculates.
#'
#' @param gtfs_obj a list of gtfs dataframes as read by [read_gtfs()].
#' @param start_hour (optional) an integer indicating the start hour (default 6)
#' @param end_hour (optional) an integer indicating the end hour (default 22)
#' @param service_ids (optional) a set of service_ids from the calendar dataframe
#' identifying a particular service id. If not provided the service_id
#' with the most departures is used
#' @param by_route default TRUE, if FALSE then calculate headway for any line coming through the stop in the same direction on the same schedule.
#' @param start_time analysis start time, can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#' @param end_time analysis perdiod end time, can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#' @param service_ids A set of service_ids from the calendar dataframe
#' identifying a particular service id. If not provided, the service_id
#' with the most departures is used.
#' @param by_route Default TRUE, if FALSE then calculate headway for any line coming
#' through the stop in the same direction on the same schedule.
#' @return dataframe of stops with the number of departures and the headway
#' (departures divided by timespan) as columns.
#' (departures divided by timespan) in seconds as columns
#'
#' @importFrom dplyr %>%
#' @importFrom rlang .data !! quo enquo
Expand All @@ -24,11 +30,16 @@
#' x <- order(stop_frequency$mean_headway)
#' head(stop_frequency[x,])
get_stop_frequency <- function(gtfs_obj,
start_hour = 6,
end_hour = 22,
start_time = "06:00:00",
end_time = "22:00:00",
service_ids = NULL,
by_route = FALSE) {
by_route = TRUE) {
n_deps <- direction_id <- NULL

if(is.character(start_time)) start_time <- hhmmss_to_seconds(start_time)
if(is.character(end_time)) end_time <- hhmmss_to_seconds(end_time)

# get service id with most departures
if(is.null(service_ids)) {
dep_per_trip = gtfs_obj$stop_times %>%
dplyr::group_by(trip_id) %>% dplyr::count(name = "n_deps") %>%
Expand All @@ -39,27 +50,82 @@ get_stop_frequency <- function(gtfs_obj,
dplyr::arrange(dplyr::desc(n_deps))
service_ids = dep_per_service_id$service_id[1]
}

# filter stop_times to service_ids and start/end_time
trips = gtfs_obj$trips %>% filter(service_id %in% service_ids)

# TODO change times to hms or strings
stop_times = gtfs_obj$stop_times %>%
filter(trip_id %in% trips$trip_id) %>%
filter(departure_time >= start_hour*3600 & arrival_time <= end_hour*3600) %>%
filter(departure_time >= start_time & arrival_time <= end_time) %>%
left_join(trips[c("trip_id", "route_id", "direction_id", "service_id")], "trip_id")

# find number of departure per stop_id, route_id, service_id
# find number of departure per stop_id (route_id, direction_id, service_id)
if(by_route) {
freq = stop_times %>%
dplyr::group_by(stop_id, route_id, direction_id, service_id) %>%
dplyr::count(name = "n_departures") %>% dplyr::ungroup()
freq = stop_times %>%
dplyr::group_by(stop_id, route_id, direction_id, service_id) %>%
dplyr::count(name = "n_departures") %>% dplyr::ungroup()
} else {
freq = stop_times %>%
dplyr::group_by(stop_id, service_id) %>%
dplyr::count(name = "n_departures") %>% dplyr::ungroup()
}

# calculate average headway
duration = (end_hour-start_hour)*3600
duration = as.numeric(end_time-start_time)
freq$mean_headway <- round(duration / freq$n_departures)

freq
}

#' Get Route Frequency
#'
#' Calculate the number of departures and mean headways for routes within a given timespan
#' and for given service_ids.
#'
#' @note Some GTFS feeds contain a frequency data frame already.
#' Consider using this instead, as it will be more accurate than what
#' tidytransit calculates.
#'
#' @param gtfs_obj a list of gtfs dataframes as read by the trread package.
#' @param start_time analysis start time, can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#' @param end_time analysis perdiod end time, can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#' @param service_ids A set of service_ids from the calendar dataframe
#' identifying a particular service id. If not provided, the service_id
#' with the most departures is used.
#' @return a dataframe of routes with variables or headway/frequency in seconds for a route
#' within a given time frame
#' @export
#' @examples
#' data(gtfs_duke)
#' routes_frequency <- get_route_frequency(gtfs_duke)
#' x <- order(routes_frequency$median_headways)
#' head(routes_frequency[x,])
get_route_frequency <- function(gtfs_obj,
start_time = "06:00:00",
end_time = "22:00:00",
service_ids = NULL) {
total_departures <- median_headways <- mean_headways <- NULL
n_departures <- mean_headway <- st_dev_headways <- stop_count <- NULL
if(feed_contains(gtfs_obj, "frequencies") && nrow(gtfs_obj$frequencies) > 0) {
message("A pre-calculated frequencies dataframe exists for this feed already,
consider using that.")
}
departures_per_stop = get_stop_frequency(gtfs_obj, start_time, end_time,
service_ids, by_route = TRUE)

if(dim(departures_per_stop)[[1]] != 0) {
routes_frequency = departures_per_stop %>%
group_by(route_id) %>%
summarise(total_departures = sum(n_departures),
median_headways = round(median(mean_headway)),
mean_headways = round(mean(mean_headway)),
st_dev_headways = round(sd(mean_headway), 2),
stop_count = dplyr::n())
} else {
warning("Failed to calculate frequency, try passing a service_id from calendar_df.")
}

return(routes_frequency)
}
2 changes: 1 addition & 1 deletion R/joins.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ filter_feed_by_stops = function(gtfs_obj, stop_ids = NULL, stop_names = NULL) {
#' \code{\link{filter_feed_by_trips}}, \code{\link{filter_feed_by_date}}
#' @export
filter_feed_by_date = function(gtfs_obj, extract_date,
min_departure_time = "00:00:00", max_arrival_time = "48:00:00") {
min_departure_time, max_arrival_time) {
st = filter_stop_times(gtfs_obj, extract_date, min_departure_time, max_arrival_time)
st <- dplyr::as_tibble(st)
attributes(st)$stops <- NULL
Expand Down
14 changes: 9 additions & 5 deletions R/raptor.R
Original file line number Diff line number Diff line change
Expand Up @@ -440,10 +440,10 @@ travel_times = function(filtered_stop_times,
#'
#' @param gtfs_obj a gtfs feed
#' @param extract_date date to extract trips from this day (Date or "YYYY-MM-DD" string)
#' @param min_departure_time The earliest departure time. Can be given as "HH:MM:SS",
#' @param min_departure_time (optional) The earliest departure time. Can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#' @param max_arrival_time The latest arrival time. Can be given as "HH:MM:SS",
#' hms object or numeric value in seconds
#' @param max_arrival_time (optional) The latest arrival time. Can be given as "HH:MM:SS",
#' hms object or numeric value in seconds.
#'
#' @return Filtered `stop_times` data.table for [travel_times()] and [raptor()].
#'
Expand All @@ -462,10 +462,14 @@ filter_stop_times = function(gtfs_obj,
if(is.character(extract_date)) {
extract_date <- as.Date(extract_date)
}
if(is.character(min_departure_time)) {
if(missing(min_departure_time)) {
min_departure_time <- 0
} else if(is.character(min_departure_time)) {
min_departure_time <- hhmmss_to_seconds(min_departure_time)
}
if(is.character(max_arrival_time)) {
if(missing(max_arrival_time)) {
max_arrival_time <- max(gtfs_obj$stop_times$arrival_time)+1
} else if(is.character(max_arrival_time)) {
max_arrival_time <- hhmmss_to_seconds(max_arrival_time)
}
min_departure_time <- as.numeric(min_departure_time)
Expand Down
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ status](https://github.com/r-transit/tidytransit/workflows/R-CMD-check/badge.svg
# tidytransit

Use tidytransit to map transit stops and routes, calculate travel times and transit
frequencies, and validate transit feeds. tidytransit reads the
frequencies, and validate transit feeds. Tidytransit reads the
[General Transit Feed Specification](http://gtfs.org/) into
[tidyverse](https://tibble.tidyverse.org/) and
[simple features](https://en.wikipedia.org/wiki/Simple_Features) data frames.
Expand All @@ -23,8 +23,9 @@ Tidytransit can be used to:
Have a look at the following vignettes to see how tidytransit can be used to analyse a feed:

- [the tutorial](http://tidytransit.r-transit.org/articles/introduction.html)
- [introduction to service patterns](http://tidytransit.r-transit.org/articles/servicepatterns.html)
- [introduction to time tables](http://tidytransit.r-transit.org/articles/timetable.html)
- [calendar and service patterns](http://tidytransit.r-transit.org/articles/servicepatterns.html)
- [create time tables for stops](http://tidytransit.r-transit.org/articles/timetable.html)
- [frequency and headway calculations](http://tidytransit.r-transit.org/articles/frequency.html)

## Installation

Expand Down
10 changes: 5 additions & 5 deletions man/filter_feed_by_date.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions man/filter_stop_times.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

45 changes: 45 additions & 0 deletions man/get_route_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

29 changes: 18 additions & 11 deletions man/get_stop_frequency.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 14 additions & 2 deletions tests/testthat/test-headways.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ context("Frequencies are calculated correctly")

# TODO rewrite with synthesized sample data
test_that("Stop frequencies (headways) for included data are as expected", {
expect_equal(nrow(get_stop_frequency(gtfs_duke)), 47)
expect_equal(nrow(get_stop_frequency(gtfs_duke, start_hour = 10, end_hour = 11)), 41)
expect_equal(nrow(get_stop_frequency(gtfs_duke, by_route = FALSE)), 47)
expect_equal(nrow(get_stop_frequency(gtfs_duke, start_time = 10*3600, end_time = 11*3600, by_route = FALSE)), 41)

stops_frequency <- get_stop_frequency(gtfs_duke, service_ids = "c_853_b_19828_d_64")
ex_address <- stops_frequency$mean_headway[stops_frequency$stop_id==778058]
Expand All @@ -16,3 +16,15 @@ test_that("Stop frequencies (headways) for included data are as expected", {
colnames(stops_frequency_by_route),
c("stop_id", "route_id", "direction_id", "service_id", "n_departures", "mean_headway"))
})

test_that("Route frequencies (headways)", {
# TODO rewrite with synthesized sample data
routes_frequency <- get_route_frequency(gtfs_duke)
expect_equal(routes_frequency[routes_frequency$route_id == 1679, ]$median_headways, 24*60)
})

test_that("Route frequencies (headways) w/ service id", {
# TODO rewrite with synthesized sample data
routes_frequency <- get_route_frequency(gtfs_duke, service_id = "c_883_b_21967_d_31")
expect_equal(routes_frequency[routes_frequency$route_id == 1680, ]$median_headways, (53+1/3)*60)
})
6 changes: 6 additions & 0 deletions tests/testthat/test-raptor.R
Original file line number Diff line number Diff line change
Expand Up @@ -345,3 +345,9 @@ test_that("raptor with filtered feed", {
stop_name = "One", time_range = 3600)
expect_equal(tt1, tt2)
})

test_that("filter feed without min/max time", {
st.1 = filter_stop_times(g, "2018-10-01")
st.2 = filter_stop_times(g, "2018-10-01", "00:00:00", 999*3600)
expect_true(all(st.1 == st.2))
})
Loading

0 comments on commit d738c16

Please sign in to comment.