-
Notifications
You must be signed in to change notification settings - Fork 0
/
updated_election_scraper_2024.R
50 lines (40 loc) · 2.64 KB
/
updated_election_scraper_2024.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
library(tidyverse)
library(rvest)
scraper <- data.frame(
file_name = c("district_1.csv", "district_2.csv", "district_3.csv", "district_4.csv", "district_5.csv", "district_6.csv", "district_7.csv", "district_8.csv", "senate.csv", "question_1.csv"),
x_path = c(
'//*[@id="primary_right_col"]/div/div[8]/table',
'//*[@id="primary_right_col"]/div/div[10]/table',
'//*[@id="primary_right_col"]/div/div[12]/table',
'//*[@id="primary_right_col"]/div/div[14]/table',
'//*[@id="primary_right_col"]/div/div[16]/table',
'//*[@id="primary_right_col"]/div/div[18]/table',
'//*[@id="primary_right_col"]/div/div[20]/table',
'//*[@id="primary_right_col"]/div/div[22]/table',
'//*[@id="primary_right_col"]/div/div[8]/table',
'//*[@id="primary_right_col"]/div/div[8]/table'),
site = c("https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_3.html", "https://elections.maryland.gov/elections/2024/general_results/gen_results_2024_2.html", "https://elections.maryland.gov/elections/2024/general_results/gen_qresults_2024_1.html")
)
for (row_number in 1:nrow(scraper)) {
each_row <- scraper %>% slice(row_number)
results <- read_html(each_row$site)
df <- results %>%
html_nodes(xpath = each_row$x_path) %>%
html_table()
# Convert the tibble to a data frame
table <- as.data.frame(df[[1]]) # Add [[1]] to extract the table
# Remove the commas from the Total column
table$Total <- gsub(',', '', table$Total)
# Remove totals row
table <- table[-nrow(table), ]
# Fiddle with Senate so it doesn't name write-in candidates
if (each_row$file_name == "senate.csv") {
table <- table %>% mutate(Total = as.numeric(table$Total))
write_in_votes <- sum(table$Total[-(1:3)])
wrow <- data.frame(c1 = "Other Write-Ins", c2 = NA, c3 = NA, c4 = NA, c5 = NA, c6 = NA, c7 = write_in_votes, c8 = NA)
colnames(wrow) <- colnames(table)
table <- table[1:3, ] %>% bind_rows(wrow)
}
# Write the CSV
write_csv(table, each_row$file_name)
}