-
Notifications
You must be signed in to change notification settings - Fork 0
/
stack_scrap.R
96 lines (66 loc) · 2.07 KB
/
stack_scrap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Navegando como em webbrowser - StackOverflow
library(tidyverse)
library(rvest)
# Conectando à página e preenchendo formulário
url <- 'https://stackoverflow.com'
session <- html_session(url)
form <- html_form(session)
filled_form <- form
filled_form[[1]] <- set_values(filled_form[[1]], q = "R")
session <- submit_form(session = session, form = filled_form[[1]])
# Perguntas da pagina
question_id <- session %>%
html_nodes("div.question-summary") %>%
html_attr('id')
# Upvotes
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_nodes('div.votes') %>%
html_nodes('span.vote-count-post') %>%
html_text() %>%
as.numeric()
# Resposta
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_node('div.status.unanswered') %>%
html_nodes('strong') %>%
html_text()
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_node('div.status.answered') %>%
html_nodes('strong') %>%
html_text() %>%
as.numeric()
# Tags
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]/div[2]/div[2]') %>%
html_nodes("a") %>%
html_text()
# Título da pergunta
question <- session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_nodes('h3') %>%
html_text()
# Link da pergunta
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_nodes('h3') %>%
html_nodes('a') %>%
html_attr('href')
# Usuário
session %>%
html_nodes(xpath = '//*[@id="question-summary-50427710"]') %>%
html_nodes('div.user-details') %>%
html_node("a") %>%
html_text()
session <- session %>%
follow_link(question)
session %>%
html_node("div.post-text") %>%
html_nodes('p') %>%
html_text() %>%
paste(collapse = '')
session <- session %>% back()
next_page <- session %>%
html_nodes('div.pager.fl') %>% html_nodes("a") %>% html_attr('href') %>% .[[1]]
session <- session %>% jump_to(next_page)