-
Notifications
You must be signed in to change notification settings - Fork 1
/
fz.go
149 lines (122 loc) · 3.35 KB
/
fz.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
package zenfo
import (
"errors"
"fmt"
"regexp"
"strings"
"time"
"github.com/PuerkitoBio/goquery"
)
// http://www.floatingzendo.org/events/
// Fz crawls floatingzendo.org, satisfies Worker interface
type Fz struct {
venueMap map[string]*Venue
client *Client
log chan string
}
// Name returns human-friendly name for worker logs
func (f *Fz) Name() string {
return "Floating Zendo (floatingzendo.org)"
}
// Init sets HTTP client and defines internal venue map
func (f *Fz) Init(client *Client, log chan string) error {
f.client = client
f.venueMap = make(map[string]*Venue)
f.log = log
f.venueMap["1041 morse st., san jose"] = &Venue{
Name: "Floating Zendo - San Jose Friends Meeting House",
Addr: "1041 Morse St, San Jose, CA 95126",
Email: "[email protected]",
Lat: 37.341372,
Lng: 121.928258,
Website: "http://www.floatingzendo.org",
}
f.venueMap["jikoji"] = &Venue{
Name: "Jikoji Zen Center",
Addr: "12100 Skyline Blvd, Los Gatos, CA",
Phone: "+1 (408) 741-9562",
Email: "[email protected]",
Lat: 37.2728165,
Lng: -122.1466097,
Website: "https://www.jikoji.org",
}
return nil
}
// Desc returns description for website crawled
func (f *Fz) Desc() string {
return "Floating Zendo (floatingzendo.org)"
}
// Events hits floating zendo events page and returns slice of Event types
// http://www.floatingzendo.org/events/
func (f *Fz) Events() ([]*Event, error) {
u := "http://www.floatingzendo.org/events/"
resp, err := f.client.Get(u)
if err != nil {
return nil, err
}
defer resp.Body.Close()
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
year := "2019" // doc.Find("div.entry-content blockquote em strong").First().Text()
if year == "" {
return nil, errors.New("Failed to extract year")
}
var (
domErr error
events []*Event
re = regexp.MustCompile(`\([^)]+\)`)
)
doc.Find("div.entry-content table tr").EachWithBreak(func(_ int, div *goquery.Selection) bool {
parts := div.Find("td")
if parts.Length() != 3 {
domErr = fmt.Errorf("Expected 3 parts to event, found %d", parts.Length())
return false
}
partSlice := parts.Map(func(_ int, s *goquery.Selection) string {
return s.Text()
})
date := clean(re.ReplaceAllString(partSlice[0], ""))
name := partSlice[1]
venue := partSlice[2]
desc := strings.Join([]string{name, venue}, "\n")
d := strings.FieldsFunc(date, func(r rune) bool {
return r == '-' || r == '–'
})
if len(d) != 2 {
domErr = fmt.Errorf("Expected 2 date items, got %d: %s", len(d), date)
return false
}
for _, dt := range d {
dt = clean(dt)
t, err := time.Parse("april 1 9pm 2019", fmt.Sprintf("%s %s", dt, year))
if err != nil {
domErr = fmt.Errorf("Failed to parse date: %s: %s", dt, err)
return false
}
f.log <- fmt.Sprintf("t=%s\n", t)
}
clean := clean(venue)
v, ok := f.venueMap[clean]
if !ok {
if strings.Contains(clean, "jikoji") {
v = f.venueMap["jikoji"]
} else {
domErr = fmt.Errorf("Failed to match venue for: %s", venue)
return false
}
}
e := &Event{
URL: u,
Name: name,
Desc: desc,
Venue: v,
}
events = append(events, e)
f.log <- fmt.Sprintf("Found event: %s | %s | %s\n", date, name, venue)
return true
})
f.log <- fmt.Sprintf("Found %d total events", len(events))
return events, domErr
}