-
Notifications
You must be signed in to change notification settings - Fork 6
/
fullrun.sh
executable file
·279 lines (254 loc) · 9.9 KB
/
fullrun.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
#!/bin/bash
##-1. Sanity Check Environment.
sanityCheck()
{
zmapPath=`node util/config.js zmap`
zmapModules=$($zmapPath --list-probe-modules | grep udp_multi)
if [ -z "$zmapModules" ]; then
echo "Your zmap doesn't have the udp_multi probe module built.";
exit 1
fi
if [ ! -f cluster_correlation/correlation-distr/bin64/chainedSolvers ]; then
echo "You haven't compiled the clustering dependency."
echo "Run cluster_correlation/correlation-distr/build-distr.sh"
exit 1
fi
}
##0. Notify and wait.
notify()
{
echo "Sending Notification..."
node util/notify.js
touch scan_pending.lock
sleep `node util/config.js notification_delay`
if [ -f scan_pending.lock ];
then
echo "Beginning."
rm scan_pending.lock
else
echo "Scan Canceled."
exit 1
fi
}
##1. Get The alexa top sites.
getTopSites()
{
echo "Getting top Sites..."
cd temp
curl -O http://s3.amazonaws.com/alexa-static/top-1m.csv.zip
unzip top-1m.csv.zip
rm top-1m.csv.zip
cut -d "," -f 2 top-1m.csv | head -10000 > domains.txt
if [[ -n $(node ../util/config.js domainlist) ]]; then
if [[ $(node ../util/config.js domainlist) == /* ]]; then
cat `node ../util/config.js domainlist` >> domains.txt
else
curl -s `node ../util/config.js domainlist` >> domains.txt
fi
fi
rm top-1m.csv
cd ..
}
##2. Add in redirects
addRedirects()
{
echo "Learning Redirects..."
node dns/find-redirects.js temp/domains.txt temp/extra
cat temp/extra-uniqRedirects.txt >> temp/domains.txt
rm temp/extra-uniqRedirects.txt
}
##3. Get the blacklist.
getBlacklist()
{
echo "Getting Blacklist..."
if [[ -n $(node util/config.js blacklist) ]]; then
if [[ $(node util/config.js blacklist) == /* ]]; then
cp `node util/config.js blacklist` temp/blacklist.conf
else
curl -s `node util/config.js blacklist` > temp/blacklist.conf
fi
else
echo "No blacklist set for satellite."
echo "Configure this setting in the config.json file."
echo "You'll need to set up your own blacklist or get in touch with us"
echo "in order to use ours."
exit 1
fi
if [ -s temp/blacklist.conf ]; then
echo "blacklist set"
else
echo "blacklist is empty. that's bad."
exit 1
fi
}
##4. Create output for run.
generateRun()
{
echo "Starting new run..."
thisRun=$(date +"%m-%d-%Y")
mkdir runs/$thisRun
}
##5. Find active servers
getActiveResolvers()
{
echo `node util/config.js local_ip` > runs/$thisRun/local.csv.ip
node dns/mkpkt.js temp/query.pkt `node util/config.js local_address`
echo "Running initial scan..."
`node util/config.js zmap` -p 53 -o runs/$thisRun/local.csv \
-b temp/blacklist.conf -c 300 -r `node util/config.js rate` \
--output-module=csv -f saddr,timestamp_str,data \
--output-filter="success = 1" -M udp \
--probe-args=file:temp/query.pkt
}
##6. extract good hosts
getGoodHosts()
{
echo "Generating IP list..."
node dns/filter.js runs/$thisRun/local.csv temp/dns_servers.txt
node dns/filter.js runs/$thisRun/local.csv runs/$thisRun/whitelist.json json
}
##7. Do it!
runTopSites()
{
echo "Scanning all domains..."
cp temp/domains.txt runs/${thisRun}/domains.txt
cp temp/dns_servers.txt runs/${thisRun}/servers.txt
mkdir runs/${thisRun}/zmap
node dns/managedscans.js temp/domains.txt temp/dns_servers.txt runs/$thisRun/zmap
}
##8. Record IP Ownership.
recordLookupTable()
{
echo "Building ASN tables..."
node --max-old-space-size=8192 asn_aggregation/makemap.js $thisRun runs/$thisRun/lookup.json
}
##9. Run HTTP Scans.
runHTTPScans()
{
echo "Scanning HTTP(s)..."
mkdir runs/$thisRun/http
node http/managedscans.js runs/$thisRun/http
}
##10. Archive
makeArchive()
{
echo "Archiving..."
tar -czf runs/$thisRun/zmap.tgz runs/$thisRun/zmap
sha1sum runs/$thisRun/zmap.tgz | awk '{print $1}' > runs/$thisRun/zmap.tgz.sig
cd compat; node ./generateStudyMetadata.js; cd ..;
}
##11. Aggregate
aggregateRun()
{
echo "Aggregating..."
plel=$(node util/config.js aggregation_processes)
node util/plelSplit.js $plel runs/$thisRun/zmap runs/$thisRun/asn.json "node ./dns/aggregator.js #1 ./runs/$thisRun/lookup.json #2 ./runs/$thisRun/whitelist.json"
cat runs/$thisRun/asn.json.* >> runs/$thisRun/asn.json
rm runs/$thisRun/asn.json.*
echo "Aggregating IP-Domain Counts..."
node --max-old-space-size=8192 asn_aggregation/asn_collapse-classC_domains.js runs/$thisRun/asn.json runs/$thisRun/aggregate
node --max-old-space-size=8192 asn_aggregation/asn_collapse-ip_domains.js runs/$thisRun/asn.json runs/$thisRun/aggregate.ip-domain.json runs/$thisRun/aggregate.domain-ip.json
}
##11 (alt). OONI aggregation.
aggregateRunWithOoni()
{
echo "Aggregating..."
plel=$(node util/config.js aggregation_processes)
node --max-old-space-size=8192 compat/ooni.js $thisRun runs/$thisRun/ooni.header runs/$thisRun/ooni.footer
node util/plelSplit.js $plel runs/$thisRun/zmap runs/$thisRun/asn.json "node ./dns/aggregator.js #1 ./runs/$thisRun/lookup.json #2 ./runs/$thisRun/whitelist.json"
cat runs/$thisRun/asn.json.*[!ooni] >> runs/$thisRun/asn.json
cat runs/$thisRun/ooni.header runs/$thisRun/asn.json.*.ooni runs/$thisRun/ooni.footer >> runs/$thisRun/ooni.json
rm runs/$thisRun/asn.json.* runs/$thisRun/ooni.header runs/$thisRun/ooni.footer
echo "Aggregating IP-Domain Counts..."
node --max-old-space-size=8192 asn_aggregation/asn_collapse-classC_domains.js runs/$thisRun/asn.json runs/$thisRun/aggregate
node --max-old-space-size=8192 asn_aggregation/asn_collapse-ip_domains.js runs/$thisRun/asn.json runs/$thisRun/aggregate.domain-ip.json runs/$thisRun/aggregate.ip-domain.json
}
##12. Get Reverse Lookups of IPs.
reverseLookup()
{
echo "Looking up PTR Records..."
node --max-old-space-size=8192 util/jsonkeystofile.js runs/$thisRun/aggregate.ip-domain.json runs/$thisRun/allIPs.txt
node dns/find-ptrs.js runs/$thisRun/allIPs.txt runs/$thisRun/ptrs.json
echo "Looking up Server headers..."
node --max-old-space-size=8192 http/find-server.js runs/$thisRun/allIPs.txt runs/$thisRun/serverheaders.json
#echo "Looking up WHOIS Records..."
#node dns/find-whois.js runs/$thisRun/allIPs.txt runs/$thisRun/whois.json
}
#13. Favicons
favicon()
{
# TODO : Randomize domains and parallelize
echo "Starting Favicons..."
mkdir runs/$thisRun/favicon
fp = runs/$thisRun/favicon
echo "{}" > $fp/ignorelist.json
node favicon/original.js temp/domains.txt $fp/locally-resolved.json
node favicon/favicon.js runs/$thisRun/aggregate.ip-domain.json $fp/ignorelist.json $fp/favicons.jsonlines
node favicon/compare.js $fp/locally-resolved.json $fp/favicons.jsonlines $fp/validation.jsonlines
}
##14. Build CDN Mapping
buildMatrices()
{
echo "Generating initial Similarity matrix..."
node --max-old-space-size=8192 cluster_correlation/correlation-matrix.js runs/$thisRun/aggregate.domain-classC.json runs/$thisRun/similarity01
for i in `seq 1 6`
do
echo "Regenerating matrix (iteration $(expr $i) of 6)..."
node --max-old-space-size=8192 cluster_correlation/reweighting-table.js runs/$thisRun/aggregate.domain-classC.json runs/$thisRun/aggregate.classC-domain.json runs/$thisRun/similarity0$i runs/$thisRun/reweight0$i.json
node --max-old-space-size=8192 cluster_correlation/correlation-matrix.js runs/$thisRun/aggregate.domain-classC.json runs/$thisRun/reweight0$i.json runs/$thisRun/similarity0$(expr $i + 1)
done
echo "Assigning Domains to clusters..."
node --max-old-space-size=8192 cluster_correlation/correlation-distr/run-distr.js runs/$thisRun/similarity06 runs/$thisRun/clusters.json
echo "Assigning IPs to clusters..."
node --max-old-space-size=8192 cluster_correlation/cluster-footprint.js runs/$thisRun/clusters.json runs/$thisRun/aggregate.classC-domain.json runs/$thisRun/similarity07 runs/$thisRun/clusters.ips.json
echo "Secondary Signal Aggregation [ptrs]"
node --max-old-space-size=8192 cluster_correlation/merge_on_metadata.js runs/$thisRun/clusters.json runs/$thisRun/clusters.ips.json runs/$thisRun/ptrs.json 0.8 runs/$thisRuns/clusters.merged.json
echo "Building Country-Country Lookup..."
node --max-old-space-size=8192 asn_aggregation/asn_asn-to-country_country.js runs/$thisRun/lookup.json runs/$thisRun/asn.json runs/$thisRun/country-country.json
}
##15. Signatures / Anomalies
makeSignatures()
{
echo "Calculating signatures..."
node --max-old-space=8192 cluster_correlation/domain_signature.js runs/$thisRun/asn.json runs/$thisRun/lookup.json runs/$thisRun/ptrs.json runs/$thisRun/serverheaders.json runs/$thisRun/domainsigs.json
echo "Calculating Outliers..."
node --max-old-space=8192 interference/extract_anomalies.js runs/$thisRun/asn.json runs/$thisRun/domainsigs.json runs/$thisRun/lookup.json runs/$thisRun/ptrs.json runs/$thisRun/serverheaders.json runs/$thisRun/anomalies.json
}
##15. Clean up
cleanup()
{
echo "Cleaning up..."
rm temp/dns_servers.txt
rm -r runs/$thisRun/zmap
rm runs/$thisRun/similarity0{2,3,4,5,6}.*
rm runs/$thisRun/reweight0{1,2,3,4,5}.json
}
if [ $# -eq 0 ]
then
sanityCheck # Checks to make sure environment is sane.
notify # send email, wait 24hr to ensure not canceled.
getTopSites # downloads alexa.
addRedirects # follow redirects and include in top sites.
getBlacklist # downloads blacklist.
generateRun # creates date-based folder
getActiveResolvers # does cs.washington.edu run
getGoodHosts # recreates dns_servers.txt from the cs.washington.edu run
runTopSites # runs all top domains against dns_servers.txt
recordLookupTable # Build lookup table of current bgp annoncements.
#runHTTPScans # scan ports 80 & 443 - not default
makeArchive # creates archive.
aggregateRun # replace folder with ASN aggreates.
reverseLookup # do PTR lookups
#favicon # Favicon scan and compare - not default.
buildMatrices # build similarity table
makeSignatures # calculate anomalies
cleanup
if [ -f postrun.sh ]
then
source postrun.sh
fi
node util/notify.js finished
else
thisRun=${2}
${1}
fi