-
Notifications
You must be signed in to change notification settings - Fork 2
/
website_analytics.sql
68 lines (63 loc) · 1.58 KB
/
website_analytics.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
-- Create User table
CREATE TABLE users (
id STRING PRIMARY KEY
) WITH (
kafka_topic='users',
value_format='AVRO'
);
-- Create Pageviews stream
CREATE STREAM pageviews WITH (
kafka_topic='pageviews',
value_format='AVRO'
);
-- Rekey Pageviews stream to use userid
CREATE STREAM pageviews_rekeyed WITH (
kafka_topic='pageviews-rekeyed',
value_format='AVRO'
) AS
SELECT
*
FROM PAGEVIEWS
PARTITION BY USERID
EMIT CHANGES;
-- Filter out information from Pageviews
CREATE STREAM pageviews_filtered WITH (
kafka_topic='pageviews-filtered',
value_format='AVRO'
) AS
SELECT
ROWTIME AS pageviewtime,
*
FROM PAGEVIEWS_REKEYED
WHERE USERID != 'User_1'
EMIT CHANGES;
-- Enrich Pageviews with User data
CREATE STREAM pageviews_enriched WITH (
kafka_topic='pageviews-enriched',
value_format='AVRO'
) AS
SELECT
pageviews.viewtime AS viewtime,
FROM_UNIXTIME(pageviews.pageviewtime) AS pageviewtime,
pageviews.pageid AS pageid,
pageviews.userid AS userid,
users.regionid AS regionid,
users.gender AS gender,
FROM_UNIXTIME(users.registertime) AS registertime
FROM pageviews_filtered AS pageviews
INNER JOIN users
ON pageviews.userid = users.id
EMIT CHANGES;
-- Conduct regional analysis
CREATE TABLE pageview_regional_analysis WITH (
kafka_topic='pageview-regional-analysis',
format='AVRO'
) AS
SELECT
regionid,
COUNT(*) AS views,
AVG(viewtime) AS viewtme_avg
FROM pageviews_enriched
WINDOW TUMBLING (SIZE 30 MINUTES)
GROUP BY regionid
EMIT FINAL;