Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Survey responses ksqlDB recipe #1447

Merged
merged 5 commits into from
Nov 8, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .semaphore/semaphore.yml
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,10 @@ blocks:
commands:
- make -C _includes/tutorials/logistics/ksql-test/code tutorial

- name: Survey Responses
commands:
- make -C _includes/tutorials/survey-responses/ksql-test/code tutorial

- name: Mainframe offload
commands:
- make -C _includes/tutorials/mainframe-offload/ksql-test/code tutorial
Expand Down
114 changes: 114 additions & 0 deletions _data/harnesses/survey-responses/ksql-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
test:
steps:
- title: ksqlDB code
content:
- action: make_file
file: docker-compose.yml
render:
skip: true

- action: execute_async
file: docker-compose-up.sh
render:
skip: true

- action: execute
file: wait-for-containers.sh
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: ../../ksql/code/tutorial-steps/dev/table_stream.sql

- name: wait for streams to be created
action: sleep
ms: 5000
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: ../../ksql/code/tutorial-steps/dev/manual.sql

- name: wait for table to be populated
action: sleep
ms: 5000
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: ../../ksql/code/tutorial-steps/dev/mask.sql

- name: wait for stream to be masked
action: sleep
ms: 5000
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: ../../ksql/code/tutorial-steps/dev/enrich.sql

- name: wait stream to be enriched
action: sleep
ms: 5000
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: ../../ksql/code/tutorial-steps/dev/process.sql

- name: wait for stream to be processed
action: sleep
ms: 5000
render:
skip: true

- action: docker_ksql_cli_session
container: ksqldb-cli
docker_bootup_file: start-cli.sh
stdout:
directory: tutorial-steps/test/outputs
column_width: 75
render:
skip: true
stdin:
- file: tutorial-steps/test/validate.sql
49 changes: 49 additions & 0 deletions _data/harnesses/survey-responses/ksql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
answer:
steps:
- title:
content:
- action: skip
render:
file: tutorials/survey-responses/ksql/markup/dev/answer-short.adoc

dev:
steps:
- title: Setup your environment
content:
- action: skip
render:
file: shared/markup/ccloud/provision-cluster.adoc

- action: skip
render:
file: shared/markup/ccloud/ccloud_setup.adoc

- title: Execute ksqlDB code
content:
- action: skip
render:
file: shared/markup/ccloud/connect.adoc

- action: skip
render:
file: shared/markup/ccloud/manual_insert.adoc

- action: skip
render:
file: tutorials/survey-responses/ksql/markup/dev/ksqlDB.adoc

- title: Test with mock data
content:
- action: skip
render:
file: shared/markup/ccloud/manual_cue.adoc

- action: skip
render:
file: tutorials/survey-responses/ksql/markup/dev/manual.adoc

- title: Cleanup
content:
- action: skip
render:
file: shared/markup/ccloud/cleanup.adoc
8 changes: 8 additions & 0 deletions _data/tutorials.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1102,3 +1102,11 @@ omnichannel-commerce:
introduction: "Today's retail customers browse and purchase products across many channels — from brick-and-mortar, to web, to mobile applications — and they expect a seamless shopping experience bridging these channels. The omnichannel retail setting is often rife with technical challenges stemming from data residing in disparate data sources. For example, web clickstream, mobile application user activity, and in-store purchase data often reside in different data stores or SaaS platforms. This tutorial enables you to correlate customers' in-store purchase activity with online user behavior, which can then feed downstream omnichannel analytics or improved customer experience (e.g., more relevant product recommendations online based on customers' in-store activity)."
status:
confluent: enabled

survey-responses:
title: "Analyze survey responses in real time"
meta-description: "analyze survey responses in real time"
slug: "/survey-responses"
introduction: "'Please rate your service today.' 'How likely are you to recommend us to a friend?' Regardless of the questions being asked, surveys are great ways for businesses to capture insights from their customers and even their employees. But these insights go stale and lose value the longer they take to be analyzed. This recipe makes survey analysis real time, allowing you to see results as survey responses happen."
status:
confluent: disabled
12 changes: 12 additions & 0 deletions _includes/tutorials/survey-responses/ksql-test/code/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
STEPS_DIR := tutorial-steps
TEST_OUTPUTS_DIR := $(STEPS_DIR)/test/outputs
TEMP_DIR := $(shell mktemp -d)
SEQUENCE := "test"

tutorial:
rm -r $(TEST_OUTPUTS_DIR) || true
mkdir -p $(TEST_OUTPUTS_DIR)
harness-runner ../../../../../_data/harnesses/survey-responses/ksql-test.yml $(TEMP_DIR) $(SEQUENCE)
# Look at survey outputs
grep Dish $(TEST_OUTPUTS_DIR)/validate/output-0.log || { cat $(TEST_OUTPUTS_DIR)/validate/output-0.log ; false; }
reset
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker-compose up -d
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
---
version: '2'

services:
zookeeper:
image: confluentinc/cp-zookeeper:7.1.0
hostname: zookeeper
container_name: zookeeper
ports:
- "2181:2181"
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000

broker:
image: confluentinc/cp-kafka:7.1.0
hostname: broker
container_name: broker
depends_on:
- zookeeper
ports:
- "29092:29092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181'
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:9092,PLAINTEXT_HOST://localhost:29092
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1
KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1
KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0

ksqldb-server:
image: confluentinc/ksqldb-server:0.24.0
hostname: ksqldb-server
container_name: ksqldb-server
depends_on:
- broker
ports:
- "8088:8088"
environment:
KSQL_CONFIG_DIR: "/etc/ksqldb"
KSQL_LOG4J_OPTS: "-Dlog4j.configuration=file:/etc/ksqldb/log4j.properties"
KSQL_BOOTSTRAP_SERVERS: "broker:9092"
KSQL_HOST_NAME: ksqldb-server
KSQL_LISTENERS: "http://0.0.0.0:8088"
KSQL_CACHE_MAX_BYTES_BUFFERING: 0
KSQL_KSQL_STREAMS_AUTO_OFFSET_RESET: "earliest"

ksqldb-cli:
image: confluentinc/ksqldb-cli:0.24.0
container_name: ksqldb-cli
depends_on:
- broker
- ksqldb-server
entrypoint: /bin/sh
environment:
KSQL_CONFIG_DIR: "/etc/ksqldb"
tty: true
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
docker exec -it ksqldb-cli ksql http://ksqldb-server:8088
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
+-----------------------------------------------------------------------+---------------------------------------------------------------------------+
|SURVEY_QUESTION |RESULTS |
+-----------------------------------------------------------------------+---------------------------------------------------------------------------+
|What is your Favorite Thanksgiving Dish? |{Cranberry Jelly=1, Turkey=3, Green Beans=1, Stuffing=3} |
|What is your Favorite Thanksgiving Pie? |{Pumpkin Pie=3, Pecan Pie=1, Apple Pie=4} |
Query terminated
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
SELECT * FROM SURVEY_RESULTS_LIVE;
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

# Wait for ksqlDB to become available
while :
do curl_status=$(curl -s -o /dev/null -w %{http_code} http://localhost:8088/info)
echo -e $(date) " Component: ksqlDB \t\t\tHTTP state: " $curl_status "\t(waiting for 200)"
if [ $curl_status -eq 200 ]
then
echo "✅ ksqlDB is ready"
# Attempt to avoid flaky test failures
sleep 1
break
fi
sleep 5
done
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Enrich Survey Responses with Respondent Data
CREATE STREAM SURVEY_RESPONSES_ENRICHED WITH (
KAFKA_TOPIC = 'survey-responses-enriched',
VALUE_FORMAT = 'JSON',
KEY_FORMAT = 'KAFKA',
PARTITIONS = 6
) AS
SELECT
RESPONSES.RESPONDENT_ID AS RESPONDENT_ID,
RESPONSES.SURVEY_ID,
RESPONSES.SURVEY_QUESTION,
RESPONSES.SURVEY_RESPONSE,
RESPONDENTS.NAME,
RESPONDENTS.TEAM
FROM SURVEY_RESPONSES RESPONSES
INNER JOIN SURVEY_RESPONDENTS_MASKED RESPONDENTS
ON RESPONSES.RESPONDENT_ID = RESPONDENTS.RESPONDENT_ID
EMIT CHANGES;
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Survey Respondents
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('159', 'Penelope Coin', 'DevX', '[email protected]', '183 Maple Drive');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ( '93', 'Theodore Bear', 'Marketing', '[email protected]', '68 El Camino Real');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('184', 'Jack Pepper', 'DevX', '[email protected]', '8299 Skyline Drive');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ( '15', 'John Deer', 'Engineering', '[email protected]', '928 Maple Street');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('282', 'Jane Doe', 'Engineering', '[email protected]', '110 Rocky Road');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('739', 'Monte Wisoky', 'Engineering', '[email protected]', '1 First Street');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('250', 'Tessie Cremin', 'DevX', '[email protected]', '8 B Street Apt A');
INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('301', 'Wilfrid Howe', 'Marketing', '[email protected]', '617 Hamilton Road');

-- Survey Responses
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '184', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '159', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '93', 'What is your Favorite Thanksgiving Pie?', 'Pecan Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '301', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '15', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '282', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '250', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '739', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '15', 'What is your Favorite Thanksgiving Dish?', 'Turkey');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '282', 'What is your Favorite Thanksgiving Dish?', 'Stuffing');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '250', 'What is your Favorite Thanksgiving Dish?', 'Turkey');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '739', 'What is your Favorite Thanksgiving Dish?', 'Stuffing');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '159', 'What is your Favorite Thanksgiving Dish?', 'Cranberry Jelly');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '184', 'What is your Favorite Thanksgiving Dish?', 'Turkey');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '93', 'What is your Favorite Thanksgiving Dish?', 'Stuffing');
INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '301', 'What is your Favorite Thanksgiving Dish?', 'Green Beans');
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- Mask Respondent Data
CREATE TABLE SURVEY_RESPONDENTS_MASKED WITH (
KAFKA_TOPIC = 'survey-respondents-masked',
VALUE_FORMAT = 'JSON',
KEY_FORMAT = 'KAFKA',
PARTITIONS = 6
) AS
SELECT
RESPONDENT_ID,
NAME,
TEAM,
MASK(EMAIL) AS EMAIL,
MASK(ADDRESS) AS ADDRESS
FROM SURVEY_RESPONDENTS
EMIT CHANGES;
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
-- Fetch Results Live
CREATE TABLE SURVEY_RESULTS_LIVE WITH (
KAFKA_TOPIC = 'survey-results-live',
VALUE_FORMAT = 'JSON',
KEY_FORMAT = 'KAFKA',
PARTITIONS = 6
) AS
SELECT
SURVEY_QUESTION,
HISTOGRAM(SURVEY_RESPONSE) AS RESULTS
FROM SURVEY_RESPONSES_ENRICHED
GROUP BY SURVEY_QUESTION
EMIT CHANGES;

-- Fetch Results Windowed Per Team
CREATE TABLE SURVEY_RESULTS_WINDOWED WITH (
KAFKA_TOPIC = 'survey-results-windowed',
VALUE_FORMAT = 'JSON',
KEY_FORMAT = 'KAFKA',
PARTITIONS = 6
) AS
SELECT
SURVEY_QUESTION,
HISTOGRAM(TEAM) AS TEAMS
FROM SURVEY_RESPONSES_ENRICHED
WINDOW TUMBLING (SIZE 24 HOURS)
GROUP BY SURVEY_QUESTION
EMIT FINAL;
Loading