diff --git a/.semaphore/semaphore.yml b/.semaphore/semaphore.yml index 0eba459c71..cfe074dbf6 100644 --- a/.semaphore/semaphore.yml +++ b/.semaphore/semaphore.yml @@ -353,6 +353,10 @@ blocks: commands: - make -C _includes/tutorials/logistics/ksql-test/code tutorial + - name: Survey Responses + commands: + - make -C _includes/tutorials/survey-responses/ksql-test/code tutorial + - name: Mainframe offload commands: - make -C _includes/tutorials/mainframe-offload/ksql-test/code tutorial diff --git a/_data/harnesses/survey-responses/ksql-test.yml b/_data/harnesses/survey-responses/ksql-test.yml new file mode 100644 index 0000000000..edabe96e8d --- /dev/null +++ b/_data/harnesses/survey-responses/ksql-test.yml @@ -0,0 +1,114 @@ +test: + steps: + - title: ksqlDB code + content: + - action: make_file + file: docker-compose.yml + render: + skip: true + + - action: execute_async + file: docker-compose-up.sh + render: + skip: true + + - action: execute + file: wait-for-containers.sh + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: ../../ksql/code/tutorial-steps/dev/table_stream.sql + + - name: wait for streams to be created + action: sleep + ms: 5000 + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: ../../ksql/code/tutorial-steps/dev/manual.sql + + - name: wait for table to be populated + action: sleep + ms: 5000 + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: ../../ksql/code/tutorial-steps/dev/mask.sql + + - name: wait for stream to be masked + action: sleep + ms: 5000 + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: ../../ksql/code/tutorial-steps/dev/enrich.sql + + - name: wait stream to be enriched + action: sleep + ms: 5000 + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: ../../ksql/code/tutorial-steps/dev/process.sql + + - name: wait for stream to be processed + action: sleep + ms: 5000 + render: + skip: true + + - action: docker_ksql_cli_session + container: ksqldb-cli + docker_bootup_file: start-cli.sh + stdout: + directory: tutorial-steps/test/outputs + column_width: 75 + render: + skip: true + stdin: + - file: tutorial-steps/test/validate.sql diff --git a/_data/harnesses/survey-responses/ksql.yml b/_data/harnesses/survey-responses/ksql.yml new file mode 100644 index 0000000000..a405ce6c07 --- /dev/null +++ b/_data/harnesses/survey-responses/ksql.yml @@ -0,0 +1,49 @@ +answer: + steps: + - title: + content: + - action: skip + render: + file: tutorials/survey-responses/ksql/markup/dev/answer-short.adoc + +dev: + steps: + - title: Setup your environment + content: + - action: skip + render: + file: shared/markup/ccloud/provision-cluster.adoc + + - action: skip + render: + file: shared/markup/ccloud/ccloud_setup.adoc + + - title: Execute ksqlDB code + content: + - action: skip + render: + file: shared/markup/ccloud/connect.adoc + + - action: skip + render: + file: shared/markup/ccloud/manual_insert.adoc + + - action: skip + render: + file: tutorials/survey-responses/ksql/markup/dev/ksqlDB.adoc + + - title: Test with mock data + content: + - action: skip + render: + file: shared/markup/ccloud/manual_cue.adoc + + - action: skip + render: + file: tutorials/survey-responses/ksql/markup/dev/manual.adoc + + - title: Cleanup + content: + - action: skip + render: + file: shared/markup/ccloud/cleanup.adoc diff --git a/_data/tutorials.yaml b/_data/tutorials.yaml index d74c9de15b..92acebc31e 100644 --- a/_data/tutorials.yaml +++ b/_data/tutorials.yaml @@ -1102,3 +1102,11 @@ omnichannel-commerce: introduction: "Today's retail customers browse and purchase products across many channels — from brick-and-mortar, to web, to mobile applications — and they expect a seamless shopping experience bridging these channels. The omnichannel retail setting is often rife with technical challenges stemming from data residing in disparate data sources. For example, web clickstream, mobile application user activity, and in-store purchase data often reside in different data stores or SaaS platforms. This tutorial enables you to correlate customers' in-store purchase activity with online user behavior, which can then feed downstream omnichannel analytics or improved customer experience (e.g., more relevant product recommendations online based on customers' in-store activity)." status: confluent: enabled + +survey-responses: + title: "Analyze survey responses in real time" + meta-description: "analyze survey responses in real time" + slug: "/survey-responses" + introduction: "'Please rate your service today.' 'How likely are you to recommend us to a friend?' Regardless of the questions being asked, surveys are great ways for businesses to capture insights from their customers and even their employees. But these insights go stale and lose value the longer they take to be analyzed. This recipe makes survey analysis real time, allowing you to see results as survey responses happen." + status: + confluent: disabled diff --git a/_includes/tutorials/survey-responses/ksql-test/code/Makefile b/_includes/tutorials/survey-responses/ksql-test/code/Makefile new file mode 100644 index 0000000000..201f5f5d3e --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/Makefile @@ -0,0 +1,12 @@ +STEPS_DIR := tutorial-steps +TEST_OUTPUTS_DIR := $(STEPS_DIR)/test/outputs +TEMP_DIR := $(shell mktemp -d) +SEQUENCE := "test" + +tutorial: + rm -r $(TEST_OUTPUTS_DIR) || true + mkdir -p $(TEST_OUTPUTS_DIR) + harness-runner ../../../../../_data/harnesses/survey-responses/ksql-test.yml $(TEMP_DIR) $(SEQUENCE) + # Look at survey outputs + grep Dish $(TEST_OUTPUTS_DIR)/validate/output-0.log || { cat $(TEST_OUTPUTS_DIR)/validate/output-0.log ; false; } + reset diff --git a/_includes/tutorials/survey-responses/ksql-test/code/docker-compose-up.sh b/_includes/tutorials/survey-responses/ksql-test/code/docker-compose-up.sh new file mode 100755 index 0000000000..a8341a634f --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/docker-compose-up.sh @@ -0,0 +1 @@ +docker-compose up -d diff --git a/_includes/tutorials/survey-responses/ksql-test/code/docker-compose.yml b/_includes/tutorials/survey-responses/ksql-test/code/docker-compose.yml new file mode 100644 index 0000000000..8d1b2e42c4 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/docker-compose.yml @@ -0,0 +1,59 @@ +--- +version: '2' + +services: + zookeeper: + image: confluentinc/cp-zookeeper:7.1.0 + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + ZOOKEEPER_CLIENT_PORT: 2181 + ZOOKEEPER_TICK_TIME: 2000 + + broker: + image: confluentinc/cp-kafka:7.1.0 + hostname: broker + container_name: broker + depends_on: + - zookeeper + ports: + - "29092:29092" + environment: + KAFKA_BROKER_ID: 1 + KAFKA_ZOOKEEPER_CONNECT: 'zookeeper:2181' + KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT + KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://broker:9092,PLAINTEXT_HOST://localhost:29092 + KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 + KAFKA_TRANSACTION_STATE_LOG_MIN_ISR: 1 + KAFKA_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: 1 + KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 + + ksqldb-server: + image: confluentinc/ksqldb-server:0.24.0 + hostname: ksqldb-server + container_name: ksqldb-server + depends_on: + - broker + ports: + - "8088:8088" + environment: + KSQL_CONFIG_DIR: "/etc/ksqldb" + KSQL_LOG4J_OPTS: "-Dlog4j.configuration=file:/etc/ksqldb/log4j.properties" + KSQL_BOOTSTRAP_SERVERS: "broker:9092" + KSQL_HOST_NAME: ksqldb-server + KSQL_LISTENERS: "http://0.0.0.0:8088" + KSQL_CACHE_MAX_BYTES_BUFFERING: 0 + KSQL_KSQL_STREAMS_AUTO_OFFSET_RESET: "earliest" + + ksqldb-cli: + image: confluentinc/ksqldb-cli:0.24.0 + container_name: ksqldb-cli + depends_on: + - broker + - ksqldb-server + entrypoint: /bin/sh + environment: + KSQL_CONFIG_DIR: "/etc/ksqldb" + tty: true diff --git a/_includes/tutorials/survey-responses/ksql-test/code/start-cli.sh b/_includes/tutorials/survey-responses/ksql-test/code/start-cli.sh new file mode 100755 index 0000000000..b2e4c39746 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/start-cli.sh @@ -0,0 +1 @@ +docker exec -it ksqldb-cli ksql http://ksqldb-server:8088 diff --git a/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/expected-outputs/survey_analysis.log b/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/expected-outputs/survey_analysis.log new file mode 100644 index 0000000000..ab68b5c5fb --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/expected-outputs/survey_analysis.log @@ -0,0 +1,6 @@ ++-----------------------------------------------------------------------+---------------------------------------------------------------------------+ +|SURVEY_QUESTION |RESULTS | ++-----------------------------------------------------------------------+---------------------------------------------------------------------------+ +|What is your Favorite Thanksgiving Dish? |{Cranberry Jelly=1, Turkey=3, Green Beans=1, Stuffing=3} | +|What is your Favorite Thanksgiving Pie? |{Pumpkin Pie=3, Pecan Pie=1, Apple Pie=4} | +Query terminated diff --git a/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/validate.sql b/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/validate.sql new file mode 100644 index 0000000000..27b12d072c --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/tutorial-steps/test/validate.sql @@ -0,0 +1 @@ +SELECT * FROM SURVEY_RESULTS_LIVE; \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql-test/code/wait-for-containers.sh b/_includes/tutorials/survey-responses/ksql-test/code/wait-for-containers.sh new file mode 100755 index 0000000000..942a8ebe35 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql-test/code/wait-for-containers.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +# Wait for ksqlDB to become available +while : + do curl_status=$(curl -s -o /dev/null -w %{http_code} http://localhost:8088/info) + echo -e $(date) " Component: ksqlDB \t\t\tHTTP state: " $curl_status "\t(waiting for 200)" + if [ $curl_status -eq 200 ] + then + echo "✅ ksqlDB is ready" + # Attempt to avoid flaky test failures + sleep 1 + break + fi + sleep 5 +done diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/enrich.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/enrich.sql new file mode 100644 index 0000000000..a324fecbd3 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/enrich.sql @@ -0,0 +1,18 @@ +-- Enrich Survey Responses with Respondent Data +CREATE STREAM SURVEY_RESPONSES_ENRICHED WITH ( + KAFKA_TOPIC = 'survey-responses-enriched', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +) AS +SELECT + RESPONSES.RESPONDENT_ID AS RESPONDENT_ID, + RESPONSES.SURVEY_ID, + RESPONSES.SURVEY_QUESTION, + RESPONSES.SURVEY_RESPONSE, + RESPONDENTS.NAME, + RESPONDENTS.TEAM +FROM SURVEY_RESPONSES RESPONSES +INNER JOIN SURVEY_RESPONDENTS_MASKED RESPONDENTS +ON RESPONSES.RESPONDENT_ID = RESPONDENTS.RESPONDENT_ID +EMIT CHANGES; \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/manual.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/manual.sql new file mode 100644 index 0000000000..ad9352aa31 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/manual.sql @@ -0,0 +1,27 @@ +-- Survey Respondents +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('159', 'Penelope Coin', 'DevX', 'pennycoin@email.com', '183 Maple Drive'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ( '93', 'Theodore Bear', 'Marketing', 'teddyb@email.com', '68 El Camino Real'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('184', 'Jack Pepper', 'DevX', 'pepper.jack@email.com', '8299 Skyline Drive'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ( '15', 'John Deer', 'Engineering', 'jdeer15@email.com', '928 Maple Street'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('282', 'Jane Doe', 'Engineering', 'jane.doe@email.com', '110 Rocky Road'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('739', 'Monte Wisoky', 'Engineering', 'wisoky.m@email.com', '1 First Street'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('250', 'Tessie Cremin', 'DevX', 'tcremin@email.com', '8 B Street Apt A'); +INSERT INTO SURVEY_RESPONDENTS (RESPONDENT_ID, NAME, TEAM, EMAIL, ADDRESS) VALUES ('301', 'Wilfrid Howe', 'Marketing', 'whowe301@email.com', '617 Hamilton Road'); + +-- Survey Responses +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '184', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '159', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '93', 'What is your Favorite Thanksgiving Pie?', 'Pecan Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '301', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '15', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '282', 'What is your Favorite Thanksgiving Pie?', 'Pumpkin Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '250', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('0', '739', 'What is your Favorite Thanksgiving Pie?', 'Apple Pie'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '15', 'What is your Favorite Thanksgiving Dish?', 'Turkey'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '282', 'What is your Favorite Thanksgiving Dish?', 'Stuffing'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '250', 'What is your Favorite Thanksgiving Dish?', 'Turkey'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '739', 'What is your Favorite Thanksgiving Dish?', 'Stuffing'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '159', 'What is your Favorite Thanksgiving Dish?', 'Cranberry Jelly'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '184', 'What is your Favorite Thanksgiving Dish?', 'Turkey'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '93', 'What is your Favorite Thanksgiving Dish?', 'Stuffing'); +INSERT INTO SURVEY_RESPONSES (SURVEY_ID, RESPONDENT_ID, SURVEY_QUESTION, SURVEY_RESPONSE) VALUES ('1', '301', 'What is your Favorite Thanksgiving Dish?', 'Green Beans'); diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/mask.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/mask.sql new file mode 100644 index 0000000000..d6bf3648a9 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/mask.sql @@ -0,0 +1,15 @@ +-- Mask Respondent Data +CREATE TABLE SURVEY_RESPONDENTS_MASKED WITH ( + KAFKA_TOPIC = 'survey-respondents-masked', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +) AS +SELECT + RESPONDENT_ID, + NAME, + TEAM, + MASK(EMAIL) AS EMAIL, + MASK(ADDRESS) AS ADDRESS +FROM SURVEY_RESPONDENTS +EMIT CHANGES; \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/process.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/process.sql new file mode 100644 index 0000000000..6caad692e4 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/process.sql @@ -0,0 +1,28 @@ +-- Fetch Results Live +CREATE TABLE SURVEY_RESULTS_LIVE WITH ( + KAFKA_TOPIC = 'survey-results-live', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +) AS +SELECT + SURVEY_QUESTION, + HISTOGRAM(SURVEY_RESPONSE) AS RESULTS +FROM SURVEY_RESPONSES_ENRICHED +GROUP BY SURVEY_QUESTION +EMIT CHANGES; + +-- Fetch Results Windowed Per Team +CREATE TABLE SURVEY_RESULTS_WINDOWED WITH ( + KAFKA_TOPIC = 'survey-results-windowed', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +) AS +SELECT + SURVEY_QUESTION, + HISTOGRAM(TEAM) AS TEAMS +FROM SURVEY_RESPONSES_ENRICHED +WINDOW TUMBLING (SIZE 24 HOURS) +GROUP BY SURVEY_QUESTION +EMIT FINAL; \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.json b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.json new file mode 100644 index 0000000000..5a06880d15 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.json @@ -0,0 +1,29 @@ +{ + "connector.class" : "ServiceNowSource", + "name" : "recipe-servicenow-survey-respondents", + "kafka.auth.mode" : "KAFKA_API_KEY", + "kafka.api.key" : "", + "kafka.api.secret" : "", + "kafka.topic" : "survey-respondents", + "output.data.format" : "JSON", + "servicenow.url" : "my.servicenow.instance", + "servicenow.table" : "respondents", + "servicenow.user" : "servicenowuser", + "servicenow.password": "********", + "tasks.max" : "1" +} + +{ + "connector.class" : "ServiceNowSource", + "name" : "recipe-servicenow-survey-responses", + "kafka.auth.mode" : "KAFKA_API_KEY", + "kafka.api.key" : "", + "kafka.api.secret" : "", + "kafka.topic" : "survey-responses", + "output.data.format" : "JSON", + "servicenow.url" : "my.servicenow.instance", + "servicenow.table" : "surveys-responses", + "servicenow.user" : "servicenowuser", + "servicenow.password": "********", + "tasks.max" : "1" +} \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.sql new file mode 100644 index 0000000000..734c0ee8c3 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.sql @@ -0,0 +1,27 @@ +CREATE SOURCE CONNECTOR IF NOT EXISTS recipe_servicenow_survey_analysis_respondents WITH ( + 'connector.class' = 'ServiceNowSource', + 'kafka.auth.mode' = 'KAFKA_API_KEY', + 'kafka.api.key' = '', + 'kafka.api.secret' = '', + 'kafka.topic' = 'survey-respondents', + 'output.data.format' = 'JSON', + 'servicenow.url' = 'my.servicenow.instance', + 'servicenow.table' = 'respondents', + 'servicenow.user' = 'servicenowuser', + 'servicenow.password'= '********', + 'tasks.max' = '1' +); + +CREATE SOURCE CONNECTOR IF NOT EXISTS recipe_servicenow_survey_analysis_responses WITH ( + 'connector.class' = 'ServiceNowSource', + 'kafka.auth.mode' = 'KAFKA_API_KEY', + 'kafka.api.key' = '', + 'kafka.api.secret' = '', + 'kafka.topic' = 'survey-responses', + 'output.data.format' = 'JSON', + 'servicenow.url' = 'my.servicenow.instance', + 'servicenow.table' = 'surveys-responses', + 'servicenow.user' = 'servicenowuser', + 'servicenow.password'= '********', + 'tasks.max' = '1' +); diff --git a/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/table_stream.sql b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/table_stream.sql new file mode 100644 index 0000000000..8d173a0375 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/code/tutorial-steps/dev/table_stream.sql @@ -0,0 +1,28 @@ +SET 'auto.offset.reset' = 'earliest'; + +-- Create Table of Survey Respondents +CREATE TABLE SURVEY_RESPONDENTS ( + RESPONDENT_ID VARCHAR PRIMARY KEY, + NAME VARCHAR, + TEAM VARCHAR, + EMAIL VARCHAR, + ADDRESS VARCHAR +) WITH ( + KAFKA_TOPIC = 'survey-respondents', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +); + +-- Create Survey Responses Stream +CREATE STREAM SURVEY_RESPONSES ( + SURVEY_ID VARCHAR KEY, + RESPONDENT_ID VARCHAR, + SURVEY_QUESTION VARCHAR, + SURVEY_RESPONSE VARCHAR +) WITH ( + KAFKA_TOPIC = 'survey-responses', + VALUE_FORMAT = 'JSON', + KEY_FORMAT = 'KAFKA', + PARTITIONS = 6 +); \ No newline at end of file diff --git a/_includes/tutorials/survey-responses/ksql/markup/dev/answer-short.adoc b/_includes/tutorials/survey-responses/ksql/markup/dev/answer-short.adoc new file mode 100644 index 0000000000..53198c9473 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/markup/dev/answer-short.adoc @@ -0,0 +1 @@ +Run this tutorial in link:https://www.confluent.io/confluent-cloud/tryfree[Confluent Cloud] diff --git a/_includes/tutorials/survey-responses/ksql/markup/dev/ksqlDB.adoc b/_includes/tutorials/survey-responses/ksql/markup/dev/ksqlDB.adoc new file mode 100644 index 0000000000..f16cf8e053 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/markup/dev/ksqlDB.adoc @@ -0,0 +1,33 @@ +For this tutorial, we'll be using a respondent dataset to reflect the userbase that's taking the surveys as well as a stream of survey responses where repsondents answer single-question surveys. Kafka Connect is a great tool that can easily stream in datasets from external sources. If we suppose that we're issuing an internal survey, both the respondent and response datasets might exist in a ServiceNow table; use the following template as a guide for setting up your connectors to extract this information and move it to Kafka. +++++ +

+{% include_raw shared/code/ccloud/comment-connector.sql %}
+{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/source.sql %}
+
+++++ +To begin, we first choose to represent our input datasets as either a `TABLE` or a `STREAM`. A `STREAM` represents unbounded, ongoing events while a `TABLE` shows the latest value for a given key. Survey responses are something that may continue to flow into our system--every survey response is valuable in and of itself, and we should care about each one when we do our analysis. Respondent data is something that could change over time. For example, a user could change their user profile which could trigger a new event on the Kafka topic. Because of this, we'll use a `TABLE` for the respondents and a `STREAM` for the survey responses. +++++ +

+{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/table_stream.sql %}
+
+++++ +As a quick aside: one of the benefits of building a stream processing application is that you can make use of intermediate data streams for different downstream applications. For example, we may want to mask some data from our `SURVEY-RESPONDENTS` stream. Every time a new event flows into the `SURVEY-RESPONDENTS` stream, we can mask the appropriate fields and send the masked data downstream for other applications to tap into. +++++ +

+{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/mask.sql %}
+
+++++ +With our inputs in ksqlDB, we can start to build up a real time stream processing application. First, we enrich the survey result set with the respondent data--this is done through an inner join. By doing so, we can leverage more of the respondent data in our analysis. +++++ +

+{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/enrich.sql %}
+
+++++ +This application processes survey responses as they're captured in real time. It aggregates survey responses from a given question and outputs the latest results. We've provided a number of queries to be used for analysis. Since we have access to the respondent data, we can get an idea of the distribution of respondents across teams. Or we can focus on the survey results, themselves. + +Analyzing the events in real time--as opposed to batch--gives the flexibility to see outcomes as they occur or in a windowed fashion depending on the consuming application. A second query has been provided to show how to window over this output result set and only see the final count of survey results after a given window has closed. +++++ +

+{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/process.sql %}
+
+++++ diff --git a/_includes/tutorials/survey-responses/ksql/markup/dev/manual.adoc b/_includes/tutorials/survey-responses/ksql/markup/dev/manual.adoc new file mode 100644 index 0000000000..9dc8f90c01 --- /dev/null +++ b/_includes/tutorials/survey-responses/ksql/markup/dev/manual.adoc @@ -0,0 +1,15 @@ +++++ +
{% include_raw tutorials/survey-responses/ksql/code/tutorial-steps/dev/manual.sql %}
+++++ + +To validate that this recipe is working, run the following query: + +++++ +
{% include_raw tutorials/survey-responses/ksql-test/code/tutorial-steps/test/validate.sql %}
+++++ + +Your output should resemble: + +++++ +
{% include_raw tutorials/survey-responses/ksql-test/code/tutorial-steps/test/expected-outputs/survey_analysis.log %}
+++++ diff --git a/tutorials/survey-responses/ksql.html b/tutorials/survey-responses/ksql.html new file mode 100644 index 0000000000..45a57f6a7a --- /dev/null +++ b/tutorials/survey-responses/ksql.html @@ -0,0 +1,6 @@ +--- +layout: recipe +permalink: /survey-responses/ksql +stack: ksql +static_data: survey-responses +--- diff --git a/use-cases.html b/use-cases.html index b1fd0ccf49..8b370ca11b 100644 --- a/use-cases.html +++ b/use-cases.html @@ -115,6 +115,7 @@

From Idea to Proof-of-concept: Learn stream processing the
  • Enrich orders with change data capture (CDC)
  • Build a dynamic pricing strategy
  • Assess marketing promotional campaign efficacy
  • +
  • Capture and analyze survey responses