From b861fe9dc57b24d0c45439edf6af8b4706be387d Mon Sep 17 00:00:00 2001 From: Flook Peter Date: Fri, 16 Aug 2024 14:49:44 +0800 Subject: [PATCH 1/2] Add in support for datahub and confluent schema registry --- README.md | 93 +++++---- data/confluent-schema-registry/env/docker.env | 11 + data/datahub-actions/env/docker.env | 38 ++++ data/datahub-frontend/env/docker.env | 71 +++++++ data/datahub-gms/env/docker.env | 55 +++++ .../env/docker-without-neo4j.env | 55 +++++ data/mysql/data/datahub.sql | 43 ++++ docker-compose.yaml | 197 +++++++++++++----- 8 files changed, 469 insertions(+), 94 deletions(-) create mode 100644 data/confluent-schema-registry/env/docker.env create mode 100644 data/datahub-actions/env/docker.env create mode 100644 data/datahub-frontend/env/docker.env create mode 100644 data/datahub-gms/env/docker.env create mode 100644 data/datahub-upgrade/env/docker-without-neo4j.env create mode 100644 data/mysql/data/datahub.sql diff --git a/README.md b/README.md index 98e724c..c06c84f 100644 --- a/README.md +++ b/README.md @@ -120,50 +120,51 @@ POSTGRES_USER=my-user POSTGRES_PASSWORD=my-password ./run.sh postgres ## Services -| Service Type | Service | Supported | -|-----------------------------|---------------|-----------| -| Change Data Capture | debezium | ✅ | -| Database | cassandra | ✅ | -| Database | cockroachdb | ✅ | -| Database | elasticsearch | ✅ | -| Database | mariadb | ✅ | -| Database | mongodb | ✅ | -| Database | mssql | ✅ | -| Database | mysql | ✅ | -| Database | neo4j | ✅ | -| Database | opensearch | ✅ | -| Database | postgres | ✅ | -| Database | spanner | ✅ | -| Database | sqlite | ✅ | -| Data Catalog | amundsen | ✅ | -| Data Catalog | marquez | ✅ | -| Data Catalog | polaris | ✅ | -| Data Catalog | unitycatalog | ✅ | -| Data Catalog | datahub | ❌ | -| Data Catalog | openmetadata | ❌ | -| Distributed Coordination | zookeeper | ✅ | -| Distributed Data Processing | flink | ✅ | -| HTTP | httpbin | ✅ | -| Identity Management | keycloak | ✅ | -| Job Orchestrator | airflow | ✅ | -| Job Orchestrator | dagster | ✅ | -| Job Orchestrator | mage-ai | ✅ | -| Job Orchestrator | prefect | ✅ | -| Messaging | activemq | ✅ | -| Messaging | kafka | ✅ | -| Messaging | rabbitmq | ✅ | -| Messaging | solace | ✅ | -| Notebook | jupyter | ✅ | -| Object Storage | minio | ✅ | -| Query Engine | duckdb | ✅ | -| Query Engine | flight-sql | ✅ | -| Query Engine | presto | ✅ | -| Query Engine | trino | ✅ | -| Real-time OLAP | clickhouse | ✅ | -| Real-time OLAP | doris | ✅ | -| Real-time OLAP | druid | ✅ | -| Real-time OLAP | pinot | ✅ | -| Test Data Management | data-caterer | ✅ | -| Workflow | maestro | ✅ | -| Workflow | temporal | ✅ | +| Service Type | Service | Supported | +|-----------------------------|---------------------------|-----------| +| Change Data Capture | debezium | ✅ | +| Database | cassandra | ✅ | +| Database | cockroachdb | ✅ | +| Database | elasticsearch | ✅ | +| Database | mariadb | ✅ | +| Database | mongodb | ✅ | +| Database | mssql | ✅ | +| Database | mysql | ✅ | +| Database | neo4j | ✅ | +| Database | opensearch | ✅ | +| Database | postgres | ✅ | +| Database | spanner | ✅ | +| Database | sqlite | ✅ | +| Data Catalog | amundsen | ✅ | +| Data Catalog | datahub | ✅ | +| Data Catalog | marquez | ✅ | +| Data Catalog | polaris | ✅ | +| Data Catalog | unitycatalog | ✅ | +| Data Catalog | openmetadata | ❌ | +| Distributed Coordination | zookeeper | ✅ | +| Distributed Data Processing | flink | ✅ | +| HTTP | httpbin | ✅ | +| Identity Management | keycloak | ✅ | +| Job Orchestrator | airflow | ✅ | +| Job Orchestrator | dagster | ✅ | +| Job Orchestrator | mage-ai | ✅ | +| Job Orchestrator | prefect | ✅ | +| Messaging | activemq | ✅ | +| Messaging | kafka | ✅ | +| Messaging | rabbitmq | ✅ | +| Messaging | solace | ✅ | +| Notebook | jupyter | ✅ | +| Object Storage | minio | ✅ | +| Query Engine | duckdb | ✅ | +| Query Engine | flight-sql | ✅ | +| Query Engine | presto | ✅ | +| Query Engine | trino | ✅ | +| Real-time OLAP | clickhouse | ✅ | +| Real-time OLAP | doris | ✅ | +| Real-time OLAP | druid | ✅ | +| Real-time OLAP | pinot | ✅ | +| Schema Registry | confluent-schema-registry | ✅ | +| Test Data Management | data-caterer | ✅ | +| Workflow | maestro | ✅ | +| Workflow | temporal | ✅ | diff --git a/data/confluent-schema-registry/env/docker.env b/data/confluent-schema-registry/env/docker.env new file mode 100644 index 0000000..42ca4ba --- /dev/null +++ b/data/confluent-schema-registry/env/docker.env @@ -0,0 +1,11 @@ +SCHEMA_REGISTRY_HOST_NAME=schema-registry +SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT +SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=kafka:29092 + +# Uncomment to customize the Schema Registry kafka store connection +# ZOOKEEPER_SASL_ENABLED=false +# KAFKA_OPTS=-Xms1g -Xmx1g +# SCHEMA_REGISTRY_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false + +# Uncomment to use schema registry < v5.4.0 +# SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 \ No newline at end of file diff --git a/data/datahub-actions/env/docker.env b/data/datahub-actions/env/docker.env new file mode 100644 index 0000000..dcef723 --- /dev/null +++ b/data/datahub-actions/env/docker.env @@ -0,0 +1,38 @@ +DATAHUB_GMS_PROTOCOL=http +DATAHUB_GMS_HOST=datahub-gms +DATAHUB_GMS_PORT=8080 + +KAFKA_BOOTSTRAP_SERVER=broker:29092 +SCHEMA_REGISTRY_URL=http://schema-registry:8081 +# SCHEMA_REGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + +# System Auth -- +DATAHUB_SYSTEM_CLIENT_ID=__datahub_system +DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + +# Kafka Authentication +KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + +# Uncomment the following if your Kafka deployment requires SSL. +# KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION=/mnt/certs/keystore +# KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION=/mnt/certs/truststore +# KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD=keystore_password +# KAFKA_PROPERTIES_SSL_KEY_PASSWORD=keystore_password +# KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD=truststore_password + +# The following env vars are meant to be passed through from the Host System +# to configure the Slack and Teams Actions +# _ENABLED flags need to be set to "true" case sensitive for the action to be enabled +DATAHUB_ACTIONS_SLACK_ENABLED +DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL +DATAHUB_ACTIONS_SLACK_BOT_TOKEN +DATAHUB_ACTIONS_SLACK_SIGNING_SECRET +DATAHUB_ACTIONS_SLACK_CHANNEL +DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + +DATAHUB_ACTIONS_TEAMS_ENABLED +DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL +DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL +DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY \ No newline at end of file diff --git a/data/datahub-frontend/env/docker.env b/data/datahub-frontend/env/docker.env new file mode 100644 index 0000000..055dda9 --- /dev/null +++ b/data/datahub-frontend/env/docker.env @@ -0,0 +1,71 @@ +DATAHUB_GMS_HOST=datahub-gms +DATAHUB_GMS_PORT=8080 +DATAHUB_SECRET=YouKnowNothing +DATAHUB_APP_VERSION=1.0 +DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB +JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null + +# Uncomment and set these to support SSL connection to GMS +# NOTE: Currently GMS itself does not offer SSL support, these settings are intended for when there is a proxy in front +# of GMS that handles SSL, such as an EC2 Load Balancer. +#DATAHUB_GMS_USE_SSL=true +#DATAHUB_GMS_SSL_PROTOCOL= + +# Uncomment and set custom SSL truststore settings +# SSL_TRUSTSTORE_FILE=datahub-frontend/conf/truststore.jks +# SSL_TRUSTSTORE_TYPE=jks +# SSL_TRUSTSTORE_PASSWORD=MyTruststorePassword + +# Uncomment to enable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=true + +# Uncomment & populate these configs to enable OIDC SSO in React application. +# Required OIDC configs +# AUTH_OIDC_ENABLED=true +# AUTH_OIDC_CLIENT_ID=1030786188615-rr9ics9gl8n4acngj9opqbf2mruflqpr.apps.googleusercontent.com +# AUTH_OIDC_CLIENT_SECRET=acEdaGcnfd7KxvsXRFDD7FNF +# AUTH_OIDC_DISCOVERY_URI=https://accounts.google.com/.well-known/openid-configuration +# AUTH_OIDC_BASE_URL=http://localhost:9001 +# Optional OIDC configs +# AUTH_OIDC_USER_NAME_CLAIM=email +# AUTH_OIDC_USER_NAME_CLAIM_REGEX=([^@]+) +# AUTH_OIDC_SCOPE= +# Optional Provisioning Configs +# AUTH_OIDC_JIT_PROVISIONING_ENABLED=true +# AUTH_OIDC_PRE_PROVISIONING_REQUIRED=false +# AUTH_OIDC_EXTRACT_GROUPS_ENABLED=false +# AUTH_OIDC_GROUPS_CLAIM=groups + +# Uncomment to disable JAAS username / password authentication (enabled by default) +# AUTH_JAAS_ENABLED=false + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Required Kafka Producer Configs +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + +# Required Elastic Client Configuration (Analytics) +ELASTIC_CLIENT_HOST=elasticsearch +ELASTIC_CLIENT_PORT=9200 + +# Optional Elastic Client Configurations +# ELASTIC_CLIENT_THREAD_COUNT=2 +# ELASTIC_CLIENT_CONNECTION_REQUEST_TIMEOUT=50 + +# To support SSL connections to Elastic, uncomment and set the following +# ELASTIC_CLIENT_USE_SSL=true +# ELASTIC_CLIENT_SSL_PROTOCOL=TLSv1.2 +# ELASTIC_CLIENT_SSL_SECURE_RANDOM_IMPLEMENTATION= +# ELASTIC_CLIENT_SSL_TRUST_STORE_FILE= +# ELASTIC_CLIENT_SSL_TRUST_STORE_TYPE= +# ELASTIC_CLIENT_SSL_TRUST_STORE_PASSWORD= +# ELASTIC_CLIENT_SSL_KEY_STORE_FILE= +# ELASTIC_CLIENT_SSL_KEY_STORE_TYPE= +# ELASTIC_CLIENT_SSL_KEY_STORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTIC_CLIENT_USE_SSL=true and uncomment: +# ELASTIC_CLIENT_USERNAME= +# ELASTIC_CLIENT_PASSWORD= \ No newline at end of file diff --git a/data/datahub-gms/env/docker.env b/data/datahub-gms/env/docker.env new file mode 100644 index 0000000..f6af280 --- /dev/null +++ b/data/datahub-gms/env/docker.env @@ -0,0 +1,55 @@ +DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms +EBEAN_DATASOURCE_USERNAME=datahub +EBEAN_DATASOURCE_PASSWORD=datahub +EBEAN_DATASOURCE_HOST=mysql:3306 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 +# KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +ELASTICSEARCH_HOST=elasticsearch +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ES_BULK_REFRESH_POLICY=WAIT_UNTIL +GRAPH_SERVICE_DIFF_MODE_ENABLED=true +GRAPH_SERVICE_IMPL=elasticsearch +JAVA_OPTS=-Xms1g -Xmx1g +ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + +MAE_CONSUMER_ENABLED=true +MCE_CONSUMER_ENABLED=true +PE_CONSUMER_ENABLED=true +UI_INGESTION_ENABLED=true +ENTITY_SERVICE_ENABLE_RETENTION=true + +ELASTIC_ID_HASH_ALGO=MD5 + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Uncomment to configure kafka topic names +# Make sure these names are consistent across the whole deployment +# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4 +# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4 + +# Uncomment and set these to support SSL connection to Elasticsearch +# ELASTICSEARCH_USE_SSL=true +# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2 +# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= +# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= +# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= +# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= +# ELASTICSEARCH_SSL_KEYSTORE_FILE= +# ELASTICSEARCH_SSL_KEYSTORE_TYPE= +# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTICSEARCH_USE_SSL=true and uncomment: +# ELASTICSEARCH_USERNAME= +# ELASTICSEARCH_PASSWORD= + +# Uncomment to run a one-time upgrade to migrate legacy default browse path format to latest format +# More details can be found at https://datahubproject.io/docs/advanced/browse-paths-upgrade +# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true \ No newline at end of file diff --git a/data/datahub-upgrade/env/docker-without-neo4j.env b/data/datahub-upgrade/env/docker-without-neo4j.env new file mode 100644 index 0000000..f6af280 --- /dev/null +++ b/data/datahub-upgrade/env/docker-without-neo4j.env @@ -0,0 +1,55 @@ +DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms +EBEAN_DATASOURCE_USERNAME=datahub +EBEAN_DATASOURCE_PASSWORD=datahub +EBEAN_DATASOURCE_HOST=mysql:3306 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 +# KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +ELASTICSEARCH_HOST=elasticsearch +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ES_BULK_REFRESH_POLICY=WAIT_UNTIL +GRAPH_SERVICE_DIFF_MODE_ENABLED=true +GRAPH_SERVICE_IMPL=elasticsearch +JAVA_OPTS=-Xms1g -Xmx1g +ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + +MAE_CONSUMER_ENABLED=true +MCE_CONSUMER_ENABLED=true +PE_CONSUMER_ENABLED=true +UI_INGESTION_ENABLED=true +ENTITY_SERVICE_ENABLE_RETENTION=true + +ELASTIC_ID_HASH_ALGO=MD5 + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Uncomment to configure kafka topic names +# Make sure these names are consistent across the whole deployment +# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4 +# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4 + +# Uncomment and set these to support SSL connection to Elasticsearch +# ELASTICSEARCH_USE_SSL=true +# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2 +# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= +# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= +# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= +# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= +# ELASTICSEARCH_SSL_KEYSTORE_FILE= +# ELASTICSEARCH_SSL_KEYSTORE_TYPE= +# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTICSEARCH_USE_SSL=true and uncomment: +# ELASTICSEARCH_USERNAME= +# ELASTICSEARCH_PASSWORD= + +# Uncomment to run a one-time upgrade to migrate legacy default browse path format to latest format +# More details can be found at https://datahubproject.io/docs/advanced/browse-paths-upgrade +# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true \ No newline at end of file diff --git a/data/mysql/data/datahub.sql b/data/mysql/data/datahub.sql new file mode 100644 index 0000000..a11217b --- /dev/null +++ b/data/mysql/data/datahub.sql @@ -0,0 +1,43 @@ +-- create datahub database +CREATE DATABASE IF NOT EXISTS `datahub` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE `datahub`; + +-- create metadata aspect table +create table if not exists metadata_aspect_v2 +( + urn varchar(500) not null, + aspect varchar(200) not null, + version bigint(20) not null, + metadata longtext not null, + systemmetadata longtext, + createdon datetime(6) not null, + createdby varchar(255) not null, + createdfor varchar(255), + constraint pk_metadata_aspect_v2 primary key (urn, aspect, version), + INDEX timeIndex(createdon) +) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + +-- create default records for datahub user if not exists +DROP TABLE if exists temp_metadata_aspect_v2; +CREATE TABLE temp_metadata_aspect_v2 LIKE metadata_aspect_v2; +INSERT INTO temp_metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) +VALUES ('urn:li:corpuser:datahub', + 'corpUserInfo', + 0, + '{"displayName":"Data Hub","active":true,"fullName":"Data Hub","email":"datahub@linkedin.com"}', + now(), + 'urn:li:corpuser:__datahub_system'), + ('urn:li:corpuser:datahub', + 'corpUserEditableInfo', + 0, + '{"skills":[],"teams":[],"pictureLink":"https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png"}', + now(), + 'urn:li:corpuser:__datahub_system'); +-- only add default records if metadata_aspect is empty +INSERT INTO metadata_aspect_v2 +SELECT * +FROM temp_metadata_aspect_v2 +WHERE NOT EXISTS (SELECT * from metadata_aspect_v2); +DROP TABLE temp_metadata_aspect_v2; + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker-compose.yaml b/docker-compose.yaml index c8e6944..36aa219 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,7 +7,7 @@ services: healthcheck: interval: 15s retries: 3 - test: [CMD-SHELL, "curl -k -f http://localhost:8161/admin"] + test: [ CMD-SHELL, "curl -k -f http://localhost:8161/admin" ] timeout: 5s image: "apache/activemq-artemis:${ACTIVEMQ_VERSION:-2.34.0}" ports: @@ -30,7 +30,7 @@ services: interval: 30s retries: 5 start_period: 30s - test: [CMD, curl, --fail, "http://localhost:8080/health"] + test: [ CMD, curl, --fail, "http://localhost:8080/health" ] timeout: 10s image: "apache/airflow:${AIRFLOW_VERSION:-2.9.2}" ports: @@ -146,7 +146,7 @@ services: ports: - "5001:5000" cassandra: - command: [-c, /tmp/scripts/init.sh] + command: [ -c, /tmp/scripts/init.sh ] container_name: cassandra-data depends_on: cassandra-server: @@ -167,7 +167,7 @@ services: healthcheck: interval: 30s retries: 3 - test: [CMD-SHELL, "[ $$(nodetool statusgossip) = running ]"] + test: [ CMD-SHELL, "[ $$(nodetool statusgossip) = running ]" ] timeout: 10s image: "datacatering/dse-server:6.8.48" ports: @@ -175,7 +175,7 @@ services: ulimits: memlock: -1 clickhouse: - command: [/bin/bash, -c, /tmp/scripts/init.sh] + command: [ /bin/bash, -c, /tmp/scripts/init.sh ] container_name: clickhouse-data depends_on: clickhouse-server: @@ -203,7 +203,7 @@ services: - "9000:9000" user: "101:101" cockroachdb: - command: [bash, -c, /tmp/scripts/init.sh] + command: [ bash, -c, /tmp/scripts/init.sh ] container_name: cockroachdb-data depends_on: cockroachdb-server: @@ -213,23 +213,38 @@ services: - "./data/cockroachdb/init.sh:/tmp/scripts/init.sh" - "${COCKROACHDB_DATA:-./data/cockroachdb/data}:/tmp/data" cockroachdb-server: - command: [start-single-node, --insecure] + command: [ start-single-node, --insecure ] container_name: cockroachdb healthcheck: interval: 10s retries: 5 - test: [CMD-SHELL, "curl --fail http://localhost:8080/ || exit 1"] + test: [ CMD-SHELL, "curl --fail http://localhost:8080/ || exit 1" ] timeout: 5s image: "cockroachdb/cockroach:${COCKROACHDB_VERSION:-v24.1.0}" ports: - "26257:26257" - "8080:8080" + confluent-schema-registry: + container_name: schema-registry + hostname: schema-registry + image: confluentinc/cp-schema-registry:${CONFLUENT_SCHEMA_REGISTRY_VERSION:-7.4.0} + ports: + - "8081:8081" + env_file: data/confluent-schema-registry/env/docker.env + healthcheck: + test: "nc -z schema-registry 8081" + interval: 10s + retries: 5 + timeout: 5s + depends_on: + kafka-server: + condition: service_healthy dagster: container_name: dagster depends_on: postgres: condition: service_completed_successfully - entrypoint: [dagster-webserver, -h, 0.0.0.0, -p, "3000", -w, /opt/dagster/app/workspace.yaml] + entrypoint: [ dagster-webserver, -h, 0.0.0.0, -p, "3000", -w, /opt/dagster/app/workspace.yaml ] environment: - DAGSTER_POSTGRES_HOST=postgres - "DAGSTER_POSTGRES_USER=${POSTGRES_USER:-postgres}" @@ -241,6 +256,89 @@ services: - "3000:3000" volumes: - "./data/dagster:/opt/dagster/app/" + datahub: + container_name: datahub + hostname: datahub + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} + ports: + - "9002:9002" + environment: + - "ELASTIC_CLIENT_USERNAME=elastic" + - "ELASTIC_CLIENT_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + env_file: data/datahub-frontend/env/docker.env + depends_on: + datahub-gms: + condition: service_healthy + datahub-actions: + container_name: datahub-actions + hostname: actions + image: acryldata/datahub-actions:${DATAHUB_VERSION:-head} + env_file: data/datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + depends_on: + datahub-gms: + condition: service_healthy + datahub-gms: + container_name: datahub-gms + hostname: datahub-gms + image: acryldata/datahub-gms:${DATAHUB_VERSION:-head} + environment: + - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - "EBEAN_DATASOURCE_USERNAME=root" + - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" + - "ELASTICSEARCH_USERNAME=elastic" + - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + env_file: data/datahub-upgrade/env/docker-without-neo4j.env + ports: + - "8080:8080" + healthcheck: + test: "curl -sS --fail http://datahub-gms:8080/health" + start_period: 90s + interval: 1s + retries: 3 + timeout: 5s + depends_on: + datahub-upgrade: + condition: service_completed_successfully + datahub-kafka-setup: + container_name: datahub-kafka-setup + depends_on: + kafka-server: + condition: service_healthy + confluent-schema-registry: + condition: service_healthy + entrypoint: [ /bin/sh, -c, /tmp/scripts/init.sh ] + environment: + - "KAFKA_TOPICS=${KAFKA_TOPICS:-MetadataAuditEvent_v4,MetadataChangeEvent_v4,FailedMetadataChangeEvent_v4,MetadataChangeLog_Versioned_v1,MetadataChangeLog_Timeseries_v1,MetadataChangeProposal_v1,FailedMetadataChangeProposal_v1,PlatformEvent_v1,DataHubUpgradeHistory_v1}" + image: "confluentinc/confluent-local:${KAFKA_VERSION:-7.6.1}" + volumes: + - "./data/kafka/init.sh:/tmp/scripts/init.sh" + datahub-upgrade: + container_name: datahub-upgrade + hostname: datahub-upgrade + image: acryldata/datahub-upgrade:${DATAHUB_VERSION:-head} + command: + - -u + - SystemUpdate + environment: + - "EBEAN_DATASOURCE_USERNAME=root" + - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" + - "ELASTICSEARCH_USERNAME=elastic" + - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + env_file: data/datahub-upgrade/env/docker-without-neo4j.env + labels: + datahub_setup_job: true + depends_on: + mysql: + condition: service_completed_successfully + elasticsearch: + condition: service_healthy + datahub-kafka-setup: + condition: service_completed_successfully + neo4j: + condition: service_healthy data-caterer: container_name: data-caterer depends_on: @@ -264,7 +362,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:8080"] + test: [ CMD, curl, --fail, "http://localhost:8080" ] timeout: 10s image: "debezium/debezium-ui:${DEBEZIUM_VERSION:-2.1.2.Final}" ports: @@ -286,7 +384,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:8083"] + test: [ CMD, curl, --fail, "http://localhost:8083" ] timeout: 10s image: "debezium/connect:${DEBEZIUM_CONNECT_VERSION:-2.6.2.Final}" ports: @@ -302,7 +400,7 @@ services: - "8040:8040" - "9030:9030" druid: - command: [router] + command: [ router ] container_name: druid depends_on: druid-broker: @@ -330,7 +428,7 @@ services: ports: - "8888:8888" druid-broker: - command: [broker] + command: [ broker ] container_name: druid-broker depends_on: druid-coordinator: @@ -352,7 +450,7 @@ services: ports: - "8082:8082" druid-coordinator: - command: [coordinator] + command: [ coordinator ] container_name: druid-coordinator depends_on: postgres: @@ -372,7 +470,7 @@ services: ports: - "8081:8081" druid-historical: - command: [historical] + command: [ historical ] container_name: druid-historical depends_on: druid-coordinator: @@ -394,7 +492,7 @@ services: ports: - "8083:8083" druid-middlemanager: - command: [middleManager] + command: [ middleManager ] container_name: druid-middlemanager depends_on: druid-coordinator: @@ -421,7 +519,7 @@ services: depends_on: postgres: condition: service_completed_successfully - entrypoint: [tail, -F, anything] + entrypoint: [ tail, -F, anything ] image: "datacatering/duckdb:${DUCKDB_VERSION:-v1.0.0}" volumes: - "./data/duckdb:/opt/data" @@ -432,6 +530,11 @@ services: - ES_JAVA_OPTS=-Xms512m -Xmx512m - "ELASTIC_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - discovery.type=single-node + healthcheck: + test: "curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s" + interval: 10s + retries: 5 + timeout: 5s image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.14.1}" ports: - "9200:9200" @@ -440,7 +543,7 @@ services: volumes: - "./data/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml:ro,Z" flight-sql: - command: [tail, -f, /dev/null] + command: [ tail, -f, /dev/null ] container_name: flight-sql depends_on: - duckdb @@ -464,7 +567,7 @@ services: - 6121 - 6122 image: "flink:${FLINK_VERSION:-1.19.0-scala_2.12-java17}" - links: [] + links: [ ] flink-jobmanager: command: jobmanager container_name: flink-jobmanager @@ -484,7 +587,7 @@ services: ports: - "80:80" jupyter: - command: [jupyter, notebook, --no-browser, "--NotebookApp.token=''", "--NotebookApp.password=''"] + command: [ jupyter, notebook, --no-browser, "--NotebookApp.token=''", "--NotebookApp.password=''" ] container_name: jupyter image: "quay.io/jupyter/minimal-notebook:2024-07-02" ports: @@ -494,7 +597,7 @@ services: depends_on: kafka-server: condition: service_healthy - entrypoint: [/bin/sh, -c, /tmp/scripts/init.sh] + entrypoint: [ /bin/sh, -c, /tmp/scripts/init.sh ] environment: - "KAFKA_TOPICS=${KAFKA_TOPICS:-accounts,transactions}" image: "confluentinc/confluent-local:${KAFKA_VERSION:-7.6.1}" @@ -512,13 +615,13 @@ services: healthcheck: interval: 5s retries: 5 - test: [CMD-SHELL, /bin/sh, -c, kafka-topics, --bootstrap-server, "kafka:29092", --list] + test: [ CMD-SHELL, /bin/sh, -c, kafka-topics, --bootstrap-server, "kafka:29092", --list ] timeout: 5s image: "confluentinc/confluent-local:7.7.0" ports: - "9092:9092" keycloak: - command: [start-dev, --import-realm] + command: [ start-dev, --import-realm ] container_name: keycloak depends_on: postgres: @@ -579,7 +682,7 @@ services: ports: - "3001:3000" marquez-data: - command: [-c, /tmp/scripts/init.sh] + command: [ -c, /tmp/scripts/init.sh ] container_name: marquez-data depends_on: marquez-server: @@ -608,7 +711,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:5001/healthcheck"] + test: [ CMD, curl, --fail, "http://localhost:5001/healthcheck" ] timeout: 5s image: "marquezproject/marquez:${MARQUEZ_VERSION:-0.47.0}" ports: @@ -617,7 +720,7 @@ services: volumes: - "./data/marquez/conf:/opt/app" minio: - command: [server, /data, --console-address, ":9001"] + command: [ server, /data, --console-address, ":9001" ] container_name: minio environment: - "MINIO_ROOT_USER=${MINIO_USER:-minioadmin}" @@ -625,14 +728,14 @@ services: healthcheck: interval: 5s retries: 3 - test: [CMD, mc, ready, local] + test: [ CMD, mc, ready, local ] timeout: 5s image: "quay.io/minio/minio:${MINIO_VERSION:-RELEASE.2024-06-04T19-20-08Z}" ports: - "9000:9000" - "9001:9001" mongodb: - command: [/bin/sh, -c, /opt/app/my_data.sh] + command: [ /bin/sh, -c, /opt/app/my_data.sh ] container_name: mongodb-connect depends_on: - mongodb-server @@ -657,7 +760,7 @@ services: healthcheck: interval: 10s retries: 10 - test: [CMD-SHELL, mssql-health-check] + test: [ CMD-SHELL, mssql-health-check ] timeout: 10s image: "mcr.microsoft.com/mssql/server:${MSSQL_VERSION:-2022-latest}" ports: @@ -665,7 +768,7 @@ services: volumes: - "./data/mssql/mssql-health-check:/usr/local/bin/mssql-health-check" mysql: - command: [/bin/bash, -c, /tmp/scripts/init.sh] + command: [ /bin/bash, -c, /tmp/scripts/init.sh ] container_name: mysql-data depends_on: mysql-server: @@ -684,7 +787,7 @@ services: healthcheck: interval: 5s retries: 3 - test: [CMD, mysqladmin, ping, -h, localhost, -u, root, -p$$MYSQL_ROOT_PASSWORD] + test: [ CMD, mysqladmin, ping, -h, localhost, -u, root, -p$$MYSQL_ROOT_PASSWORD ] timeout: 5s image: "mysql:${MYSQL_VERSION:-8.4.0}" ports: @@ -693,12 +796,10 @@ services: container_name: neo4j environment: - NEO4J_AUTH=none - - "NEO4J_dbms_connector_http_advertised__address=localhost:7474" - - "NEO4J_dbms_connector_bolt_advertised__address=localhost:7687" healthcheck: interval: 30s retries: 5 - test: [CMD-SHELL, "cypher-shell -u neo4j -p test 'RETURN 1' || exit 1"] + test: [ CMD-SHELL, "cypher-shell -u neo4j -p test 'RETURN 1' || exit 1" ] timeout: 10s image: "neo4j:${NEO4J_VERSION:-5.20.0}" ports: @@ -712,7 +813,7 @@ services: healthcheck: interval: 10s retries: 5 - test: [CMD, curl, --fail, "https://localhost:9200", -ku, "admin:${OPENSEARCH_PASSWORD:-!BigData#1}"] + test: [ CMD, curl, --fail, "https://localhost:9200", -ku, "admin:${OPENSEARCH_PASSWORD:-!BigData#1}" ] timeout: 5s image: "opensearchproject/opensearch:${OPENSEARCH_VERSION:-2.15.0}" ports: @@ -729,7 +830,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:8098/health/readiness"] + test: [ CMD, curl, --fail, "http://localhost:8098/health/readiness" ] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -746,7 +847,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:8099/health"] + test: [ CMD, curl, --fail, "http://localhost:8099/health" ] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -763,7 +864,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD, curl, --fail, "http://localhost:9000/pinot-controller/admin"] + test: [ CMD, curl, --fail, "http://localhost:9000/pinot-controller/admin" ] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -774,14 +875,14 @@ services: healthcheck: interval: 10s retries: 5 - test: [CMD, curl, "http://localhost:8182/healthcheck"] + test: [ CMD, curl, "http://localhost:8182/healthcheck" ] timeout: 10s image: "datacatering/polaris:${POLARIS_VERSION:-1.0.0}" ports: - "8181:8181" - "8182:8182" postgres: - command: [/bin/bash, -c, /tmp/scripts/init.sh] + command: [ /bin/bash, -c, /tmp/scripts/init.sh ] container_name: postgres-data depends_on: postgres-server: @@ -802,7 +903,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [CMD-SHELL, pg_isready] + test: [ CMD-SHELL, pg_isready ] timeout: 5s image: "postgres:${POSTGRES_VERSION:-16.3}" ports: @@ -811,7 +912,7 @@ services: container_name: prefect-data depends_on: - prefect-server - entrypoint: [/opt/prefect/app/start_flows.sh] + entrypoint: [ /opt/prefect/app/start_flows.sh ] environment: - "PREFECT_API_URL=http://host.docker.internal:4200/api" image: "prefecthq/prefect:${PREFECT_VERSION:-2.19.5-python3.11}" @@ -824,7 +925,7 @@ services: depends_on: postgres: condition: service_completed_successfully - entrypoint: [/opt/prefect/entrypoint.sh, prefect, server, start] + entrypoint: [ /opt/prefect/entrypoint.sh, prefect, server, start ] environment: - "PREFECT_UI_URL=http://127.0.0.1:4200/api" - "PREFECT_API_URL=http://127.0.0.1:4200/api" @@ -865,7 +966,7 @@ services: depends_on: solace-server: condition: service_healthy - entrypoint: [/bin/sh, -c, /opt/app/my_data.sh] + entrypoint: [ /bin/sh, -c, /opt/app/my_data.sh ] image: "solace/solace-pubsub-standard:${SOLACE_VERSION:-10.8}" volumes: - "./data/solace:/opt/app" @@ -882,7 +983,7 @@ services: healthcheck: interval: 30s retries: 3 - test: [CMD-SHELL, curl, --output, /dev/null, --silent, --head, --fail, "http://localhost:8080"] + test: [ CMD-SHELL, curl, --output, /dev/null, --silent, --head, --fail, "http://localhost:8080" ] timeout: 5s image: "solace/solace-pubsub-standard:${SOLACE_VERSION:-10.8}" ports: @@ -901,16 +1002,16 @@ services: - "9010:9010" - "9020:9020" sqlite: - command: [tail, -f, /dev/null] + command: [ tail, -f, /dev/null ] container_name: sqlite image: "keinos/sqlite3:3.46.0" volumes: - "./data/sqlite:/opt/data" temporal: - command: [server, start-dev, --db-filename, /opt/data/db/temporal.db, --ip, 0.0.0.0, --metrics-port, "9233"] + command: [ server, start-dev, --db-filename, /opt/data/db/temporal.db, --ip, 0.0.0.0, --metrics-port, "9233" ] container_name: temporal entrypoint: temporal - environment: [] + environment: [ ] expose: - 8233 - 7233 From 5f3e92c9984d827dd84982e749579cb20987f907 Mon Sep 17 00:00:00 2001 From: pflooky Date: Fri, 16 Aug 2024 06:50:30 +0000 Subject: [PATCH 2/2] Format docker compose YAML files --- docker-compose.yaml | 212 ++++++++++++++++++++++---------------------- 1 file changed, 106 insertions(+), 106 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 36aa219..e157c24 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,7 +7,7 @@ services: healthcheck: interval: 15s retries: 3 - test: [ CMD-SHELL, "curl -k -f http://localhost:8161/admin" ] + test: [CMD-SHELL, "curl -k -f http://localhost:8161/admin"] timeout: 5s image: "apache/activemq-artemis:${ACTIVEMQ_VERSION:-2.34.0}" ports: @@ -30,7 +30,7 @@ services: interval: 30s retries: 5 start_period: 30s - test: [ CMD, curl, --fail, "http://localhost:8080/health" ] + test: [CMD, curl, --fail, "http://localhost:8080/health"] timeout: 10s image: "apache/airflow:${AIRFLOW_VERSION:-2.9.2}" ports: @@ -146,7 +146,7 @@ services: ports: - "5001:5000" cassandra: - command: [ -c, /tmp/scripts/init.sh ] + command: [-c, /tmp/scripts/init.sh] container_name: cassandra-data depends_on: cassandra-server: @@ -167,7 +167,7 @@ services: healthcheck: interval: 30s retries: 3 - test: [ CMD-SHELL, "[ $$(nodetool statusgossip) = running ]" ] + test: [CMD-SHELL, "[ $$(nodetool statusgossip) = running ]"] timeout: 10s image: "datacatering/dse-server:6.8.48" ports: @@ -175,7 +175,7 @@ services: ulimits: memlock: -1 clickhouse: - command: [ /bin/bash, -c, /tmp/scripts/init.sh ] + command: [/bin/bash, -c, /tmp/scripts/init.sh] container_name: clickhouse-data depends_on: clickhouse-server: @@ -203,7 +203,7 @@ services: - "9000:9000" user: "101:101" cockroachdb: - command: [ bash, -c, /tmp/scripts/init.sh ] + command: [bash, -c, /tmp/scripts/init.sh] container_name: cockroachdb-data depends_on: cockroachdb-server: @@ -213,12 +213,12 @@ services: - "./data/cockroachdb/init.sh:/tmp/scripts/init.sh" - "${COCKROACHDB_DATA:-./data/cockroachdb/data}:/tmp/data" cockroachdb-server: - command: [ start-single-node, --insecure ] + command: [start-single-node, --insecure] container_name: cockroachdb healthcheck: interval: 10s retries: 5 - test: [ CMD-SHELL, "curl --fail http://localhost:8080/ || exit 1" ] + test: [CMD-SHELL, "curl --fail http://localhost:8080/ || exit 1"] timeout: 5s image: "cockroachdb/cockroach:${COCKROACHDB_VERSION:-v24.1.0}" ports: @@ -226,25 +226,25 @@ services: - "8080:8080" confluent-schema-registry: container_name: schema-registry - hostname: schema-registry - image: confluentinc/cp-schema-registry:${CONFLUENT_SCHEMA_REGISTRY_VERSION:-7.4.0} - ports: - - "8081:8081" + depends_on: + kafka-server: + condition: service_healthy env_file: data/confluent-schema-registry/env/docker.env healthcheck: - test: "nc -z schema-registry 8081" interval: 10s retries: 5 + test: nc -z schema-registry 8081 timeout: 5s - depends_on: - kafka-server: - condition: service_healthy + hostname: schema-registry + image: confluentinc/cp-schema-registry:${CONFLUENT_SCHEMA_REGISTRY_VERSION:-7.4.0} + ports: + - "8081:8081" dagster: container_name: dagster depends_on: postgres: condition: service_completed_successfully - entrypoint: [ dagster-webserver, -h, 0.0.0.0, -p, "3000", -w, /opt/dagster/app/workspace.yaml ] + entrypoint: [dagster-webserver, -h, 0.0.0.0, -p, "3000", -w, /opt/dagster/app/workspace.yaml] environment: - DAGSTER_POSTGRES_HOST=postgres - "DAGSTER_POSTGRES_USER=${POSTGRES_USER:-postgres}" @@ -256,102 +256,102 @@ services: - "3000:3000" volumes: - "./data/dagster:/opt/dagster/app/" + data-caterer: + container_name: data-caterer + depends_on: + postgres: + condition: service_completed_successfully + environment: + - DEPLOY_MODE=standalone + image: "datacatering/data-caterer-basic:${DATA_CATERER_VERSION:-0.10.10}" + ports: + - "9898:9898" + volumes: + - "./data/data-caterer/connection:/opt/DataCaterer/connection" + - "./data/data-caterer/plan:/opt/DataCaterer/plan" datahub: container_name: datahub + depends_on: + datahub-gms: + condition: service_healthy + env_file: data/datahub-frontend/env/docker.env + environment: + - ELASTIC_CLIENT_USERNAME=elastic + - "ELASTIC_CLIENT_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" hostname: datahub image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} ports: - "9002:9002" - environment: - - "ELASTIC_CLIENT_USERNAME=elastic" - - "ELASTIC_CLIENT_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - env_file: data/datahub-frontend/env/docker.env + datahub-actions: + container_name: datahub-actions depends_on: datahub-gms: condition: service_healthy - datahub-actions: - container_name: datahub-actions - hostname: actions - image: acryldata/datahub-actions:${DATAHUB_VERSION:-head} env_file: data/datahub-actions/env/docker.env environment: - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} - depends_on: - datahub-gms: - condition: service_healthy + hostname: actions + image: acryldata/datahub-actions:${DATAHUB_VERSION:-head} datahub-gms: container_name: datahub-gms - hostname: datahub-gms - image: acryldata/datahub-gms:${DATAHUB_VERSION:-head} + depends_on: + datahub-upgrade: + condition: service_completed_successfully + env_file: data/datahub-upgrade/env/docker-without-neo4j.env environment: - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} - - "EBEAN_DATASOURCE_USERNAME=root" + - EBEAN_DATASOURCE_USERNAME=root - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" - - "ELASTICSEARCH_USERNAME=elastic" + - ELASTICSEARCH_USERNAME=elastic - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - env_file: data/datahub-upgrade/env/docker-without-neo4j.env - ports: - - "8080:8080" healthcheck: - test: "curl -sS --fail http://datahub-gms:8080/health" - start_period: 90s interval: 1s retries: 3 + start_period: 90s + test: "curl -sS --fail http://datahub-gms:8080/health" timeout: 5s - depends_on: - datahub-upgrade: - condition: service_completed_successfully + hostname: datahub-gms + image: acryldata/datahub-gms:${DATAHUB_VERSION:-head} + ports: + - "8080:8080" datahub-kafka-setup: container_name: datahub-kafka-setup depends_on: - kafka-server: - condition: service_healthy confluent-schema-registry: condition: service_healthy - entrypoint: [ /bin/sh, -c, /tmp/scripts/init.sh ] + kafka-server: + condition: service_healthy + entrypoint: [/bin/sh, -c, /tmp/scripts/init.sh] environment: - "KAFKA_TOPICS=${KAFKA_TOPICS:-MetadataAuditEvent_v4,MetadataChangeEvent_v4,FailedMetadataChangeEvent_v4,MetadataChangeLog_Versioned_v1,MetadataChangeLog_Timeseries_v1,MetadataChangeProposal_v1,FailedMetadataChangeProposal_v1,PlatformEvent_v1,DataHubUpgradeHistory_v1}" image: "confluentinc/confluent-local:${KAFKA_VERSION:-7.6.1}" volumes: - "./data/kafka/init.sh:/tmp/scripts/init.sh" datahub-upgrade: - container_name: datahub-upgrade - hostname: datahub-upgrade - image: acryldata/datahub-upgrade:${DATAHUB_VERSION:-head} command: - -u - SystemUpdate - environment: - - "EBEAN_DATASOURCE_USERNAME=root" - - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" - - "ELASTICSEARCH_USERNAME=elastic" - - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - env_file: data/datahub-upgrade/env/docker-without-neo4j.env - labels: - datahub_setup_job: true + container_name: datahub-upgrade depends_on: - mysql: + datahub-kafka-setup: condition: service_completed_successfully elasticsearch: condition: service_healthy - datahub-kafka-setup: + mysql: condition: service_completed_successfully neo4j: condition: service_healthy - data-caterer: - container_name: data-caterer - depends_on: - postgres: - condition: service_completed_successfully + env_file: data/datahub-upgrade/env/docker-without-neo4j.env environment: - - DEPLOY_MODE=standalone - image: "datacatering/data-caterer-basic:${DATA_CATERER_VERSION:-0.10.10}" - ports: - - "9898:9898" - volumes: - - "./data/data-caterer/connection:/opt/DataCaterer/connection" - - "./data/data-caterer/plan:/opt/DataCaterer/plan" + - EBEAN_DATASOURCE_USERNAME=root + - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" + - ELASTICSEARCH_USERNAME=elastic + - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + hostname: datahub-upgrade + image: acryldata/datahub-upgrade:${DATAHUB_VERSION:-head} + labels: + datahub_setup_job: true debezium: container_name: debezium depends_on: @@ -362,7 +362,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:8080" ] + test: [CMD, curl, --fail, "http://localhost:8080"] timeout: 10s image: "debezium/debezium-ui:${DEBEZIUM_VERSION:-2.1.2.Final}" ports: @@ -384,7 +384,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:8083" ] + test: [CMD, curl, --fail, "http://localhost:8083"] timeout: 10s image: "debezium/connect:${DEBEZIUM_CONNECT_VERSION:-2.6.2.Final}" ports: @@ -400,7 +400,7 @@ services: - "8040:8040" - "9030:9030" druid: - command: [ router ] + command: [router] container_name: druid depends_on: druid-broker: @@ -428,7 +428,7 @@ services: ports: - "8888:8888" druid-broker: - command: [ broker ] + command: [broker] container_name: druid-broker depends_on: druid-coordinator: @@ -450,7 +450,7 @@ services: ports: - "8082:8082" druid-coordinator: - command: [ coordinator ] + command: [coordinator] container_name: druid-coordinator depends_on: postgres: @@ -470,7 +470,7 @@ services: ports: - "8081:8081" druid-historical: - command: [ historical ] + command: [historical] container_name: druid-historical depends_on: druid-coordinator: @@ -492,7 +492,7 @@ services: ports: - "8083:8083" druid-middlemanager: - command: [ middleManager ] + command: [middleManager] container_name: druid-middlemanager depends_on: druid-coordinator: @@ -519,7 +519,7 @@ services: depends_on: postgres: condition: service_completed_successfully - entrypoint: [ tail, -F, anything ] + entrypoint: [tail, -F, anything] image: "datacatering/duckdb:${DUCKDB_VERSION:-v1.0.0}" volumes: - "./data/duckdb:/opt/data" @@ -531,9 +531,9 @@ services: - "ELASTIC_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - discovery.type=single-node healthcheck: - test: "curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s" interval: 10s retries: 5 + test: "curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s" timeout: 5s image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.14.1}" ports: @@ -543,7 +543,7 @@ services: volumes: - "./data/elasticsearch/config/elasticsearch.yml:/usr/share/elasticsearch/config/elasticsearch.yml:ro,Z" flight-sql: - command: [ tail, -f, /dev/null ] + command: [tail, -f, /dev/null] container_name: flight-sql depends_on: - duckdb @@ -567,7 +567,7 @@ services: - 6121 - 6122 image: "flink:${FLINK_VERSION:-1.19.0-scala_2.12-java17}" - links: [ ] + links: [] flink-jobmanager: command: jobmanager container_name: flink-jobmanager @@ -587,7 +587,7 @@ services: ports: - "80:80" jupyter: - command: [ jupyter, notebook, --no-browser, "--NotebookApp.token=''", "--NotebookApp.password=''" ] + command: [jupyter, notebook, --no-browser, "--NotebookApp.token=''", "--NotebookApp.password=''"] container_name: jupyter image: "quay.io/jupyter/minimal-notebook:2024-07-02" ports: @@ -597,7 +597,7 @@ services: depends_on: kafka-server: condition: service_healthy - entrypoint: [ /bin/sh, -c, /tmp/scripts/init.sh ] + entrypoint: [/bin/sh, -c, /tmp/scripts/init.sh] environment: - "KAFKA_TOPICS=${KAFKA_TOPICS:-accounts,transactions}" image: "confluentinc/confluent-local:${KAFKA_VERSION:-7.6.1}" @@ -615,13 +615,13 @@ services: healthcheck: interval: 5s retries: 5 - test: [ CMD-SHELL, /bin/sh, -c, kafka-topics, --bootstrap-server, "kafka:29092", --list ] + test: [CMD-SHELL, /bin/sh, -c, kafka-topics, --bootstrap-server, "kafka:29092", --list] timeout: 5s image: "confluentinc/confluent-local:7.7.0" ports: - "9092:9092" keycloak: - command: [ start-dev, --import-realm ] + command: [start-dev, --import-realm] container_name: keycloak depends_on: postgres: @@ -682,7 +682,7 @@ services: ports: - "3001:3000" marquez-data: - command: [ -c, /tmp/scripts/init.sh ] + command: [-c, /tmp/scripts/init.sh] container_name: marquez-data depends_on: marquez-server: @@ -711,7 +711,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:5001/healthcheck" ] + test: [CMD, curl, --fail, "http://localhost:5001/healthcheck"] timeout: 5s image: "marquezproject/marquez:${MARQUEZ_VERSION:-0.47.0}" ports: @@ -720,7 +720,7 @@ services: volumes: - "./data/marquez/conf:/opt/app" minio: - command: [ server, /data, --console-address, ":9001" ] + command: [server, /data, --console-address, ":9001"] container_name: minio environment: - "MINIO_ROOT_USER=${MINIO_USER:-minioadmin}" @@ -728,14 +728,14 @@ services: healthcheck: interval: 5s retries: 3 - test: [ CMD, mc, ready, local ] + test: [CMD, mc, ready, local] timeout: 5s image: "quay.io/minio/minio:${MINIO_VERSION:-RELEASE.2024-06-04T19-20-08Z}" ports: - "9000:9000" - "9001:9001" mongodb: - command: [ /bin/sh, -c, /opt/app/my_data.sh ] + command: [/bin/sh, -c, /opt/app/my_data.sh] container_name: mongodb-connect depends_on: - mongodb-server @@ -760,7 +760,7 @@ services: healthcheck: interval: 10s retries: 10 - test: [ CMD-SHELL, mssql-health-check ] + test: [CMD-SHELL, mssql-health-check] timeout: 10s image: "mcr.microsoft.com/mssql/server:${MSSQL_VERSION:-2022-latest}" ports: @@ -768,7 +768,7 @@ services: volumes: - "./data/mssql/mssql-health-check:/usr/local/bin/mssql-health-check" mysql: - command: [ /bin/bash, -c, /tmp/scripts/init.sh ] + command: [/bin/bash, -c, /tmp/scripts/init.sh] container_name: mysql-data depends_on: mysql-server: @@ -787,7 +787,7 @@ services: healthcheck: interval: 5s retries: 3 - test: [ CMD, mysqladmin, ping, -h, localhost, -u, root, -p$$MYSQL_ROOT_PASSWORD ] + test: [CMD, mysqladmin, ping, -h, localhost, -u, root, -p$$MYSQL_ROOT_PASSWORD] timeout: 5s image: "mysql:${MYSQL_VERSION:-8.4.0}" ports: @@ -799,7 +799,7 @@ services: healthcheck: interval: 30s retries: 5 - test: [ CMD-SHELL, "cypher-shell -u neo4j -p test 'RETURN 1' || exit 1" ] + test: [CMD-SHELL, "cypher-shell -u neo4j -p test 'RETURN 1' || exit 1"] timeout: 10s image: "neo4j:${NEO4J_VERSION:-5.20.0}" ports: @@ -813,7 +813,7 @@ services: healthcheck: interval: 10s retries: 5 - test: [ CMD, curl, --fail, "https://localhost:9200", -ku, "admin:${OPENSEARCH_PASSWORD:-!BigData#1}" ] + test: [CMD, curl, --fail, "https://localhost:9200", -ku, "admin:${OPENSEARCH_PASSWORD:-!BigData#1}"] timeout: 5s image: "opensearchproject/opensearch:${OPENSEARCH_VERSION:-2.15.0}" ports: @@ -830,7 +830,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:8098/health/readiness" ] + test: [CMD, curl, --fail, "http://localhost:8098/health/readiness"] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -847,7 +847,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:8099/health" ] + test: [CMD, curl, --fail, "http://localhost:8099/health"] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -864,7 +864,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD, curl, --fail, "http://localhost:9000/pinot-controller/admin" ] + test: [CMD, curl, --fail, "http://localhost:9000/pinot-controller/admin"] timeout: 5s image: "apachepinot/pinot:${PINOT_VERSION:-1.1.0}" ports: @@ -875,14 +875,14 @@ services: healthcheck: interval: 10s retries: 5 - test: [ CMD, curl, "http://localhost:8182/healthcheck" ] + test: [CMD, curl, "http://localhost:8182/healthcheck"] timeout: 10s image: "datacatering/polaris:${POLARIS_VERSION:-1.0.0}" ports: - "8181:8181" - "8182:8182" postgres: - command: [ /bin/bash, -c, /tmp/scripts/init.sh ] + command: [/bin/bash, -c, /tmp/scripts/init.sh] container_name: postgres-data depends_on: postgres-server: @@ -903,7 +903,7 @@ services: healthcheck: interval: 10s retries: 3 - test: [ CMD-SHELL, pg_isready ] + test: [CMD-SHELL, pg_isready] timeout: 5s image: "postgres:${POSTGRES_VERSION:-16.3}" ports: @@ -912,7 +912,7 @@ services: container_name: prefect-data depends_on: - prefect-server - entrypoint: [ /opt/prefect/app/start_flows.sh ] + entrypoint: [/opt/prefect/app/start_flows.sh] environment: - "PREFECT_API_URL=http://host.docker.internal:4200/api" image: "prefecthq/prefect:${PREFECT_VERSION:-2.19.5-python3.11}" @@ -925,7 +925,7 @@ services: depends_on: postgres: condition: service_completed_successfully - entrypoint: [ /opt/prefect/entrypoint.sh, prefect, server, start ] + entrypoint: [/opt/prefect/entrypoint.sh, prefect, server, start] environment: - "PREFECT_UI_URL=http://127.0.0.1:4200/api" - "PREFECT_API_URL=http://127.0.0.1:4200/api" @@ -966,7 +966,7 @@ services: depends_on: solace-server: condition: service_healthy - entrypoint: [ /bin/sh, -c, /opt/app/my_data.sh ] + entrypoint: [/bin/sh, -c, /opt/app/my_data.sh] image: "solace/solace-pubsub-standard:${SOLACE_VERSION:-10.8}" volumes: - "./data/solace:/opt/app" @@ -983,7 +983,7 @@ services: healthcheck: interval: 30s retries: 3 - test: [ CMD-SHELL, curl, --output, /dev/null, --silent, --head, --fail, "http://localhost:8080" ] + test: [CMD-SHELL, curl, --output, /dev/null, --silent, --head, --fail, "http://localhost:8080"] timeout: 5s image: "solace/solace-pubsub-standard:${SOLACE_VERSION:-10.8}" ports: @@ -1002,16 +1002,16 @@ services: - "9010:9010" - "9020:9020" sqlite: - command: [ tail, -f, /dev/null ] + command: [tail, -f, /dev/null] container_name: sqlite image: "keinos/sqlite3:3.46.0" volumes: - "./data/sqlite:/opt/data" temporal: - command: [ server, start-dev, --db-filename, /opt/data/db/temporal.db, --ip, 0.0.0.0, --metrics-port, "9233" ] + command: [server, start-dev, --db-filename, /opt/data/db/temporal.db, --ip, 0.0.0.0, --metrics-port, "9233"] container_name: temporal entrypoint: temporal - environment: [ ] + environment: [] expose: - 8233 - 7233