diff --git a/README.md b/README.md index 98e724c..c06c84f 100644 --- a/README.md +++ b/README.md @@ -120,50 +120,51 @@ POSTGRES_USER=my-user POSTGRES_PASSWORD=my-password ./run.sh postgres ## Services -| Service Type | Service | Supported | -|-----------------------------|---------------|-----------| -| Change Data Capture | debezium | ✅ | -| Database | cassandra | ✅ | -| Database | cockroachdb | ✅ | -| Database | elasticsearch | ✅ | -| Database | mariadb | ✅ | -| Database | mongodb | ✅ | -| Database | mssql | ✅ | -| Database | mysql | ✅ | -| Database | neo4j | ✅ | -| Database | opensearch | ✅ | -| Database | postgres | ✅ | -| Database | spanner | ✅ | -| Database | sqlite | ✅ | -| Data Catalog | amundsen | ✅ | -| Data Catalog | marquez | ✅ | -| Data Catalog | polaris | ✅ | -| Data Catalog | unitycatalog | ✅ | -| Data Catalog | datahub | ❌ | -| Data Catalog | openmetadata | ❌ | -| Distributed Coordination | zookeeper | ✅ | -| Distributed Data Processing | flink | ✅ | -| HTTP | httpbin | ✅ | -| Identity Management | keycloak | ✅ | -| Job Orchestrator | airflow | ✅ | -| Job Orchestrator | dagster | ✅ | -| Job Orchestrator | mage-ai | ✅ | -| Job Orchestrator | prefect | ✅ | -| Messaging | activemq | ✅ | -| Messaging | kafka | ✅ | -| Messaging | rabbitmq | ✅ | -| Messaging | solace | ✅ | -| Notebook | jupyter | ✅ | -| Object Storage | minio | ✅ | -| Query Engine | duckdb | ✅ | -| Query Engine | flight-sql | ✅ | -| Query Engine | presto | ✅ | -| Query Engine | trino | ✅ | -| Real-time OLAP | clickhouse | ✅ | -| Real-time OLAP | doris | ✅ | -| Real-time OLAP | druid | ✅ | -| Real-time OLAP | pinot | ✅ | -| Test Data Management | data-caterer | ✅ | -| Workflow | maestro | ✅ | -| Workflow | temporal | ✅ | +| Service Type | Service | Supported | +|-----------------------------|---------------------------|-----------| +| Change Data Capture | debezium | ✅ | +| Database | cassandra | ✅ | +| Database | cockroachdb | ✅ | +| Database | elasticsearch | ✅ | +| Database | mariadb | ✅ | +| Database | mongodb | ✅ | +| Database | mssql | ✅ | +| Database | mysql | ✅ | +| Database | neo4j | ✅ | +| Database | opensearch | ✅ | +| Database | postgres | ✅ | +| Database | spanner | ✅ | +| Database | sqlite | ✅ | +| Data Catalog | amundsen | ✅ | +| Data Catalog | datahub | ✅ | +| Data Catalog | marquez | ✅ | +| Data Catalog | polaris | ✅ | +| Data Catalog | unitycatalog | ✅ | +| Data Catalog | openmetadata | ❌ | +| Distributed Coordination | zookeeper | ✅ | +| Distributed Data Processing | flink | ✅ | +| HTTP | httpbin | ✅ | +| Identity Management | keycloak | ✅ | +| Job Orchestrator | airflow | ✅ | +| Job Orchestrator | dagster | ✅ | +| Job Orchestrator | mage-ai | ✅ | +| Job Orchestrator | prefect | ✅ | +| Messaging | activemq | ✅ | +| Messaging | kafka | ✅ | +| Messaging | rabbitmq | ✅ | +| Messaging | solace | ✅ | +| Notebook | jupyter | ✅ | +| Object Storage | minio | ✅ | +| Query Engine | duckdb | ✅ | +| Query Engine | flight-sql | ✅ | +| Query Engine | presto | ✅ | +| Query Engine | trino | ✅ | +| Real-time OLAP | clickhouse | ✅ | +| Real-time OLAP | doris | ✅ | +| Real-time OLAP | druid | ✅ | +| Real-time OLAP | pinot | ✅ | +| Schema Registry | confluent-schema-registry | ✅ | +| Test Data Management | data-caterer | ✅ | +| Workflow | maestro | ✅ | +| Workflow | temporal | ✅ | diff --git a/data/confluent-schema-registry/env/docker.env b/data/confluent-schema-registry/env/docker.env new file mode 100644 index 0000000..42ca4ba --- /dev/null +++ b/data/confluent-schema-registry/env/docker.env @@ -0,0 +1,11 @@ +SCHEMA_REGISTRY_HOST_NAME=schema-registry +SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT +SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=kafka:29092 + +# Uncomment to customize the Schema Registry kafka store connection +# ZOOKEEPER_SASL_ENABLED=false +# KAFKA_OPTS=-Xms1g -Xmx1g +# SCHEMA_REGISTRY_JMX_OPTS=-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false + +# Uncomment to use schema registry < v5.4.0 +# SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL=zookeeper:2181 \ No newline at end of file diff --git a/data/datahub-actions/env/docker.env b/data/datahub-actions/env/docker.env new file mode 100644 index 0000000..dcef723 --- /dev/null +++ b/data/datahub-actions/env/docker.env @@ -0,0 +1,38 @@ +DATAHUB_GMS_PROTOCOL=http +DATAHUB_GMS_HOST=datahub-gms +DATAHUB_GMS_PORT=8080 + +KAFKA_BOOTSTRAP_SERVER=broker:29092 +SCHEMA_REGISTRY_URL=http://schema-registry:8081 +# SCHEMA_REGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +METADATA_CHANGE_LOG_VERSIONED_TOPIC_NAME=MetadataChangeLog_Versioned_v1 + +# System Auth -- +DATAHUB_SYSTEM_CLIENT_ID=__datahub_system +DATAHUB_SYSTEM_CLIENT_SECRET=JohnSnowKnowsNothing + +# Kafka Authentication +KAFKA_PROPERTIES_SECURITY_PROTOCOL=PLAINTEXT + +# Uncomment the following if your Kafka deployment requires SSL. +# KAFKA_PROPERTIES_SSL_KEYSTORE_LOCATION=/mnt/certs/keystore +# KAFKA_PROPERTIES_SSL_TRUSTSTORE_LOCATION=/mnt/certs/truststore +# KAFKA_PROPERTIES_SSL_KEYSTORE_PASSWORD=keystore_password +# KAFKA_PROPERTIES_SSL_KEY_PASSWORD=keystore_password +# KAFKA_PROPERTIES_SSL_TRUSTSTORE_PASSWORD=truststore_password + +# The following env vars are meant to be passed through from the Host System +# to configure the Slack and Teams Actions +# _ENABLED flags need to be set to "true" case sensitive for the action to be enabled +DATAHUB_ACTIONS_SLACK_ENABLED +DATAHUB_ACTIONS_SLACK_DATAHUB_BASE_URL +DATAHUB_ACTIONS_SLACK_BOT_TOKEN +DATAHUB_ACTIONS_SLACK_SIGNING_SECRET +DATAHUB_ACTIONS_SLACK_CHANNEL +DATAHUB_ACTIONS_SLACK_SUPPRESS_SYSTEM_ACTIVITY + +DATAHUB_ACTIONS_TEAMS_ENABLED +DATAHUB_ACTIONS_TEAMS_DATAHUB_BASE_URL +DATAHUB_ACTIONS_TEAMS_WEBHOOK_URL +DATAHUB_ACTIONS_TEAMS_SUPPRESS_SYSTEM_ACTIVITY \ No newline at end of file diff --git a/data/datahub-frontend/env/docker.env b/data/datahub-frontend/env/docker.env new file mode 100644 index 0000000..055dda9 --- /dev/null +++ b/data/datahub-frontend/env/docker.env @@ -0,0 +1,71 @@ +DATAHUB_GMS_HOST=datahub-gms +DATAHUB_GMS_PORT=8080 +DATAHUB_SECRET=YouKnowNothing +DATAHUB_APP_VERSION=1.0 +DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB +JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null + +# Uncomment and set these to support SSL connection to GMS +# NOTE: Currently GMS itself does not offer SSL support, these settings are intended for when there is a proxy in front +# of GMS that handles SSL, such as an EC2 Load Balancer. +#DATAHUB_GMS_USE_SSL=true +#DATAHUB_GMS_SSL_PROTOCOL= + +# Uncomment and set custom SSL truststore settings +# SSL_TRUSTSTORE_FILE=datahub-frontend/conf/truststore.jks +# SSL_TRUSTSTORE_TYPE=jks +# SSL_TRUSTSTORE_PASSWORD=MyTruststorePassword + +# Uncomment to enable Metadata Service Authentication +# METADATA_SERVICE_AUTH_ENABLED=true + +# Uncomment & populate these configs to enable OIDC SSO in React application. +# Required OIDC configs +# AUTH_OIDC_ENABLED=true +# AUTH_OIDC_CLIENT_ID=1030786188615-rr9ics9gl8n4acngj9opqbf2mruflqpr.apps.googleusercontent.com +# AUTH_OIDC_CLIENT_SECRET=acEdaGcnfd7KxvsXRFDD7FNF +# AUTH_OIDC_DISCOVERY_URI=https://accounts.google.com/.well-known/openid-configuration +# AUTH_OIDC_BASE_URL=http://localhost:9001 +# Optional OIDC configs +# AUTH_OIDC_USER_NAME_CLAIM=email +# AUTH_OIDC_USER_NAME_CLAIM_REGEX=([^@]+) +# AUTH_OIDC_SCOPE= +# Optional Provisioning Configs +# AUTH_OIDC_JIT_PROVISIONING_ENABLED=true +# AUTH_OIDC_PRE_PROVISIONING_REQUIRED=false +# AUTH_OIDC_EXTRACT_GROUPS_ENABLED=false +# AUTH_OIDC_GROUPS_CLAIM=groups + +# Uncomment to disable JAAS username / password authentication (enabled by default) +# AUTH_JAAS_ENABLED=false + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Required Kafka Producer Configs +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1 + +# Required Elastic Client Configuration (Analytics) +ELASTIC_CLIENT_HOST=elasticsearch +ELASTIC_CLIENT_PORT=9200 + +# Optional Elastic Client Configurations +# ELASTIC_CLIENT_THREAD_COUNT=2 +# ELASTIC_CLIENT_CONNECTION_REQUEST_TIMEOUT=50 + +# To support SSL connections to Elastic, uncomment and set the following +# ELASTIC_CLIENT_USE_SSL=true +# ELASTIC_CLIENT_SSL_PROTOCOL=TLSv1.2 +# ELASTIC_CLIENT_SSL_SECURE_RANDOM_IMPLEMENTATION= +# ELASTIC_CLIENT_SSL_TRUST_STORE_FILE= +# ELASTIC_CLIENT_SSL_TRUST_STORE_TYPE= +# ELASTIC_CLIENT_SSL_TRUST_STORE_PASSWORD= +# ELASTIC_CLIENT_SSL_KEY_STORE_FILE= +# ELASTIC_CLIENT_SSL_KEY_STORE_TYPE= +# ELASTIC_CLIENT_SSL_KEY_STORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTIC_CLIENT_USE_SSL=true and uncomment: +# ELASTIC_CLIENT_USERNAME= +# ELASTIC_CLIENT_PASSWORD= \ No newline at end of file diff --git a/data/datahub-gms/env/docker.env b/data/datahub-gms/env/docker.env new file mode 100644 index 0000000..f6af280 --- /dev/null +++ b/data/datahub-gms/env/docker.env @@ -0,0 +1,55 @@ +DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms +EBEAN_DATASOURCE_USERNAME=datahub +EBEAN_DATASOURCE_PASSWORD=datahub +EBEAN_DATASOURCE_HOST=mysql:3306 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 +# KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +ELASTICSEARCH_HOST=elasticsearch +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ES_BULK_REFRESH_POLICY=WAIT_UNTIL +GRAPH_SERVICE_DIFF_MODE_ENABLED=true +GRAPH_SERVICE_IMPL=elasticsearch +JAVA_OPTS=-Xms1g -Xmx1g +ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + +MAE_CONSUMER_ENABLED=true +MCE_CONSUMER_ENABLED=true +PE_CONSUMER_ENABLED=true +UI_INGESTION_ENABLED=true +ENTITY_SERVICE_ENABLE_RETENTION=true + +ELASTIC_ID_HASH_ALGO=MD5 + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Uncomment to configure kafka topic names +# Make sure these names are consistent across the whole deployment +# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4 +# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4 + +# Uncomment and set these to support SSL connection to Elasticsearch +# ELASTICSEARCH_USE_SSL=true +# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2 +# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= +# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= +# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= +# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= +# ELASTICSEARCH_SSL_KEYSTORE_FILE= +# ELASTICSEARCH_SSL_KEYSTORE_TYPE= +# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTICSEARCH_USE_SSL=true and uncomment: +# ELASTICSEARCH_USERNAME= +# ELASTICSEARCH_PASSWORD= + +# Uncomment to run a one-time upgrade to migrate legacy default browse path format to latest format +# More details can be found at https://datahubproject.io/docs/advanced/browse-paths-upgrade +# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true \ No newline at end of file diff --git a/data/datahub-upgrade/env/docker-without-neo4j.env b/data/datahub-upgrade/env/docker-without-neo4j.env new file mode 100644 index 0000000..f6af280 --- /dev/null +++ b/data/datahub-upgrade/env/docker-without-neo4j.env @@ -0,0 +1,55 @@ +DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms +EBEAN_DATASOURCE_USERNAME=datahub +EBEAN_DATASOURCE_PASSWORD=datahub +EBEAN_DATASOURCE_HOST=mysql:3306 +EBEAN_DATASOURCE_URL=jdbc:mysql://mysql:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8 +EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver +KAFKA_BOOTSTRAP_SERVER=kafka:29092 +KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081 +# KAFKA_SCHEMAREGISTRY_URL=http://datahub-gms:8080/schema-registry/api/ +ELASTICSEARCH_HOST=elasticsearch +ELASTICSEARCH_PORT=9200 +ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true +ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true +ES_BULK_REFRESH_POLICY=WAIT_UNTIL +GRAPH_SERVICE_DIFF_MODE_ENABLED=true +GRAPH_SERVICE_IMPL=elasticsearch +JAVA_OPTS=-Xms1g -Xmx1g +ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml + +MAE_CONSUMER_ENABLED=true +MCE_CONSUMER_ENABLED=true +PE_CONSUMER_ENABLED=true +UI_INGESTION_ENABLED=true +ENTITY_SERVICE_ENABLE_RETENTION=true + +ELASTIC_ID_HASH_ALGO=MD5 + +# Uncomment to disable persistence of client-side analytics events +# DATAHUB_ANALYTICS_ENABLED=false + +# Uncomment to configure kafka topic names +# Make sure these names are consistent across the whole deployment +# METADATA_AUDIT_EVENT_NAME=MetadataAuditEvent_v4 +# METADATA_CHANGE_EVENT_NAME=MetadataChangeEvent_v4 +# FAILED_METADATA_CHANGE_EVENT_NAME=FailedMetadataChangeEvent_v4 + +# Uncomment and set these to support SSL connection to Elasticsearch +# ELASTICSEARCH_USE_SSL=true +# ELASTICSEARCH_SSL_PROTOCOL=TLSv1.2 +# ELASTICSEARCH_SSL_SECURE_RANDOM_IMPL= +# ELASTICSEARCH_SSL_TRUSTSTORE_FILE= +# ELASTICSEARCH_SSL_TRUSTSTORE_TYPE= +# ELASTICSEARCH_SSL_TRUSTSTORE_PASSWORD= +# ELASTICSEARCH_SSL_KEYSTORE_FILE= +# ELASTICSEARCH_SSL_KEYSTORE_TYPE= +# ELASTICSEARCH_SSL_KEYSTORE_PASSWORD= + +# To use simple username/password authentication to Elasticsearch over HTTPS +# set ELASTICSEARCH_USE_SSL=true and uncomment: +# ELASTICSEARCH_USERNAME= +# ELASTICSEARCH_PASSWORD= + +# Uncomment to run a one-time upgrade to migrate legacy default browse path format to latest format +# More details can be found at https://datahubproject.io/docs/advanced/browse-paths-upgrade +# UPGRADE_DEFAULT_BROWSE_PATHS_ENABLED=true \ No newline at end of file diff --git a/data/mysql/data/datahub.sql b/data/mysql/data/datahub.sql new file mode 100644 index 0000000..a11217b --- /dev/null +++ b/data/mysql/data/datahub.sql @@ -0,0 +1,43 @@ +-- create datahub database +CREATE DATABASE IF NOT EXISTS `datahub` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE `datahub`; + +-- create metadata aspect table +create table if not exists metadata_aspect_v2 +( + urn varchar(500) not null, + aspect varchar(200) not null, + version bigint(20) not null, + metadata longtext not null, + systemmetadata longtext, + createdon datetime(6) not null, + createdby varchar(255) not null, + createdfor varchar(255), + constraint pk_metadata_aspect_v2 primary key (urn, aspect, version), + INDEX timeIndex(createdon) +) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; + +-- create default records for datahub user if not exists +DROP TABLE if exists temp_metadata_aspect_v2; +CREATE TABLE temp_metadata_aspect_v2 LIKE metadata_aspect_v2; +INSERT INTO temp_metadata_aspect_v2 (urn, aspect, version, metadata, createdon, createdby) +VALUES ('urn:li:corpuser:datahub', + 'corpUserInfo', + 0, + '{"displayName":"Data Hub","active":true,"fullName":"Data Hub","email":"datahub@linkedin.com"}', + now(), + 'urn:li:corpuser:__datahub_system'), + ('urn:li:corpuser:datahub', + 'corpUserEditableInfo', + 0, + '{"skills":[],"teams":[],"pictureLink":"https://raw.githubusercontent.com/datahub-project/datahub/master/datahub-web-react/src/images/default_avatar.png"}', + now(), + 'urn:li:corpuser:__datahub_system'); +-- only add default records if metadata_aspect is empty +INSERT INTO metadata_aspect_v2 +SELECT * +FROM temp_metadata_aspect_v2 +WHERE NOT EXISTS (SELECT * from metadata_aspect_v2); +DROP TABLE temp_metadata_aspect_v2; + +DROP TABLE IF EXISTS metadata_index; diff --git a/docker-compose.yaml b/docker-compose.yaml index c8e6944..e157c24 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -224,6 +224,21 @@ services: ports: - "26257:26257" - "8080:8080" + confluent-schema-registry: + container_name: schema-registry + depends_on: + kafka-server: + condition: service_healthy + env_file: data/confluent-schema-registry/env/docker.env + healthcheck: + interval: 10s + retries: 5 + test: nc -z schema-registry 8081 + timeout: 5s + hostname: schema-registry + image: confluentinc/cp-schema-registry:${CONFLUENT_SCHEMA_REGISTRY_VERSION:-7.4.0} + ports: + - "8081:8081" dagster: container_name: dagster depends_on: @@ -254,6 +269,89 @@ services: volumes: - "./data/data-caterer/connection:/opt/DataCaterer/connection" - "./data/data-caterer/plan:/opt/DataCaterer/plan" + datahub: + container_name: datahub + depends_on: + datahub-gms: + condition: service_healthy + env_file: data/datahub-frontend/env/docker.env + environment: + - ELASTIC_CLIENT_USERNAME=elastic + - "ELASTIC_CLIENT_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + hostname: datahub + image: acryldata/datahub-frontend-react:${DATAHUB_VERSION:-head} + ports: + - "9002:9002" + datahub-actions: + container_name: datahub-actions + depends_on: + datahub-gms: + condition: service_healthy + env_file: data/datahub-actions/env/docker.env + environment: + - ACTIONS_EXTRA_PACKAGES=${ACTIONS_EXTRA_PACKAGES:-} + - ACTIONS_CONFIG=${ACTIONS_CONFIG:-} + hostname: actions + image: acryldata/datahub-actions:${DATAHUB_VERSION:-head} + datahub-gms: + container_name: datahub-gms + depends_on: + datahub-upgrade: + condition: service_completed_successfully + env_file: data/datahub-upgrade/env/docker-without-neo4j.env + environment: + - KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true} + - EBEAN_DATASOURCE_USERNAME=root + - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" + - ELASTICSEARCH_USERNAME=elastic + - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + healthcheck: + interval: 1s + retries: 3 + start_period: 90s + test: "curl -sS --fail http://datahub-gms:8080/health" + timeout: 5s + hostname: datahub-gms + image: acryldata/datahub-gms:${DATAHUB_VERSION:-head} + ports: + - "8080:8080" + datahub-kafka-setup: + container_name: datahub-kafka-setup + depends_on: + confluent-schema-registry: + condition: service_healthy + kafka-server: + condition: service_healthy + entrypoint: [/bin/sh, -c, /tmp/scripts/init.sh] + environment: + - "KAFKA_TOPICS=${KAFKA_TOPICS:-MetadataAuditEvent_v4,MetadataChangeEvent_v4,FailedMetadataChangeEvent_v4,MetadataChangeLog_Versioned_v1,MetadataChangeLog_Timeseries_v1,MetadataChangeProposal_v1,FailedMetadataChangeProposal_v1,PlatformEvent_v1,DataHubUpgradeHistory_v1}" + image: "confluentinc/confluent-local:${KAFKA_VERSION:-7.6.1}" + volumes: + - "./data/kafka/init.sh:/tmp/scripts/init.sh" + datahub-upgrade: + command: + - -u + - SystemUpdate + container_name: datahub-upgrade + depends_on: + datahub-kafka-setup: + condition: service_completed_successfully + elasticsearch: + condition: service_healthy + mysql: + condition: service_completed_successfully + neo4j: + condition: service_healthy + env_file: data/datahub-upgrade/env/docker-without-neo4j.env + environment: + - EBEAN_DATASOURCE_USERNAME=root + - "EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD:-root}" + - ELASTICSEARCH_USERNAME=elastic + - "ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + hostname: datahub-upgrade + image: acryldata/datahub-upgrade:${DATAHUB_VERSION:-head} + labels: + datahub_setup_job: true debezium: container_name: debezium depends_on: @@ -432,6 +530,11 @@ services: - ES_JAVA_OPTS=-Xms512m -Xmx512m - "ELASTIC_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" - discovery.type=single-node + healthcheck: + interval: 10s + retries: 5 + test: "curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s" + timeout: 5s image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.14.1}" ports: - "9200:9200" @@ -693,8 +796,6 @@ services: container_name: neo4j environment: - NEO4J_AUTH=none - - "NEO4J_dbms_connector_http_advertised__address=localhost:7474" - - "NEO4J_dbms_connector_bolt_advertised__address=localhost:7687" healthcheck: interval: 30s retries: 5