diff --git a/README.md b/README.md index c06c84f..993487f 100644 --- a/README.md +++ b/README.md @@ -141,9 +141,10 @@ POSTGRES_USER=my-user POSTGRES_PASSWORD=my-password ./run.sh postgres | Data Catalog | polaris | ✅ | | Data Catalog | unitycatalog | ✅ | | Data Catalog | openmetadata | ❌ | +| Data Collector | fluentd | ✅ | +| Data Collector | logstash | ✅ | | Distributed Coordination | zookeeper | ✅ | | Distributed Data Processing | flink | ✅ | -| HTTP | httpbin | ✅ | | Identity Management | keycloak | ✅ | | Job Orchestrator | airflow | ✅ | | Job Orchestrator | dagster | ✅ | @@ -165,6 +166,8 @@ POSTGRES_USER=my-user POSTGRES_PASSWORD=my-password ./run.sh postgres | Real-time OLAP | pinot | ✅ | | Schema Registry | confluent-schema-registry | ✅ | | Test Data Management | data-caterer | ✅ | -| Workflow | maestro | ✅ | +| Web Server | httpbin | ✅ | +| Web Server | httpd | ✅ | +| Workflow | maestro | ✅ | | Workflow | temporal | ✅ | diff --git a/data/elasticsearch/data/entrypoint.sh b/data/elasticsearch/data/entrypoint.sh new file mode 100755 index 0000000..441c203 --- /dev/null +++ b/data/elasticsearch/data/entrypoint.sh @@ -0,0 +1,121 @@ +#!/usr/bin/env bash + +# From ELK repo: https://github.com/deviantony/docker-elk + +set -eu +set -o pipefail + +source "${BASH_SOURCE[0]%/*}"/lib.sh + + +# -------------------------------------------------------- +# Users declarations + +declare -A users_passwords +users_passwords=( + [logstash_internal]="${LOGSTASH_INTERNAL_PASSWORD:-}" + [kibana_system]="${KIBANA_SYSTEM_PASSWORD:-}" + [metricbeat_internal]="${METRICBEAT_INTERNAL_PASSWORD:-}" + [filebeat_internal]="${FILEBEAT_INTERNAL_PASSWORD:-}" + [heartbeat_internal]="${HEARTBEAT_INTERNAL_PASSWORD:-}" + [monitoring_internal]="${MONITORING_INTERNAL_PASSWORD:-}" + [beats_system]="${BEATS_SYSTEM_PASSWORD=:-}" +) + +declare -A users_roles +users_roles=( + [logstash_internal]='logstash_writer' + [metricbeat_internal]='metricbeat_writer' + [filebeat_internal]='filebeat_writer' + [heartbeat_internal]='heartbeat_writer' + [monitoring_internal]='remote_monitoring_collector' +) + +# -------------------------------------------------------- +# Roles declarations + +declare -A roles_files +roles_files=( + [logstash_writer]='logstash_writer.json' + [metricbeat_writer]='metricbeat_writer.json' + [filebeat_writer]='filebeat_writer.json' + [heartbeat_writer]='heartbeat_writer.json' +) + +# -------------------------------------------------------- + + +log 'Waiting for availability of Elasticsearch. This can take several minutes.' + +declare -i exit_code=0 +wait_for_elasticsearch || exit_code=$? + +if ((exit_code)); then + case $exit_code in + 6) + suberr 'Could not resolve host. Is Elasticsearch running?' + ;; + 7) + suberr 'Failed to connect to host. Is Elasticsearch healthy?' + ;; + 28) + suberr 'Timeout connecting to host. Is Elasticsearch healthy?' + ;; + *) + suberr "Connection to Elasticsearch failed. Exit code: ${exit_code}" + ;; + esac + + exit $exit_code +fi + +sublog 'Elasticsearch is running' + +log 'Waiting for initialization of built-in users' + +wait_for_builtin_users || exit_code=$? + +if ((exit_code)); then + suberr 'Timed out waiting for condition' + exit $exit_code +fi + +sublog 'Built-in users were initialized' + +for role in "${!roles_files[@]}"; do + log "Role '$role'" + + declare body_file + body_file="${BASH_SOURCE[0]%/*}/roles/${roles_files[$role]:-}" + if [[ ! -f "${body_file:-}" ]]; then + sublog "No role body found at '${body_file}', skipping" + continue + fi + + sublog 'Creating/updating' + ensure_role "$role" "$(<"${body_file}")" +done + +for user in "${!users_passwords[@]}"; do + log "User '$user'" + if [[ -z "${users_passwords[$user]:-}" ]]; then + sublog 'No password defined, skipping' + continue + fi + + declare -i user_exists=0 + user_exists="$(check_user_exists "$user")" + + if ((user_exists)); then + sublog 'User exists, setting password' + set_user_password "$user" "${users_passwords[$user]}" + else + if [[ -z "${users_roles[$user]:-}" ]]; then + suberr ' No role defined, skipping creation' + continue + fi + + sublog 'User does not exist, creating' + create_user "$user" "${users_passwords[$user]}" "${users_roles[$user]}" + fi +done \ No newline at end of file diff --git a/data/elasticsearch/data/lib.sh b/data/elasticsearch/data/lib.sh new file mode 100755 index 0000000..c1616b2 --- /dev/null +++ b/data/elasticsearch/data/lib.sh @@ -0,0 +1,242 @@ +#!/usr/bin/env bash + +# From ELK repo: https://github.com/deviantony/docker-elk + +# Log a message. +function log { + echo "[+] $1" +} + +# Log a message at a sub-level. +function sublog { + echo " ⠿ $1" +} + +# Log an error. +function err { + echo "[x] $1" >&2 +} + +# Log an error at a sub-level. +function suberr { + echo " ⠍ $1" >&2 +} + +# Poll the 'elasticsearch' service until it responds with HTTP code 200. +function wait_for_elasticsearch { + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' "http://${elasticsearch_host}:9200/" ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local output + + # retry for max 300s (60*5s) + for _ in $(seq 1 60); do + local -i exit_code=0 + output="$(curl "${args[@]}")" || exit_code=$? + + if ((exit_code)); then + result=$exit_code + fi + + if [[ "${output: -3}" -eq 200 ]]; then + result=0 + break + fi + + sleep 5 + done + + if ((result)) && [[ "${output: -3}" -ne 000 ]]; then + echo -e "\n${output::-3}" + fi + + return $result +} + +# Poll the Elasticsearch users API until it returns users. +function wait_for_builtin_users { + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' "http://${elasticsearch_host}:9200/_security/user?pretty" ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + + local line + local -i exit_code + local -i num_users + + # retry for max 30s (30*1s) + for _ in $(seq 1 30); do + num_users=0 + + # read exits with a non-zero code if the last read input doesn't end + # with a newline character. The printf without newline that follows the + # curl command ensures that the final input not only contains curl's + # exit code, but causes read to fail so we can capture the return value. + # Ref. https://unix.stackexchange.com/a/176703/152409 + while IFS= read -r line || ! exit_code="$line"; do + if [[ "$line" =~ _reserved.+true ]]; then + (( num_users++ )) + fi + done < <(curl "${args[@]}"; printf '%s' "$?") + + if ((exit_code)); then + result=$exit_code + fi + + # we expect more than just the 'elastic' user in the result + if (( num_users > 1 )); then + result=0 + break + fi + + sleep 1 + done + + return $result +} + +# Verify that the given Elasticsearch user exists. +function check_user_exists { + local username=$1 + + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' + "http://${elasticsearch_host}:9200/_security/user/${username}" + ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local -i exists=0 + local output + + output="$(curl "${args[@]}")" + if [[ "${output: -3}" -eq 200 || "${output: -3}" -eq 404 ]]; then + result=0 + fi + if [[ "${output: -3}" -eq 200 ]]; then + exists=1 + fi + + if ((result)); then + echo -e "\n${output::-3}" + else + echo "$exists" + fi + + return $result +} + +# Set password of a given Elasticsearch user. +function set_user_password { + local username=$1 + local password=$2 + + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' + "http://${elasticsearch_host}:9200/_security/user/${username}/_password" + '-X' 'POST' + '-H' 'Content-Type: application/json' + '-d' "{\"password\" : \"${password}\"}" + ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local output + + output="$(curl "${args[@]}")" + if [[ "${output: -3}" -eq 200 ]]; then + result=0 + fi + + if ((result)); then + echo -e "\n${output::-3}\n" + fi + + return $result +} + +# Create the given Elasticsearch user. +function create_user { + local username=$1 + local password=$2 + local role=$3 + + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' + "http://${elasticsearch_host}:9200/_security/user/${username}" + '-X' 'POST' + '-H' 'Content-Type: application/json' + '-d' "{\"password\":\"${password}\",\"roles\":[\"${role}\"]}" + ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local output + + output="$(curl "${args[@]}")" + if [[ "${output: -3}" -eq 200 ]]; then + result=0 + fi + + if ((result)); then + echo -e "\n${output::-3}\n" + fi + + return $result +} + +# Ensure that the given Elasticsearch role is up-to-date, create it if required. +function ensure_role { + local name=$1 + local body=$2 + + local elasticsearch_host="${ELASTICSEARCH_HOST:-elasticsearch}" + + local -a args=( '-s' '-D-' '-m15' '-w' '%{http_code}' + "http://${elasticsearch_host}:9200/_security/role/${name}" + '-X' 'POST' + '-H' 'Content-Type: application/json' + '-d' "$body" + ) + + if [[ -n "${ELASTIC_PASSWORD:-}" ]]; then + args+=( '-u' "elastic:${ELASTIC_PASSWORD}" ) + fi + + local -i result=1 + local output + + output="$(curl "${args[@]}")" + if [[ "${output: -3}" -eq 200 ]]; then + result=0 + fi + + if ((result)); then + echo -e "\n${output::-3}\n" + fi + + return $result +} \ No newline at end of file diff --git a/data/elasticsearch/data/roles/filebeat_writer.json b/data/elasticsearch/data/roles/filebeat_writer.json new file mode 100644 index 0000000..aff2b70 --- /dev/null +++ b/data/elasticsearch/data/roles/filebeat_writer.json @@ -0,0 +1,20 @@ +{ + "cluster": [ + "manage_ilm", + "manage_index_templates", + "manage_ingest_pipelines", + "monitor", + "read_pipeline" + ], + "indices": [ + { + "names": [ + "filebeat-*" + ], + "privileges": [ + "create_doc", + "manage" + ] + } + ] +} \ No newline at end of file diff --git a/data/elasticsearch/data/roles/heartbeat_writer.json b/data/elasticsearch/data/roles/heartbeat_writer.json new file mode 100644 index 0000000..bd569c0 --- /dev/null +++ b/data/elasticsearch/data/roles/heartbeat_writer.json @@ -0,0 +1,18 @@ +{ + "cluster": [ + "manage_ilm", + "manage_index_templates", + "monitor" + ], + "indices": [ + { + "names": [ + "heartbeat-*" + ], + "privileges": [ + "create_doc", + "manage" + ] + } + ] +} \ No newline at end of file diff --git a/data/elasticsearch/data/roles/logstash_writer.json b/data/elasticsearch/data/roles/logstash_writer.json new file mode 100644 index 0000000..7261b5c --- /dev/null +++ b/data/elasticsearch/data/roles/logstash_writer.json @@ -0,0 +1,33 @@ +{ + "cluster": [ + "manage_index_templates", + "monitor", + "manage_ilm" + ], + "indices": [ + { + "names": [ + "logs-generic-default", + "logstash-*", + "ecs-logstash-*" + ], + "privileges": [ + "write", + "create", + "create_index", + "manage", + "manage_ilm" + ] + }, + { + "names": [ + "logstash", + "ecs-logstash" + ], + "privileges": [ + "write", + "manage" + ] + } + ] +} \ No newline at end of file diff --git a/data/elasticsearch/data/roles/metricbeat_writer.json b/data/elasticsearch/data/roles/metricbeat_writer.json new file mode 100644 index 0000000..741e208 --- /dev/null +++ b/data/elasticsearch/data/roles/metricbeat_writer.json @@ -0,0 +1,19 @@ +{ + "cluster": [ + "manage_ilm", + "manage_index_templates", + "monitor" + ], + "indices": [ + { + "names": [ + ".monitoring-*-mb", + "metricbeat-*" + ], + "privileges": [ + "create_doc", + "manage" + ] + } + ] +} \ No newline at end of file diff --git a/data/fluentd/etc/fluent.conf b/data/fluentd/etc/fluent.conf new file mode 100644 index 0000000..c30d323 --- /dev/null +++ b/data/fluentd/etc/fluent.conf @@ -0,0 +1,30 @@ +# fluentd/conf/fluent.conf + + + @type forward + port 24224 + bind 0.0.0.0 + + + + @type copy + + + @type elasticsearch + host elasticsearch + port 9200 + user elastic + password elasticsearch + logstash_format true + logstash_prefix fluentd + logstash_dateformat %Y%m%d + include_tag_key true + type_name access_log + tag_key @log_name + flush_interval 1s + + + + @type stdout + + \ No newline at end of file diff --git a/data/kibana/config/kibana.yml b/data/kibana/config/kibana.yml new file mode 100644 index 0000000..fa5fed7 --- /dev/null +++ b/data/kibana/config/kibana.yml @@ -0,0 +1,99 @@ +--- +## Default Kibana configuration from Kibana base image. +## https://github.com/elastic/kibana/blob/main/src/dev/build/tasks/os_packages/docker_generator/templates/kibana_yml.template.ts +# +server.name: kibana +server.host: 0.0.0.0 +elasticsearch.hosts: [ http://elasticsearch:9200 ] + +monitoring.ui.container.elasticsearch.enabled: true +monitoring.ui.container.logstash.enabled: true + +## X-Pack security credentials +# +elasticsearch.username: kibana_system +elasticsearch.password: ${KIBANA_SYSTEM_PASSWORD} + +## Encryption keys (optional but highly recommended) +## +## Generate with either +## $ docker container run --rm docker.elastic.co/kibana/kibana:8.6.2 bin/kibana-encryption-keys generate +## $ openssl rand -hex 32 +## +## https://www.elastic.co/guide/en/kibana/current/using-kibana-with-security.html +## https://www.elastic.co/guide/en/kibana/current/kibana-encryption-keys.html +# +#xpack.security.encryptionKey: +#xpack.encryptedSavedObjects.encryptionKey: +#xpack.reporting.encryptionKey: + +## Fleet +## https://www.elastic.co/guide/en/kibana/current/fleet-settings-kb.html +# +xpack.fleet.agents.fleet_server.hosts: [ http://fleet-server:8220 ] + +xpack.fleet.outputs: + - id: fleet-default-output + name: default + type: elasticsearch + hosts: [ http://elasticsearch:9200 ] + is_default: true + is_default_monitoring: true + +xpack.fleet.packages: + - name: fleet_server + version: latest + - name: system + version: latest + - name: elastic_agent + version: latest + - name: docker + version: latest + - name: apm + version: latest + +xpack.fleet.agentPolicies: + - name: Fleet Server Policy + id: fleet-server-policy + description: Static agent policy for Fleet Server + monitoring_enabled: + - logs + - metrics + package_policies: + - name: fleet_server-1 + package: + name: fleet_server + - name: system-1 + package: + name: system + - name: elastic_agent-1 + package: + name: elastic_agent + - name: docker-1 + package: + name: docker + - name: Agent Policy APM Server + id: agent-policy-apm-server + description: Static agent policy for the APM Server integration + monitoring_enabled: + - logs + - metrics + package_policies: + - name: system-1 + package: + name: system + - name: elastic_agent-1 + package: + name: elastic_agent + - name: apm-1 + package: + name: apm + # See the APM package manifest for a list of possible inputs. + # https://github.com/elastic/apm-server/blob/v8.5.0/apmpackage/apm/manifest.yml#L41-L168 + inputs: + - type: apm + vars: + - name: host + value: 0.0.0.0:8200 + - name: url + value: http://apm-server:8200 \ No newline at end of file diff --git a/data/logstash/config/logstash.yml b/data/logstash/config/logstash.yml new file mode 100644 index 0000000..88182e8 --- /dev/null +++ b/data/logstash/config/logstash.yml @@ -0,0 +1,7 @@ +--- +## Default Logstash configuration from Logstash base image. +## https://github.com/elastic/logstash/blob/main/docker/data/logstash/config/logstash-full.yml +# +http.host: 0.0.0.0 + +node.name: logstash \ No newline at end of file diff --git a/data/logstash/pipeline/logstash.conf b/data/logstash/pipeline/logstash.conf new file mode 100644 index 0000000..8c2bd20 --- /dev/null +++ b/data/logstash/pipeline/logstash.conf @@ -0,0 +1,19 @@ +input { + beats { + port => 5044 + } + + tcp { + port => 50000 + } +} + +## Add your filters / logstash plugins configuration here + +output { + elasticsearch { + hosts => "elasticsearch:9200" + user => "logstash_internal" + password => "${LOGSTASH_INTERNAL_PASSWORD}" + } +} \ No newline at end of file diff --git a/docker-compose.yaml b/docker-compose.yaml index e157c24..cc8164a 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -524,18 +524,39 @@ services: volumes: - "./data/duckdb:/opt/data" elasticsearch: + container_name: elasticsearch-data + depends_on: + elasticsearch-server: + condition: service_healthy + entrypoint: /tmp/entrypoint.sh + environment: + - "ELASTIC_PASSWORD=${ELASTIC_PASSWORD:-elasticsearch}" + - "LOGSTASH_INTERNAL_PASSWORD=${LOGSTASH_INTERNAL_PASSWORD:-password}" + - "KIBANA_SYSTEM_PASSWORD=${KIBANA_SYSTEM_PASSWORD:-password}" + - "METRICBEAT_INTERNAL_PASSWORD=${METRICBEAT_INTERNAL_PASSWORD:-}" + - "FILEBEAT_INTERNAL_PASSWORD=${FILEBEAT_INTERNAL_PASSWORD:-}" + - "HEARTBEAT_INTERNAL_PASSWORD=${HEARTBEAT_INTERNAL_PASSWORD:-}" + - "MONITORING_INTERNAL_PASSWORD=${MONITORING_INTERNAL_PASSWORD:-}" + - "BEATS_SYSTEM_PASSWORD=${BEATS_SYSTEM_PASSWORD:-}" + image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.15.0}" + volumes: + - "./data/elasticsearch/data/entrypoint.sh:/tmp/entrypoint.sh" + - "./data/elasticsearch/data/lib.sh:/tmp/lib.sh" + - "./data/elasticsearch/data/roles:/tmp/roles" + elasticsearch-server: container_name: elasticsearch environment: - node.name=elasticsearch - ES_JAVA_OPTS=-Xms512m -Xmx512m - "ELASTIC_PASSWORD=${ELASTICSEARCH_PASSWORD:-elasticsearch}" + - XPACK_SECURITY_ENABLED=true - discovery.type=single-node healthcheck: interval: 10s retries: 5 test: "curl -sS --fail http://elasticsearch:9200/_cluster/health?wait_for_status=yellow&timeout=0s" timeout: 5s - image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.14.1}" + image: "docker.elastic.co/elasticsearch/elasticsearch:${ELASTICSEARCH_VERSION:-8.15.0}" ports: - "9200:9200" - "9300:9300" @@ -579,6 +600,17 @@ services: image: "flink:${FLINK_VERSION:-1.19.0-scala_2.12-java17}" ports: - "8081:8081" + fluentd: + container_name: fluentd + depends_on: + elasticsearch: + condition: service_completed_successfully + image: "datacatering/fluentd-elasticsearch:${FLUENTD_VERSION:-v1.17.0-debian-1.0}" + ports: + - "24224:24224" + - "24224:24224/udp" + volumes: + - "./data/fluentd/etc/fluent.conf:/fluentd/etc/fluent.conf" httpbin: container_name: http environment: @@ -586,6 +618,19 @@ services: image: "kennethreitz/httpbin:${HTTPBIN_VERSION:-latest}" ports: - "80:80" + httpd: + container_name: httpd + depends_on: + - fluentd + image: "httpd:${HTTPD_VERSION:-2.4.62}" + logging: + driver: fluentd + options: + fluentd-address: localhost:24224 + fluentd-async-connect: "true" + tag: httpd.access + ports: + - "80:80" jupyter: command: [jupyter, notebook, --no-browser, "--NotebookApp.token=''", "--NotebookApp.password=''"] container_name: jupyter @@ -640,6 +685,49 @@ services: restart: unless-stopped volumes: - "./data/keycloak/realm.json:/opt/keycloak/data/import/realm.json:ro" + kibana: + container_name: kibana + depends_on: + elasticsearch: + condition: service_completed_successfully + environment: + - ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + - ELASTICSEARCH_USERNAME=kibana_system + - "KIBANA_SYSTEM_PASSWORD=${KIBANA_SYSTEM_PASSWORD:-password}" + healthcheck: + interval: 10s + retries: 3 + test: [ CMD, curl, --fail, "http://localhost:5601/api/status" ] + timeout: 5s + image: "docker.elastic.co/kibana/kibana:${KIBANA_VERSION:-8.15.0}" + ports: + - "5601:5601" + restart: always + volumes: + - "./data/kibana/config/kibana.yml:/usr/share/kibana/config/kibana.yml" + logstash: + container_name: logstash + depends_on: + elasticsearch: + condition: service_completed_successfully + environment: + LOGSTASH_INTERNAL_PASSWORD: "${LOGSTASH_INTERNAL_PASSWORD:-password}" + LS_JAVA_OPTS: "-Xms256m -Xmx256m" + healthcheck: + interval: 10s + retries: 3 + test: [ CMD, curl, --fail, "http://localhost:9600" ] + timeout: 5s + image: "docker.elastic.co/logstash/logstash:${LOGSTASH_VERSION:-8.15.0}" + ports: + - "5044:5044" + - "50000:50000/tcp" + - "50000:50000/udp" + - "9600:9600" + restart: unless-stopped + volumes: + - "./data/logstash/config/logstash.yml:/usr/share/logstash/config/logstash.yml" + - "./data/logstash/pipeline:/usr/share/logstash/pipeline" maestro: container_name: maestro depends_on: