From 8f0d52570047b697f7ac321e2a900d45b33c2c26 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 29 May 2024 08:11:59 +0000
Subject: [PATCH 01/64] [DOP-16270] Bump version

---
 onetl/VERSION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onetl/VERSION b/onetl/VERSION
index af88ba82..bc859cbd 100644
--- a/onetl/VERSION
+++ b/onetl/VERSION
@@ -1 +1 @@
-0.11.1
+0.11.2

From f50f008a132ed9619a83c0368815aebf1125ef4a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 4 Jun 2024 12:19:41 +0000
Subject: [PATCH 02/64] [DOP-13853] Update MongoDB example

---
 .../db_connection/mongodb/types.rst           | 86 +++++++++----------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/docs/connection/db_connection/mongodb/types.rst b/docs/connection/db_connection/mongodb/types.rst
index 2a023164..4b22b7cb 100644
--- a/docs/connection/db_connection/mongodb/types.rst
+++ b/docs/connection/db_connection/mongodb/types.rst
@@ -14,49 +14,49 @@ MongoDB is, by design, __schemaless__. So there are 2 ways how this can be handl
 
   .. dropdown:: See example
 
-      .. code-block:: python
-
-          from onetl.connection import MongoDB
-          from onetl.db import DBReader
-
-          from pyspark.sql.types import (
-              StructType,
-              StructField,
-              IntegerType,
-              StringType,
-              TimestampType,
-          )
-
-          mongodb = MongoDB(...)
-
-          df_schema = StructType(
-              [
-                  StructField("_id", StringType()),
-                  StructField("some", StringType()),
-                  StructField(
-                      "field",
-                      StructType(
-                          [
-                              StructField("nested", IntegerType()),
-                          ]
-                      ),
-                  ),
-              ]
-          )
-
-          reader = DBReader(
-              connection=mongodb,
-              source="some_collection",
-              df_schema=df_schema,
-          )
-          df = reader.run()
-
-          # or
-
-          df = mongodb.pipeline(
-              collection="some_collection",
-              df_schema=df_schema,
-          )
+    .. code-block:: python
+
+        from onetl.connection import MongoDB
+        from onetl.db import DBReader
+
+        from pyspark.sql.types import (
+            StructType,
+            StructField,
+            IntegerType,
+            StringType,
+            TimestampType,
+        )
+
+        mongodb = MongoDB(...)
+
+        df_schema = StructType(
+            [
+                StructField("_id", StringType()),
+                StructField("some", StringType()),
+                StructField(
+                    "field",
+                    StructType(
+                        [
+                            StructField("nested", IntegerType()),
+                        ]
+                    ),
+                ),
+            ]
+        )
+
+        reader = DBReader(
+            connection=mongodb,
+            source="some_collection",
+            df_schema=df_schema,
+        )
+        df = reader.run()
+
+        # or
+
+        df = mongodb.pipeline(
+            collection="some_collection",
+            df_schema=df_schema,
+        )
 
 * Rely on MongoDB connector schema infer:
 

From a015b80e851a2395c08d864dd03021f3410f7336 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 4 Jun 2024 12:36:39 +0000
Subject: [PATCH 03/64] [DOP-14063] Drop onetl._internal module

---
 .github/workflows/data/core/tracked.txt       |   1 -
 .github/workflows/test-core.yml               |   2 +-
 onetl/_internal.py                            | 172 ------------------
 onetl/_util/file.py                           |  34 +++-
 onetl/_util/spark.py                          |  63 +++++++
 onetl/_util/sql.py                            |  25 +++
 .../db_connection/hive/connection.py          |   2 +-
 .../jdbc_connection/connection.py             |   2 +-
 .../db_connection/jdbc_connection/options.py  |  26 ++-
 .../db_connection/jdbc_mixin/connection.py    |   4 +-
 .../db_connection/kafka/connection.py         |   3 +-
 .../kafka/kafka_kerberos_auth.py              |   2 +-
 .../db_connection/kafka/kafka_scram_auth.py   |   2 +-
 .../db_connection/kafka/kafka_ssl_protocol.py |   2 +-
 .../db_connection/oracle/connection.py        |   2 +-
 .../db_connection/teradata/connection.py      |   2 +-
 .../file_df_connection/spark_s3/connection.py |   3 +-
 onetl/file/file_downloader/file_downloader.py |   2 +-
 onetl/file/file_uploader/file_uploader.py     |   2 +-
 onetl/file/format/csv.py                      |   3 +-
 onetl/file/format/json.py                     |   2 +-
 .../test_generate_temp_path.py                |   5 +-
 .../test_jdbc_options_unit.py                 |   4 +-
 23 files changed, 153 insertions(+), 212 deletions(-)
 delete mode 100644 onetl/_internal.py
 create mode 100644 onetl/_util/sql.py

diff --git a/.github/workflows/data/core/tracked.txt b/.github/workflows/data/core/tracked.txt
index 5b2a3ca4..855cb884 100644
--- a/.github/workflows/data/core/tracked.txt
+++ b/.github/workflows/data/core/tracked.txt
@@ -3,6 +3,5 @@ onetl/plugins/**
 onetl/impl/**
 onetl/hwm/**
 onetl/_util/**
-onetl/_internal.py
 onetl/log.py
 .github/workflows/data/core/**
diff --git a/.github/workflows/test-core.yml b/.github/workflows/test-core.yml
index 65d681dc..6008f925 100644
--- a/.github/workflows/test-core.yml
+++ b/.github/workflows/test-core.yml
@@ -72,7 +72,7 @@ jobs:
       - name: Run tests
         run: |
           ./run_tests.sh -m 'not connection'
-          ./run_tests.sh onetl/_util onetl/_internal.py onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py
+          ./run_tests.sh onetl/_util onetl/hooks onetl/file/filter onetl/file/limit onetl/hwm/store/hwm_class_registry.py
 
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
diff --git a/onetl/_internal.py b/onetl/_internal.py
deleted file mode 100644
index 361bb3e8..00000000
--- a/onetl/_internal.py
+++ /dev/null
@@ -1,172 +0,0 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
-# SPDX-License-Identifier: Apache-2.0
-"""
-    Helpers
-"""
-
-from __future__ import annotations
-
-import os
-from datetime import datetime
-from typing import TYPE_CHECKING, Any
-
-try:
-    from pydantic.v1 import SecretStr
-except (ImportError, AttributeError):
-    from pydantic import SecretStr  # type: ignore[no-redef, assignment]
-
-if TYPE_CHECKING:
-    from pathlib import PurePath
-
-# e.g. 20230524122150
-DATETIME_FORMAT = "%Y%m%d%H%M%S"
-
-
-def clear_statement(statement: str) -> str:
-    """
-    Clear unnecessary spaces and semicolons at the statement end.
-
-    Oracle-specific: adds semicolon after END statement.
-
-    Examples
-    --------
-
-    >>> clear_statement("SELECT * FROM mytable")
-    'SELECT * FROM mytable'
-    >>> clear_statement("SELECT * FROM mytable ; ")
-    'SELECT * FROM mytable'
-    >>> clear_statement("CREATE TABLE mytable (id NUMBER)")
-    'CREATE TABLE mytable (id NUMBER)'
-    >>> clear_statement("BEGIN ... END")
-    'BEGIN ... END;'
-    """
-
-    statement = statement.rstrip().lstrip("\n\r").rstrip(";").rstrip()
-    if statement.lower().endswith("end"):
-        statement += ";"
-    return statement
-
-
-def uniq_ignore_case(orig_list: list[str]) -> list[str]:
-    """
-    Return only uniq values from a list, case ignore.
-
-    Examples
-    --------
-
-    >>> uniq_ignore_case(["a", "c"])
-    ['a', 'c']
-    >>> uniq_ignore_case(["A", "a", "c"])
-    ['A', 'c']
-    >>> uniq_ignore_case(["a", "A", "c"])
-    ['a', 'c']
-    """
-
-    result: list[str] = []
-    already_visited: set[str] = set()
-
-    for orig_value in orig_list:
-        if orig_value.casefold() not in already_visited:
-            result.append(orig_value)
-            already_visited.add(orig_value.casefold())
-
-    return result
-
-
-def stringify(value: Any, quote: bool = False) -> Any:  # noqa: WPS212
-    """
-    Convert values to strings.
-
-    Values ``True``, ``False`` and ``None`` become ``"true"``, ``"false"`` and ``"null"``.
-
-    If input is dict, return dict with stringified values and keys (recursive).
-
-    If input is list, return list with stringified values (recursive).
-
-    If ``quote=True``, wrap string values with double quotes.
-
-    Examples
-    --------
-
-    >>> stringify(1)
-    '1'
-    >>> stringify(True)
-    'true'
-    >>> stringify(False)
-    'false'
-    >>> stringify(None)
-    'null'
-    >>> stringify("string")
-    'string'
-    >>> stringify("string", quote=True)
-    '"string"'
-    >>> stringify({"abc": 1})
-    {'abc': '1'}
-    >>> stringify([1, True, False, None, "string"])
-    ['1', 'true', 'false', 'null', 'string']
-    """
-
-    if isinstance(value, dict):
-        return {stringify(k): stringify(v, quote) for k, v in value.items()}
-
-    if isinstance(value, list):
-        return [stringify(v, quote) for v in value]
-
-    if value is None:
-        return "null"
-
-    if isinstance(value, bool):
-        return "true" if value else "false"
-
-    if isinstance(value, SecretStr):
-        value = value.get_secret_value()
-
-    if isinstance(value, os.PathLike):
-        value = os.fspath(value)
-
-    if isinstance(value, str):
-        return f'"{value}"' if quote else value
-
-    return str(value)
-
-
-def to_camel(string: str) -> str:
-    """
-    Convert ``snake_case`` strings to ``camelCase`` (with first symbol in lowercase)
-
-    Examples
-    --------
-
-    >>> to_camel("some_value")
-    'someValue'
-    """
-
-    return "".join(word.capitalize() if index > 0 else word for index, word in enumerate(string.split("_")))
-
-
-def generate_temp_path(root: PurePath) -> PurePath:
-    """
-    Returns prefix which will be used for creating temp directory
-
-    Returns
-    -------
-    RemotePath
-        Temp path, containing current host name, process name and datetime
-
-    Examples
-    --------
-
-    >>> from etl_entities.process import Process
-    >>> from pathlib import Path
-    >>> generate_temp_path(Path("/tmp")) # doctest: +SKIP
-    Path("/tmp/onetl/currenthost/myprocess/20230524122150")
-    >>> with Process(dag="mydag", task="mytask"): # doctest: +SKIP
-    ...    generate_temp_path(Path("/abc"))
-    Path("/abc/onetl/currenthost/mydag.mytask.myprocess/20230524122150")
-    """
-
-    from etl_entities.process import ProcessStackManager
-
-    current_process = ProcessStackManager.get_current()
-    current_dt = datetime.now().strftime(DATETIME_FORMAT)
-    return root / "onetl" / current_process.host / current_process.full_name / current_dt
diff --git a/onetl/_util/file.py b/onetl/_util/file.py
index 06e6ef04..ee27c57f 100644
--- a/onetl/_util/file.py
+++ b/onetl/_util/file.py
@@ -5,11 +5,15 @@
 import hashlib
 import io
 import os
-from pathlib import Path
+from datetime import datetime
+from pathlib import Path, PurePath
 
 from onetl.exception import NotAFileError
 from onetl.impl import path_repr
 
+# e.g. 20230524122150
+DATETIME_FORMAT = "%Y%m%d%H%M%S"
+
 
 def get_file_hash(
     path: os.PathLike | str,
@@ -41,3 +45,31 @@ def is_file_readable(path: str | os.PathLike) -> Path:
         raise OSError(f"No read access to file {path_repr(path)}")
 
     return path
+
+
+def generate_temp_path(root: PurePath) -> PurePath:
+    """
+    Returns prefix which will be used for creating temp directory
+
+    Returns
+    -------
+    RemotePath
+        Temp path, containing current host name, process name and datetime
+
+    Examples
+    --------
+
+    >>> from etl_entities.process import Process
+    >>> from pathlib import Path
+    >>> generate_temp_path(Path("/tmp")) # doctest: +SKIP
+    Path("/tmp/onetl/currenthost/myprocess/20230524122150")
+    >>> with Process(dag="mydag", task="mytask"): # doctest: +SKIP
+    ...    generate_temp_path(Path("/abc"))
+    Path("/abc/onetl/currenthost/mydag.mytask.myprocess/20230524122150")
+    """
+
+    from etl_entities.process import ProcessStackManager
+
+    current_process = ProcessStackManager.get_current()
+    current_dt = datetime.now().strftime(DATETIME_FORMAT)
+    return root / "onetl" / current_process.host / current_process.full_name / current_dt
diff --git a/onetl/_util/spark.py b/onetl/_util/spark.py
index 230abe80..f172b1c9 100644
--- a/onetl/_util/spark.py
+++ b/onetl/_util/spark.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import os
 import textwrap
 from contextlib import contextmanager
 from math import inf
@@ -9,11 +10,73 @@
 
 from onetl._util.version import Version
 
+try:
+    from pydantic.v1 import SecretStr
+except (ImportError, AttributeError):
+    from pydantic import SecretStr  # type: ignore[no-redef, assignment]
+
 if TYPE_CHECKING:
     from pyspark.sql import SparkSession
     from pyspark.sql.conf import RuntimeConfig
 
 
+def stringify(value: Any, quote: bool = False) -> Any:  # noqa: WPS212
+    """
+    Convert values to strings.
+
+    Values ``True``, ``False`` and ``None`` become ``"true"``, ``"false"`` and ``"null"``.
+
+    If input is dict, return dict with stringified values and keys (recursive).
+
+    If input is list, return list with stringified values (recursive).
+
+    If ``quote=True``, wrap string values with double quotes.
+
+    Examples
+    --------
+
+    >>> stringify(1)
+    '1'
+    >>> stringify(True)
+    'true'
+    >>> stringify(False)
+    'false'
+    >>> stringify(None)
+    'null'
+    >>> stringify("string")
+    'string'
+    >>> stringify("string", quote=True)
+    '"string"'
+    >>> stringify({"abc": 1})
+    {'abc': '1'}
+    >>> stringify([1, True, False, None, "string"])
+    ['1', 'true', 'false', 'null', 'string']
+    """
+
+    if isinstance(value, dict):
+        return {stringify(k): stringify(v, quote) for k, v in value.items()}
+
+    if isinstance(value, list):
+        return [stringify(v, quote) for v in value]
+
+    if value is None:
+        return "null"
+
+    if isinstance(value, bool):
+        return "true" if value else "false"
+
+    if isinstance(value, SecretStr):
+        value = value.get_secret_value()
+
+    if isinstance(value, os.PathLike):
+        value = os.fspath(value)
+
+    if isinstance(value, str):
+        return f'"{value}"' if quote else value
+
+    return str(value)
+
+
 @contextmanager
 def inject_spark_param(conf: RuntimeConfig, name: str, value: Any):
     """
diff --git a/onetl/_util/sql.py b/onetl/_util/sql.py
new file mode 100644
index 00000000..37aa09a7
--- /dev/null
+++ b/onetl/_util/sql.py
@@ -0,0 +1,25 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+def clear_statement(statement: str) -> str:
+    """
+    Clear unnecessary spaces and semicolons at the statement end.
+
+    Oracle-specific: adds semicolon after END statement.
+
+    Examples
+    --------
+
+    >>> clear_statement("SELECT * FROM mytable")
+    'SELECT * FROM mytable'
+    >>> clear_statement("SELECT * FROM mytable ; ")
+    'SELECT * FROM mytable'
+    >>> clear_statement("CREATE TABLE mytable (id NUMBER)")
+    'CREATE TABLE mytable (id NUMBER)'
+    >>> clear_statement("BEGIN ... END")
+    'BEGIN ... END;'
+    """
+
+    statement = statement.rstrip().lstrip("\n\r").rstrip(";").rstrip()
+    if statement.lower().endswith("end"):
+        statement += ";"
+    return statement
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index fbedebef..7fcb4dce 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -13,8 +13,8 @@
 except (ImportError, AttributeError):
     from pydantic import validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import clear_statement
 from onetl._util.spark import inject_spark_param
+from onetl._util.sql import clear_statement
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.hive.dialect import HiveDialect
 from onetl.connection.db_connection.hive.options import (
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index e6716ae5..5b0aebeb 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -7,7 +7,7 @@
 import warnings
 from typing import TYPE_CHECKING, Any
 
-from onetl._internal import clear_statement
+from onetl._util.sql import clear_statement
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
 from onetl.connection.db_connection.jdbc_connection.options import (
diff --git a/onetl/connection/db_connection/jdbc_connection/options.py b/onetl/connection/db_connection/jdbc_connection/options.py
index cd4538f2..a2aa39ad 100644
--- a/onetl/connection/db_connection/jdbc_connection/options.py
+++ b/onetl/connection/db_connection/jdbc_connection/options.py
@@ -15,7 +15,6 @@
 
 from typing_extensions import deprecated
 
-from onetl._internal import to_camel
 from onetl.impl import GenericOptions
 
 # options from spark.read.jdbc which are populated by JDBCConnection methods
@@ -144,10 +143,9 @@ class Config:
         known_options = READ_OPTIONS | READ_WRITE_OPTIONS
         prohibited_options = GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS
         extra = "allow"
-        alias_generator = to_camel
 
     # Options in DataFrameWriter.jdbc() method
-    partition_column: Optional[str] = None
+    partition_column: Optional[str] = Field(default=None, alias="partitionColumn")
     """Column used to parallelize reading from a table.
 
     .. warning::
@@ -164,17 +162,17 @@ class Config:
 
     See documentation for :obj:`~partitioning_mode` for more details"""
 
-    num_partitions: PositiveInt = 1
+    num_partitions: PositiveInt = Field(default=1, alias="numPartitions")
     """Number of jobs created by Spark to read the table content in parallel.
     See documentation for :obj:`~partitioning_mode` for more details"""
 
-    lower_bound: Optional[int] = None
+    lower_bound: Optional[int] = Field(default=None, alias="lowerBound")
     """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
 
-    upper_bound: Optional[int] = None
+    upper_bound: Optional[int] = Field(default=None, alias="upperBound")
     """See documentation for :obj:`~partitioning_mode` for more details"""  # noqa: WPS322
 
-    session_init_statement: Optional[str] = None
+    session_init_statement: Optional[str] = Field(default=None, alias="sessionInitStatement")
     '''After each database session is opened to the remote DB and before starting to read data,
     this option executes a custom SQL statement (or a PL/SQL block).
 
@@ -423,7 +421,6 @@ class Config:
         known_options = WRITE_OPTIONS | READ_WRITE_OPTIONS
         prohibited_options = GENERIC_PROHIBITED_OPTIONS | READ_OPTIONS
         extra = "allow"
-        alias_generator = to_camel
 
     if_exists: JDBCTableExistBehavior = Field(default=JDBCTableExistBehavior.APPEND, alias="mode")
     """Behavior of writing data into existing table.
@@ -528,7 +525,7 @@ class Config:
         Changed default value from 1000 to 20_000
     """
 
-    isolation_level: str = "READ_UNCOMMITTED"
+    isolation_level: str = Field(default="READ_UNCOMMITTED", alias="isolationLevel")
     """The transaction isolation level, which applies to current connection.
 
     Possible values:
@@ -571,7 +568,7 @@ class JDBCSQLOptions(GenericOptions):
         Split up ``ReadOptions`` to ``SQLOptions``
     """
 
-    partition_column: Optional[str] = None
+    partition_column: Optional[str] = Field(default=None, alias="partitionColumn")
     """Column used to partition data across multiple executors for parallel query processing.
 
     .. warning::
@@ -600,16 +597,16 @@ class JDBCSQLOptions(GenericOptions):
         -- Where ``stride`` is calculated as ``(upper_bound - lower_bound) / num_partitions``.
     """
 
-    num_partitions: Optional[int] = None
+    num_partitions: Optional[int] = Field(default=None, alias="numPartitions")
     """Number of jobs created by Spark to read the table content in parallel."""  # noqa: WPS322
 
-    lower_bound: Optional[int] = None
+    lower_bound: Optional[int] = Field(default=None, alias="lowerBound")
     """Defines the starting boundary for partitioning the query's data. Mandatory if :obj:`~partition_column` is set"""  # noqa: WPS322
 
-    upper_bound: Optional[int] = None
+    upper_bound: Optional[int] = Field(default=None, alias="upperBound")
     """Sets the ending boundary for data partitioning. Mandatory if :obj:`~partition_column` is set"""  # noqa: WPS322
 
-    session_init_statement: Optional[str] = None
+    session_init_statement: Optional[str] = Field(default=None, alias="sessionInitStatement")
     '''After each database session is opened to the remote DB and before starting to read data,
     this option executes a custom SQL statement (or a PL/SQL block).
 
@@ -658,7 +655,6 @@ class Config:
         known_options = READ_OPTIONS - {"partitioning_mode"}
         prohibited_options = GENERIC_PROHIBITED_OPTIONS | WRITE_OPTIONS | {"partitioning_mode"}
         extra = "allow"
-        alias_generator = to_camel
 
     @root_validator(pre=True)
     def _check_partition_fields(cls, values):
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index 8ab43075..e8c19e38 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -16,9 +16,9 @@
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, SecretStr, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import clear_statement, stringify
 from onetl._util.java import get_java_gateway, try_import_java_class
-from onetl._util.spark import get_spark_version
+from onetl._util.spark import get_spark_version, stringify
+from onetl._util.sql import clear_statement
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_mixin.options import (
     JDBCExecuteOptions,
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index b64fff14..ce3829e4 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -14,10 +14,9 @@
 except (ImportError, AttributeError):
     from pydantic import root_validator, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import stringify
 from onetl._util.java import try_import_java_class
 from onetl._util.scala import get_default_scala_version
-from onetl._util.spark import get_spark_version
+from onetl._util.spark import get_spark_version, stringify
 from onetl._util.version import Version
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.kafka.dialect import KafkaDialect
diff --git a/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py b/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
index 6a20a31a..40e9aa55 100644
--- a/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
@@ -12,8 +12,8 @@
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, root_validator, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import stringify
 from onetl._util.file import get_file_hash, is_file_readable
+from onetl._util.spark import stringify
 from onetl.connection.db_connection.kafka.kafka_auth import KafkaAuth
 from onetl.impl import GenericOptions, LocalPath, path_repr
 
diff --git a/onetl/connection/db_connection/kafka/kafka_scram_auth.py b/onetl/connection/db_connection/kafka/kafka_scram_auth.py
index add09f34..823d0f82 100644
--- a/onetl/connection/db_connection/kafka/kafka_scram_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_scram_auth.py
@@ -11,7 +11,7 @@
 
 from typing_extensions import Literal
 
-from onetl._internal import stringify
+from onetl._util.spark import stringify
 from onetl.connection.db_connection.kafka.kafka_auth import KafkaAuth
 from onetl.impl import GenericOptions
 
diff --git a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
index 6149f5aa..24dd52f6 100644
--- a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
+++ b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
@@ -10,8 +10,8 @@
 except (ImportError, AttributeError):
     from pydantic import Field, SecretStr, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import stringify
 from onetl._util.file import is_file_readable
+from onetl._util.spark import stringify
 from onetl.impl import GenericOptions, LocalPath
 
 if TYPE_CHECKING:
diff --git a/onetl/connection/db_connection/oracle/connection.py b/onetl/connection/db_connection/oracle/connection.py
index 8ca1b6ef..04398950 100644
--- a/onetl/connection/db_connection/oracle/connection.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -19,8 +19,8 @@
 
 from etl_entities.instance import Host
 
-from onetl._internal import clear_statement
 from onetl._util.classproperty import classproperty
+from onetl._util.sql import clear_statement
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
 from onetl.connection.db_connection.jdbc_connection.options import JDBCReadOptions
diff --git a/onetl/connection/db_connection/teradata/connection.py b/onetl/connection/db_connection/teradata/connection.py
index d6ea76ac..6ef2637b 100644
--- a/onetl/connection/db_connection/teradata/connection.py
+++ b/onetl/connection/db_connection/teradata/connection.py
@@ -7,8 +7,8 @@
 
 from etl_entities.instance import Host
 
-from onetl._internal import stringify
 from onetl._util.classproperty import classproperty
+from onetl._util.spark import stringify
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
 from onetl.connection.db_connection.teradata.dialect import TeradataDialect
diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py
index 04da89e0..1efe39d4 100644
--- a/onetl/connection/file_df_connection/spark_s3/connection.py
+++ b/onetl/connection/file_df_connection/spark_s3/connection.py
@@ -16,11 +16,10 @@
 
 from typing_extensions import Literal
 
-from onetl._internal import stringify
 from onetl._util.hadoop import get_hadoop_config, get_hadoop_version
 from onetl._util.java import try_import_java_class
 from onetl._util.scala import get_default_scala_version
-from onetl._util.spark import get_spark_version
+from onetl._util.spark import get_spark_version, stringify
 from onetl._util.version import Version
 from onetl.base import (
     BaseReadableFileFormat,
diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index 3fe45ff4..069f8c69 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -22,7 +22,7 @@
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, root_validator, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import generate_temp_path
+from onetl._util.file import generate_temp_path
 from onetl.base import BaseFileConnection, BaseFileFilter, BaseFileLimit
 from onetl.base.path_protocol import PathProtocol
 from onetl.file.file_downloader.options import FileDownloaderOptions
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index 9ab5f088..fc6709ce 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -15,7 +15,7 @@
 except (ImportError, AttributeError):
     from pydantic import PrivateAttr, validator  # type: ignore[no-redef, assignment]
 
-from onetl._internal import generate_temp_path
+from onetl._util.file import generate_temp_path
 from onetl.base import BaseFileConnection
 from onetl.base.path_protocol import PathWithStatsProtocol
 from onetl.base.pure_path_protocol import PurePathProtocol
diff --git a/onetl/file/format/csv.py b/onetl/file/format/csv.py
index 353a8e98..1c4442fd 100644
--- a/onetl/file/format/csv.py
+++ b/onetl/file/format/csv.py
@@ -10,8 +10,7 @@
 except (ImportError, AttributeError):
     from pydantic import Field  # type: ignore[no-redef, assignment]
 
-from onetl._internal import stringify
-from onetl._util.spark import get_spark_version
+from onetl._util.spark import get_spark_version, stringify
 from onetl.file.format.file_format import ReadWriteFileFormat
 from onetl.hooks import slot, support_hooks
 
diff --git a/onetl/file/format/json.py b/onetl/file/format/json.py
index 69887442..085d125e 100644
--- a/onetl/file/format/json.py
+++ b/onetl/file/format/json.py
@@ -6,7 +6,7 @@
 
 from typing_extensions import Literal
 
-from onetl._internal import stringify
+from onetl._util.spark import stringify
 from onetl.file.format.file_format import ReadOnlyFileFormat
 from onetl.hooks import slot, support_hooks
 
diff --git a/tests/tests_unit/test_internal_unit/test_generate_temp_path.py b/tests/tests_unit/test_internal_unit/test_generate_temp_path.py
index faad170f..0b8f9885 100644
--- a/tests/tests_unit/test_internal_unit/test_generate_temp_path.py
+++ b/tests/tests_unit/test_internal_unit/test_generate_temp_path.py
@@ -3,13 +3,14 @@
 from pathlib import PurePath
 
 import pytest
-from etl_entities.process import Process
 
-from onetl._internal import generate_temp_path
+from onetl._util.file import generate_temp_path
 
 
 @pytest.mark.flaky(reruns=5)
 def test_generate_temp_path():
+    from etl_entities.process import Process
+
     root = PurePath("/path")
 
     dt_prefix = datetime.now().strftime("%Y%m%d%H%M")  # up to minutes, not seconds
diff --git a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
index 69983888..6d3ff132 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_jdbc_options_unit.py
@@ -2,7 +2,6 @@
 
 import pytest
 
-from onetl._internal import to_camel
 from onetl.connection import MSSQL, Clickhouse, MySQL, Oracle, Postgres, Teradata
 from onetl.connection.db_connection.jdbc_connection import JDBCTableExistBehavior
 
@@ -181,7 +180,8 @@ def test_jdbc_old_options_allowed_but_deprecated(arg, value):
     with pytest.warns(UserWarning, match=warning_msg):
         options = Postgres.Options.parse({arg: value})
 
-    assert options.dict(by_alias=True)[to_camel(arg)] == value
+    parsed_value = options.dict().get(arg) or options.dict(by_alias=True).get(arg)
+    assert parsed_value == value
 
 
 @pytest.mark.parametrize(

From 0fff2b5e1c46ac59705f8d94fafbd6d787c326b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 6 Jun 2024 08:28:39 +0000
Subject: [PATCH 04/64] [DOP-16555] Add notes about local Greenplum tests

---
 CONTRIBUTING.rst                                          | 8 ++++++++
 docker-compose.yml                                        | 4 +++-
 tests/fixtures/spark.py                                   | 7 +++----
 .../test_clickhouse_writer_integration.py                 | 2 +-
 .../test_greenplum_writer_integration.py                  | 2 +-
 .../test_kafka_writer_integration.py                      | 2 +-
 .../test_mongodb_writer_integration.py                    | 2 +-
 .../test_mssql_writer_integration.py                      | 2 +-
 .../test_mysql_writer_integration.py                      | 2 +-
 .../test_oracle_writer_integration.py                     | 2 +-
 .../test_postgres_writer_integration.py                   | 6 +++---
 11 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index e7a60fc1..7a70dbac 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -176,6 +176,14 @@ Without docker-compose
 
     * Download `VMware Greenplum connector for Spark <https://onetl.readthedocs.io/en/latest/connection/db_connection/greenplum/prerequisites.html>`_
     * Either move it to ``~/.ivy2/jars/``, or pass file path to ``CLASSPATH``
+    * Set environment variable ``ONETL_GP_PACKAGE_VERSION=local``.
+    * On Linux, you may have to set environment variable ``SPARK_EXTERNAL_IP`` to IP of ``onetl_onetl`` network gateway:
+
+        .. code:: bash
+
+            export SPARK_EXTERNAL_IP=$(docker network inspect onetl_onetl --format '{{ (index .IPAM.Config 0).Gateway }}')
+
+        This is because in some cases Spark does not properly detect hsot machine IP address, so Greenplum segments cannot connect to Spark executors.
 
 Start all containers with dependencies:
 
diff --git a/docker-compose.yml b/docker-compose.yml
index 3a61170e..34f2c4fe 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -24,13 +24,15 @@ services:
     # no dependencies from other containers to allow running limited set of tests instead of all
 
   greenplum:
-    image: ${GREENPLUM_IMAGE:-andruche/greenplum:7.0.0}
+    image: ${GREENPLUM_IMAGE:-andruche/greenplum:6}
     restart: unless-stopped
     env_file: .env.dependencies
     ports:
       - 5433:5432
     networks:
       - onetl
+    extra_hosts:
+      - host.docker.internal:host-gateway
     sysctls:
       - net.ipv6.conf.all.disable_ipv6=1
 
diff --git a/tests/fixtures/spark.py b/tests/fixtures/spark.py
index dbc03ba3..e7248e84 100644
--- a/tests/fixtures/spark.py
+++ b/tests/fixtures/spark.py
@@ -73,11 +73,12 @@ def maven_packages(request):
     if "teradata" in markers:
         packages.extend(Teradata.get_packages())
 
-    if "greenplum" in markers:
+    gp_package_version = os.getenv("ONETL_GP_PACKAGE_VERSION")
+    if "greenplum" in markers and gp_package_version != "local":
         packages.extend(
             Greenplum.get_packages(
                 spark_version=str(pyspark_version),
-                package_version=os.getenv("ONETL_GP_PACKAGE_VERSION") or None,
+                package_version=gp_package_version,
             ),
         )
 
@@ -139,8 +140,6 @@ def get_spark_session(warehouse_dir, spark_metastore_dir, ivysettings_path, mave
         .config("spark.driver.memory", "1g")
         .config("spark.driver.maxResultSize", "1g")
         .config("spark.executor.cores", "1")
-        .config("spark.driver.bindAddress", "127.0.0.1")  # prevent Spark from unreachable network connection
-        .config("spark.driver.host", "127.0.0.1")
         .config("spark.executor.memory", "1g")
         .config("spark.executor.allowSparkContext", "true")  # Greenplum uses SparkContext on executor if master==local
         .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_clickhouse_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_clickhouse_writer_integration.py
index 459794b3..884cd015 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_clickhouse_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_clickhouse_writer_integration.py
@@ -6,7 +6,7 @@
 pytestmark = pytest.mark.clickhouse
 
 
-def test_clickhouse_writer_snapshot(spark, processing, prepare_schema_table):
+def test_clickhouse_writer(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     clickhouse = Clickhouse(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
index 338de0c6..d25d38f8 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_greenplum_writer_integration.py
@@ -16,7 +16,7 @@
         {"if_exists": "ignore"},
     ],
 )
-def test_greenplum_writer_snapshot(spark, processing, get_schema_table, options):
+def test_greenplum_writer(spark, processing, get_schema_table, options):
     df = processing.create_spark_df(spark=spark)
 
     greenplum = Greenplum(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
index cf045b31..dd4c045f 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_kafka_writer_integration.py
@@ -62,7 +62,7 @@ def kafka_spark_df(spark, kafka_processing):
     return spark.createDataFrame(data, schema=schema)
 
 
-def test_kafka_writer_snapshot(spark, kafka_processing, kafka_spark_df):
+def test_kafka_writer(spark, kafka_processing, kafka_spark_df):
     from pyspark.sql.functions import lit
 
     if get_spark_version(spark).major < 3:
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mongodb_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mongodb_writer_integration.py
index 503d2ee2..edfd2151 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mongodb_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mongodb_writer_integration.py
@@ -19,7 +19,7 @@
         {"if_exists": "ignore"},
     ],
 )
-def test_mongodb_writer_snapshot(spark, processing, get_schema_table, options, caplog):
+def test_mongodb_writer(spark, processing, get_schema_table, options, caplog):
     df = processing.create_spark_df(spark=spark)
 
     mongo = MongoDB(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mssql_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mssql_writer_integration.py
index 3e6cf35b..6f79f5fa 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mssql_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mssql_writer_integration.py
@@ -6,7 +6,7 @@
 pytestmark = pytest.mark.mssql
 
 
-def test_mssql_writer_snapshot(spark, processing, prepare_schema_table):
+def test_mssql_writer(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     mssql = MSSQL(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mysql_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mysql_writer_integration.py
index 86bc7cbb..5a345971 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mysql_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_mysql_writer_integration.py
@@ -6,7 +6,7 @@
 pytestmark = pytest.mark.mysql
 
 
-def test_mysql_writer_snapshot(spark, processing, prepare_schema_table):
+def test_mysql_writer(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     mysql = MySQL(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_oracle_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_oracle_writer_integration.py
index f5083bab..779fc2e9 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_oracle_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_oracle_writer_integration.py
@@ -6,7 +6,7 @@
 pytestmark = pytest.mark.oracle
 
 
-def test_oracle_writer_snapshot(spark, processing, prepare_schema_table):
+def test_oracle_writer(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     oracle = Oracle(
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_postgres_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_postgres_writer_integration.py
index ed651948..57483da0 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_postgres_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_postgres_writer_integration.py
@@ -16,7 +16,7 @@
         {"if_exists": "ignore"},
     ],
 )
-def test_postgres_writer_snapshot(spark, processing, get_schema_table, options):
+def test_postgres_writer(spark, processing, get_schema_table, options):
     df = processing.create_spark_df(spark=spark)
 
     postgres = Postgres(
@@ -44,7 +44,7 @@ def test_postgres_writer_snapshot(spark, processing, get_schema_table, options):
     )
 
 
-def test_postgres_writer_snapshot_with_dict_options(spark, processing, prepare_schema_table):
+def test_postgres_writer_with_dict_options(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     postgres = Postgres(
@@ -72,7 +72,7 @@ def test_postgres_writer_snapshot_with_dict_options(spark, processing, prepare_s
     )
 
 
-def test_postgres_writer_snapshot_with_pydantic_options(spark, processing, prepare_schema_table):
+def test_postgres_writer_with_pydantic_options(spark, processing, prepare_schema_table):
     df = processing.create_spark_df(spark=spark)
 
     postgres = Postgres(

From 287eddf0acf556dbe94b88c92e625a4914752fb6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 10 Jun 2024 09:54:40 +0000
Subject: [PATCH 05/64] Update README

---
 README.rst | 99 ++++++++++++++++++++++++------------------------------
 1 file changed, 43 insertions(+), 56 deletions(-)

diff --git a/README.rst b/README.rst
index 4f97ce08..0a4cbc97 100644
--- a/README.rst
+++ b/README.rst
@@ -65,62 +65,49 @@ Supported storages
 Database
 ~~~~~~~~
 
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Storage      | Powered by                                                                                                              |
-+==============+=========================================================================================================================+
-| Clickhouse   | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| MSSQL        | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| MySQL        | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Postgres     | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Oracle       | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Teradata     | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Hive         | Apache Spark `Hive integration <https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html>`_               |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Kafka        | Apache Spark `Kafka integration <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_    |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| Greenplum    | VMware `Greenplum Spark connector <https://docs.vmware.com/en/VMware-Greenplum-Connector-for-Apache-Spark/index.html>`_ |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-| MongoDB      | `MongoDB Spark connector <https://www.mongodb.com/docs/spark-connector/current>`_                                       |
-+--------------+-------------------------------------------------------------------------------------------------------------------------+
-
-File
-~~~~
-+--------------+--------------------------------------------------------------------+
-| Storage      | Powered by                                                         |
-+==============+====================================================================+
-| HDFS         | `HDFS Python client <https://pypi.org/project/hdfs/>`_             |
-+--------------+--------------------------------------------------------------------+
-| S3           | `minio-py client <https://pypi.org/project/minio/>`_               |
-+--------------+--------------------------------------------------------------------+
-| SFTP         | `Paramiko library <https://pypi.org/project/paramiko/>`_           |
-+--------------+--------------------------------------------------------------------+
-| FTP          | `FTPUtil library <https://pypi.org/project/ftputil/>`_             |
-+--------------+--------------------------------------------------------------------+
-| FTPS         | `FTPUtil library <https://pypi.org/project/ftputil/>`_             |
-+--------------+--------------------------------------------------------------------+
-| WebDAV       | `WebdavClient3 library <https://pypi.org/project/webdavclient3/>`_ |
-+--------------+--------------------------------------------------------------------+
-| Samba        | `pysmb library <https://pypi.org/project/pysmb/>`_                 |
-+--------------+--------------------------------------------------------------------+
-
-Files as DataFrame
-~~~~~~~~~~~~~~~~~~
-
-+--------------+---------------------------------------------------------------------------------------------------------------+
-| Storage      | Powered by                                                                                                    |
-+==============+===============================================================================================================+
-| SparkLocalFS | Apache Spark `File Data Source <https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html>`_ |
-+--------------+---------------------------------------------------------------------------------------------------------------+
-| SparkHDFS    | Apache Spark `File Data Source <https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html>`_ |
-+--------------+---------------------------------------------------------------------------------------------------------------+
-| SparkS3      | `Hadoop AWS <https://hadoop.apache.org/docs/current3/hadoop-aws/tools/hadoop-aws/index.html>`_ library        |
-+--------------+---------------------------------------------------------------------------------------------------------------+
++--------------------+--------------+-------------------------------------------------------------------------------------------------------------------------+
+| Type               | Storage      | Powered by                                                                                                              |
++====================+==============+=========================================================================================================================+
+| Database           | Clickhouse   | Apache Spark `JDBC Data Source <https://spark.apache.org/docs/latest/sql-data-sources-jdbc.html>`_                      |
++                    +--------------+                                                                                                                         +
+|                    | MSSQL        |                                                                                                                         |
++                    +--------------+                                                                                                                         +
+|                    | MySQL        |                                                                                                                         |
++                    +--------------+                                                                                                                         +
+|                    | Postgres     |                                                                                                                         |
++                    +--------------+                                                                                                                         +
+|                    | Oracle       |                                                                                                                         |
++                    +--------------+                                                                                                                         +
+|                    | Teradata     |                                                                                                                         |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | Hive         | Apache Spark `Hive integration <https://spark.apache.org/docs/latest/sql-data-sources-hive-tables.html>`_               |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | Kafka        | Apache Spark `Kafka integration <https://spark.apache.org/docs/latest/structured-streaming-kafka-integration.html>`_    |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | Greenplum    | VMware `Greenplum Spark connector <https://docs.vmware.com/en/VMware-Greenplum-Connector-for-Apache-Spark/index.html>`_ |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | MongoDB      | `MongoDB Spark connector <https://www.mongodb.com/docs/spark-connector/current>`_                                       |
++--------------------+--------------+-------------------------------------------------------------------------------------------------------------------------+
+| File               | HDFS         | `HDFS Python client <https://pypi.org/project/hdfs/>`_                                                                  |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | S3           | `minio-py client <https://pypi.org/project/minio/>`_                                                                    |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | SFTP         | `Paramiko library <https://pypi.org/project/paramiko/>`_                                                                |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | FTP          | `FTPUtil library <https://pypi.org/project/ftputil/>`_                                                                  |
++                    +--------------+                                                                                                                         +
+|                    | FTPS         |                                                                                                                         |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | WebDAV       | `WebdavClient3 library <https://pypi.org/project/webdavclient3/>`_                                                      |
++                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | Samba        | `pysmb library <https://pypi.org/project/pysmb/>`_                                                                      |
++--------------------+--------------+-------------------------------------------------------------------------------------------------------------------------+
+| Files as DataFrame | SparkLocalFS | Apache Spark `File Data Source <https://spark.apache.org/docs/latest/sql-data-sources-generic-options.html>`_           |
+|                    +--------------+                                                                                                                         +
+|                    | SparkHDFS    |                                                                                                                         |
+|                    +--------------+-------------------------------------------------------------------------------------------------------------------------+
+|                    | SparkS3      | `Hadoop AWS <https://hadoop.apache.org/docs/current3/hadoop-aws/tools/hadoop-aws/index.html>`_ library                  |
++--------------------+--------------+-------------------------------------------------------------------------------------------------------------------------+
 
 .. documentation
 

From 3211aaf26764ba77665fc21757f7b2ab7527656b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 10 Jun 2024 21:13:14 +0000
Subject: [PATCH 06/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v3.15.2 → v3.16.0](https://github.com/asottile/pyupgrade/compare/v3.15.2...v3.16.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index baa40c29..202ed4f5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -90,7 +90,7 @@ repos:
       - id: text-unicode-replacement-char
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.15.2
+    rev: v3.16.0
     hooks:
       - id: pyupgrade
         args: [--py37-plus, --keep-runtime-typing]

From ce5a13526b8940bc2933b9ee7df4fc42f3bfd3ca Mon Sep 17 00:00:00 2001
From: maxim-lixakov <mvliksako1@mts.ru>
Date: Thu, 13 Jun 2024 16:44:37 +0300
Subject: [PATCH 07/64] [DOP-16103] - add class format to HiveWriteOptions

---
 docs/changelog/next_release/292.feature.rst   |  1 +
 .../db_connection/hive/connection.py          | 27 ++++++++++----
 .../connection/db_connection/hive/options.py  | 35 +++++++++++++++++--
 .../test_hive_writer_integration.py           | 18 ++++++----
 4 files changed, 67 insertions(+), 14 deletions(-)
 create mode 100644 docs/changelog/next_release/292.feature.rst

diff --git a/docs/changelog/next_release/292.feature.rst b/docs/changelog/next_release/292.feature.rst
new file mode 100644
index 00000000..e50a5fcd
--- /dev/null
+++ b/docs/changelog/next_release/292.feature.rst
@@ -0,0 +1 @@
+Add support for specifying file formats (``ORC``, ``Parquet``, ``CSV``, etc.) in ``HiveWriteOptions.format``: ``Hive.WriteOptions(format=ORC(compression="snappy"))``.
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 7fcb4dce..857f8836 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -23,6 +23,7 @@
     HiveWriteOptions,
 )
 from onetl.connection.db_connection.hive.slots import HiveSlots
+from onetl.file.format.file_format import ReadWriteFileFormat
 from onetl.hooks import slot, support_hooks
 from onetl.hwm import Window
 from onetl.log import log_lines, log_with_indent
@@ -423,7 +424,12 @@ def _insert_into(
     ) -> None:
         write_options = self.WriteOptions.parse(options)
 
-        unsupported_options = write_options.dict(by_alias=True, exclude_unset=True, exclude={"if_exists"})
+        unsupported_options = write_options.dict(
+            by_alias=True,
+            exclude_unset=True,
+            exclude_defaults=True,
+            exclude={"if_exists"},
+        )
         if unsupported_options:
             log.warning(
                 "|%s| User-specified options %r are ignored while inserting into existing table. "
@@ -458,17 +464,26 @@ def _save_as_table(
         write_options = self.WriteOptions.parse(options)
 
         writer = df.write
-        for method, value in write_options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"}).items():
-            # <value> is the arguments that will be passed to the <method>
-            # format orc, parquet methods and format simultaneously
+        for method, value in write_options.dict(  # noqa: WPS352
+            by_alias=True,
+            exclude_none=True,
+            exclude={"if_exists", "format"},
+        ).items():
             if hasattr(writer, method):
                 if isinstance(value, Iterable) and not isinstance(value, str):
-                    writer = getattr(writer, method)(*value)  # noqa: WPS220
+                    writer = getattr(writer, method)(*value)
                 else:
-                    writer = getattr(writer, method)(value)  # noqa: WPS220
+                    writer = getattr(writer, method)(value)
             else:
                 writer = writer.option(method, value)
 
+        # deserialize passed OCR(), Parquet(), CSV(), etc. file formats
+        if isinstance(write_options.format, ReadWriteFileFormat):
+            writer = writer.format(write_options.format.name)
+            writer = writer.options(**write_options.format.dict())
+        elif isinstance(write_options.format, str):
+            writer = writer.format(write_options.format)
+
         mode = "append" if write_options.if_exists == HiveTableExistBehavior.APPEND else "overwrite"
 
         log.info("|%s| Saving data to a table %r ...", self.__class__.__name__, table)
diff --git a/onetl/connection/db_connection/hive/options.py b/onetl/connection/db_connection/hive/options.py
index a196487a..f6b5fde8 100644
--- a/onetl/connection/db_connection/hive/options.py
+++ b/onetl/connection/db_connection/hive/options.py
@@ -13,6 +13,7 @@
 
 from typing_extensions import deprecated
 
+from onetl.file.format.file_format import ReadWriteFileFormat
 from onetl.impl import GenericOptions
 
 
@@ -198,10 +199,30 @@ class Config:
         does not affect behavior.
     """
 
-    format: str = "orc"
+    format: Union[str, ReadWriteFileFormat] = "orc"
     """Format of files which should be used for storing table data.
 
-    Examples: ``orc`` (default), ``parquet``, ``csv`` (NOT recommended)
+    Examples
+    --------
+
+    - string format: ``"orc"`` (default), ``"parquet"``, ``"csv"`` (NOT recommended).
+    - format class instance: ``ORC(compression="snappy")``, ``Parquet()``, ``CSV(header=True, delimiter=",")``.
+
+    .. code::
+
+        options = Hive.WriteOptions(
+            if_exists="append",
+            partition_by="reg_id",
+            format="orc",
+        )
+
+        # or using an ORC format class instance:
+
+        options = Hive.WriteOptions(
+            if_exists="append",
+            partition_by="reg_id",
+            format=ORC(compression="snappy"),
+        )
 
     .. note::
 
@@ -285,6 +306,16 @@ class Config:
         Used **only** while **creating new table**, or in case of ``if_exists=replace_entire_table``
     """
 
+    def dict(self, **kwargs):
+        d = super().dict(**kwargs)
+        if isinstance(self.format, ReadWriteFileFormat):
+            if self.format.name != self.__fields__["format"].default:
+                d["format"] = self.format.name
+            elif "format" in d:
+                d.pop("format")
+            d.update(self.format.dict(exclude={"name"}))
+        return d
+
     @validator("sort_by")
     def _sort_by_cannot_be_used_without_bucket_by(cls, sort_by, values):
         options = values.copy()
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
index a841f09f..20e2dc94 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
@@ -7,6 +7,7 @@
 from onetl._util.spark import get_spark_version
 from onetl.connection import Hive
 from onetl.db import DBWriter
+from onetl.file.format import CSV, ORC, Parquet
 
 pytestmark = pytest.mark.hive
 
@@ -69,14 +70,17 @@ def test_hive_writer_with_options(spark, processing, get_schema_table, options):
 
 
 @pytest.mark.parametrize(
-    "options, fmt",
+    "options, format",
     [
-        (Hive.WriteOptions(format="orc"), "orc"),
         (Hive.WriteOptions(), "orc"),  # default
+        (Hive.WriteOptions(format="orc"), "orc"),
         (Hive.WriteOptions(format="parquet"), "parquet"),
+        (Hive.WriteOptions(format=ORC(compression="snappy")), "orc"),
+        (Hive.WriteOptions(format=CSV(sep=",", encoding="utf-8", inferSchema=True, compression="gzip")), "csv"),
+        (Hive.WriteOptions(format=Parquet(compression="snappy")), "parquet"),
     ],
 )
-def test_hive_writer_with_format(spark, processing, get_schema_table, options, fmt):
+def test_hive_writer_with_format(spark, processing, get_schema_table, options, format):
     df = processing.create_spark_df(spark)
     hive = Hive(cluster="rnd-dwh", spark=spark)
 
@@ -90,7 +94,7 @@ def test_hive_writer_with_format(spark, processing, get_schema_table, options, f
     response = hive.sql(f"SHOW CREATE TABLE {get_schema_table.full_name}")
     response = response.collect()[0][0]
 
-    assert f"USING {fmt}" in response
+    assert f"USING {format}" in response
 
 
 @pytest.mark.parametrize(
@@ -264,11 +268,13 @@ def test_hive_writer_create_table_if_exists(spark, processing, get_schema_table,
             Hive.WriteOptions(bucketBy=(5, "id_int"), sortBy="hwm_int"),
             "{'bucketBy': (5, 'id_int'), 'sortBy': 'hwm_int'}",
         ),
-        (Hive.WriteOptions(compression="snappy"), "{'compression': 'snappy'}"),
         (Hive.WriteOptions(format="parquet"), "{'format': 'parquet'}"),
+        (Hive.WriteOptions(format=Parquet()), "{'format': 'parquet'}"),
+        (Hive.WriteOptions(compression="snappy"), "{'compression': 'snappy'}"),
+        (Hive.WriteOptions(format=ORC(compression="snappy")), "{'compression': 'snappy'}"),
     ],
 )
-def test_hive_writer_insert_into_with_options(spark, processing, get_schema_table, options, option_kv, caplog):
+def test_hive_writer_insert_into_with_options_ignored(spark, processing, get_schema_table, options, option_kv, caplog):
     df = processing.create_spark_df(spark)
     hive = Hive(cluster="rnd-dwh", spark=spark)
 

From 539da6a6ab123795de1215166e9746cfcd94889f Mon Sep 17 00:00:00 2001
From: maxim-lixakov <mvliksako1@mts.ru>
Date: Fri, 14 Jun 2024 11:31:13 +0300
Subject: [PATCH 08/64] [DOP-16103] - move format logic to
 _format_write_options method

---
 .../db_connection/hive/connection.py          | 33 ++++++++++++-------
 .../connection/db_connection/hive/options.py  | 12 ++-----
 2 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 857f8836..0a2b0ffa 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -23,7 +23,7 @@
     HiveWriteOptions,
 )
 from onetl.connection.db_connection.hive.slots import HiveSlots
-from onetl.file.format.file_format import ReadWriteFileFormat
+from onetl.file.format.file_format import WriteOnlyFileFormat
 from onetl.hooks import slot, support_hooks
 from onetl.hwm import Window
 from onetl.log import log_lines, log_with_indent
@@ -423,13 +423,7 @@ def _insert_into(
         options: HiveWriteOptions | dict | None = None,
     ) -> None:
         write_options = self.WriteOptions.parse(options)
-
-        unsupported_options = write_options.dict(
-            by_alias=True,
-            exclude_unset=True,
-            exclude_defaults=True,
-            exclude={"if_exists"},
-        )
+        unsupported_options = self._format_write_options(write_options)
         if unsupported_options:
             log.warning(
                 "|%s| User-specified options %r are ignored while inserting into existing table. "
@@ -455,6 +449,24 @@ def _insert_into(
 
         log.info("|%s| Data is successfully inserted into table %r.", self.__class__.__name__, table)
 
+    def _format_write_options(self, write_options: HiveWriteOptions) -> dict:
+        options_dict = write_options.dict(
+            by_alias=True,
+            exclude_unset=True,
+            exclude_defaults=True,
+            exclude={"if_exists"},
+        )
+
+        if isinstance(write_options.format, WriteOnlyFileFormat):
+            if write_options.format.name != HiveWriteOptions.__fields__["format"].default:
+                options_dict["format"] = write_options.format.name
+            elif "format" in options_dict:
+                options_dict.pop("format")  # remove format key if it matches the default
+
+            options_dict.update(write_options.format.dict(exclude={"name"}))
+
+        return options_dict
+
     def _save_as_table(
         self,
         df: DataFrame,
@@ -478,9 +490,8 @@ def _save_as_table(
                 writer = writer.option(method, value)
 
         # deserialize passed OCR(), Parquet(), CSV(), etc. file formats
-        if isinstance(write_options.format, ReadWriteFileFormat):
-            writer = writer.format(write_options.format.name)
-            writer = writer.options(**write_options.format.dict())
+        if isinstance(write_options.format, WriteOnlyFileFormat):
+            writer = write_options.format.apply_to_writer(writer)
         elif isinstance(write_options.format, str):
             writer = writer.format(write_options.format)
 
diff --git a/onetl/connection/db_connection/hive/options.py b/onetl/connection/db_connection/hive/options.py
index f6b5fde8..9b059a83 100644
--- a/onetl/connection/db_connection/hive/options.py
+++ b/onetl/connection/db_connection/hive/options.py
@@ -218,6 +218,8 @@ class Config:
 
         # or using an ORC format class instance:
 
+        from onetl.file.format import ORC
+
         options = Hive.WriteOptions(
             if_exists="append",
             partition_by="reg_id",
@@ -306,16 +308,6 @@ class Config:
         Used **only** while **creating new table**, or in case of ``if_exists=replace_entire_table``
     """
 
-    def dict(self, **kwargs):
-        d = super().dict(**kwargs)
-        if isinstance(self.format, ReadWriteFileFormat):
-            if self.format.name != self.__fields__["format"].default:
-                d["format"] = self.format.name
-            elif "format" in d:
-                d.pop("format")
-            d.update(self.format.dict(exclude={"name"}))
-        return d
-
     @validator("sort_by")
     def _sort_by_cannot_be_used_without_bucket_by(cls, sort_by, values):
         options = values.copy()

From dc052288b945c3f3787975bb5aa14066929a98bf Mon Sep 17 00:00:00 2001
From: maxim-lixakov <mvliksako1@mts.ru>
Date: Fri, 14 Jun 2024 13:28:57 +0300
Subject: [PATCH 09/64] [DOP-16103] - improve logging

---
 docs/connection/db_connection/hive/write.rst               | 4 ++++
 onetl/connection/db_connection/hive/connection.py          | 7 +------
 .../test_hive_writer_integration.py                        | 3 ++-
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/connection/db_connection/hive/write.rst b/docs/connection/db_connection/hive/write.rst
index 95e54a5a..6b7ca91e 100644
--- a/docs/connection/db_connection/hive/write.rst
+++ b/docs/connection/db_connection/hive/write.rst
@@ -5,6 +5,10 @@ Writing to Hive using ``DBWriter``
 
 For writing data to Hive, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>`.
 
+.. warning::
+    When using ``DBWriter`` with ``Hive.WriteOptions``, the default spark data format configured in ``spark.sql.sources.default`` is overridden to use ``orc`` by default. This may affect performance and storage characteristics.
+
+
 Examples
 --------
 
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 0a2b0ffa..81c50e87 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -453,16 +453,11 @@ def _format_write_options(self, write_options: HiveWriteOptions) -> dict:
         options_dict = write_options.dict(
             by_alias=True,
             exclude_unset=True,
-            exclude_defaults=True,
             exclude={"if_exists"},
         )
 
         if isinstance(write_options.format, WriteOnlyFileFormat):
-            if write_options.format.name != HiveWriteOptions.__fields__["format"].default:
-                options_dict["format"] = write_options.format.name
-            elif "format" in options_dict:
-                options_dict.pop("format")  # remove format key if it matches the default
-
+            options_dict["format"] = write_options.format.name
             options_dict.update(write_options.format.dict(exclude={"name"}))
 
         return options_dict
diff --git a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
index 20e2dc94..97f774f7 100644
--- a/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
+++ b/tests/tests_integration/tests_core_integration/tests_db_writer_integration/test_hive_writer_integration.py
@@ -271,7 +271,8 @@ def test_hive_writer_create_table_if_exists(spark, processing, get_schema_table,
         (Hive.WriteOptions(format="parquet"), "{'format': 'parquet'}"),
         (Hive.WriteOptions(format=Parquet()), "{'format': 'parquet'}"),
         (Hive.WriteOptions(compression="snappy"), "{'compression': 'snappy'}"),
-        (Hive.WriteOptions(format=ORC(compression="snappy")), "{'compression': 'snappy'}"),
+        (Hive.WriteOptions(format="orc"), "{'format': 'orc'}"),
+        (Hive.WriteOptions(format=ORC(compression="snappy")), "{'format': 'orc', 'compression': 'snappy'}"),
     ],
 )
 def test_hive_writer_insert_into_with_options_ignored(spark, processing, get_schema_table, options, option_kv, caplog):

From 4e2c681bcd4a323646694d5315296213fedce2c8 Mon Sep 17 00:00:00 2001
From: Maxim Liksakov <67663774+maxim-lixakov@users.noreply.github.com>
Date: Fri, 14 Jun 2024 13:42:44 +0300
Subject: [PATCH 10/64] Update docs/connection/db_connection/hive/write.rst

Co-authored-by: Maxim Martynov <msmarty5@mts.ru>
---
 docs/connection/db_connection/hive/write.rst   | 10 +++++-----
 onetl/connection/db_connection/hive/options.py |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/connection/db_connection/hive/write.rst b/docs/connection/db_connection/hive/write.rst
index 6b7ca91e..cd707fae 100644
--- a/docs/connection/db_connection/hive/write.rst
+++ b/docs/connection/db_connection/hive/write.rst
@@ -5,9 +5,6 @@ Writing to Hive using ``DBWriter``
 
 For writing data to Hive, use :obj:`DBWriter <onetl.db.db_writer.db_writer.DBWriter>`.
 
-.. warning::
-    When using ``DBWriter`` with ``Hive.WriteOptions``, the default spark data format configured in ``spark.sql.sources.default`` is overridden to use ``orc`` by default. This may affect performance and storage characteristics.
-
 
 Examples
 --------
@@ -55,13 +52,16 @@ Use column-based write formats
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 Prefer these write formats:
-  * `ORC <https://spark.apache.org/docs/latest/sql-data-sources-orc.html>`_
+  * `ORC <https://spark.apache.org/docs/latest/sql-data-sources-orc.html>`_ (**default**)
   * `Parquet <https://spark.apache.org/docs/latest/sql-data-sources-parquet.html>`_
   * `Iceberg <https://iceberg.apache.org/spark-quickstart/>`_
   * `Hudi <https://hudi.apache.org/docs/quick-start-guide/>`_
   * `Delta <https://docs.delta.io/latest/quick-start.html#set-up-apache-spark-with-delta-lake>`_
 
-For colum-based write formats, each file contains separated sections there column data is stored. The file footer contains
+.. warning::
+    When using ``DBWriter``, the default spark data format configured in ``spark.sql.sources.default`` is ignored, as  ``Hive.WriteOptions(format=...)`` default value is explicitly set to ``orc``.
+
+For column-based write formats, each file contains separated sections where column data is stored. The file footer contains
 location of each column section/group. Spark can use this information to load only sections required by specific query, e.g. only selected columns,
 to drastically speed up the query.
 
diff --git a/onetl/connection/db_connection/hive/options.py b/onetl/connection/db_connection/hive/options.py
index 9b059a83..16d21a0e 100644
--- a/onetl/connection/db_connection/hive/options.py
+++ b/onetl/connection/db_connection/hive/options.py
@@ -13,7 +13,7 @@
 
 from typing_extensions import deprecated
 
-from onetl.file.format.file_format import ReadWriteFileFormat
+from onetl.file.format.file_format import WriteOnlyFileFormat
 from onetl.impl import GenericOptions
 
 
@@ -199,7 +199,7 @@ class Config:
         does not affect behavior.
     """
 
-    format: Union[str, ReadWriteFileFormat] = "orc"
+    format: Union[str, WriteOnlyFileFormat] = "orc"
     """Format of files which should be used for storing table data.
 
     Examples

From a0442768b51f8aeb0681d8a0a7de370821a27d2c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 17 Jun 2024 06:26:56 +0000
Subject: [PATCH 11/64] Bump mikefarah/yq from 4.44.1 to 4.44.2 in the
 github-actions group

Bumps the github-actions group with 1 update: [mikefarah/yq](https://github.com/mikefarah/yq).


Updates `mikefarah/yq` from 4.44.1 to 4.44.2
- [Release notes](https://github.com/mikefarah/yq/releases)
- [Changelog](https://github.com/mikefarah/yq/blob/master/release_notes.txt)
- [Commits](https://github.com/mikefarah/yq/compare/v4.44.1...v4.44.2)

---
updated-dependencies:
- dependency-name: mikefarah/yq
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: github-actions
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/get-matrix.yml | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/get-matrix.yml b/.github/workflows/get-matrix.yml
index 8f024cf8..5963ff9c 100644
--- a/.github/workflows/get-matrix.yml
+++ b/.github/workflows/get-matrix.yml
@@ -154,7 +154,7 @@ jobs:
 
       - name: Get Core matrix
         id: matrix-core
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/core/matrix.yml
 
@@ -184,7 +184,7 @@ jobs:
 
       - name: Get Clickhouse matrix
         id: matrix-clickhouse
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/clickhouse/matrix.yml
 
@@ -214,7 +214,7 @@ jobs:
 
       - name: Get Greenplum matrix
         id: matrix-greenplum
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/greenplum/matrix.yml
 
@@ -244,7 +244,7 @@ jobs:
 
       - name: Get Hive matrix
         id: matrix-hive
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/hive/matrix.yml
 
@@ -274,7 +274,7 @@ jobs:
 
       - name: Get Kafka matrix
         id: matrix-kafka
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/kafka/matrix.yml
 
@@ -304,7 +304,7 @@ jobs:
 
       - name: Get LocalFS matrix
         id: matrix-local-fs
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/local-fs/matrix.yml
 
@@ -334,7 +334,7 @@ jobs:
 
       - name: Get MongoDB matrix
         id: matrix-mongodb
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mongodb/matrix.yml
 
@@ -364,7 +364,7 @@ jobs:
 
       - name: Get MSSQL matrix
         id: matrix-mssql
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mssql/matrix.yml
 
@@ -394,7 +394,7 @@ jobs:
 
       - name: Get MySQL matrix
         id: matrix-mysql
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mysql/matrix.yml
 
@@ -424,7 +424,7 @@ jobs:
 
       - name: Get Oracle matrix
         id: matrix-oracle
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/oracle/matrix.yml
 
@@ -454,7 +454,7 @@ jobs:
 
       - name: Get Postgres matrix
         id: matrix-postgres
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/postgres/matrix.yml
 
@@ -484,7 +484,7 @@ jobs:
 
       - name: Get Teradata matrix
         id: matrix-teradata
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/teradata/matrix.yml
 
@@ -514,7 +514,7 @@ jobs:
 
       - name: Get FTP matrix
         id: matrix-ftp
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/ftp/matrix.yml
 
@@ -544,7 +544,7 @@ jobs:
 
       - name: Get FTPS matrix
         id: matrix-ftps
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/ftps/matrix.yml
 
@@ -574,7 +574,7 @@ jobs:
 
       - name: Get HDFS matrix
         id: matrix-hdfs
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/hdfs/matrix.yml
 
@@ -604,7 +604,7 @@ jobs:
 
       - name: Get S3 matrix
         id: matrix-s3
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/s3/matrix.yml
 
@@ -634,7 +634,7 @@ jobs:
 
       - name: Get SFTP matrix
         id: matrix-sftp
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/sftp/matrix.yml
 
@@ -664,7 +664,7 @@ jobs:
 
       - name: Get Samba matrix
         id: matrix-samba
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/samba/matrix.yml
 
@@ -694,6 +694,6 @@ jobs:
 
       - name: Get WebDAV matrix
         id: matrix-webdav
-        uses: mikefarah/yq@v4.44.1
+        uses: mikefarah/yq@v4.44.2
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/webdav/matrix.yml

From 86714c9f7a99e640e98a99625ba291de80819571 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 17 Jun 2024 08:42:01 +0000
Subject: [PATCH 12/64] Add Spark metastore & warehouse to .gitignore

---
 .gitignore | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.gitignore b/.gitignore
index 49d22c6e..31223eb6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -153,3 +153,6 @@ dmypy.json
 # Local stuff
 docker-compose*override*
 !docker-compose.override.sample.yml
+
+metastore_db/
+spark-warehouse/

From d9c44c5fbb7a6054cd67776152b23e38215437c2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 17 Jun 2024 09:07:31 +0000
Subject: [PATCH 13/64] [DOP-16923] Disable IPv6 for Greenplum container

---
 .github/workflows/test-greenplum.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index d54e9697..1bbc0658 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -45,6 +45,7 @@ jobs:
           TZ: UTC
         ports:
           - 5433:5432
+        options: --sysctl net.ipv6.conf.all.disable_ipv6=1
 
     steps:
       - name: Checkout code

From 6b1582c8b24a1484f681cca0c38c9b2e5fcb6ba1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 17 Jun 2024 09:23:24 +0000
Subject: [PATCH 14/64] [DOP-16923] Update MySQL image minimal version

---
 .github/workflows/data/mysql/matrix.yml | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/data/mysql/matrix.yml b/.github/workflows/data/mysql/matrix.yml
index 8e46b42e..39061bde 100644
--- a/.github/workflows/data/mysql/matrix.yml
+++ b/.github/workflows/data/mysql/matrix.yml
@@ -24,14 +24,14 @@ matrix:
     - mysql-version: 8.4.0
       <<: *max
   full:
-    # Min supported version by JDBC driver is 5.7
-    - mysql-version: 5.7.6
+    # Tags 5.7.6-5.6.12 cannot be downloaded since Docker v26:
+    # "Docker Image Format v1 and Docker Image manifest version 2, schema 1 support is disabled by default"
+    - mysql-version: 5.7.13
       <<: *min
-    # Max supported version by JDBC driver is 8.3
     - mysql-version: 8.4.0
       <<: *max
   nightly:
-    - mysql-version: 5.7.6
+    - mysql-version: 5.7.13
       <<: *min
     - mysql-version: latest
       <<: *latest

From 11e27fdcff3fb817aefbc0604107a21861fdea4e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 17 Jun 2024 10:08:30 +0000
Subject: [PATCH 15/64] [DOP-16923] Disable IPv6 for Greenplum container

---
 .github/workflows/test-greenplum.yml | 1 +
 docker-compose.yml                   | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index 1bbc0658..4c1b4045 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -45,6 +45,7 @@ jobs:
           TZ: UTC
         ports:
           - 5433:5432
+        # TODO: remove after https://github.com/andruche/docker-greenplum/pull/2
         options: --sysctl net.ipv6.conf.all.disable_ipv6=1
 
     steps:
diff --git a/docker-compose.yml b/docker-compose.yml
index 34f2c4fe..5316891a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -33,6 +33,7 @@ services:
       - onetl
     extra_hosts:
       - host.docker.internal:host-gateway
+    # TODO: remove after https://github.com/andruche/docker-greenplum/pull/2
     sysctls:
       - net.ipv6.conf.all.disable_ipv6=1
 

From 75c44b9189a1e7b507e9eb16aefaa0259fa5b436 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 17 Jun 2024 11:45:54 +0000
Subject: [PATCH 16/64] Fix CREATE TABLE examples

---
 docs/connection/db_connection/clickhouse/execute.rst | 2 +-
 docs/connection/db_connection/clickhouse/types.rst   | 6 +++---
 docs/connection/db_connection/greenplum/execute.rst  | 2 +-
 docs/connection/db_connection/greenplum/types.rst    | 2 +-
 docs/connection/db_connection/hive/execute.rst       | 2 +-
 docs/connection/db_connection/mssql/execute.rst      | 2 +-
 docs/connection/db_connection/mssql/types.rst        | 4 ++--
 docs/connection/db_connection/mysql/execute.rst      | 2 +-
 docs/connection/db_connection/mysql/types.rst        | 2 +-
 docs/connection/db_connection/oracle/execute.rst     | 2 +-
 docs/connection/db_connection/oracle/types.rst       | 2 +-
 docs/connection/db_connection/postgres/execute.rst   | 2 +-
 docs/connection/db_connection/postgres/types.rst     | 2 +-
 13 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/docs/connection/db_connection/clickhouse/execute.rst b/docs/connection/db_connection/clickhouse/execute.rst
index f33369c5..9232710a 100644
--- a/docs/connection/db_connection/clickhouse/execute.rst
+++ b/docs/connection/db_connection/clickhouse/execute.rst
@@ -90,7 +90,7 @@ Examples
         clickhouse.execute("DROP TABLE schema.table")
         clickhouse.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id UInt8,
                 key String,
                 value Float32
diff --git a/docs/connection/db_connection/clickhouse/types.rst b/docs/connection/db_connection/clickhouse/types.rst
index 00a71551..21ddf0ba 100644
--- a/docs/connection/db_connection/clickhouse/types.rst
+++ b/docs/connection/db_connection/clickhouse/types.rst
@@ -84,7 +84,7 @@ Always prefer creating tables with specific types **BEFORE WRITING DATA**:
 
         clickhouse.execute(
             """
-            CREATE TABLE default.target_tbl AS (
+            CREATE TABLE default.target_tbl (
                 id UInt8,
                 value DateTime64(6) -- specific type and precision
             )
@@ -398,7 +398,7 @@ For writing JSON data to ClickHouse, use the :obj:`JSON.serialize_column <onetl.
 
     clickhouse.execute(
         """
-        CREATE TABLE default.target_tbl AS (
+        CREATE TABLE default.target_tbl (
             id Int32,
             array_column_json String,
         )
@@ -430,7 +430,7 @@ to avoid writing such expression in every ``SELECT`` clause all the time:
 
 .. code-block:: sql
 
-    CREATE TABLE default.target_tbl AS (
+    CREATE TABLE default.target_tbl (
         id Int32,
         array_column_json String,
         -- computed column
diff --git a/docs/connection/db_connection/greenplum/execute.rst b/docs/connection/db_connection/greenplum/execute.rst
index d19f8ab9..064f10ca 100644
--- a/docs/connection/db_connection/greenplum/execute.rst
+++ b/docs/connection/db_connection/greenplum/execute.rst
@@ -92,7 +92,7 @@ Examples
         greenplum.execute("DROP TABLE schema.table")
         greenplum.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id int,
                 key text,
                 value real
diff --git a/docs/connection/db_connection/greenplum/types.rst b/docs/connection/db_connection/greenplum/types.rst
index 9199467e..3c0e664b 100644
--- a/docs/connection/db_connection/greenplum/types.rst
+++ b/docs/connection/db_connection/greenplum/types.rst
@@ -80,7 +80,7 @@ Always prefer creating table with desired DDL **BEFORE WRITING DATA**:
 
         greenplum.execute(
             """
-            CREATE TABLE public.table AS (
+            CREATE TABLE public.table (
                 id int32,
                 business_dt timestamp(6),
                 value json
diff --git a/docs/connection/db_connection/hive/execute.rst b/docs/connection/db_connection/hive/execute.rst
index dffadbf5..40a7002f 100644
--- a/docs/connection/db_connection/hive/execute.rst
+++ b/docs/connection/db_connection/hive/execute.rst
@@ -35,7 +35,7 @@ Examples
     hive.execute("DROP TABLE schema.table")
     hive.execute(
         """
-        CREATE TABLE schema.table AS (
+        CREATE TABLE schema.table (
             id NUMBER,
             key VARCHAR,
             value DOUBLE
diff --git a/docs/connection/db_connection/mssql/execute.rst b/docs/connection/db_connection/mssql/execute.rst
index 13b348fa..85280d92 100644
--- a/docs/connection/db_connection/mssql/execute.rst
+++ b/docs/connection/db_connection/mssql/execute.rst
@@ -91,7 +91,7 @@ Examples
         mssql.execute("DROP TABLE schema.table")
         mssql.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint GENERATED ALWAYS AS IDENTITY,
                 key VARCHAR2(4000),
                 value NUMBER
diff --git a/docs/connection/db_connection/mssql/types.rst b/docs/connection/db_connection/mssql/types.rst
index add3aea1..807d62d9 100644
--- a/docs/connection/db_connection/mssql/types.rst
+++ b/docs/connection/db_connection/mssql/types.rst
@@ -80,7 +80,7 @@ Always prefer creating tables with specific types **BEFORE WRITING DATA**:
 
         mssql.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint,
                 key text,
                 value datetime2(6) -- specific type and precision
@@ -328,7 +328,7 @@ and write it as ``text`` column in MSSQL:
 
     mssql.execute(
         """
-        CREATE TABLE schema.target_tbl AS (
+        CREATE TABLE schema.target_tbl (
             id bigint,
             struct_column_json text -- any string type, actually
         )
diff --git a/docs/connection/db_connection/mysql/execute.rst b/docs/connection/db_connection/mysql/execute.rst
index f9e95f5e..5de08b43 100644
--- a/docs/connection/db_connection/mysql/execute.rst
+++ b/docs/connection/db_connection/mysql/execute.rst
@@ -91,7 +91,7 @@ Examples
         mysql.execute("DROP TABLE schema.table")
         mysql.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint,
                 key text,
                 value float
diff --git a/docs/connection/db_connection/mysql/types.rst b/docs/connection/db_connection/mysql/types.rst
index f3fca30a..1ad6815c 100644
--- a/docs/connection/db_connection/mysql/types.rst
+++ b/docs/connection/db_connection/mysql/types.rst
@@ -75,7 +75,7 @@ Always prefer creating tables with specific types **BEFORE WRITING DATA**:
 
         mysql.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint,
                 key text,
                 value timestamp(6) -- specific type and precision
diff --git a/docs/connection/db_connection/oracle/execute.rst b/docs/connection/db_connection/oracle/execute.rst
index fff504ee..de60325e 100644
--- a/docs/connection/db_connection/oracle/execute.rst
+++ b/docs/connection/db_connection/oracle/execute.rst
@@ -92,7 +92,7 @@ Examples
         oracle.execute("DROP TABLE schema.table")
         oracle.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint GENERATED ALWAYS AS IDENTITY,
                 key VARCHAR2(4000),
                 value NUMBER
diff --git a/docs/connection/db_connection/oracle/types.rst b/docs/connection/db_connection/oracle/types.rst
index 2c6116ce..81b7da10 100644
--- a/docs/connection/db_connection/oracle/types.rst
+++ b/docs/connection/db_connection/oracle/types.rst
@@ -74,7 +74,7 @@ Always prefer creating table with desired DDL **BEFORE WRITING DATA**:
 
         oracle.execute(
             """
-            CREATE TABLE username.table AS (
+            CREATE TABLE username.table (
                 id NUMBER,
                 business_dt TIMESTAMP(6),
                 value VARCHAR2(2000)
diff --git a/docs/connection/db_connection/postgres/execute.rst b/docs/connection/db_connection/postgres/execute.rst
index e8ef17b0..3433c611 100644
--- a/docs/connection/db_connection/postgres/execute.rst
+++ b/docs/connection/db_connection/postgres/execute.rst
@@ -90,7 +90,7 @@ Examples
         postgres.execute("DROP TABLE schema.table")
         postgres.execute(
             """
-            CREATE TABLE schema.table AS (
+            CREATE TABLE schema.table (
                 id bigint GENERATED ALWAYS AS IDENTITY,
                 key text,
                 value real
diff --git a/docs/connection/db_connection/postgres/types.rst b/docs/connection/db_connection/postgres/types.rst
index 4214fe63..b4d9d202 100644
--- a/docs/connection/db_connection/postgres/types.rst
+++ b/docs/connection/db_connection/postgres/types.rst
@@ -80,7 +80,7 @@ Always prefer creating table with desired DDL **BEFORE WRITING DATA**:
 
         postgres.execute(
             """
-            CREATE TABLE public.table AS (
+            CREATE TABLE public.table (
                 id bigint,
                 business_dt timestamp(6),
                 value json

From 33d182679d064e528f79e9c66f2de9c41dbbab0b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 17 Jun 2024 21:12:21 +0000
Subject: [PATCH 17/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/pycqa/bandit: 1.7.8 → 1.7.9](https://github.com/pycqa/bandit/compare/1.7.8...1.7.9)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 202ed4f5..15f38046 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -114,7 +114,7 @@ repos:
           - black==24.4.2
 
   - repo: https://github.com/pycqa/bandit
-    rev: 1.7.8
+    rev: 1.7.9
     hooks:
       - id: bandit
         args:

From 6b3f0d1635277b010f0fe96036cedeb04b69bbb0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 19 Jun 2024 15:17:44 +0000
Subject: [PATCH 18/64] Update coverage download step

---
 .github/workflows/test-core.yml      | 2 +-
 .github/workflows/test-ftp.yml       | 2 +-
 .github/workflows/test-ftps.yml      | 2 +-
 .github/workflows/test-greenplum.yml | 2 +-
 .github/workflows/test-hdfs.yml      | 2 +-
 .github/workflows/test-hive.yml      | 2 +-
 .github/workflows/test-kafka.yml     | 2 +-
 .github/workflows/test-local-fs.yml  | 2 +-
 .github/workflows/test-mongodb.yml   | 2 +-
 .github/workflows/test-mssql.yml     | 2 +-
 .github/workflows/test-mysql.yml     | 2 +-
 .github/workflows/test-oracle.yml    | 2 +-
 .github/workflows/test-postgres.yml  | 2 +-
 .github/workflows/test-s3.yml        | 2 +-
 .github/workflows/test-samba.yml     | 2 +-
 .github/workflows/test-sftp.yml      | 2 +-
 .github/workflows/test-teradata.yml  | 2 +-
 .github/workflows/test-webdav.yml    | 2 +-
 .github/workflows/tests.yml          | 7 +++----
 19 files changed, 21 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/test-core.yml b/.github/workflows/test-core.yml
index 6008f925..8a0b3b7a 100644
--- a/.github/workflows/test-core.yml
+++ b/.github/workflows/test-core.yml
@@ -77,5 +77,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: core-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-core-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-ftp.yml b/.github/workflows/test-ftp.yml
index e41e1f3e..dee06115 100644
--- a/.github/workflows/test-ftp.yml
+++ b/.github/workflows/test-ftp.yml
@@ -75,5 +75,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: ftp-${{ inputs.ftp-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-ftp-${{ inputs.ftp-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-ftps.yml b/.github/workflows/test-ftps.yml
index 4fb9c623..fadb4406 100644
--- a/.github/workflows/test-ftps.yml
+++ b/.github/workflows/test-ftps.yml
@@ -75,5 +75,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: ftps-${{ inputs.ftps-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-ftps-${{ inputs.ftps-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index 4c1b4045..9bc79fdd 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -121,5 +121,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: greenplum-${{ inputs.greenplum-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-greenplum-${{ inputs.greenplum-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-hdfs.yml b/.github/workflows/test-hdfs.yml
index 6e52a5df..41c339dc 100644
--- a/.github/workflows/test-hdfs.yml
+++ b/.github/workflows/test-hdfs.yml
@@ -98,5 +98,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: hdfs-${{ inputs.hadoop-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-hdfs-${{ inputs.hadoop-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-hive.yml b/.github/workflows/test-hive.yml
index 893348ab..17af1d93 100644
--- a/.github/workflows/test-hive.yml
+++ b/.github/workflows/test-hive.yml
@@ -79,5 +79,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: hive-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-hive-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-kafka.yml b/.github/workflows/test-kafka.yml
index 34c2894a..31f5e6e1 100644
--- a/.github/workflows/test-kafka.yml
+++ b/.github/workflows/test-kafka.yml
@@ -119,5 +119,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: kafka-${{ inputs.kafka-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-kafka-${{ inputs.kafka-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-local-fs.yml b/.github/workflows/test-local-fs.yml
index f4b37c45..2672afe4 100644
--- a/.github/workflows/test-local-fs.yml
+++ b/.github/workflows/test-local-fs.yml
@@ -79,5 +79,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: local-fs-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-local-fs-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-mongodb.yml b/.github/workflows/test-mongodb.yml
index a617450b..ec9e7fa4 100644
--- a/.github/workflows/test-mongodb.yml
+++ b/.github/workflows/test-mongodb.yml
@@ -91,5 +91,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: mongodb-${{ inputs.mongodb-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-mongodb-${{ inputs.mongodb-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-mssql.yml b/.github/workflows/test-mssql.yml
index 0819887a..037a2c13 100644
--- a/.github/workflows/test-mssql.yml
+++ b/.github/workflows/test-mssql.yml
@@ -96,5 +96,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: mssql-${{ inputs.mssql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-mssql-${{ inputs.mssql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-mysql.yml b/.github/workflows/test-mysql.yml
index e2035cfc..97691a6d 100644
--- a/.github/workflows/test-mysql.yml
+++ b/.github/workflows/test-mysql.yml
@@ -93,5 +93,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: mysql-${{ inputs.mysql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-mysql-${{ inputs.mysql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-oracle.yml b/.github/workflows/test-oracle.yml
index e11a57b8..2ba490c1 100644
--- a/.github/workflows/test-oracle.yml
+++ b/.github/workflows/test-oracle.yml
@@ -113,5 +113,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: oracle-${{ inputs.oracle-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-oracle-${{ inputs.oracle-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-postgres.yml b/.github/workflows/test-postgres.yml
index ef31a037..7a9022b3 100644
--- a/.github/workflows/test-postgres.yml
+++ b/.github/workflows/test-postgres.yml
@@ -92,5 +92,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: postgres-${{ inputs.postgres-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-postgres-${{ inputs.postgres-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-s3.yml b/.github/workflows/test-s3.yml
index 8da4540c..3179002c 100644
--- a/.github/workflows/test-s3.yml
+++ b/.github/workflows/test-s3.yml
@@ -93,5 +93,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: s3-${{ inputs.minio-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-s3-${{ inputs.minio-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-samba.yml b/.github/workflows/test-samba.yml
index 58db08b8..4a2e30d1 100644
--- a/.github/workflows/test-samba.yml
+++ b/.github/workflows/test-samba.yml
@@ -73,5 +73,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: samba-${{ inputs.server-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-samba-${{ inputs.server-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-sftp.yml b/.github/workflows/test-sftp.yml
index ffbf786f..eaa5e5a4 100644
--- a/.github/workflows/test-sftp.yml
+++ b/.github/workflows/test-sftp.yml
@@ -70,5 +70,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: sftp-${{ inputs.openssh-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-sftp-${{ inputs.openssh-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-teradata.yml b/.github/workflows/test-teradata.yml
index b348da5f..8ba3ff60 100644
--- a/.github/workflows/test-teradata.yml
+++ b/.github/workflows/test-teradata.yml
@@ -79,5 +79,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: teradata-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-teradata-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/test-webdav.yml b/.github/workflows/test-webdav.yml
index 47251964..34a94326 100644
--- a/.github/workflows/test-webdav.yml
+++ b/.github/workflows/test-webdav.yml
@@ -75,5 +75,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: webdav-${{ inputs.webdav-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-webdav-${{ inputs.webdav-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index f7a7cf07..fcd6352a 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -384,10 +384,9 @@ jobs:
       - name: Download all coverage reports
         uses: actions/download-artifact@v4
         with:
-          path: reports
-
-      - name: Move coverage data to the root folder
-        run: find reports -type f -exec mv '{}' reports \;
+          path: reports/
+          pattern: coverage-*
+          merge-multiple: true
 
       - name: Generate coverate reports
         run: ./combine_coverage.sh

From 06b561a7a7e642893c1feb500109a83de741207c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 19 Jun 2024 15:21:31 +0000
Subject: [PATCH 19/64] Update coverage download step

---
 .github/workflows/test-clickhouse.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-clickhouse.yml b/.github/workflows/test-clickhouse.yml
index 4f8d436e..ba3bc21d 100644
--- a/.github/workflows/test-clickhouse.yml
+++ b/.github/workflows/test-clickhouse.yml
@@ -93,5 +93,5 @@ jobs:
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
-          name: clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          name: coverage-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*

From 59af63dcd09078d5e9f5a51e2faf8d629242c9d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 20 Jun 2024 14:04:26 +0000
Subject: [PATCH 20/64] Add 'Last updated at' fields to build documentation

---
 .readthedocs.yml                                             | 3 +++
 docs/conf.py                                                 | 4 ++++
 .../file_df_connection/spark_s3/troubleshooting.rst          | 2 +-
 requirements/docs.txt                                        | 1 +
 .../test_file_format_integration/test_json_integration.py    | 5 +----
 5 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index efb1a83c..3e5f91d2 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -4,6 +4,9 @@ build:
   os: ubuntu-22.04
   tools:
     python: "3.12"
+  jobs:
+    post_checkout:
+      - git fetch --unshallow || true
   # TODO: remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
   commands:
     - python -m virtualenv $READTHEDOCS_VIRTUALENV_PATH
diff --git a/docs/conf.py b/docs/conf.py
index 9427e190..3cee9a25 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -59,6 +59,7 @@
     "sphinx.ext.extlinks",
     "sphinx_favicon",
     "sphinxcontrib.autodoc_pydantic",
+    "sphinx_last_updated_by_git",
 ]
 numpydoc_show_class_members = False
 autodoc_pydantic_model_show_config = False
@@ -80,6 +81,9 @@
 towncrier_draft_include_empty = False
 towncrier_draft_working_directory = PROJECT_ROOT_DIR
 
+# TODO: remove after https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
+git_exclude_patterns = ["docs/_static/logo_wide.svg"]
+
 github_username = "MobileTeleSystems"
 github_repository = "onetl"
 
diff --git a/docs/connection/file_df_connection/spark_s3/troubleshooting.rst b/docs/connection/file_df_connection/spark_s3/troubleshooting.rst
index 20b3b989..97d846de 100644
--- a/docs/connection/file_df_connection/spark_s3/troubleshooting.rst
+++ b/docs/connection/file_df_connection/spark_s3/troubleshooting.rst
@@ -38,7 +38,7 @@ How to determine reason
 Make logging more verbose
 ^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Change Spark session log level to :ref:`DEBUG <spark-troubleshooting>` to print result of each attempt.
+Change Spark session log level to :ref:`DEBUG <troubleshooting-spark>` to print result of each attempt.
 Resulting logs will look like this
 
 .. dropdown:: See log
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 3776dbb0..154dbd31 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -7,6 +7,7 @@ sphinx
 sphinx-copybutton
 sphinx-design
 sphinx-favicon
+sphinx-last-updated-by-git
 # TODO: uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4
 # sphinx-plantuml
 sphinx-tabs
diff --git a/tests/tests_integration/test_file_format_integration/test_json_integration.py b/tests/tests_integration/test_file_format_integration/test_json_integration.py
index dcdbbc03..46fbc8c9 100644
--- a/tests/tests_integration/test_file_format_integration/test_json_integration.py
+++ b/tests/tests_integration/test_file_format_integration/test_json_integration.py
@@ -22,13 +22,10 @@
         StructField,
         StructType,
     )
-except ImportError:
-    pytest.skip("Missing pyspark", allow_module_level=True)
 
-try:
     from tests.util.assert_df import assert_equal_df
 except ImportError:
-    pytest.skip("Missing pandas", allow_module_level=True)
+    pytest.skip("Missing pandas or pyspark", allow_module_level=True)
 
 pytestmark = [pytest.mark.local_fs, pytest.mark.file_df_connection, pytest.mark.connection, pytest.mark.json]
 

From dc589618cdc3f9effac9d2076ac9c18400440e20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 20 Jun 2024 14:07:11 +0000
Subject: [PATCH 21/64] Add 'Last updated at' fields to build documentation

---
 .readthedocs.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 3e5f91d2..ad825a0f 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -4,11 +4,9 @@ build:
   os: ubuntu-22.04
   tools:
     python: "3.12"
-  jobs:
-    post_checkout:
-      - git fetch --unshallow || true
-  # TODO: remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
   commands:
+    - git fetch --unshallow || true
+  # TODO: remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
     - python -m virtualenv $READTHEDOCS_VIRTUALENV_PATH
     - python -m pip install --upgrade --no-cache-dir pip setuptools wheel
     - python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext

From 5bc200488d9b92bab9b59dd71d8e3f670431af46 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 20 Jun 2024 14:27:19 +0000
Subject: [PATCH 22/64] Update ReadTheDocs config

---
 .readthedocs.yml | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index ad825a0f..a0fc204d 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -4,17 +4,13 @@ build:
   os: ubuntu-22.04
   tools:
     python: "3.12"
-  commands:
-    - git fetch --unshallow || true
-  # TODO: remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
-    - python -m virtualenv $READTHEDOCS_VIRTUALENV_PATH
-    - python -m pip install --upgrade --no-cache-dir pip setuptools wheel
-    - python -m pip install --upgrade --no-cache-dir sphinx readthedocs-sphinx-ext
-    - python -m pip install --exists-action=w --no-cache-dir -r requirements/docs.txt
-    - python -m pip install --exists-action=w --no-cache-dir --no-deps sphinx-plantuml
-    - python -m pip install --exists-action=w --upgrade --upgrade-strategy only-if-needed --no-cache-dir .[ftp,ftps,hdfs,samba,s3,sftp,webdav,spark]
-    - cat docs/conf.py
-    - cd docs && python -m sphinx -T -E -b html -d _build/doctrees -D language=en . $READTHEDOCS_OUTPUT/html
+  jobs:
+    post_checkout:
+      - git fetch --unshallow || true
+    post_create_environment:
+      - python -m pip install --exists-action=w --no-cache-dir --no-deps sphinx-plantuml # remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
+      - python -m pip install --exists-action=w --no-cache-dir -r requirements/docs.txt
+      - python -m pip install --exists-action=w --upgrade --upgrade-strategy only-if-needed --no-cache-dir .[ftp,ftps,hdfs,samba,s3,sftp,webdav,spark]
 
 # TODO: uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4
 #python:

From 69fce1b75abbc75bc7688aed4922926be64db8e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 20 Jun 2024 14:43:52 +0000
Subject: [PATCH 23/64] Update ReadTheDocs config

---
 docs/conf.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index 3cee9a25..f781dddd 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -81,9 +81,6 @@
 towncrier_draft_include_empty = False
 towncrier_draft_working_directory = PROJECT_ROOT_DIR
 
-# TODO: remove after https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
-git_exclude_patterns = ["docs/_static/logo_wide.svg"]
-
 github_username = "MobileTeleSystems"
 github_repository = "onetl"
 
@@ -122,6 +119,10 @@
 favicons = [
     {"rel": "icon", "href": "icon.svg", "type": "image/svg+xml"},
 ]
+
+# TODO: remove after https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
+git_exclude_patterns = ["docs/_static/logo_wide.svg"]
+
 # The master toctree document.
 master_doc = "index"
 

From 26c5fed031f1b01991295c168b4e409080369a26 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 2 Jul 2024 01:46:37 +0000
Subject: [PATCH 24/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/blacken-docs: 1.16.0 → 1.18.0](https://github.com/asottile/blacken-docs/compare/1.16.0...1.18.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 15f38046..ccab6bca 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -107,7 +107,7 @@ repos:
         language_version: python3
 
   - repo: https://github.com/asottile/blacken-docs
-    rev: 1.16.0
+    rev: 1.18.0
     hooks:
       - id: blacken-docs
         additional_dependencies:

From b65732247ba76d82fe2a509c2877642eddc31692 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 15 Jul 2024 21:17:59 +0000
Subject: [PATCH 25/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/macisamuele/language-formatters-pre-commit-hooks: v2.13.0 → v2.14.0](https://github.com/macisamuele/language-formatters-pre-commit-hooks/compare/v2.13.0...v2.14.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index ccab6bca..f0c94a33 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -58,7 +58,7 @@ repos:
         args: [-w]
 
   - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
-    rev: v2.13.0
+    rev: v2.14.0
     hooks:
       - id: pretty-format-yaml
         args: [--autofix, --indent, '2', --preserve-quotes, --offset, '2']

From c757b7089f4930811f3bf2dc0a356c7a3e9673f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 11:09:11 +0000
Subject: [PATCH 26/64] Fix nightly tests

---
 docker/mssql/configure-db.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/mssql/configure-db.sh b/docker/mssql/configure-db.sh
index 51b39ae3..38e224e3 100755
--- a/docker/mssql/configure-db.sh
+++ b/docker/mssql/configure-db.sh
@@ -19,7 +19,7 @@ while true; do
         exit 1
     fi
 
-    DBSTATUS=$(/opt/mssql-tools/bin/sqlcmd -h -1 -t 1 -U sa -P ${MSSQL_SA_PASSWORD} -Q "SET NOCOUNT ON; Select SUM(state) from sys.databases" 2>/dev/null | sed -e 's/^[[:space:]]*//')
+    DBSTATUS=$(sqlcmd -h -1 -t 1 -U sa -P ${MSSQL_SA_PASSWORD} -Q "SET NOCOUNT ON; Select SUM(state) from sys.databases" 2>/dev/null | sed -e 's/^[[:space:]]*//')
     ERRCODE=$?
     if [[ "$DBSTATUS" -eq "0" && "$ERRCODE" -eq "0" ]]; then
         echo "INFO: Database ready."
@@ -32,5 +32,5 @@ done
 
 # Run the setup script to create the DB and the schema in the DB
 echo "Running setup.sql";
-/opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P $MSSQL_SA_PASSWORD -d master -i /usr/config/setup.sql;
+sqlcmd -S localhost -U sa -P $MSSQL_SA_PASSWORD -d master -i /usr/config/setup.sql;
 echo "Success";

From df869d0e3ede22157c88e7b46ae3d050488b4d6a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 11:31:13 +0000
Subject: [PATCH 27/64] Fix MSSQL tests

---
 .env.dependencies                       | 3 +++
 .github/workflows/data/mssql/matrix.yml | 4 ++--
 docker-compose.yml                      | 9 +++++++++
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/.env.dependencies b/.env.dependencies
index 5fccfa15..ec75df37 100644
--- a/.env.dependencies
+++ b/.env.dependencies
@@ -21,6 +21,9 @@ MONGO_INITDB_ROOT_PASSWORD=E4j7h!9A
 # MSSQL
 ACCEPT_EULA=Y
 MSSQL_SA_PASSWORD=2astazeY
+MSSQL_DATABASE=onetl
+MSSQL_USER=onetl
+MSSQL_PASSWORD=7ellowEl7akey
 
 # MySQL
 MYSQL_ROOT_PASSWORD=ohbuz9Eochaj9saibooK3thooGa5aesh
diff --git a/.github/workflows/data/mssql/matrix.yml b/.github/workflows/data/mssql/matrix.yml
index c46d98d0..a074ed7a 100644
--- a/.github/workflows/data/mssql/matrix.yml
+++ b/.github/workflows/data/mssql/matrix.yml
@@ -21,12 +21,12 @@ latest: &latest
 
 matrix:
   small:
-    - mssql-version: 2022-CU12-ubuntu-22.04
+    - mssql-version: 2022-CU14-ubuntu-22.04
       <<: *max
   full:
     - mssql-version: 2017-GA-ubuntu
       <<: *min
-    - mssql-version: 2022-CU12-ubuntu-22.04
+    - mssql-version: 2022-CU14-ubuntu-22.04
       <<: *max
   nightly:
     - mssql-version: 2017-GA-ubuntu
diff --git a/docker-compose.yml b/docker-compose.yml
index 5316891a..41d8af80 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -91,6 +91,9 @@ services:
     image: ${MSSQL_IMAGE:-mcr.microsoft.com/mssql/server:latest}
     restart: unless-stopped
     env_file: .env.dependencies
+    environment:
+      # fix for https://github.com/microsoft/mssql-docker/issues/892
+      PATH: /opt/mssql-tools18/bin:/opt/mssql-tools/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
     ports:
       - 1433:1433
     volumes:
@@ -99,6 +102,12 @@ services:
     networks:
       - onetl
     platform: linux/amd64
+    healthcheck:
+      # Container is healthy only when database is created
+      test: ["CMD-SHELL", "sqlcmd -S localhost -d $$MSSQL_DATABASE -U $$MSSQL_USER -P $$MSSQL_PASSWORD -Q 'SELECT 1'"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
 
   mysql:
     image: ${MYSQL_IMAGE:-mysql:latest}

From 267cc50562d16417907b81c91dbc8520345910c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 12:01:05 +0000
Subject: [PATCH 28/64] Fix MSSQL tests

---
 .github/workflows/test-clickhouse.yml | 14 ++++++++++++++
 .github/workflows/test-ftp.yml        | 14 ++++++++++++++
 .github/workflows/test-ftps.yml       | 14 ++++++++++++++
 .github/workflows/test-greenplum.yml  | 14 ++++++++++++++
 .github/workflows/test-hdfs.yml       | 14 ++++++++++++++
 .github/workflows/test-kafka.yml      | 14 ++++++++++++++
 .github/workflows/test-mongodb.yml    | 14 ++++++++++++++
 .github/workflows/test-mssql.yml      | 14 ++++++++++++++
 .github/workflows/test-mysql.yml      | 14 ++++++++++++++
 .github/workflows/test-oracle.yml     | 16 +++++++++++++++-
 .github/workflows/test-postgres.yml   | 14 ++++++++++++++
 .github/workflows/test-s3.yml         | 14 ++++++++++++++
 .github/workflows/test-samba.yml      | 14 ++++++++++++++
 .github/workflows/test-sftp.yml       | 14 ++++++++++++++
 .github/workflows/test-webdav.yml     | 14 ++++++++++++++
 docker-compose.yml                    |  6 ++----
 docker/mssql/configure-db.sh          | 11 +++++++++--
 17 files changed, 222 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test-clickhouse.yml b/.github/workflows/test-clickhouse.yml
index ba3bc21d..db05402d 100644
--- a/.github/workflows/test-clickhouse.yml
+++ b/.github/workflows/test-clickhouse.yml
@@ -95,3 +95,17 @@ jobs:
         with:
           name: coverage-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+
+      - name: Dump Clickhouse logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: ${{ inputs.clickhouse-image }}
+          dest: ./logs
+
+      - name: Upload Clickhouse logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}
+          path: logs/*
diff --git a/.github/workflows/test-ftp.yml b/.github/workflows/test-ftp.yml
index dee06115..8e45ec32 100644
--- a/.github/workflows/test-ftp.yml
+++ b/.github/workflows/test-ftp.yml
@@ -67,6 +67,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m ftp
 
+      - name: Dump FTP logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: chonjay21/ftps
+          dest: ./logs
+
+      - name: Upload FTP logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-ftp-${{ inputs.ftp-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown FTP
         if: always()
         run: |
diff --git a/.github/workflows/test-ftps.yml b/.github/workflows/test-ftps.yml
index fadb4406..dfe8ffed 100644
--- a/.github/workflows/test-ftps.yml
+++ b/.github/workflows/test-ftps.yml
@@ -67,6 +67,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m ftps
 
+      - name: Dump FTPS logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: chonjay21/ftps
+          dest: ./logs
+
+      - name: Upload FTPS logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-ftps-${{ inputs.ftps-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown FTPS
         if: always()
         run: |
diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index 9bc79fdd..5f24f779 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -118,6 +118,20 @@ jobs:
           GREENPLUM_PACKAGES_USER: ${{ secrets.GREENPLUM_PACKAGES_USER }}
           GREENPLUM_PACKAGES_PASSWORD: ${{ secrets.GREENPLUM_PACKAGES_PASSWORD }}
 
+      - name: Dump Greenplum logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: andruche/greenplum
+          dest: ./logs
+
+      - name: Upload Greenplum logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-greenplum-${{ inputs.greenplum-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-hdfs.yml b/.github/workflows/test-hdfs.yml
index 41c339dc..e06f01ed 100644
--- a/.github/workflows/test-hdfs.yml
+++ b/.github/workflows/test-hdfs.yml
@@ -90,6 +90,20 @@ jobs:
           echo "127.0.0.1 hdfs" | sudo tee -a /etc/hosts
           ./pytest_runner.sh -m hdfs
 
+      - name: Dump HDFS logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: mtsrus/hadoop
+          dest: ./logs
+
+      - name: Upload HDFS logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-hdfs-${{ inputs.hadoop-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown HDFS
         if: always()
         run: |
diff --git a/.github/workflows/test-kafka.yml b/.github/workflows/test-kafka.yml
index 31f5e6e1..b1f06552 100644
--- a/.github/workflows/test-kafka.yml
+++ b/.github/workflows/test-kafka.yml
@@ -116,6 +116,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m kafka
 
+      - name: Dump Kafka logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: bitnami/kafka
+          dest: ./logs
+
+      - name: Upload Kafka logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-kafka-${{ inputs.kafka-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-mongodb.yml b/.github/workflows/test-mongodb.yml
index ec9e7fa4..334bbfc1 100644
--- a/.github/workflows/test-mongodb.yml
+++ b/.github/workflows/test-mongodb.yml
@@ -88,6 +88,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m mongodb
 
+      - name: Dump MongoDB logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: mongo
+          dest: ./logs
+
+      - name: Upload MongoDB logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-mongodb-${{ inputs.mongodb-version }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-mssql.yml b/.github/workflows/test-mssql.yml
index 037a2c13..0865492a 100644
--- a/.github/workflows/test-mssql.yml
+++ b/.github/workflows/test-mssql.yml
@@ -88,6 +88,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m mssql
 
+      - name: Dump MSSQL logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: mcr.microsoft.com/mssql/server
+          dest: ./logs
+
+      - name: Upload MSSQL logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-mssql-${{ inputs.mssql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown MSSQL
         if: always()
         run: |
diff --git a/.github/workflows/test-mysql.yml b/.github/workflows/test-mysql.yml
index 97691a6d..e305af6d 100644
--- a/.github/workflows/test-mysql.yml
+++ b/.github/workflows/test-mysql.yml
@@ -90,6 +90,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m mysql
 
+      - name: Dump MySQL logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: mysql
+          dest: ./logs
+
+      - name: Upload MySQL logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-mysql-${{ inputs.mysql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-oracle.yml b/.github/workflows/test-oracle.yml
index 2ba490c1..38a21daf 100644
--- a/.github/workflows/test-oracle.yml
+++ b/.github/workflows/test-oracle.yml
@@ -38,7 +38,7 @@ jobs:
     runs-on: ${{ inputs.os }}
     services:
       oracle:
-        image: "${{ inputs.oracle-image }}:${{ inputs.oracle-version }}"
+        image: ${{ inputs.oracle-image }}:${{ inputs.oracle-version }}
         env:
           TZ: UTC
           ORACLE_PASSWORD: maaxohmiGe9eep5x
@@ -110,6 +110,20 @@ jobs:
           export "ONETL_ORA_SERVICE_NAME=${{ inputs.db-name }}"
           ./pytest_runner.sh -m oracle
 
+      - name: Dump Oracle logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: ${{ inputs.oracle-image }}
+          dest: ./logs
+
+      - name: Upload Oracle logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-oracle-${{ inputs.oracle-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-postgres.yml b/.github/workflows/test-postgres.yml
index 7a9022b3..68236134 100644
--- a/.github/workflows/test-postgres.yml
+++ b/.github/workflows/test-postgres.yml
@@ -89,6 +89,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m postgres
 
+      - name: Dump Postgres logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: postgres
+          dest: ./logs
+
+      - name: Upload Postgres logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-postgres-${{ inputs.postgres-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-s3.yml b/.github/workflows/test-s3.yml
index 3179002c..1ef595e6 100644
--- a/.github/workflows/test-s3.yml
+++ b/.github/workflows/test-s3.yml
@@ -90,6 +90,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m s3
 
+      - name: Dump S3 logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: bitnami/minio
+          dest: ./logs
+
+      - name: Upload S3 logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-s3-${{ inputs.minio-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-samba.yml b/.github/workflows/test-samba.yml
index 4a2e30d1..f7b07131 100644
--- a/.github/workflows/test-samba.yml
+++ b/.github/workflows/test-samba.yml
@@ -65,6 +65,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m samba
 
+      - name: Dump Samba logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: elswork/samba
+          dest: ./logs
+
+      - name: Upload Samba logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-samba-${{ inputs.server-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown Samba
         if: always()
         run: |
diff --git a/.github/workflows/test-sftp.yml b/.github/workflows/test-sftp.yml
index eaa5e5a4..2ab8de1c 100644
--- a/.github/workflows/test-sftp.yml
+++ b/.github/workflows/test-sftp.yml
@@ -67,6 +67,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m sftp
 
+      - name: Dump SFTP logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: linuxserver/openssh-server
+          dest: ./logs
+
+      - name: Upload SFTP logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-sftp-${{ inputs.openssh-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Upload coverage results
         uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-webdav.yml b/.github/workflows/test-webdav.yml
index 34a94326..2ce0e4ef 100644
--- a/.github/workflows/test-webdav.yml
+++ b/.github/workflows/test-webdav.yml
@@ -67,6 +67,20 @@ jobs:
           source ./env
           ./pytest_runner.sh -m webdav
 
+      - name: Dump WebDAV logs on failure
+        if: failure()
+        uses: jwalton/gh-docker-logs@v2
+        with:
+          images: chonjay21/webdav
+          dest: ./logs
+
+      - name: Upload WebDAV logs
+        uses: actions/upload-artifact@v4
+        if: failure()
+        with:
+          name: container-logs-webdav-${{ inputs.webdav-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: logs/*
+
       - name: Shutdown WebDAV
         if: always()
         run: |
diff --git a/docker-compose.yml b/docker-compose.yml
index 41d8af80..f5859bb5 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -91,9 +91,6 @@ services:
     image: ${MSSQL_IMAGE:-mcr.microsoft.com/mssql/server:latest}
     restart: unless-stopped
     env_file: .env.dependencies
-    environment:
-      # fix for https://github.com/microsoft/mssql-docker/issues/892
-      PATH: /opt/mssql-tools18/bin:/opt/mssql-tools/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
     ports:
       - 1433:1433
     volumes:
@@ -104,7 +101,8 @@ services:
     platform: linux/amd64
     healthcheck:
       # Container is healthy only when database is created
-      test: ["CMD-SHELL", "sqlcmd -S localhost -d $$MSSQL_DATABASE -U $$MSSQL_USER -P $$MSSQL_PASSWORD -Q 'SELECT 1'"]
+      # TODO: replace with SELECT after fixing sqlcmd location: https://github.com/microsoft/mssql-docker/issues/892
+      test: ["CMD-SHELL", "ls -lsah /var/opt/mssql/data/onetl.mdf"]
       interval: 10s
       timeout: 5s
       retries: 5
diff --git a/docker/mssql/configure-db.sh b/docker/mssql/configure-db.sh
index 38e224e3..6c31f067 100755
--- a/docker/mssql/configure-db.sh
+++ b/docker/mssql/configure-db.sh
@@ -12,6 +12,13 @@ TIMEOUT=60
 START=$(date +%s)
 echo "Configure DB script started at $(date)"
 
+# fix for https://github.com/microsoft/mssql-docker/issues/892
+if [[ -d "/opt/mssql-tools18/bin" ]]; then
+    SQLCMD="/opt/mssql-tools18/bin/sqlcmd -No"
+else
+    SQLCMD=/opt/mssql-tools/bin/sqlcmd
+fi
+
 while true; do
     DELTA=$(($(date +%s) - START))
     if [[ $DELTA -gt $TIMEOUT ]]; then
@@ -19,7 +26,7 @@ while true; do
         exit 1
     fi
 
-    DBSTATUS=$(sqlcmd -h -1 -t 1 -U sa -P ${MSSQL_SA_PASSWORD} -Q "SET NOCOUNT ON; Select SUM(state) from sys.databases" 2>/dev/null | sed -e 's/^[[:space:]]*//')
+    DBSTATUS=$($SQLCMD -h -1 -t 1 -U sa -P ${MSSQL_SA_PASSWORD} -Q "SET NOCOUNT ON; Select SUM(state) from sys.databases" 2>/dev/null | sed -e 's/^[[:space:]]*//')
     ERRCODE=$?
     if [[ "$DBSTATUS" -eq "0" && "$ERRCODE" -eq "0" ]]; then
         echo "INFO: Database ready."
@@ -32,5 +39,5 @@ done
 
 # Run the setup script to create the DB and the schema in the DB
 echo "Running setup.sql";
-sqlcmd -S localhost -U sa -P $MSSQL_SA_PASSWORD -d master -i /usr/config/setup.sql;
+$SQLCMD -S localhost -U sa -P $MSSQL_SA_PASSWORD -d master -i /usr/config/setup.sql;
 echo "Success";

From 8d81a0e83a9ad853c1d9c02997d97174a3b0b449 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 12:19:11 +0000
Subject: [PATCH 29/64] Update test matrix to use latest DB versions

---
 .github/workflows/data/clickhouse/matrix.yml  |  4 +--
 .github/workflows/data/kafka/matrix.yml       |  2 +-
 .github/workflows/data/mongodb/matrix.yml     | 21 ++++-------
 .github/workflows/data/mssql/matrix.yml       | 19 ++++------
 .github/workflows/data/mysql/matrix.yml       | 23 +++++-------
 .github/workflows/data/oracle/matrix.yml      | 35 +++++++------------
 .github/workflows/data/postgres/matrix.yml    | 21 ++++-------
 .github/workflows/data/s3/matrix.yml          | 13 +++----
 .github/workflows/data/samba/matrix.yml       | 21 ++++-------
 .github/workflows/data/sftp/matrix.yml        | 21 ++++-------
 .github/workflows/data/webdav/matrix.yml      | 21 ++++-------
 .../db_connection/mysql/prerequisites.rst     |  2 +-
 .../db_connection/postgres/prerequisites.rst  |  2 +-
 13 files changed, 70 insertions(+), 135 deletions(-)

diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml
index 1469100a..928d315e 100644
--- a/.github/workflows/data/clickhouse/matrix.yml
+++ b/.github/workflows/data/clickhouse/matrix.yml
@@ -22,7 +22,7 @@ latest: &latest
 matrix:
   small:
     - clickhouse-image: clickhouse/clickhouse-server
-      clickhouse-version: 24.3.2.23-alpine
+      clickhouse-version: 24.6.3.70-alpine
       <<: *max
   full:
     # Clickhouse version with proper DateTime > DateTime64 comparison
@@ -30,7 +30,7 @@ matrix:
       clickhouse-version: '21.1'
       <<: *min
     - clickhouse-image: clickhouse/clickhouse-server
-      clickhouse-version: 24.3.2.23-alpine
+      clickhouse-version: 24.6.3.70-alpine
       <<: *max
   nightly:
     - clickhouse-image: yandex/clickhouse-server
diff --git a/.github/workflows/data/kafka/matrix.yml b/.github/workflows/data/kafka/matrix.yml
index 8050948b..c5242cb5 100644
--- a/.github/workflows/data/kafka/matrix.yml
+++ b/.github/workflows/data/kafka/matrix.yml
@@ -10,7 +10,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  kafka-version: 3.7.0
+  kafka-version: 3.7.1
   pydantic-version: 2
   spark-version: 3.5.1
   python-version: '3.12'
diff --git a/.github/workflows/data/mongodb/matrix.yml b/.github/workflows/data/mongodb/matrix.yml
index a07bdd3b..98e1fe97 100644
--- a/.github/workflows/data/mongodb/matrix.yml
+++ b/.github/workflows/data/mongodb/matrix.yml
@@ -1,5 +1,6 @@
 min: &min
-  # MongoDB connector does not support Spark 2
+  mongodb-version: 4.0.0
+  # MongoDB connector does not support Spark 2.x
   spark-version: 3.2.4
   pydantic-version: 1
   python-version: '3.7'
@@ -7,6 +8,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  mongodb-version: 7.0.12
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -14,6 +16,7 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  mongodb-version: latest
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -21,16 +24,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - mongodb-version: 7.0.9
-      <<: *max
-  full:
-    - mongodb-version: 4.0.0
-      <<: *min
-    - mongodb-version: 7.0.9
-      <<: *max
-  nightly:
-    - mongodb-version: 4.0.0
-      <<: *min
-    - mongodb-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/mssql/matrix.yml b/.github/workflows/data/mssql/matrix.yml
index a074ed7a..fad2e738 100644
--- a/.github/workflows/data/mssql/matrix.yml
+++ b/.github/workflows/data/mssql/matrix.yml
@@ -1,4 +1,5 @@
 min: &min
+  mssql-version: 2017-GA-ubuntu
   spark-version: 2.3.1
   pydantic-version: 1
   python-version: '3.7'
@@ -6,6 +7,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  mssql-version: 2022-CU14-ubuntu-22.04
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -13,6 +15,7 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  mssql-version: latest
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -20,16 +23,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - mssql-version: 2022-CU14-ubuntu-22.04
-      <<: *max
-  full:
-    - mssql-version: 2017-GA-ubuntu
-      <<: *min
-    - mssql-version: 2022-CU14-ubuntu-22.04
-      <<: *max
-  nightly:
-    - mssql-version: 2017-GA-ubuntu
-      <<: *min
-    - mssql-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/mysql/matrix.yml b/.github/workflows/data/mysql/matrix.yml
index 39061bde..d2e70314 100644
--- a/.github/workflows/data/mysql/matrix.yml
+++ b/.github/workflows/data/mysql/matrix.yml
@@ -1,4 +1,7 @@
 min: &min
+  # Tags 5.7.6-5.6.12 cannot be downloaded since Docker v26:
+  # "Docker Image Format v1 and Docker Image manifest version 2, schema 1 support is disabled by default"
+  mysql-version: 5.7.13
   spark-version: 2.3.1
   pydantic-version: 1
   python-version: '3.7'
@@ -6,6 +9,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  mysql-version: 9.0.1
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -13,6 +17,7 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  mysql-version: latest
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -20,18 +25,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - mysql-version: 8.4.0
-      <<: *max
-  full:
-    # Tags 5.7.6-5.6.12 cannot be downloaded since Docker v26:
-    # "Docker Image Format v1 and Docker Image manifest version 2, schema 1 support is disabled by default"
-    - mysql-version: 5.7.13
-      <<: *min
-    - mysql-version: 8.4.0
-      <<: *max
-  nightly:
-    - mysql-version: 5.7.13
-      <<: *min
-    - mysql-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/oracle/matrix.yml b/.github/workflows/data/oracle/matrix.yml
index c0a50fc2..7a79c68a 100644
--- a/.github/workflows/data/oracle/matrix.yml
+++ b/.github/workflows/data/oracle/matrix.yml
@@ -1,4 +1,7 @@
 min: &min
+  oracle-image: gvenzl/oracle-xe
+  oracle-version: 11.2.0.2-slim-faststart
+  db-name: XE
   spark-version: 2.3.1
   pydantic-version: 1
   python-version: '3.7'
@@ -6,6 +9,9 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  oracle-image: gvenzl/oracle-free
+  oracle-version: 23.4-slim-faststart
+  db-name: FREEPDB1
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -13,6 +19,9 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  oracle-image: gvenzl/oracle-free
+  oracle-version: slim-faststart
+  db-name: FREEPDB1
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -20,26 +29,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - oracle-image: gvenzl/oracle-free
-      oracle-version: 23.3-slim-faststart
-      db-name: FREEPDB1
-      <<: *max
-  full:
-    - oracle-image: gvenzl/oracle-xe
-      oracle-version: 11.2.0.2-slim-faststart
-      db-name: XE
-      <<: *min
-    - oracle-image: gvenzl/oracle-free
-      oracle-version: 23.3-slim-faststart
-      db-name: FREEPDB1
-      <<: *max
-  nightly:
-    - oracle-image: gvenzl/oracle-xe
-      oracle-version: 11.2.0.2-slim-faststart
-      db-name: XE
-      <<: *min
-    - oracle-image: gvenzl/oracle-free
-      oracle-version: slim-faststart
-      db-name: FREEPDB1
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/postgres/matrix.yml b/.github/workflows/data/postgres/matrix.yml
index 7b8e296e..4c5b5f4e 100644
--- a/.github/workflows/data/postgres/matrix.yml
+++ b/.github/workflows/data/postgres/matrix.yml
@@ -1,4 +1,6 @@
 min: &min
+  # Min supported version by JDBC driver is 8.4, but it is too ancient to be used by anyone in real life
+  postgres-version: 9.4.26-alpine
   spark-version: 2.3.1
   pydantic-version: 1
   python-version: '3.7'
@@ -6,6 +8,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  postgres-version: 16.3-alpine
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -13,6 +16,7 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  postgres-version: alpine
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -20,17 +24,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - postgres-version: 16.2-alpine
-      <<: *max
-  full:
-    # Min supported version by JDBC driver is 8.4, but it is too ancient to be used by anyone in real life
-    - postgres-version: 9.4.26-alpine
-      <<: *min
-    - postgres-version: 16.2-alpine
-      <<: *max
-  nightly:
-    - postgres-version: 9.4.26-alpine
-      <<: *min
-    - postgres-version: alpine
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/s3/matrix.yml b/.github/workflows/data/s3/matrix.yml
index d9b9338f..06d4f748 100644
--- a/.github/workflows/data/s3/matrix.yml
+++ b/.github/workflows/data/s3/matrix.yml
@@ -9,7 +9,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  minio-version: 2024.4.18
+  minio-version: 2024.7.26
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -25,11 +25,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - *max
-  full:
-    - *min
-    - *max
-  nightly:
-    - *min
-    - *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/samba/matrix.yml b/.github/workflows/data/samba/matrix.yml
index b1e6b56d..045a093b 100644
--- a/.github/workflows/data/samba/matrix.yml
+++ b/.github/workflows/data/samba/matrix.yml
@@ -1,30 +1,23 @@
 min: &min
+  # elswork/samba image versions does not correlate with smbd version, it is always 4.x
+  server-version: latest
   pydantic-version: 1
   python-version: '3.7'
   os: ubuntu-latest
 
 max: &max
+  server-version: latest
   pydantic-version: 2
   python-version: '3.12'
   os: ubuntu-latest
 
 latest: &latest
+  server-version: latest
   pydantic-version: latest
   python-version: '3.12'
   os: ubuntu-latest
 
 matrix:
-  small:
-    # elswork/samba image versions does not correlate with smbd version, it is always 4.x
-    - server-version: latest
-      <<: *max
-  full:
-    - server-version: latest
-      <<: *min
-    - server-version: latest
-      <<: *max
-  nightly:
-    - server-version: latest
-      <<: *min
-    - server-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/sftp/matrix.yml b/.github/workflows/data/sftp/matrix.yml
index a32f6f82..5a5a757c 100644
--- a/.github/workflows/data/sftp/matrix.yml
+++ b/.github/workflows/data/sftp/matrix.yml
@@ -1,30 +1,23 @@
 min: &min
+  # prior image versions does not accept incoming connections, seems like a bug
+  openssh-version: 8.1_p1-r0-ls5
   pydantic-version: 1
   python-version: '3.7'
   os: ubuntu-latest
 
 max: &max
+  openssh-version: 9.6_p1-r0-ls154
   pydantic-version: 2
   python-version: '3.12'
   os: ubuntu-latest
 
 latest: &latest
+  openssh-version: latest
   pydantic-version: latest
   python-version: '3.12'
   os: ubuntu-latest
 
 matrix:
-  small:
-    - openssh-version: 9.6_p1-r0-ls154
-      <<: *max
-  full:
-    # prior image versions does not accept incoming connections, seems like a bug
-    - openssh-version: 8.1_p1-r0-ls5
-      <<: *min
-    - openssh-version: 9.6_p1-r0-ls154
-      <<: *max
-  nightly:
-    - openssh-version: 8.1_p1-r0-ls5
-      <<: *min
-    - openssh-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/.github/workflows/data/webdav/matrix.yml b/.github/workflows/data/webdav/matrix.yml
index fb76e328..39c09fcd 100644
--- a/.github/workflows/data/webdav/matrix.yml
+++ b/.github/workflows/data/webdav/matrix.yml
@@ -1,30 +1,23 @@
 min: &min
+  # chonjay21/webdav image has only latest tag
+  webdav-version: latest
   pydantic-version: 1
   python-version: '3.7'
   os: ubuntu-latest
 
 max: &max
+  webdav-version: latest
   pydantic-version: 2
   python-version: '3.12'
   os: ubuntu-latest
 
 latest: &latest
+  webdav-version: latest
   pydantic-version: latest
   python-version: '3.12'
   os: ubuntu-latest
 
 matrix:
-  small:
-    # chonjay21/webdav image has only latest tag
-    - webdav-version: latest
-      <<: *max
-  full:
-    - webdav-version: latest
-      <<: *min
-    - webdav-version: latest
-      <<: *max
-  nightly:
-    - webdav-version: latest
-      <<: *min
-    - webdav-version: latest
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *latest]
diff --git a/docs/connection/db_connection/mysql/prerequisites.rst b/docs/connection/db_connection/mysql/prerequisites.rst
index 225e630b..b92f3320 100644
--- a/docs/connection/db_connection/mysql/prerequisites.rst
+++ b/docs/connection/db_connection/mysql/prerequisites.rst
@@ -6,7 +6,7 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* MySQL server versions: 5.7 - 8.4
+* MySQL server versions: 5.7 - 9.0
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/postgres/prerequisites.rst b/docs/connection/db_connection/postgres/prerequisites.rst
index 509b54bc..ef83144f 100644
--- a/docs/connection/db_connection/postgres/prerequisites.rst
+++ b/docs/connection/db_connection/postgres/prerequisites.rst
@@ -6,7 +6,7 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* PostgreSQL server versions: 8.2 or higher
+* PostgreSQL server versions: 8.2  - 16
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 

From 8f7f62fe8af5d95c961886ca64209377aeb0660b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 13:03:26 +0000
Subject: [PATCH 30/64] Update test matrix

---
 .github/workflows/data/clickhouse/matrix.yml | 29 +++++++-------------
 .github/workflows/data/greenplum/matrix.yml  | 27 ++++++------------
 .github/workflows/data/hdfs/matrix.yml       | 11 ++------
 .github/workflows/data/kafka/matrix.yml      | 11 ++------
 4 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml
index 928d315e..6f1d7261 100644
--- a/.github/workflows/data/clickhouse/matrix.yml
+++ b/.github/workflows/data/clickhouse/matrix.yml
@@ -1,4 +1,7 @@
 min: &min
+  # Clickhouse version with proper DateTime > DateTime64 comparison
+  clickhouse-image: yandex/clickhouse-server
+  clickhouse-version: '21.1'
   spark-version: 2.3.1
   pydantic-version: 1
   python-version: '3.7'
@@ -6,6 +9,8 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  clickhouse-image: clickhouse/clickhouse-server
+  clickhouse-version: 24.6.3.70-alpine
   spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
@@ -13,6 +18,8 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  clickhouse-image: clickhouse/clickhouse-server
+  clickhouse-version: latest-alpine
   spark-version: latest
   pydantic-version: latest
   python-version: '3.12'
@@ -20,22 +27,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - clickhouse-image: clickhouse/clickhouse-server
-      clickhouse-version: 24.6.3.70-alpine
-      <<: *max
-  full:
-    # Clickhouse version with proper DateTime > DateTime64 comparison
-    - clickhouse-image: yandex/clickhouse-server
-      clickhouse-version: '21.1'
-      <<: *min
-    - clickhouse-image: clickhouse/clickhouse-server
-      clickhouse-version: 24.6.3.70-alpine
-      <<: *max
-  nightly:
-    - clickhouse-image: yandex/clickhouse-server
-      clickhouse-version: '21.1'
-      <<: *min
-    - clickhouse-image: clickhouse/clickhouse-server
-      clickhouse-version: latest-alpine
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *max, *latest]
diff --git a/.github/workflows/data/greenplum/matrix.yml b/.github/workflows/data/greenplum/matrix.yml
index 28ec20e7..0935a821 100644
--- a/.github/workflows/data/greenplum/matrix.yml
+++ b/.github/workflows/data/greenplum/matrix.yml
@@ -1,4 +1,6 @@
 min: &min
+  greenplum-version: 6.23.1
+  package-version: 2.2.0
   # Spark 2.3.0 does not support passing ivysettings.xml
   spark-version: 2.3.1
   pydantic-version: 1
@@ -7,6 +9,8 @@ min: &min
   os: ubuntu-latest
 
 max: &max
+  greenplum-version: 7.0.0
+  package-version: 2.3.1
   # Greenplum connector does not support Spark 3.3+
   spark-version: 3.2.4
   pydantic-version: 2
@@ -15,6 +19,8 @@ max: &max
   os: ubuntu-latest
 
 latest: &latest
+  greenplum-version: 7.0.0
+  package-version: 2.3.1
   # Greenplum connector does not support Spark 3.3+
   spark-version: 3.2.4
   pydantic-version: latest
@@ -23,21 +29,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - greenplum-version: 7.0.0
-      package-version: 2.3.1
-      <<: *max
-  full:
-    - greenplum-version: 6.23.1
-      package-version: 2.2.0
-      <<: *min
-    - greenplum-version: 7.0.0
-      package-version: 2.3.1
-      <<: *max
-  nightly:
-    - greenplum-version: 6.23.1
-      package-version: 2.2.0
-      <<: *min
-    - greenplum-version: 7.0.0
-      package-version: 2.3.1
-      <<: *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *max, *latest]
diff --git a/.github/workflows/data/hdfs/matrix.yml b/.github/workflows/data/hdfs/matrix.yml
index 6d8156c5..af4553f1 100644
--- a/.github/workflows/data/hdfs/matrix.yml
+++ b/.github/workflows/data/hdfs/matrix.yml
@@ -23,11 +23,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - *max
-  full:
-    - *min
-    - *max
-  nightly:
-    - *min
-    - *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *max, *latest]
diff --git a/.github/workflows/data/kafka/matrix.yml b/.github/workflows/data/kafka/matrix.yml
index c5242cb5..1b9b2336 100644
--- a/.github/workflows/data/kafka/matrix.yml
+++ b/.github/workflows/data/kafka/matrix.yml
@@ -26,11 +26,6 @@ latest: &latest
   os: ubuntu-latest
 
 matrix:
-  small:
-    - *max
-  full:
-    - *min
-    - *max
-  nightly:
-    - *min
-    - *latest
+  small: [*max]
+  full: [*min, *max]
+  nightly: [*min, *max, *latest]

From 9e955f7a083a3d3d8ef1dd828c93e64250a07aa1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 29 Jul 2024 14:23:31 +0000
Subject: [PATCH 31/64] Fix mypy warnings

---
 onetl/hooks/hook.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onetl/hooks/hook.py b/onetl/hooks/hook.py
index 45200384..d49297f1 100644
--- a/onetl/hooks/hook.py
+++ b/onetl/hooks/hook.py
@@ -340,7 +340,7 @@ def __exit__(self, exc_type, value, traceback):  # noqa: WPS231
             raise
         raise RuntimeError("generator didn't stop after throw()")
 
-    def process_result(self, result: T) -> T | None:
+    def process_result(self, result):
         """
         Handle original method call result, and return new value.
 

From 31ee03fadd1153c22676433a3d567c9ec2dd994d Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 29 Jul 2024 21:16:51 +0000
Subject: [PATCH 32/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/asottile/pyupgrade: v3.16.0 → v3.17.0](https://github.com/asottile/pyupgrade/compare/v3.16.0...v3.17.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f0c94a33..d4f50d79 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -90,7 +90,7 @@ repos:
       - id: text-unicode-replacement-char
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.16.0
+    rev: v3.17.0
     hooks:
       - id: pyupgrade
         args: [--py37-plus, --keep-runtime-typing]

From 4eac4668a1164d3daa8282b50a4a76b94f11fa2c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 30 Jul 2024 07:42:09 +0000
Subject: [PATCH 33/64] Fix documentation build

---
 requirements/docs.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements/docs.txt b/requirements/docs.txt
index 154dbd31..a840f3da 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,4 +1,4 @@
-autodoc-pydantic<2
+autodoc-pydantic
 furo
 importlib-resources<6
 numpydoc

From 8d64cfbe111b2f8c528cdc6580b04564d5f841ee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 30 Jul 2024 07:45:05 +0000
Subject: [PATCH 34/64] Fix documentation build

---
 requirements/docs.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/docs.txt b/requirements/docs.txt
index a840f3da..5ba51314 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,9 +1,9 @@
-autodoc-pydantic
+autodoc-pydantic<2
 furo
 importlib-resources<6
 numpydoc
 pygments-csv-lexer
-sphinx
+sphinx<8
 sphinx-copybutton
 sphinx-design
 sphinx-favicon

From 48e6a64f333b85cf735f4e32d47e79df89d89877 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 30 Jul 2024 07:50:10 +0000
Subject: [PATCH 35/64] Fix documentation build

---
 .readthedocs.yml      | 3 +++
 requirements/docs.txt | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index a0fc204d..aa073dab 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -11,6 +11,9 @@ build:
       - python -m pip install --exists-action=w --no-cache-dir --no-deps sphinx-plantuml # remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
       - python -m pip install --exists-action=w --no-cache-dir -r requirements/docs.txt
       - python -m pip install --exists-action=w --upgrade --upgrade-strategy only-if-needed --no-cache-dir .[ftp,ftps,hdfs,samba,s3,sftp,webdav,spark]
+    post_install:
+      # TODO: remove after upgrading autodoc-pydantic to v2
+      - python -m pip install --exists-action=w --no-cache-dir "sphinx<8"
 
 # TODO: uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4
 #python:
diff --git a/requirements/docs.txt b/requirements/docs.txt
index 5ba51314..03da763e 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,8 +1,10 @@
+# TODO: remove version limit after upgrading all Pydantic models to v2
 autodoc-pydantic<2
 furo
 importlib-resources<6
 numpydoc
 pygments-csv-lexer
+# TODO: remove version limit after upgrading autodoc-pydantic to v2
 sphinx<8
 sphinx-copybutton
 sphinx-design

From e0f1555db01452bd13546140b007bf4a604c7027 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 06:15:05 +0000
Subject: [PATCH 36/64] Bump mikefarah/yq from 4.44.2 to 4.44.3 in the
 github-actions group

Bumps the github-actions group with 1 update: [mikefarah/yq](https://github.com/mikefarah/yq).


Updates `mikefarah/yq` from 4.44.2 to 4.44.3
- [Release notes](https://github.com/mikefarah/yq/releases)
- [Changelog](https://github.com/mikefarah/yq/blob/master/release_notes.txt)
- [Commits](https://github.com/mikefarah/yq/compare/v4.44.2...v4.44.3)

---
updated-dependencies:
- dependency-name: mikefarah/yq
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: github-actions
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/get-matrix.yml | 38 ++++++++++++++++----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/get-matrix.yml b/.github/workflows/get-matrix.yml
index 5963ff9c..eba22eaf 100644
--- a/.github/workflows/get-matrix.yml
+++ b/.github/workflows/get-matrix.yml
@@ -154,7 +154,7 @@ jobs:
 
       - name: Get Core matrix
         id: matrix-core
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/core/matrix.yml
 
@@ -184,7 +184,7 @@ jobs:
 
       - name: Get Clickhouse matrix
         id: matrix-clickhouse
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/clickhouse/matrix.yml
 
@@ -214,7 +214,7 @@ jobs:
 
       - name: Get Greenplum matrix
         id: matrix-greenplum
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/greenplum/matrix.yml
 
@@ -244,7 +244,7 @@ jobs:
 
       - name: Get Hive matrix
         id: matrix-hive
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/hive/matrix.yml
 
@@ -274,7 +274,7 @@ jobs:
 
       - name: Get Kafka matrix
         id: matrix-kafka
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/kafka/matrix.yml
 
@@ -304,7 +304,7 @@ jobs:
 
       - name: Get LocalFS matrix
         id: matrix-local-fs
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/local-fs/matrix.yml
 
@@ -334,7 +334,7 @@ jobs:
 
       - name: Get MongoDB matrix
         id: matrix-mongodb
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mongodb/matrix.yml
 
@@ -364,7 +364,7 @@ jobs:
 
       - name: Get MSSQL matrix
         id: matrix-mssql
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mssql/matrix.yml
 
@@ -394,7 +394,7 @@ jobs:
 
       - name: Get MySQL matrix
         id: matrix-mysql
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/mysql/matrix.yml
 
@@ -424,7 +424,7 @@ jobs:
 
       - name: Get Oracle matrix
         id: matrix-oracle
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/oracle/matrix.yml
 
@@ -454,7 +454,7 @@ jobs:
 
       - name: Get Postgres matrix
         id: matrix-postgres
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/postgres/matrix.yml
 
@@ -484,7 +484,7 @@ jobs:
 
       - name: Get Teradata matrix
         id: matrix-teradata
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/teradata/matrix.yml
 
@@ -514,7 +514,7 @@ jobs:
 
       - name: Get FTP matrix
         id: matrix-ftp
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/ftp/matrix.yml
 
@@ -544,7 +544,7 @@ jobs:
 
       - name: Get FTPS matrix
         id: matrix-ftps
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/ftps/matrix.yml
 
@@ -574,7 +574,7 @@ jobs:
 
       - name: Get HDFS matrix
         id: matrix-hdfs
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/hdfs/matrix.yml
 
@@ -604,7 +604,7 @@ jobs:
 
       - name: Get S3 matrix
         id: matrix-s3
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/s3/matrix.yml
 
@@ -634,7 +634,7 @@ jobs:
 
       - name: Get SFTP matrix
         id: matrix-sftp
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/sftp/matrix.yml
 
@@ -664,7 +664,7 @@ jobs:
 
       - name: Get Samba matrix
         id: matrix-samba
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/samba/matrix.yml
 
@@ -694,6 +694,6 @@ jobs:
 
       - name: Get WebDAV matrix
         id: matrix-webdav
-        uses: mikefarah/yq@v4.44.2
+        uses: mikefarah/yq@v4.44.3
         with:
           cmd: yq -o=json '.matrix' .github/workflows/data/webdav/matrix.yml

From 411f58cff049e6f22dfb3a87a4ad7d64b3d5898e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 5 Aug 2024 14:41:25 +0000
Subject: [PATCH 37/64] Fix documentation build

---
 requirements/docs.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements/docs.txt b/requirements/docs.txt
index 03da763e..be2cd127 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -16,4 +16,5 @@ sphinx-tabs
 sphinx-toolbox
 sphinx_substitution_extensions
 sphinxcontrib-towncrier
-towncrier
+# TODO: remove upper limit after https://github.com/sphinx-contrib/sphinxcontrib-towncrier/issues/92
+towncrier<24.7

From c4a9cb895e02f2e50be988093c2fa915449e60df Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 5 Aug 2024 21:53:19 +0000
Subject: [PATCH 38/64] [pre-commit.ci] pre-commit autoupdate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/psf/black: 24.4.2 → 24.8.0](https://github.com/psf/black/compare/24.4.2...24.8.0)
---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d4f50d79..8d9215d3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -101,7 +101,7 @@ repos:
       - id: add-trailing-comma
 
   - repo: https://github.com/psf/black
-    rev: 24.4.2
+    rev: 24.8.0
     hooks:
       - id: black
         language_version: python3

From abff632d9ed44626dcdd8275b9ca8429e1b71af4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 7 Aug 2024 20:36:58 +0000
Subject: [PATCH 39/64] [DOP-18570] Implement SparkMetricsRecorder

---
 .github/workflows/data/file-df/tracked.txt    |   6 +-
 onetl/_metrics/__init__.py                    |  17 ++
 onetl/_metrics/command.py                     |  57 +++++
 onetl/_metrics/driver.py                      |  39 ++++
 onetl/_metrics/executor.py                    |  54 +++++
 onetl/_metrics/extract.py                     | 113 ++++++++++
 onetl/_metrics/input.py                       |  55 +++++
 onetl/_metrics/listener/__init__.py           |  29 +++
 onetl/_metrics/listener/base.py               | 178 +++++++++++++++
 onetl/_metrics/listener/execution.py          | 109 ++++++++++
 onetl/_metrics/listener/job.py                |  87 ++++++++
 onetl/_metrics/listener/listener.py           | 133 ++++++++++++
 onetl/_metrics/listener/stage.py              |  66 ++++++
 onetl/_metrics/listener/task.py               |  94 ++++++++
 onetl/_metrics/output.py                      |  50 +++++
 onetl/_metrics/recorder.py                    |  30 +++
 onetl/_util/java.py                           |  34 +++
 onetl/_util/scala.py                          |   7 +
 onetl/strategy/hwm_store/__init__.py          |   2 +-
 onetl/version.py                              |   2 +-
 setup.cfg                                     |   7 +-
 tests/.coveragerc                             |   1 +
 tests/fixtures/global_hwm_store.py            |   2 +-
 tests/fixtures/processing/fixtures.py         |  10 +-
 .../test_spark_metrics_recorder_file_df.py    | 171 +++++++++++++++
 .../test_spark_metrics_recorder_hive.py       | 159 ++++++++++++++
 .../test_spark_metrics_recorder_postgres.py   | 205 ++++++++++++++++++
 .../test_spark_command_metrics.py             |  70 ++++++
 .../test_metrics/test_spark_driver_metrics.py |  22 ++
 .../test_spark_executor_metrics.py            |  58 +++++
 .../test_metrics/test_spark_input_metrics.py  |  50 +++++
 .../test_metrics/test_spark_output_metrics.py |  46 ++++
 32 files changed, 1952 insertions(+), 11 deletions(-)
 create mode 100644 onetl/_metrics/__init__.py
 create mode 100644 onetl/_metrics/command.py
 create mode 100644 onetl/_metrics/driver.py
 create mode 100644 onetl/_metrics/executor.py
 create mode 100644 onetl/_metrics/extract.py
 create mode 100644 onetl/_metrics/input.py
 create mode 100644 onetl/_metrics/listener/__init__.py
 create mode 100644 onetl/_metrics/listener/base.py
 create mode 100644 onetl/_metrics/listener/execution.py
 create mode 100644 onetl/_metrics/listener/job.py
 create mode 100644 onetl/_metrics/listener/listener.py
 create mode 100644 onetl/_metrics/listener/stage.py
 create mode 100644 onetl/_metrics/listener/task.py
 create mode 100644 onetl/_metrics/output.py
 create mode 100644 onetl/_metrics/recorder.py
 create mode 100644 tests/tests_integration/test_metrics/test_spark_metrics_recorder_file_df.py
 create mode 100644 tests/tests_integration/test_metrics/test_spark_metrics_recorder_hive.py
 create mode 100644 tests/tests_integration/test_metrics/test_spark_metrics_recorder_postgres.py
 create mode 100644 tests/tests_unit/test_metrics/test_spark_command_metrics.py
 create mode 100644 tests/tests_unit/test_metrics/test_spark_driver_metrics.py
 create mode 100644 tests/tests_unit/test_metrics/test_spark_executor_metrics.py
 create mode 100644 tests/tests_unit/test_metrics/test_spark_input_metrics.py
 create mode 100644 tests/tests_unit/test_metrics/test_spark_output_metrics.py

diff --git a/.github/workflows/data/file-df/tracked.txt b/.github/workflows/data/file-df/tracked.txt
index 880912b1..c1230737 100644
--- a/.github/workflows/data/file-df/tracked.txt
+++ b/.github/workflows/data/file-df/tracked.txt
@@ -1,6 +1,4 @@
 .github/workflows/data/file-df/**
-onetl/file_df_connection/spark_file_df_connection.py
-onetl/file/file_df_reader/**
-onetl/file/file_df_writer/**
 onetl/file/__init__.py
-tests/resources/file_df_connection/**
+**/*file_df*
+**/*file_df*/**
diff --git a/onetl/_metrics/__init__.py b/onetl/_metrics/__init__.py
new file mode 100644
index 00000000..5d7482b6
--- /dev/null
+++ b/onetl/_metrics/__init__.py
@@ -0,0 +1,17 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.driver import SparkDriverMetrics
+from onetl._metrics.executor import SparkExecutorMetrics
+from onetl._metrics.input import SparkInputMetrics
+from onetl._metrics.output import SparkOutputMetrics
+from onetl._metrics.recorder import SparkMetricsRecorder
+
+__all__ = [
+    "SparkCommandMetrics",
+    "SparkDriverMetrics",
+    "SparkMetricsRecorder",
+    "SparkExecutorMetrics",
+    "SparkInputMetrics",
+    "SparkOutputMetrics",
+]
diff --git a/onetl/_metrics/command.py b/onetl/_metrics/command.py
new file mode 100644
index 00000000..2a8a53c6
--- /dev/null
+++ b/onetl/_metrics/command.py
@@ -0,0 +1,57 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import os
+import textwrap
+
+try:
+    from pydantic.v1 import Field
+except (ImportError, AttributeError):
+    from pydantic import Field  # type: ignore[no-redef, assignment]
+
+from onetl._metrics.driver import SparkDriverMetrics
+from onetl._metrics.executor import SparkExecutorMetrics
+from onetl._metrics.input import SparkInputMetrics
+from onetl._metrics.output import SparkOutputMetrics
+from onetl.impl import BaseModel
+
+INDENT = " " * 4
+
+
+class SparkCommandMetrics(BaseModel):
+    input: SparkInputMetrics = Field(default_factory=SparkInputMetrics)
+    output: SparkOutputMetrics = Field(default_factory=SparkOutputMetrics)
+    driver: SparkDriverMetrics = Field(default_factory=SparkDriverMetrics)
+    executor: SparkExecutorMetrics = Field(default_factory=SparkExecutorMetrics)
+
+    @property
+    def is_empty(self) -> bool:
+        return all([self.input.is_empty, self.output.is_empty])
+
+    def update(self, other: SparkCommandMetrics) -> SparkCommandMetrics:
+        self.input.update(other.input)
+        self.output.update(other.output)
+        self.driver.update(other.driver)
+        self.executor.update(other.executor)
+        return self
+
+    @property
+    def details(self) -> str:
+        if self.is_empty:
+            return "No data"
+
+        result = []
+        if not self.input.is_empty:
+            result.append(f"Input:{os.linesep}{textwrap.indent(self.input.details, INDENT)}")
+        if not self.output.is_empty:
+            result.append(f"Output:{os.linesep}{textwrap.indent(self.output.details, INDENT)}")
+        if not self.driver.is_empty:
+            result.append(f"Driver:{os.linesep}{textwrap.indent(self.driver.details, INDENT)}")
+        if not self.executor.is_empty:
+            result.append(f"Executor:{os.linesep}{textwrap.indent(self.executor.details, INDENT)}")
+
+        return os.linesep.join(result)
+
+    def __str__(self):
+        return self.details
diff --git a/onetl/_metrics/driver.py b/onetl/_metrics/driver.py
new file mode 100644
index 00000000..4e685719
--- /dev/null
+++ b/onetl/_metrics/driver.py
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import os
+
+from humanize import naturalsize
+
+from onetl.impl import BaseModel
+
+# Metrics themselves are considered a part of driver result,
+# ignore if result is smaller than 1MB
+MIN_DRIVER_BYTES = 1_000_000
+
+
+class SparkDriverMetrics(BaseModel):
+    in_memory_bytes: int = 0
+
+    @property
+    def is_empty(self) -> bool:
+        return self.in_memory_bytes < MIN_DRIVER_BYTES
+
+    def update(self, other: SparkDriverMetrics) -> SparkDriverMetrics:
+        self.in_memory_bytes += other.in_memory_bytes
+        return self
+
+    @property
+    def details(self) -> str:
+        if self.is_empty:
+            return "No data"
+
+        result = []
+        if self.in_memory_bytes >= MIN_DRIVER_BYTES:
+            result.append(f"In-memory data (approximate): {naturalsize(self.in_memory_bytes)}")
+
+        return os.linesep.join(result)
+
+    def __str__(self):
+        return self.details
diff --git a/onetl/_metrics/executor.py b/onetl/_metrics/executor.py
new file mode 100644
index 00000000..3fd6f3fc
--- /dev/null
+++ b/onetl/_metrics/executor.py
@@ -0,0 +1,54 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import os
+from datetime import timedelta
+
+from humanize import naturalsize, precisedelta
+
+from onetl.impl import BaseModel
+
+
+class SparkExecutorMetrics(BaseModel):
+    total_run_time: timedelta = timedelta()
+    total_cpu_time: timedelta = timedelta()
+    peak_memory_bytes: int = 0
+    memory_spilled_bytes: int = 0
+    disk_spilled_bytes: int = 0
+
+    @property
+    def is_empty(self) -> bool:
+        return not self.total_run_time
+
+    def update(self, other: SparkExecutorMetrics) -> SparkExecutorMetrics:
+        self.total_run_time += other.total_run_time
+        self.total_cpu_time += other.total_cpu_time
+        self.peak_memory_bytes += other.peak_memory_bytes
+        self.memory_spilled_bytes += other.memory_spilled_bytes
+        self.disk_spilled_bytes += other.disk_spilled_bytes
+        return self
+
+    @property
+    def details(self) -> str:
+        if self.is_empty:
+            return "No data"
+
+        result = [
+            f"Total run time: {precisedelta(self.total_run_time)}",
+            f"Total CPU time: {precisedelta(self.total_cpu_time)}",
+        ]
+
+        if self.peak_memory_bytes:
+            result.append(f"Peak memory: {naturalsize(self.peak_memory_bytes)}")
+
+        if self.memory_spilled_bytes:
+            result.append(f"Memory spilled: {naturalsize(self.memory_spilled_bytes)}")
+
+        if self.disk_spilled_bytes:
+            result.append(f"Disk spilled: {naturalsize(self.disk_spilled_bytes)}")
+
+        return os.linesep.join(result)
+
+    def __str__(self):
+        return self.details
diff --git a/onetl/_metrics/extract.py b/onetl/_metrics/extract.py
new file mode 100644
index 00000000..4789d8fd
--- /dev/null
+++ b/onetl/_metrics/extract.py
@@ -0,0 +1,113 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import re
+from datetime import timedelta
+from typing import Any
+
+try:
+    from pydantic.v1 import ByteSize
+except (ImportError, AttributeError):
+    from pydantic import ByteSize  # type: ignore[no-redef, assignment]
+
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.driver import SparkDriverMetrics
+from onetl._metrics.executor import SparkExecutorMetrics
+from onetl._metrics.input import SparkInputMetrics
+from onetl._metrics.listener.execution import (
+    SparkListenerExecution,
+    SparkSQLMetricNames,
+)
+from onetl._metrics.output import SparkOutputMetrics
+
+# in some cases byte metrics have format "7.6 MiB", but sometimes it is:
+# total (min, med, max (stageId: taskId))\n7.6 MiB (0.0 B, 7.6 MiB, 7.6 MiB (driver))
+NON_BYTE_SIZE = re.compile(r"^[^\d.]+|\(.*\)", flags=re.DOTALL)
+
+
+def _get_int(data: dict[SparkSQLMetricNames, list[str]], key: Any) -> int | None:
+    try:
+        return int(data[key][0])
+    except Exception:
+        return None
+
+
+def _get_bytes(data: dict[SparkSQLMetricNames, list[str]], key: Any) -> int | None:
+    try:
+        raw_value = data[key][0]
+        normalized_value = NON_BYTE_SIZE.sub("", raw_value)
+        return int(ByteSize.validate(normalized_value))
+    except Exception:
+        return None
+
+
+def extract_metrics_from_execution(execution: SparkListenerExecution) -> SparkCommandMetrics:
+    input_read_bytes: int = 0
+    input_read_rows: int = 0
+    output_bytes: int = 0
+    output_rows: int = 0
+
+    run_time_milliseconds: int = 0
+    cpu_time_nanoseconds: int = 0
+    peak_memory_bytes: int = 0
+    memory_spilled_bytes: int = 0
+    disk_spilled_bytes: int = 0
+    result_size_bytes: int = 0
+
+    # some metrics are per-stage, and have to be summed, others are per-execution
+    for job in execution.jobs:
+        for stage in job.stages:
+            input_read_bytes += stage.metrics.input_metrics.bytes_read
+            input_read_rows += stage.metrics.input_metrics.records_read
+            output_bytes += stage.metrics.output_metrics.bytes_written
+            output_rows += stage.metrics.output_metrics.records_written
+
+            run_time_milliseconds += stage.metrics.executor_run_time_milliseconds
+            cpu_time_nanoseconds += stage.metrics.executor_cpu_time_nanoseconds
+            peak_memory_bytes += stage.metrics.peak_execution_memory_bytes
+            memory_spilled_bytes += stage.metrics.memory_spilled_bytes
+            disk_spilled_bytes += stage.metrics.disk_spilled_bytes
+            result_size_bytes += stage.metrics.result_size_bytes
+
+    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L467-L473
+    input_file_count = (
+        _get_int(execution.metrics, SparkSQLMetricNames.NUMBER_OF_FILES_READ)
+        or _get_int(execution.metrics, SparkSQLMetricNames.STATIC_NUMBER_OF_FILES_READ)
+        or 0
+    )
+    input_raw_file_bytes = (
+        _get_bytes(execution.metrics, SparkSQLMetricNames.SIZE_OF_FILES_READ)
+        or _get_bytes(execution.metrics, SparkSQLMetricNames.STATIC_SIZE_OF_FILES_READ)
+        or 0
+    )
+    input_read_partitions = _get_int(execution.metrics, SparkSQLMetricNames.NUMBER_OF_PARTITIONS_READ) or 0
+
+    output_files = _get_int(execution.metrics, SparkSQLMetricNames.NUMBER_OF_WRITTEN_FILES) or 0
+    output_dynamic_partitions = _get_int(execution.metrics, SparkSQLMetricNames.NUMBER_OF_DYNAMIC_PART) or 0
+
+    return SparkCommandMetrics(
+        input=SparkInputMetrics(
+            read_rows=input_read_rows,
+            read_files=input_file_count,
+            read_bytes=input_read_bytes,
+            raw_file_bytes=input_raw_file_bytes,
+            read_partitions=input_read_partitions,
+        ),
+        output=SparkOutputMetrics(
+            written_rows=output_rows,
+            written_bytes=output_bytes,
+            created_files=output_files,
+            created_partitions=output_dynamic_partitions,
+        ),
+        driver=SparkDriverMetrics(
+            in_memory_bytes=result_size_bytes,
+        ),
+        executor=SparkExecutorMetrics(
+            total_run_time=timedelta(milliseconds=run_time_milliseconds),
+            total_cpu_time=timedelta(microseconds=cpu_time_nanoseconds / 1000),
+            peak_memory_bytes=peak_memory_bytes,
+            memory_spilled_bytes=memory_spilled_bytes,
+            disk_spilled_bytes=disk_spilled_bytes,
+        ),
+    )
diff --git a/onetl/_metrics/input.py b/onetl/_metrics/input.py
new file mode 100644
index 00000000..39061311
--- /dev/null
+++ b/onetl/_metrics/input.py
@@ -0,0 +1,55 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import os
+from pprint import pformat
+
+from humanize import naturalsize
+
+from onetl.impl import BaseModel
+
+
+class SparkInputMetrics(BaseModel):
+    read_rows: int = 0
+    read_files: int = 0
+    read_partitions: int = 0
+    read_bytes: int = 0
+    raw_file_bytes: int = 0
+
+    @property
+    def is_empty(self) -> bool:
+        return not any([self.read_bytes, self.read_files, self.read_rows])
+
+    def update(self, other: SparkInputMetrics) -> SparkInputMetrics:
+        self.read_rows += other.read_rows
+        self.read_files += other.read_files
+        self.read_partitions += other.read_partitions
+        self.read_bytes += other.read_bytes
+        self.raw_file_bytes += other.raw_file_bytes
+        return self
+
+    @property
+    def details(self) -> str:
+        if self.is_empty:
+            return "No data"
+
+        result = []
+        result.append(f"Read rows: {pformat(self.read_rows)}")
+
+        if self.read_partitions:
+            result.append(f"Read partitions: {pformat(self.read_partitions)}")
+
+        if self.read_files:
+            result.append(f"Read files: {pformat(self.read_files)}")
+
+        if self.read_bytes:
+            result.append(f"Read size: {naturalsize(self.read_bytes)}")
+
+        if self.raw_file_bytes and self.read_bytes != self.raw_file_bytes:
+            result.append(f"Raw files size: {naturalsize(self.raw_file_bytes)}")
+
+        return os.linesep.join(result)
+
+    def __str__(self):
+        return self.details
diff --git a/onetl/_metrics/listener/__init__.py b/onetl/_metrics/listener/__init__.py
new file mode 100644
index 00000000..112e4fba
--- /dev/null
+++ b/onetl/_metrics/listener/__init__.py
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from onetl._metrics.listener.execution import (
+    SparkListenerExecution,
+    SparkListenerExecutionStatus,
+    SparkSQLMetricNames,
+)
+from onetl._metrics.listener.job import SparkListenerJob, SparkListenerJobStatus
+from onetl._metrics.listener.listener import SparkMetricsListener
+from onetl._metrics.listener.stage import SparkListenerStage, SparkListenerStageStatus
+from onetl._metrics.listener.task import (
+    SparkListenerTask,
+    SparkListenerTaskMetrics,
+    SparkListenerTaskStatus,
+)
+
+__all__ = [
+    "SparkListenerTask",
+    "SparkListenerTaskStatus",
+    "SparkListenerTaskMetrics",
+    "SparkListenerStage",
+    "SparkListenerStageStatus",
+    "SparkListenerJob",
+    "SparkListenerJobStatus",
+    "SparkListenerExecution",
+    "SparkListenerExecutionStatus",
+    "SparkSQLMetricNames",
+    "SparkMetricsListener",
+]
diff --git a/onetl/_metrics/listener/base.py b/onetl/_metrics/listener/base.py
new file mode 100644
index 00000000..90432c7c
--- /dev/null
+++ b/onetl/_metrics/listener/base.py
@@ -0,0 +1,178 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from contextlib import suppress
+from dataclasses import dataclass
+from typing import TYPE_CHECKING
+
+from onetl._util.java import get_java_gateway, start_callback_server
+
+if TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+
+
+@dataclass
+class BaseSparkListener:
+    """Base no-op SparkListener implementation.
+
+    See `SparkListener <https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListener.html>`_ interface.
+    """
+
+    spark: SparkSession
+
+    def activate(self):
+        start_callback_server(self.spark)
+
+        # passing python listener object directly to addSparkListener or removeSparkListener leads to creating new java object each time.
+        # But removeSparkListener call has effect only on the same Java object passed to removeSparkListener.
+        # So we need to explicitly create Java object, and then pass it both calls.
+        gateway = get_java_gateway(self.spark)
+        java_list = gateway.jvm.java.util.ArrayList()
+        java_list.append(self)
+        self._java_listener = java_list[0]
+
+        spark_context = self.spark.sparkContext._jsc.sc()  # noqa: WPS437
+        spark_context.addSparkListener(self._java_listener)
+
+    def deactivate(self):
+        with suppress(Exception):
+            spark_context = self.spark.sparkContext._jsc.sc()  # noqa: WPS437
+            spark_context.removeSparkListener(self._java_listener)
+
+        with suppress(Exception):
+            del self._java_listener
+
+    def __enter__(self):
+        self.activate()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.deactivate()
+
+    def __del__(self):  # noqa: WPS603
+        # If current object is collected by GC, deactivate listener
+        # and free bind Java object
+        self.deactivate()
+
+    def equals(self, other):
+        # Java does not provide proper way to get object id for comparison,
+        # so we compare string representation which should contain some form of id
+        return other.toString() == self._java_listener.toString()
+
+    def toString(self):
+        return type(self).__qualname__ + "@" + hex(id(self))
+
+    def hashCode(self):
+        return hash(self)
+
+    # no cover: start
+    # method names are important for Java interface compatibility!
+    def onApplicationEnd(self, application):
+        pass
+
+    def onApplicationStart(self, application):
+        pass
+
+    def onBlockManagerAdded(self, block_manager):
+        pass
+
+    def onBlockManagerRemoved(self, block_manager):
+        pass
+
+    def onBlockUpdated(self, block):
+        pass
+
+    def onEnvironmentUpdate(self, environment):
+        pass
+
+    def onExecutorAdded(self, executor):
+        pass
+
+    def onExecutorMetricsUpdate(self, executor):
+        pass
+
+    def onExecutorRemoved(self, executor):
+        pass
+
+    def onExecutorBlacklisted(self, event):
+        pass
+
+    def onExecutorBlacklistedForStage(self, event):
+        pass
+
+    def onExecutorExcluded(self, event):
+        pass
+
+    def onExecutorExcludedForStage(self, event):
+        pass
+
+    def onExecutorUnblacklisted(self, event):
+        pass
+
+    def onExecutorUnexcluded(self, event):
+        pass
+
+    def onJobStart(self, event):
+        pass
+
+    def onJobEnd(self, event):
+        pass
+
+    def onNodeBlacklisted(self, node):
+        pass
+
+    def onNodeBlacklistedForStage(self, stage):
+        pass
+
+    def onNodeExcluded(self, node):
+        pass
+
+    def onNodeExcludedForStage(self, node):
+        pass
+
+    def onNodeUnblacklisted(self, node):
+        pass
+
+    def onNodeUnexcluded(self, node):
+        pass
+
+    def onOtherEvent(self, event):
+        pass
+
+    def onResourceProfileAdded(self, resource_profile):
+        pass
+
+    def onSpeculativeTaskSubmitted(self, task):
+        pass
+
+    def onStageCompleted(self, event):
+        pass
+
+    def onStageExecutorMetrics(self, metrics):
+        pass
+
+    def onStageSubmitted(self, event):
+        pass
+
+    def onTaskEnd(self, event):
+        pass
+
+    def onTaskGettingResult(self, task):
+        pass
+
+    def onTaskStart(self, event):
+        pass
+
+    def onUnpersistRDD(self, rdd):
+        pass
+
+    def onUnschedulableTaskSetAdded(self, task_set):
+        pass
+
+    def onUnschedulableTaskSetRemoved(self, task_set):
+        pass
+
+    # no cover: stop
+    class Java:
+        implements = ["org.apache.spark.scheduler.SparkListenerInterface"]
diff --git a/onetl/_metrics/listener/execution.py b/onetl/_metrics/listener/execution.py
new file mode 100644
index 00000000..728c4c2c
--- /dev/null
+++ b/onetl/_metrics/listener/execution.py
@@ -0,0 +1,109 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from collections import defaultdict
+from dataclasses import dataclass, field
+from enum import Enum
+
+from onetl._metrics.listener.job import SparkListenerJob, SparkListenerJobStatus
+
+
+class SparkListenerExecutionStatus(str, Enum):
+    STARTED = "STARTED"
+    COMPLETE = "COMPLETE"
+    FAILED = "FAILED"
+
+    def __str__(self):
+        return self.value
+
+
+class SparkSQLMetricNames(str, Enum):  # noqa: WPS338
+    # Metric names passed to SQLMetrics.createMetric(...)
+    # But only those we're interested in.
+
+    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L233C55-L233C87
+    NUMBER_OF_PARTITIONS_READ = "number of partitions read"
+
+    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L225-L227
+    NUMBER_OF_FILES_READ = "number of files read"
+    SIZE_OF_FILES_READ = "size of files read"
+
+    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L455-L456
+    STATIC_NUMBER_OF_FILES_READ = "static number of files read"
+    STATIC_SIZE_OF_FILES_READ = "static size of files read"
+
+    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala#L241-L246
+    NUMBER_OF_DYNAMIC_PART = "number of dynamic part"
+    NUMBER_OF_WRITTEN_FILES = "number of written files"
+
+    def __str__(self):
+        return self.value
+
+
+@dataclass
+class SparkListenerExecution:
+    id: int
+    description: str | None = None
+    external_id: str | None = None
+    status: SparkListenerExecutionStatus = SparkListenerExecutionStatus.STARTED
+
+    # These metrics are emitted by any command performed within this execution, so we can have multiple values.
+    # Some metrics can be summarized, but some not, so we store a list.
+    metrics: dict[SparkSQLMetricNames, list[str]] = field(default_factory=lambda: defaultdict(list), repr=False)
+
+    _jobs: dict[int, SparkListenerJob] = field(default_factory=dict, repr=False, init=False)
+
+    @property
+    def jobs(self) -> list[SparkListenerJob]:
+        result = []
+        for job_id in sorted(self._jobs.keys()):
+            result.append(self._jobs[job_id])
+        return result
+
+    def on_execution_start(self, event):
+        # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L44-L58
+        self.status = SparkListenerExecutionStatus.STARTED
+
+    def on_execution_end(self, event):
+        # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L61-L83
+        for job in self._jobs.values():
+            if job.status == SparkListenerJobStatus.FAILED:
+                self.status = SparkListenerExecutionStatus.FAILED
+                break
+        else:
+            self.status = SparkListenerExecutionStatus.COMPLETE
+
+    def on_job_start(self, event):
+        job_id = event.jobId()
+        job = SparkListenerJob.create(event)
+        self._jobs[job_id] = job
+        job.on_job_start(event)
+
+    def on_job_end(self, event):
+        job_id = event.jobId()
+        job = self._jobs.get(job_id)
+
+        if job:
+            job.on_job_end(event)
+
+        # in some cases Execution consists of just one job with same id
+        if job_id == self.id:
+            self.on_execution_end(event)
+
+    # push down events
+    def on_stage_start(self, event):
+        for job in self._jobs.values():
+            job.on_stage_start(event)
+
+    def on_stage_end(self, event):
+        for job in self._jobs.values():
+            job.on_stage_end(event)
+
+    def on_task_start(self, event):
+        for job in self._jobs.values():
+            job.on_task_start(event)
+
+    def on_task_end(self, event):
+        for job in self._jobs.values():
+            job.on_task_end(event)
diff --git a/onetl/_metrics/listener/job.py b/onetl/_metrics/listener/job.py
new file mode 100644
index 00000000..b3abbd06
--- /dev/null
+++ b/onetl/_metrics/listener/job.py
@@ -0,0 +1,87 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+from onetl._metrics.listener.stage import SparkListenerStage, SparkListenerStageStatus
+from onetl._util.scala import scala_seq_to_python_list
+
+
+class SparkListenerJobStatus(str, Enum):
+    RUNNING = "RUNNING"
+    SUCCEEDED = "SUCCEEDED"
+    FAILED = "FAILED"
+    UNKNOWN = "UNKNOWN"
+
+    def __str__(self):
+        return self.value
+
+
+@dataclass
+class SparkListenerJob:
+    id: int
+    description: str | None = None
+    group_id: str | None = None
+    call_site: str | None = None
+    status: SparkListenerJobStatus = SparkListenerJobStatus.UNKNOWN
+
+    _stages: dict[int, SparkListenerStage] = field(default_factory=dict, repr=False, init=False)
+
+    @property
+    def stages(self) -> list[SparkListenerStage]:
+        result = []
+        for stage_id in sorted(self._stages.keys()):
+            result.append(self._stages[stage_id])
+        return result
+
+    @classmethod
+    def create(cls, event):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerJobSubmitted.html
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerJobCompleted.html
+        result = cls(
+            id=event.jobId(),
+            description=event.properties().get("spark.job.description"),
+            group_id=event.properties().get("spark.jobGroup.id"),
+            call_site=event.properties().get("callSite.short"),
+        )
+
+        stage_ids = scala_seq_to_python_list(event.stageIds())
+        stage_infos = scala_seq_to_python_list(event.stageInfos())
+        for stage_id, stage_info in zip(stage_ids, stage_infos):
+            result._stages[stage_id] = SparkListenerStage.create(stage_info)  # noqa: WPS437
+
+        return result
+
+    def on_job_start(self, event):
+        self.status = SparkListenerJobStatus.RUNNING
+
+    def on_job_end(self, event):
+        for stage in self._stages.values():
+            if stage.status == SparkListenerStageStatus.FAILED:
+                self.status = SparkListenerJobStatus.FAILED
+                break
+        else:
+            self.status = SparkListenerJobStatus.SUCCEEDED
+
+    def on_stage_start(self, event):
+        stage_id = event.stageInfo().stageId()
+        stage = self._stages.get(stage_id)
+        if stage:
+            stage.on_stage_start(event)
+
+    def on_stage_end(self, event):
+        stage_id = event.stageInfo().stageId()
+        stage = self._stages.get(stage_id)
+        if stage:
+            stage.on_stage_end(event)
+
+    # push down events
+    def on_task_start(self, event):
+        for stage in self._stages.values():
+            stage.on_task_start(event)
+
+    def on_task_end(self, event):
+        for stage in self._stages.values():
+            stage.on_task_end(event)
diff --git a/onetl/_metrics/listener/listener.py b/onetl/_metrics/listener/listener.py
new file mode 100644
index 00000000..3421e5ae
--- /dev/null
+++ b/onetl/_metrics/listener/listener.py
@@ -0,0 +1,133 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from threading import current_thread
+from typing import ClassVar
+
+from onetl._metrics.listener.base import BaseSparkListener
+from onetl._metrics.listener.execution import (
+    SparkListenerExecution,
+    SparkSQLMetricNames,
+)
+
+
+@dataclass
+class SparkMetricsListener(BaseSparkListener):
+    THREAD_ID_KEY = "python.thread.id"
+    SQL_START_CLASS_NAME: ClassVar[str] = "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart"
+    SQL_STOP_CLASS_NAME: ClassVar[str] = "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionEnd"
+
+    _thread_id: str = field(default_factory=lambda: str(current_thread().ident), repr=False, init=False)
+    _recorded_executions: dict[int, SparkListenerExecution] = field(default_factory=dict, repr=False, init=False)
+
+    def activate(self):
+        # we cannot override execution_id property as it set by Spark
+        # we also cannot use job tags, as they were implemented only in Spark 3.5+
+        self.spark.sparkContext.setLocalProperty(self.THREAD_ID_KEY, self._thread_id)
+        return super().activate()
+
+    def reset(self):
+        self._recorded_executions.clear()
+        return self
+
+    @property
+    def executions(self):
+        return [
+            execution for execution in self._recorded_executions.values() if execution.external_id == self._thread_id
+        ]
+
+    def __enter__(self):
+        """Record only executions performed by current Spark thread.
+
+        It is important to use this method only in combination with
+        :obj:`pyspark.util.InheritableThread` to preserve thread-local variables
+        between Python thread and Java thread.
+        """
+        self.reset()
+        return super().__enter__()
+
+    def onOtherEvent(self, event):
+        class_name = event.getClass().getName()
+        if class_name == self.SQL_START_CLASS_NAME:
+            self.onExecutionStart(event)
+        elif class_name == self.SQL_STOP_CLASS_NAME:
+            self.onExecutionEnd(event)
+
+    def onExecutionStart(self, event):
+        execution_id = event.executionId()
+        description = event.description()
+        execution = SparkListenerExecution(
+            id=execution_id,
+            description=description,
+        )
+        self._recorded_executions[execution_id] = execution
+        execution.on_execution_start(event)
+
+    def onExecutionEnd(self, event):
+        execution_id = event.executionId()
+        execution = self._recorded_executions.get(execution_id)
+        if execution:
+            execution.on_execution_end(event)
+
+            # Get execution metrics from SQLAppStatusStore,
+            # as SparkListenerSQLExecutionEnd event does not provide them:
+            # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
+            session_status_store = self.spark._jsparkSession.sharedState().statusStore()  # noqa: WPS437
+            raw_execution = session_status_store.execution(execution.id).get()
+            metrics = raw_execution.metrics()
+            metric_values = session_status_store.executionMetrics(execution.id)
+            for i in range(metrics.size()):
+                metric = metrics.apply(i)
+                metric_name = metric.name()
+                if metric_name not in SparkSQLMetricNames:
+                    continue
+                metric_value = metric_values.get(metric.accumulatorId())
+                if not metric_value.isDefined():
+                    continue
+                execution.metrics[SparkSQLMetricNames(metric_name)].append(metric_value.get())
+
+    def onJobStart(self, event):
+        execution_id = event.properties().get("spark.sql.execution.id")
+        execution_thread_id = event.properties().get(self.THREAD_ID_KEY)
+        if execution_id is None:
+            # single job execution
+            job_id = event.jobId()
+            execution = SparkListenerExecution(
+                id=job_id,
+                description=event.properties().get("spark.job.description"),
+                external_id=execution_thread_id,
+            )
+            self._recorded_executions[job_id] = execution
+        else:
+            execution = self._recorded_executions.get(int(execution_id))
+            if execution is None:
+                return
+
+            if execution_thread_id:
+                # SparkListenerSQLExecutionStart does not have properties, but SparkListenerJobStart does,
+                # use it as a source of external_id
+                execution.external_id = execution_thread_id
+
+        execution.on_job_start(event)
+
+    def onJobEnd(self, event):
+        for execution in self._recorded_executions.values():
+            execution.on_job_end(event)
+
+    def onStageSubmitted(self, event):
+        for execution in self._recorded_executions.values():
+            execution.on_stage_start(event)
+
+    def onStageCompleted(self, event):
+        for execution in self._recorded_executions.values():
+            execution.on_stage_end(event)
+
+    def onTaskStart(self, event):
+        for execution in self._recorded_executions.values():
+            execution.on_task_start(event)
+
+    def onTaskEnd(self, event):
+        for execution in self._recorded_executions.values():
+            execution.on_task_end(event)
diff --git a/onetl/_metrics/listener/stage.py b/onetl/_metrics/listener/stage.py
new file mode 100644
index 00000000..4bf4dffb
--- /dev/null
+++ b/onetl/_metrics/listener/stage.py
@@ -0,0 +1,66 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+from onetl._metrics.listener.task import SparkListenerTask, SparkListenerTaskMetrics
+
+
+class SparkListenerStageStatus(str, Enum):
+    ACTIVE = "ACTIVE"
+    COMPLETE = "COMPLETE"
+    FAILED = "FAILED"
+    PENDING = "PENDING"
+    SKIPPED = "SKIPPED"
+
+    def __str__(self):
+        return self.value
+
+
+@dataclass
+class SparkListenerStage:
+    # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/StageInfo.html
+    id: int
+    status: SparkListenerStageStatus = SparkListenerStageStatus.PENDING
+    metrics: SparkListenerTaskMetrics = field(default_factory=SparkListenerTaskMetrics, repr=False, init=False)
+    _tasks: dict[int, SparkListenerTask] = field(default_factory=dict, repr=False, init=False)
+
+    @property
+    def tasks(self) -> list[SparkListenerTask]:
+        result = []
+        for task_id in sorted(self._tasks.keys()):
+            result.append(self._tasks[task_id])
+        return result
+
+    @classmethod
+    def create(cls, stage_info):
+        return cls(id=stage_info.stageId())
+
+    def on_stage_start(self, event):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerStageSubmitted.html
+        self.status = SparkListenerStageStatus.ACTIVE
+
+    def on_stage_end(self, event):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerStageCompleted.html
+        stage_info = event.stageInfo()
+        if stage_info.failureReason().isDefined():
+            self.status = SparkListenerStageStatus.FAILED
+        elif not self.tasks:
+            self.status = SparkListenerStageStatus.SKIPPED
+        else:
+            self.status = SparkListenerStageStatus.COMPLETE
+
+        self.metrics = SparkListenerTaskMetrics.create(stage_info.taskMetrics())
+
+    def on_task_start(self, event):
+        task_info = event.taskInfo()
+        task_id = task_info.taskId()
+        self._tasks[task_id] = SparkListenerTask.create(task_info)
+
+    def on_task_end(self, event):
+        task_id = event.taskInfo().taskId()
+        task = self._tasks.get(task_id)
+        if task:
+            task.on_task_end(event)
diff --git a/onetl/_metrics/listener/task.py b/onetl/_metrics/listener/task.py
new file mode 100644
index 00000000..4b27ffcf
--- /dev/null
+++ b/onetl/_metrics/listener/task.py
@@ -0,0 +1,94 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from enum import Enum
+
+
+class SparkListenerTaskStatus(str, Enum):
+    PENDING = "PENDING"
+    RUNNING = "RUNNING"
+    SUCCESS = "SUCCESS"
+    FAILED = "FAILED"
+    KILLED = "KILLED"
+
+    def __str__(self):
+        return self.value
+
+
+@dataclass
+class SparkListenerTaskInputMetrics:
+    bytes_read: int = 0
+    records_read: int = 0
+
+    @classmethod
+    def create(cls, task_input_metrics):
+        return cls(
+            bytes_read=task_input_metrics.bytesRead(),
+            records_read=task_input_metrics.recordsRead(),
+        )
+
+
+@dataclass
+class SparkListenerTaskOutputMetrics:
+    bytes_written: int = 0
+    records_written: int = 0
+
+    @classmethod
+    def create(cls, task_output_metrics):
+        return cls(
+            bytes_written=task_output_metrics.bytesWritten(),
+            records_written=task_output_metrics.recordsWritten(),
+        )
+
+
+@dataclass
+class SparkListenerTaskMetrics:
+    """Python representation of Spark TaskMetrics object.
+
+    See `documentation <https://spark.apache.org/docs/latest/monitoring.html#executor-task-metrics>`_.
+    """
+
+    executor_run_time_milliseconds: int = 0
+    executor_cpu_time_nanoseconds: int = 0
+    peak_execution_memory_bytes: int = 0
+    memory_spilled_bytes: int = 0
+    disk_spilled_bytes: int = 0
+    result_size_bytes: int = 0
+    input_metrics: SparkListenerTaskInputMetrics = field(default_factory=SparkListenerTaskInputMetrics)
+    output_metrics: SparkListenerTaskOutputMetrics = field(default_factory=SparkListenerTaskOutputMetrics)
+
+    @classmethod
+    def create(cls, task_metrics):
+        return cls(
+            executor_run_time_milliseconds=task_metrics.executorRunTime(),
+            executor_cpu_time_nanoseconds=task_metrics.executorCpuTime(),
+            peak_execution_memory_bytes=task_metrics.peakExecutionMemory(),
+            memory_spilled_bytes=task_metrics.memoryBytesSpilled(),
+            disk_spilled_bytes=task_metrics.diskBytesSpilled(),
+            result_size_bytes=task_metrics.resultSize(),
+            input_metrics=SparkListenerTaskInputMetrics.create(task_metrics.inputMetrics()),
+            output_metrics=SparkListenerTaskOutputMetrics.create(task_metrics.outputMetrics()),
+        )
+
+
+@dataclass
+class SparkListenerTask:
+    id: int
+    status: SparkListenerTaskStatus = SparkListenerTaskStatus.PENDING
+    metrics: SparkListenerTaskMetrics | None = field(default=None, repr=False, init=False)
+
+    @classmethod
+    def create(cls, task_info):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/TaskInfo.html
+        return cls(id=task_info.taskId())
+
+    def on_task_start(self, event):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerTaskStart.html
+        self.status = SparkListenerTaskStatus(event.taskInfo().status())
+
+    def on_task_end(self, event):
+        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerTaskEnd.html
+        self.status = SparkListenerTaskStatus(event.taskInfo().status())
+        self.metrics = SparkListenerTaskMetrics.create(event.taskMetrics())
diff --git a/onetl/_metrics/output.py b/onetl/_metrics/output.py
new file mode 100644
index 00000000..8600bb68
--- /dev/null
+++ b/onetl/_metrics/output.py
@@ -0,0 +1,50 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import os
+from pprint import pformat
+
+from humanize import naturalsize
+
+from onetl.impl import BaseModel
+
+
+class SparkOutputMetrics(BaseModel):
+    written_bytes: int = 0
+    written_rows: int = 0
+    created_files: int = 0
+    created_partitions: int = 0
+
+    @property
+    def is_empty(self) -> bool:
+        return not any([self.written_bytes, self.written_rows, self.created_files])
+
+    def update(self, other: SparkOutputMetrics) -> SparkOutputMetrics:
+        self.written_bytes += other.written_bytes
+        self.written_rows += other.written_rows
+        self.created_files += other.created_files
+        self.created_partitions = max([self.created_partitions, other.created_partitions])
+        return self
+
+    @property
+    def details(self) -> str:
+        if self.is_empty:
+            return "No data"
+
+        result = []
+        result.append(f"Written rows: {pformat(self.written_rows)}")
+
+        if self.written_bytes:
+            result.append(f"Written size: {naturalsize(self.written_bytes)}")
+
+        if self.created_files:
+            result.append(f"Created files: {pformat(self.created_files)}")
+
+        if self.created_partitions:
+            result.append(f"Created partitions: {pformat(self.created_partitions)}")
+
+        return os.linesep.join(result)
+
+    def __str__(self):
+        return self.details
diff --git a/onetl/_metrics/recorder.py b/onetl/_metrics/recorder.py
new file mode 100644
index 00000000..4cc5745b
--- /dev/null
+++ b/onetl/_metrics/recorder.py
@@ -0,0 +1,30 @@
+# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.extract import extract_metrics_from_execution
+from onetl._metrics.listener import SparkMetricsListener
+
+if TYPE_CHECKING:
+    from pyspark.sql import SparkSession
+
+
+class SparkMetricsRecorder:
+    def __init__(self, spark: SparkSession):
+        self._listener = SparkMetricsListener(spark=spark)
+
+    def __enter__(self):
+        self._listener.__enter__()
+        return self
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self._listener.__exit__(exc_type, exc_val, exc_tb)
+
+    def metrics(self) -> SparkCommandMetrics:
+        result = SparkCommandMetrics()
+        for execution in self._listener.executions:
+            result = result.update(extract_metrics_from_execution(execution))
+        return result
diff --git a/onetl/_util/java.py b/onetl/_util/java.py
index df88b1a5..45111432 100644
--- a/onetl/_util/java.py
+++ b/onetl/_util/java.py
@@ -4,6 +4,9 @@
 
 from typing import TYPE_CHECKING
 
+from onetl._util.spark import get_spark_version
+from onetl._util.version import Version
+
 if TYPE_CHECKING:
     from py4j.java_gateway import JavaGateway
     from pyspark.sql import SparkSession
@@ -24,3 +27,34 @@ def try_import_java_class(spark_session: SparkSession, name: str):
     klass = getattr(gateway.jvm, name)
     gateway.help(klass, display=False)
     return klass
+
+
+def start_callback_server(spark_session: SparkSession):
+    """
+    Start Py4J callback server. Important to receive Java events on Python side,
+    e.g. in Spark Listener implementations.
+    """
+    gateway = get_java_gateway(spark_session)
+    if get_spark_version(spark_session) >= Version("2.4"):
+        from pyspark.java_gateway import ensure_callback_server_started
+
+        ensure_callback_server_started(gateway)
+        return
+
+    # PySpark 2.3
+    if "_callback_server" not in gateway.__dict__ or gateway._callback_server is None:
+        from py4j.java_gateway import JavaObject
+
+        gateway.callback_server_parameters.eager_load = True
+        gateway.callback_server_parameters.daemonize = True
+        gateway.callback_server_parameters.daemonize_connections = True
+        gateway.callback_server_parameters.port = 0
+        gateway.start_callback_server(gateway.callback_server_parameters)
+        cbport = gateway._callback_server.server_socket.getsockname()[1]
+        gateway._callback_server.port = cbport
+        # gateway with real port
+        gateway._python_proxy_port = gateway._callback_server.port
+        # get the GatewayServer object in JVM by ID
+        java_gateway = JavaObject("GATEWAY_SERVER", gateway._gateway_client)
+        # update the port of CallbackClient with real port
+        java_gateway.resetCallbackClient(java_gateway.getCallbackClient().getAddress(), gateway._python_proxy_port)
diff --git a/onetl/_util/scala.py b/onetl/_util/scala.py
index 397a9157..5e6c21bc 100644
--- a/onetl/_util/scala.py
+++ b/onetl/_util/scala.py
@@ -12,3 +12,10 @@ def get_default_scala_version(spark_version: Version) -> Version:
     if spark_version.major < 3:
         return Version("2.11")
     return Version("2.12")
+
+
+def scala_seq_to_python_list(seq) -> list:
+    result = []
+    for i in range(seq.size()):
+        result.append(seq.apply(i))
+    return result
diff --git a/onetl/strategy/hwm_store/__init__.py b/onetl/strategy/hwm_store/__init__.py
index 0b931301..7a0338d3 100644
--- a/onetl/strategy/hwm_store/__init__.py
+++ b/onetl/strategy/hwm_store/__init__.py
@@ -23,7 +23,7 @@
         register_spark_type_to_hwm_type_mapping,
     )
 
-__all__ = [  # noqa: WPS410
+__all__ = [
     "BaseHWMStore",
     "SparkTypeToHWM",
     "register_spark_type_to_hwm_type_mapping",
diff --git a/onetl/version.py b/onetl/version.py
index dada22dd..1a3c6cec 100644
--- a/onetl/version.py
+++ b/onetl/version.py
@@ -8,4 +8,4 @@
 
 VERSION_FILE = Path(__file__).parent / "VERSION"
 
-__version__ = VERSION_FILE.read_text().strip()  # noqa: WPS410
+__version__ = VERSION_FILE.read_text().strip()
diff --git a/setup.cfg b/setup.cfg
index d12261ed..7ddb67b0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -275,7 +275,9 @@ ignore =
 # WPS474 Found import object collision
     WPS474,
 # WPS318 Found extra indentation
-    WPS318
+    WPS318,
+# WPS410 Found wrong metadata variable: __all__
+    WPS410
 
 # http://flake8.pycqa.org/en/latest/user/options.html?highlight=per-file-ignores#cmdoption-flake8-per-file-ignores
 per-file-ignores =
@@ -350,6 +352,9 @@ per-file-ignores =
     onetl/hooks/slot.py:
 # WPS210 Found too many local variables
         WPS210,
+    onetl/_metrics/listener/*:
+# N802 function name 'onJobStart' should be lowercase
+        N802,
     tests/*:
 # Found too many empty lines in `def`
         WPS473,
diff --git a/tests/.coveragerc b/tests/.coveragerc
index 08633e6c..55af8c09 100644
--- a/tests/.coveragerc
+++ b/tests/.coveragerc
@@ -7,6 +7,7 @@ data_file = reports/.coverage
 [report]
 exclude_lines =
     pragma: no cover
+    no cover: start(?s:.)*?no cover: stop
     def __repr__
     if self.debug:
     if settings.DEBUG
diff --git a/tests/fixtures/global_hwm_store.py b/tests/fixtures/global_hwm_store.py
index f10a0089..2e006b92 100644
--- a/tests/fixtures/global_hwm_store.py
+++ b/tests/fixtures/global_hwm_store.py
@@ -5,7 +5,7 @@
 @pytest.fixture(scope="function", autouse=True)
 def global_hwm_store(request):  # noqa: WPS325
     test_function = request.function
-    entities = test_function.__name__.split("_") if test_function else []
+    entities = set(test_function.__name__.split("_")) if test_function else set()
 
     if "strategy" in entities:
         with MemoryHWMStore():
diff --git a/tests/fixtures/processing/fixtures.py b/tests/fixtures/processing/fixtures.py
index 3f541f69..9bb62689 100644
--- a/tests/fixtures/processing/fixtures.py
+++ b/tests/fixtures/processing/fixtures.py
@@ -21,10 +21,14 @@ def processing(request, spark):
         "kafka": ("tests.fixtures.processing.kafka", "KafkaProcessing"),
     }
 
-    db_storage_name = request.function.__name__.split("_")[1]
-    if db_storage_name not in processing_classes:
-        raise ValueError(f"Wrong name. Please use one of: {list(processing_classes.keys())}")
+    test_name_parts = set(request.function.__name__.split("_"))
+    matches = set(processing_classes.keys()) & test_name_parts
+    if not matches or len(matches) > 1:
+        raise ValueError(
+            f"Test name {request.function.__name__} should have one of these components: {list(processing_classes.keys())}",
+        )
 
+    db_storage_name = matches.pop()
     module_name, class_name = processing_classes[db_storage_name]
     module = import_module(module_name)
     db_processing = getattr(module, class_name)
diff --git a/tests/tests_integration/test_metrics/test_spark_metrics_recorder_file_df.py b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_file_df.py
new file mode 100644
index 00000000..f59acf89
--- /dev/null
+++ b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_file_df.py
@@ -0,0 +1,171 @@
+import time
+from contextlib import suppress
+from pathlib import Path
+
+import pytest
+
+from onetl._metrics.recorder import SparkMetricsRecorder
+from onetl._util.spark import get_spark_version
+from onetl.file import FileDFReader, FileDFWriter
+from onetl.file.format import CSV, JSON
+
+pytestmark = [
+    pytest.mark.local_fs,
+    pytest.mark.file_df_connection,
+    pytest.mark.connection,
+    pytest.mark.csv,
+    # SparkListener does not give guarantees of delivering execution metrics in time
+    pytest.mark.flaky(reruns=5),
+]
+
+
+def test_spark_metrics_recorder_file_df_reader(
+    spark,
+    local_fs_file_df_connection_with_path_and_files,
+):
+    local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files
+    files_path: Path = source_path / "csv/with_header"
+
+    reader = FileDFReader(
+        connection=local_fs,
+        format=CSV(header=True),
+        source_path=files_path,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows
+        assert metrics.input.read_bytes
+        # file related metrics are too flaky to assert
+
+
+def test_spark_metrics_recorder_file_df_reader_no_files(
+    spark,
+    local_fs_file_df_connection_with_path,
+    file_df_schema,
+):
+    local_fs, source_path = local_fs_file_df_connection_with_path
+
+    reader = FileDFReader(
+        connection=local_fs,
+        format=CSV(),
+        source_path=source_path,
+        df_schema=file_df_schema,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+        assert not metrics.input.read_files
+
+
+def test_spark_metrics_recorder_file_df_reader_no_data_after_filter(
+    spark,
+    local_fs_file_df_connection_with_path_and_files,
+    file_df_schema,
+):
+    local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files
+    files_path = source_path / "csv/with_header"
+
+    reader = FileDFReader(
+        connection=local_fs,
+        format=CSV(header=True),
+        source_path=files_path,
+        df_schema=file_df_schema,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run().where("str_value = 'unknown'")
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+
+        spark_version = get_spark_version(spark)
+        if spark_version.major >= 3:
+            # Spark 3.0 does not include skipped rows to metrics
+            assert not metrics.input.read_rows
+        else:
+            # Spark 2.0 does
+            assert metrics.input.read_rows
+
+
+def test_spark_metrics_recorder_file_df_reader_error(
+    spark,
+    local_fs_file_df_connection_with_path_and_files,
+):
+    local_fs, source_path, _ = local_fs_file_df_connection_with_path_and_files
+    files_path: Path = source_path / "csv/with_header"
+
+    reader = FileDFReader(
+        connection=local_fs,
+        format=JSON(),
+        source_path=files_path,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        with suppress(Exception):
+            df = reader.run()
+            df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        # some files metadata may be scanned, but file content was not read
+        assert not metrics.input.raw_file_bytes
+
+
+def test_spark_metrics_recorder_file_df_writer(
+    spark,
+    local_fs_file_df_connection_with_path,
+    file_df_dataframe,
+):
+    local_fs, target_path = local_fs_file_df_connection_with_path
+
+    writer = FileDFWriter(
+        connection=local_fs,
+        format=CSV(),
+        target_path=target_path,
+        options=FileDFWriter.Options(if_exists="append"),
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(file_df_dataframe)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.output.written_rows == file_df_dataframe.count()
+        assert metrics.output.written_bytes
+        # file related metrics are too flaky to assert
+
+
+def test_spark_metrics_recorder_file_df_writer_empty_input(
+    spark,
+    local_fs_file_df_connection_with_path,
+    file_df_dataframe,
+):
+    local_fs, target_path = local_fs_file_df_connection_with_path
+
+    df = file_df_dataframe.limit(0)
+
+    writer = FileDFWriter(
+        connection=local_fs,
+        format=CSV(),
+        target_path=target_path,
+        options=FileDFWriter.Options(if_exists="append"),
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(df)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.output.written_rows
+        assert not metrics.output.written_bytes
diff --git a/tests/tests_integration/test_metrics/test_spark_metrics_recorder_hive.py b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_hive.py
new file mode 100644
index 00000000..7e8dc218
--- /dev/null
+++ b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_hive.py
@@ -0,0 +1,159 @@
+import time
+
+import pytest
+
+from onetl._metrics.recorder import SparkMetricsRecorder
+from onetl.connection import Hive
+from onetl.db import DBReader, DBWriter
+from tests.util.rand import rand_str
+
+pytestmark = [
+    pytest.mark.hive,
+    pytest.mark.db_connection,
+    pytest.mark.connection,
+    # SparkListener does not give guarantees of delivering execution metrics in time
+    pytest.mark.flaky(reruns=5),
+]
+
+
+def test_spark_metrics_recorder_hive_read_count(spark, load_table_data):
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    reader = DBReader(
+        connection=hive,
+        source=load_table_data.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        rows = df.count()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows == rows
+        assert metrics.input.read_bytes
+        # in some cases files are read, in some cases only metastore statistics is used
+
+
+def test_spark_metrics_recorder_hive_read_collect(spark, load_table_data):
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    reader = DBReader(
+        connection=hive,
+        source=load_table_data.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        rows = len(df.collect())
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows == rows
+        assert metrics.input.read_bytes
+        # file related metrics are too flaky to assert
+
+
+def test_spark_metrics_recorder_hive_read_empty_source(spark, prepare_schema_table):
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    reader = DBReader(
+        connection=hive,
+        source=prepare_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+        assert not metrics.input.read_bytes
+
+
+def test_spark_metrics_recorder_hive_read_no_data_after_filter(spark, load_table_data):
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    reader = DBReader(
+        connection=hive,
+        source=load_table_data.full_name,
+        where="1=0",
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+        assert not metrics.input.read_bytes
+
+
+def test_spark_metrics_recorder_hive_sql(spark, load_table_data):
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = hive.sql(f"SELECT * FROM {load_table_data.full_name}")
+        rows = len(df.collect())
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows == rows
+        assert metrics.input.read_bytes
+        # file related metrics are too flaky to assert
+
+
+def test_spark_metrics_recorder_hive_write(spark, processing, get_schema_table):
+    df = processing.create_spark_df(spark)
+
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    writer = DBWriter(
+        connection=hive,
+        target=get_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(df)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.output.written_rows == df.count()
+        assert metrics.output.written_bytes
+        # file related metrics are too flaky to assert
+
+
+def test_spark_metrics_recorder_hive_write_empty(spark, processing, get_schema_table):
+    df = processing.create_spark_df(spark).limit(0)
+
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+    writer = DBWriter(
+        connection=hive,
+        target=get_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(df)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.output.written_rows
+
+
+def test_spark_metrics_recorder_hive_execute(request, spark, processing, get_schema_table):
+    df = processing.create_spark_df(spark)
+    view_name = rand_str()
+    df.createOrReplaceTempView(view_name)
+
+    def finalizer():
+        spark.sql(f"DROP VIEW IF EXISTS {view_name}")
+
+    request.addfinalizer(finalizer)
+
+    hive = Hive(cluster="rnd-dwh", spark=spark)
+
+    with SparkMetricsRecorder(spark) as recorder:
+        hive.execute(f"CREATE TABLE {get_schema_table.full_name} AS SELECT * FROM {view_name}")
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.output.written_rows == df.count()
+        assert metrics.output.written_bytes
+        # file related metrics are too flaky to assert
diff --git a/tests/tests_integration/test_metrics/test_spark_metrics_recorder_postgres.py b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_postgres.py
new file mode 100644
index 00000000..67e31591
--- /dev/null
+++ b/tests/tests_integration/test_metrics/test_spark_metrics_recorder_postgres.py
@@ -0,0 +1,205 @@
+import time
+
+import pytest
+
+from onetl._metrics.recorder import SparkMetricsRecorder
+from onetl._util.spark import get_spark_version
+from onetl.connection import Postgres
+from onetl.db import DBReader, DBWriter
+
+pytestmark = [
+    pytest.mark.postgres,
+    pytest.mark.db_connection,
+    pytest.mark.connection,
+    # SparkListener does not give guarantees of delivering execution metrics in time
+    pytest.mark.flaky(reruns=5),
+]
+
+
+def test_spark_metrics_recorder_postgres_read(spark, processing, load_table_data):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    reader = DBReader(
+        connection=postgres,
+        source=load_table_data.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        rows = len(df.collect())
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows == rows
+        # JDBC does not provide information about data size
+        assert not metrics.input.read_bytes
+
+
+def test_spark_metrics_recorder_postgres_read_empty_source(spark, processing, prepare_schema_table):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    reader = DBReader(
+        connection=postgres,
+        source=prepare_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+
+
+def test_spark_metrics_recorder_postgres_read_no_data_after_filter(spark, processing, load_table_data):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    reader = DBReader(
+        connection=postgres,
+        source=load_table_data.full_name,
+        where="1=0",
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = reader.run()
+        df.collect()
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+
+
+def test_spark_metrics_recorder_postgres_sql(spark, processing, load_table_data):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        df = postgres.sql(f"SELECT * FROM {load_table_data.full_name}")
+        rows = len(df.collect())
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert metrics.input.read_rows == rows
+
+
+def test_spark_metrics_recorder_postgres_write(spark, processing, get_schema_table):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    df = processing.create_spark_df(spark)
+
+    writer = DBWriter(
+        connection=postgres,
+        target=get_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(df)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        spark_version = get_spark_version(spark)
+        if spark_version.major >= 3:
+            # Spark started collecting JDBC write bytes only since Spark 3.0:
+            # https://issues.apache.org/jira/browse/SPARK-29461
+            assert metrics.output.written_rows == df.count()
+        else:
+            assert not metrics.output.written_rows
+        # JDBC does not provide information about data size
+        assert not metrics.output.written_bytes
+
+
+def test_spark_metrics_recorder_postgres_write_empty(spark, processing, get_schema_table):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+    df = processing.create_spark_df(spark).limit(0)
+
+    writer = DBWriter(
+        connection=postgres,
+        target=get_schema_table.full_name,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        writer.run(df)
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.output.written_rows
+
+
+def test_spark_metrics_recorder_postgres_fetch(spark, processing, load_table_data):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    with SparkMetricsRecorder(spark) as recorder:
+        postgres.fetch(f"SELECT * FROM {load_table_data.full_name}")
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
+
+
+def test_spark_metrics_recorder_postgres_execute(spark, processing, load_table_data):
+    postgres = Postgres(
+        host=processing.host,
+        port=processing.port,
+        user=processing.user,
+        password=processing.password,
+        database=processing.database,
+        spark=spark,
+    )
+
+    new_table = load_table_data.full_name + "_new"
+
+    with SparkMetricsRecorder(spark) as recorder:
+        postgres.execute(f"CREATE TABLE {new_table} AS SELECT * FROM {load_table_data.full_name}")
+
+        time.sleep(0.1)  # sleep to fetch late metrics from SparkListener
+        metrics = recorder.metrics()
+        assert not metrics.input.read_rows
diff --git a/tests/tests_unit/test_metrics/test_spark_command_metrics.py b/tests/tests_unit/test_metrics/test_spark_command_metrics.py
new file mode 100644
index 00000000..f4da3070
--- /dev/null
+++ b/tests/tests_unit/test_metrics/test_spark_command_metrics.py
@@ -0,0 +1,70 @@
+import textwrap
+from datetime import timedelta
+
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.driver import SparkDriverMetrics
+from onetl._metrics.executor import SparkExecutorMetrics
+from onetl._metrics.input import SparkInputMetrics
+from onetl._metrics.output import SparkOutputMetrics
+
+
+def test_spark_metrics_command_is_empty():
+    empty_metrics = SparkCommandMetrics()
+    assert empty_metrics.is_empty
+
+    no_input_output = SparkCommandMetrics(
+        driver=SparkDriverMetrics(in_memory_bytes=1_000_000),
+        executor=SparkExecutorMetrics(total_run_time=timedelta(microseconds=1)),
+    )
+    assert no_input_output.is_empty
+
+    with_input = SparkCommandMetrics(
+        input=SparkInputMetrics(read_rows=1),
+    )
+    assert not with_input.is_empty
+
+    with_output = SparkCommandMetrics(
+        output=SparkOutputMetrics(written_rows=1),
+    )
+    assert not with_output.is_empty
+
+
+def test_spark_metrics_command_details():
+    empty_metrics = SparkCommandMetrics()
+    assert empty_metrics.details == "No data"
+    assert str(empty_metrics) == empty_metrics.details
+
+    jdbc_fetch_metrics = SparkCommandMetrics(
+        input=SparkInputMetrics(read_rows=1_000),
+        driver=SparkDriverMetrics(in_memory_bytes=1_000_000),
+    )
+
+    expected = textwrap.dedent(
+        """
+        Input:
+            Read rows: 1000
+        Driver:
+            In-memory data (approximate): 1.0 MB
+        """,
+    )
+    assert jdbc_fetch_metrics.details == expected.strip()
+    assert str(jdbc_fetch_metrics) == jdbc_fetch_metrics.details
+
+    jdbc_write_metrics = SparkCommandMetrics(
+        output=SparkOutputMetrics(written_rows=1_000),
+        executor=SparkExecutorMetrics(
+            total_run_time=timedelta(seconds=2),
+            total_cpu_time=timedelta(seconds=1),
+        ),
+    )
+    expected = textwrap.dedent(
+        """
+        Output:
+            Written rows: 1000
+        Executor:
+            Total run time: 2 seconds
+            Total CPU time: 1 second
+        """,
+    )
+    assert jdbc_write_metrics.details == expected.strip()
+    assert str(jdbc_write_metrics) == jdbc_write_metrics.details
diff --git a/tests/tests_unit/test_metrics/test_spark_driver_metrics.py b/tests/tests_unit/test_metrics/test_spark_driver_metrics.py
new file mode 100644
index 00000000..cd4c5dc9
--- /dev/null
+++ b/tests/tests_unit/test_metrics/test_spark_driver_metrics.py
@@ -0,0 +1,22 @@
+from onetl._metrics.driver import SparkDriverMetrics
+
+
+def test_spark_metrics_driver_is_empty():
+    empty_metrics = SparkDriverMetrics()
+    assert empty_metrics.is_empty
+
+    metrics1 = SparkDriverMetrics(in_memory_bytes=1_000)
+    assert metrics1.is_empty
+
+    metrics2 = SparkDriverMetrics(in_memory_bytes=1_000_000)
+    assert not metrics2.is_empty
+
+
+def test_spark_metrics_driver_details():
+    empty_metrics = SparkDriverMetrics()
+    assert empty_metrics.details == "No data"
+    assert str(empty_metrics) == empty_metrics.details
+
+    jdbc_metrics = SparkDriverMetrics(in_memory_bytes=1_000_000)
+    assert jdbc_metrics.details == "In-memory data (approximate): 1.0 MB"
+    assert str(jdbc_metrics) == jdbc_metrics.details
diff --git a/tests/tests_unit/test_metrics/test_spark_executor_metrics.py b/tests/tests_unit/test_metrics/test_spark_executor_metrics.py
new file mode 100644
index 00000000..3acd7190
--- /dev/null
+++ b/tests/tests_unit/test_metrics/test_spark_executor_metrics.py
@@ -0,0 +1,58 @@
+import textwrap
+from datetime import timedelta
+
+from onetl._metrics.executor import SparkExecutorMetrics
+
+
+def test_spark_metrics_executor_is_empty():
+    empty_metrics = SparkExecutorMetrics()
+    assert empty_metrics.is_empty
+
+    run_metrics = SparkExecutorMetrics(
+        total_run_time=timedelta(microseconds=1),
+    )
+    assert not run_metrics.is_empty
+
+
+def test_spark_metrics_executor_details():
+    empty_metrics = SparkExecutorMetrics()
+    assert empty_metrics.details == "No data"
+    assert str(empty_metrics) == empty_metrics.details
+
+    full_metrics = SparkExecutorMetrics(
+        total_run_time=timedelta(hours=2),
+        total_cpu_time=timedelta(hours=1),
+        peak_memory_bytes=1_000_000_000,
+        memory_spilled_bytes=2_000_000_000,
+        disk_spilled_bytes=3_000_000_000,
+    )
+
+    assert (
+        full_metrics.details
+        == textwrap.dedent(
+            """
+        Total run time: 2 hours
+        Total CPU time: 1 hour
+        Peak memory: 1.0 GB
+        Memory spilled: 2.0 GB
+        Disk spilled: 3.0 GB
+        """,
+        ).strip()
+    )
+    assert str(full_metrics) == full_metrics.details
+
+    minimal_metrics = SparkExecutorMetrics(
+        total_run_time=timedelta(seconds=1),
+        total_cpu_time=timedelta(seconds=1),
+    )
+
+    assert (
+        minimal_metrics.details
+        == textwrap.dedent(
+            """
+        Total run time: 1 second
+        Total CPU time: 1 second
+        """,
+        ).strip()
+    )
+    assert str(minimal_metrics) == minimal_metrics.details
diff --git a/tests/tests_unit/test_metrics/test_spark_input_metrics.py b/tests/tests_unit/test_metrics/test_spark_input_metrics.py
new file mode 100644
index 00000000..0de1a57a
--- /dev/null
+++ b/tests/tests_unit/test_metrics/test_spark_input_metrics.py
@@ -0,0 +1,50 @@
+import textwrap
+
+from onetl._metrics.input import SparkInputMetrics
+
+
+def test_spark_metrics_input_is_empty():
+    empty_metrics = SparkInputMetrics()
+    assert empty_metrics.is_empty
+
+    metrics1 = SparkInputMetrics(read_rows=1)
+    assert not metrics1.is_empty
+
+    metrics2 = SparkInputMetrics(read_files=1)
+    assert not metrics2.is_empty
+
+    metrics3 = SparkInputMetrics(read_bytes=1)
+    assert not metrics3.is_empty
+
+
+def test_spark_metrics_input_details():
+    empty_metrics = SparkInputMetrics()
+    assert empty_metrics.details == "No data"
+    assert str(empty_metrics) == empty_metrics.details
+
+    file_df_metrics = SparkInputMetrics(
+        read_rows=1_000,
+        read_partitions=4,
+        read_files=4,
+        read_bytes=2_000_000,
+        raw_file_bytes=5_000_000,
+    )
+
+    expected = textwrap.dedent(
+        """
+        Read rows: 1000
+        Read partitions: 4
+        Read files: 4
+        Read size: 2.0 MB
+        Raw files size: 5.0 MB
+        """,
+    )
+    assert file_df_metrics.details == expected.strip()
+    assert str(file_df_metrics) == file_df_metrics.details
+
+    jdbc_metrics = SparkInputMetrics(
+        read_rows=1_000,
+    )
+
+    assert jdbc_metrics.details == "Read rows: 1000"
+    assert str(jdbc_metrics) == jdbc_metrics.details
diff --git a/tests/tests_unit/test_metrics/test_spark_output_metrics.py b/tests/tests_unit/test_metrics/test_spark_output_metrics.py
new file mode 100644
index 00000000..e8cb9ae7
--- /dev/null
+++ b/tests/tests_unit/test_metrics/test_spark_output_metrics.py
@@ -0,0 +1,46 @@
+import textwrap
+
+from onetl._metrics.output import SparkOutputMetrics
+
+
+def test_spark_metrics_output_is_empty():
+    empty_metrics = SparkOutputMetrics()
+    assert empty_metrics.is_empty
+
+    metric1 = SparkOutputMetrics(written_rows=1)
+    assert not metric1.is_empty
+
+    metric2 = SparkOutputMetrics(written_bytes=1)
+    assert not metric2.is_empty
+
+    metric3 = SparkOutputMetrics(created_files=1)
+    assert not metric3.is_empty
+
+
+def test_spark_metrics_output_details():
+    empty_metrics = SparkOutputMetrics()
+    assert empty_metrics.details == "No data"
+    assert str(empty_metrics) == empty_metrics.details
+
+    hive_metrics = SparkOutputMetrics(
+        written_rows=1_000,
+        written_bytes=2_000_000,
+        created_files=4,
+        created_partitions=4,
+    )
+
+    expected = textwrap.dedent(
+        """
+        Written rows: 1000
+        Written size: 2.0 MB
+        Created files: 4
+        Created partitions: 4
+        """,
+    )
+    assert hive_metrics.details == expected.strip()
+    assert str(hive_metrics) == hive_metrics.details
+
+    jdbc_metrics = SparkOutputMetrics(written_rows=1_000)
+
+    assert jdbc_metrics.details == "Written rows: 1000"
+    assert str(jdbc_metrics) == jdbc_metrics.details

From 3c254053e496cae8d30b70f2c2baa74785fa96b4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 8 Aug 2024 09:45:59 +0000
Subject: [PATCH 40/64] [DOP-18571] Collect and log Spark metrics in various
 method calls

---
 docs/changelog/next_release/303.feature.1.rst |  1 +
 docs/changelog/next_release/303.feature.2.rst | 10 +++
 onetl/_util/spark.py                          | 17 ++++-
 onetl/base/base_db_connection.py              |  3 +-
 onetl/base/base_file_df_connection.py         |  4 +-
 .../db_connection/hive/connection.py          | 49 +++++++++++--
 .../jdbc_connection/connection.py             |  8 ++-
 .../db_connection/jdbc_mixin/connection.py    | 69 +++++++++++--------
 .../db_connection/oracle/connection.py        | 39 +++--------
 onetl/db/db_writer/db_writer.py               | 47 ++++++++++---
 onetl/file/file_df_writer/file_df_writer.py   | 40 +++++++++--
 tests/.coveragerc                             |  2 +-
 .../test_postgres_integration.py              |  4 +-
 13 files changed, 208 insertions(+), 85 deletions(-)
 create mode 100644 docs/changelog/next_release/303.feature.1.rst
 create mode 100644 docs/changelog/next_release/303.feature.2.rst

diff --git a/docs/changelog/next_release/303.feature.1.rst b/docs/changelog/next_release/303.feature.1.rst
new file mode 100644
index 00000000..8c0b1e19
--- /dev/null
+++ b/docs/changelog/next_release/303.feature.1.rst
@@ -0,0 +1 @@
+Log estimated size of in-memory dataframe created by ``JDBC.fetch`` and ``JDBC.execute`` methods.
diff --git a/docs/changelog/next_release/303.feature.2.rst b/docs/changelog/next_release/303.feature.2.rst
new file mode 100644
index 00000000..92bbe13c
--- /dev/null
+++ b/docs/changelog/next_release/303.feature.2.rst
@@ -0,0 +1,10 @@
+Collect Spark execution metrics in following methods, and log then in DEBUG mode:
+* ``DBWriter.run()``
+* ``FileDFWriter.run()``
+* ``Hive.sql()``
+* ``Hive.execute()``
+
+This is implemented using custom ``SparkListener`` which wraps the entire method call, and
+then report collected metrics. But these metrics sometimes may be missing due to Spark architecture,
+so they are not reliable source of information. That's why logs are printed only in DEBUG mode, and
+are not returned as method call result.
diff --git a/onetl/_util/spark.py b/onetl/_util/spark.py
index f172b1c9..2b2edbaf 100644
--- a/onetl/_util/spark.py
+++ b/onetl/_util/spark.py
@@ -16,7 +16,7 @@
     from pydantic import SecretStr  # type: ignore[no-redef, assignment]
 
 if TYPE_CHECKING:
-    from pyspark.sql import SparkSession
+    from pyspark.sql import DataFrame, SparkSession
     from pyspark.sql.conf import RuntimeConfig
 
 
@@ -136,6 +136,21 @@ def get_spark_version(spark_session: SparkSession) -> Version:
     return Version(spark_session.version)
 
 
+def estimate_dataframe_size(spark_session: SparkSession, df: DataFrame) -> int:
+    """
+    Estimate in-memory DataFrame size in bytes. If cannot be estimated, return 0.
+
+    Using Spark's `SizeEstimator <https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/util/SizeEstimator.html>`_.
+    """
+    try:
+        size_estimator = spark_session._jvm.org.apache.spark.util.SizeEstimator  # type: ignore[union-attr]
+        return size_estimator.estimate(df._jdf)
+    except Exception:
+        # SizeEstimator uses Java reflection which may behave differently in different Java versions,
+        # and also may be prohibited.
+        return 0
+
+
 def get_executor_total_cores(spark_session: SparkSession, include_driver: bool = False) -> tuple[int | float, dict]:
     """
     Calculate maximum number of cores which can be used by Spark on all executors.
diff --git a/onetl/base/base_db_connection.py b/onetl/base/base_db_connection.py
index f9c7bcac..2c427deb 100644
--- a/onetl/base/base_db_connection.py
+++ b/onetl/base/base_db_connection.py
@@ -10,7 +10,7 @@
 
 if TYPE_CHECKING:
     from etl_entities.hwm import HWM
-    from pyspark.sql import DataFrame
+    from pyspark.sql import DataFrame, SparkSession
     from pyspark.sql.types import StructField, StructType
 
 
@@ -106,6 +106,7 @@ class BaseDBConnection(BaseConnection):
     Implements generic methods for reading and writing dataframe from/to database-like source
     """
 
+    spark: SparkSession
     Dialect = BaseDBDialect
 
     @property
diff --git a/onetl/base/base_file_df_connection.py b/onetl/base/base_file_df_connection.py
index c54390ce..28c57f3c 100644
--- a/onetl/base/base_file_df_connection.py
+++ b/onetl/base/base_file_df_connection.py
@@ -11,7 +11,7 @@
 from onetl.base.pure_path_protocol import PurePathProtocol
 
 if TYPE_CHECKING:
-    from pyspark.sql import DataFrame, DataFrameReader, DataFrameWriter
+    from pyspark.sql import DataFrame, DataFrameReader, DataFrameWriter, SparkSession
     from pyspark.sql.types import StructType
 
 
@@ -72,6 +72,8 @@ class BaseFileDFConnection(BaseConnection):
     .. versionadded:: 0.9.0
     """
 
+    spark: SparkSession
+
     @abstractmethod
     def check_if_format_supported(
         self,
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 81c50e87..61032987 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -13,6 +13,7 @@
 except (ImportError, AttributeError):
     from pydantic import validator  # type: ignore[no-redef, assignment]
 
+from onetl._metrics.recorder import SparkMetricsRecorder
 from onetl._util.spark import inject_spark_param
 from onetl._util.sql import clear_statement
 from onetl.connection.db_connection.db_connection import DBConnection
@@ -210,8 +211,29 @@ def sql(
         log.info("|%s| Executing SQL query:", self.__class__.__name__)
         log_lines(log, query)
 
-        df = self._execute_sql(query)
-        log.info("|Spark| DataFrame successfully created from SQL statement")
+        with SparkMetricsRecorder(self.spark) as recorder:
+            try:
+                df = self._execute_sql(query)
+            except Exception:
+                log.error("|%s| Query failed", self.__class__.__name__)
+
+                metrics = recorder.metrics()
+                if log.isEnabledFor(logging.DEBUG) and not metrics.is_empty:
+                    # as SparkListener results are not guaranteed to be received in time,
+                    # some metrics may be missing. To avoid confusion, log only in debug, and with a notice
+                    log.info("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+                    log_lines(log, str(metrics), level=logging.DEBUG)
+                raise
+
+            log.info("|Spark| DataFrame successfully created from SQL statement")
+
+            metrics = recorder.metrics()
+            if log.isEnabledFor(logging.DEBUG) and not metrics.is_empty:
+                # as SparkListener results are not guaranteed to be received in time,
+                # some metrics may be missing. To avoid confusion, log only in debug, and with a notice
+                log.info("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+                log_lines(log, str(metrics), level=logging.DEBUG)
+
         return df
 
     @slot
@@ -236,8 +258,27 @@ def execute(
         log.info("|%s| Executing statement:", self.__class__.__name__)
         log_lines(log, statement)
 
-        self._execute_sql(statement).collect()
-        log.info("|%s| Call succeeded", self.__class__.__name__)
+        with SparkMetricsRecorder(self.spark) as recorder:
+            try:
+                self._execute_sql(statement).collect()
+            except Exception:
+                log.error("|%s| Execution failed", self.__class__.__name__)
+                metrics = recorder.metrics()
+                if log.isEnabledFor(logging.DEBUG) and not metrics.is_empty:
+                    # as SparkListener results are not guaranteed to be received in time,
+                    # some metrics may be missing. To avoid confusion, log only in debug, and with a notice
+                    log.info("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+                    log_lines(log, str(metrics), level=logging.DEBUG)
+                raise
+
+            log.info("|%s| Execution succeeded", self.__class__.__name__)
+
+            metrics = recorder.metrics()
+            if log.isEnabledFor(logging.DEBUG) and not metrics.is_empty:
+                # as SparkListener results are not guaranteed to be received in time,
+                # some metrics may be missing. To avoid confusion, log only in debug, and with a notice
+                log.info("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+                log_lines(log, str(metrics), level=logging.DEBUG)
 
     @slot
     def write_df_to_target(
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 5b0aebeb..2fc2f7cf 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -92,9 +92,13 @@ def sql(
         log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
         log_lines(log, query)
 
-        df = self._query_on_executor(query, self.SQLOptions.parse(options))
+        try:
+            df = self._query_on_executor(query, self.SQLOptions.parse(options))
+        except Exception:
+            log.error("|%s| Query failed!", self.__class__.__name__)
+            raise
 
-        log.info("|Spark| DataFrame successfully created from SQL statement ")
+        log.info("|Spark| DataFrame successfully created from SQL statement")
         return df
 
     @slot
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index e8c19e38..84276147 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -9,15 +9,14 @@
 from enum import Enum, auto
 from typing import TYPE_CHECKING, Callable, ClassVar, Optional, TypeVar
 
-from onetl.impl.generic_options import GenericOptions
-
 try:
     from pydantic.v1 import Field, PrivateAttr, SecretStr, validator
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, SecretStr, validator  # type: ignore[no-redef, assignment]
 
+from onetl._metrics.command import SparkCommandMetrics
 from onetl._util.java import get_java_gateway, try_import_java_class
-from onetl._util.spark import get_spark_version, stringify
+from onetl._util.spark import estimate_dataframe_size, get_spark_version, stringify
 from onetl._util.sql import clear_statement
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_mixin.options import (
@@ -29,7 +28,7 @@
 )
 from onetl.exception import MISSING_JVM_CLASS_MSG
 from onetl.hooks import slot, support_hooks
-from onetl.impl import FrozenModel
+from onetl.impl import FrozenModel, GenericOptions
 from onetl.log import log_lines
 
 if TYPE_CHECKING:
@@ -204,20 +203,27 @@ def fetch(
         log.info("|%s| Executing SQL query (on driver):", self.__class__.__name__)
         log_lines(log, query)
 
-        df = self._query_on_driver(
-            query,
-            (
-                self.FetchOptions.parse(options.dict())  # type: ignore
-                if isinstance(options, JDBCMixinOptions)
-                else self.FetchOptions.parse(options)
-            ),
+        call_options = (
+            self.FetchOptions.parse(options.dict())  # type: ignore
+            if isinstance(options, JDBCMixinOptions)
+            else self.FetchOptions.parse(options)
         )
 
-        log.info(
-            "|%s| Query succeeded, resulting in-memory dataframe contains %d rows",
-            self.__class__.__name__,
-            df.count(),
-        )
+        try:
+            df = self._query_on_driver(query, call_options)
+        except Exception:
+            log.error("|%s| Query failed!", self.__class__.__name__)
+            raise
+
+        log.info("|%s| Query succeeded, created in-memory dataframe.", self.__class__.__name__)
+
+        # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
+        # Just create metrics by hand, and fill them up using information based on dataframe content.
+        metrics = SparkCommandMetrics()
+        metrics.input.read_rows = df.count()
+        metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
+        log.info("|%s| Recorded metrics:", self.__class__.__name__)
+        log_lines(log, str(metrics))
         return df
 
     @slot
@@ -273,17 +279,26 @@ def execute(
             if isinstance(options, JDBCMixinOptions)
             else self.ExecuteOptions.parse(options)
         )
-        df = self._call_on_driver(statement, call_options)
-
-        if df is not None:
-            rows_count = df.count()
-            log.info(
-                "|%s| Execution succeeded, resulting in-memory dataframe contains %d rows",
-                self.__class__.__name__,
-                rows_count,
-            )
-        else:
-            log.info("|%s| Execution succeeded, nothing returned", self.__class__.__name__)
+
+        try:
+            df = self._call_on_driver(statement, call_options)
+        except Exception:
+            log.error("|%s| Execution failed!", self.__class__.__name__)
+            raise
+
+        if not df:
+            log.info("|%s| Execution succeeded, nothing returned.", self.__class__.__name__)
+            return None
+
+        log.info("|%s| Execution succeeded, created in-memory dataframe.", self.__class__.__name__)
+        # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
+        # Just create metrics by hand, and fill them up using information based on dataframe content.
+        metrics = SparkCommandMetrics()
+        metrics.input.read_rows = df.count()
+        metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
+
+        log.info("|%s| Recorded metrics:", self.__class__.__name__)
+        log_lines(log, str(metrics))
         return df
 
     @validator("spark")
diff --git a/onetl/connection/db_connection/oracle/connection.py b/onetl/connection/db_connection/oracle/connection.py
index 04398950..c7669361 100644
--- a/onetl/connection/db_connection/oracle/connection.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -20,14 +20,12 @@
 from etl_entities.instance import Host
 
 from onetl._util.classproperty import classproperty
-from onetl._util.sql import clear_statement
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_connection import JDBCConnection
 from onetl.connection.db_connection.jdbc_connection.options import JDBCReadOptions
 from onetl.connection.db_connection.jdbc_mixin.options import (
     JDBCExecuteOptions,
     JDBCFetchOptions,
-    JDBCOptions,
 )
 from onetl.connection.db_connection.oracle.dialect import OracleDialect
 from onetl.connection.db_connection.oracle.options import (
@@ -43,8 +41,6 @@
 from onetl.log import BASE_LOG_INDENT, log_lines
 
 # do not import PySpark here, as we allow user to use `Oracle.get_packages()` for creating Spark session
-
-
 if TYPE_CHECKING:
     from pyspark.sql import DataFrame
 
@@ -290,32 +286,6 @@ def get_min_max_values(
             max_value = int(max_value)
         return min_value, max_value
 
-    @slot
-    def execute(
-        self,
-        statement: str,
-        options: JDBCOptions | JDBCExecuteOptions | dict | None = None,  # noqa: WPS437
-    ) -> DataFrame | None:
-        statement = clear_statement(statement)
-
-        log.info("|%s| Executing statement (on driver):", self.__class__.__name__)
-        log_lines(log, statement)
-
-        call_options = self.ExecuteOptions.parse(options)
-        df = self._call_on_driver(statement, call_options)
-        self._handle_compile_errors(statement.strip(), call_options)
-
-        if df is not None:
-            rows_count = df.count()
-            log.info(
-                "|%s| Execution succeeded, resulting in-memory dataframe contains %d rows",
-                self.__class__.__name__,
-                rows_count,
-            )
-        else:
-            log.info("|%s| Execution succeeded, nothing returned", self.__class__.__name__)
-        return df
-
     @root_validator
     def _only_one_of_sid_or_service_name(cls, values):
         sid = values.get("sid")
@@ -329,6 +299,15 @@ def _only_one_of_sid_or_service_name(cls, values):
 
         return values
 
+    def _call_on_driver(
+        self,
+        query: str,
+        options: JDBCExecuteOptions,
+    ) -> DataFrame | None:
+        result = super()._call_on_driver(query, options)
+        self._handle_compile_errors(query.strip(), options)
+        return result
+
     def _parse_create_statement(self, statement: str) -> tuple[str, str, str] | None:
         """
         Parses ``CREATE ... type_name [schema.]object_name ...`` statement
diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py
index 666fce87..06dbd44c 100644
--- a/onetl/db/db_writer/db_writer.py
+++ b/onetl/db/db_writer/db_writer.py
@@ -2,7 +2,7 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
-from logging import getLogger
+import logging
 from typing import TYPE_CHECKING, Optional
 
 try:
@@ -10,12 +10,15 @@
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, validator  # type: ignore[no-redef, assignment]
 
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.recorder import SparkMetricsRecorder
 from onetl.base import BaseDBConnection
 from onetl.hooks import slot, support_hooks
 from onetl.impl import FrozenModel, GenericOptions
 from onetl.log import (
     entity_boundary_log,
     log_dataframe_schema,
+    log_lines,
     log_options,
     log_with_indent,
 )
@@ -23,7 +26,7 @@
 if TYPE_CHECKING:
     from pyspark.sql import DataFrame
 
-log = getLogger(__name__)
+log = logging.getLogger(__name__)
 
 
 @support_hooks
@@ -172,7 +175,7 @@ def validate_options(cls, options, values):
         return None
 
     @slot
-    def run(self, df: DataFrame):
+    def run(self, df: DataFrame) -> None:
         """
         Method for writing your df to specified target. |support_hooks|
 
@@ -188,7 +191,7 @@ def run(self, df: DataFrame):
         Examples
         --------
 
-        Write df to target:
+        Write dataframe to target:
 
         .. code:: python
 
@@ -198,18 +201,37 @@ def run(self, df: DataFrame):
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() starts")
-
         if not self._connection_checked:
             self._log_parameters()
             log_dataframe_schema(log, df)
             self.connection.check()
             self._connection_checked = True
 
-        self.connection.write_df_to_target(
-            df=df,
-            target=str(self.target),
-            **self._get_write_kwargs(),
-        )
+        with SparkMetricsRecorder(self.connection.spark) as recorder:
+            try:
+                self.connection.write_df_to_target(
+                    df=df,
+                    target=str(self.target),
+                    **self._get_write_kwargs(),
+                )
+            except Exception:
+                metrics = recorder.metrics()
+                # SparkListener is not a reliable source of information, metrics may or may not be present.
+                # Because of this we also do not return these metrics as method result
+                if metrics.output.is_empty:
+                    log.error(
+                        "|%s| Error while writing dataframe.",
+                        self.__class__.__name__,
+                    )
+                else:
+                    log.error(
+                        "|%s| Error while writing dataframe. Target MAY contain partially written data!",
+                        self.__class__.__name__,
+                    )
+                self._log_metrics(metrics)
+                raise
+            finally:
+                self._log_metrics(recorder.metrics())
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
 
@@ -225,3 +247,8 @@ def _get_write_kwargs(self) -> dict:
             return {"options": self.options}
 
         return {}
+
+    def _log_metrics(self, metrics: SparkCommandMetrics) -> None:
+        if not metrics.is_empty:
+            log.debug("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+            log_lines(log, str(metrics), level=logging.DEBUG)
diff --git a/onetl/file/file_df_writer/file_df_writer.py b/onetl/file/file_df_writer/file_df_writer.py
index a80f5480..6431219a 100644
--- a/onetl/file/file_df_writer/file_df_writer.py
+++ b/onetl/file/file_df_writer/file_df_writer.py
@@ -10,6 +10,8 @@
 except (ImportError, AttributeError):
     from pydantic import PrivateAttr, validator  # type: ignore[no-redef, assignment]
 
+from onetl._metrics.command import SparkCommandMetrics
+from onetl._metrics.recorder import SparkMetricsRecorder
 from onetl.base import BaseFileDFConnection, BaseWritableFileFormat, PurePathProtocol
 from onetl.file.file_df_writer.options import FileDFWriterOptions
 from onetl.hooks import slot, support_hooks
@@ -17,6 +19,7 @@
 from onetl.log import (
     entity_boundary_log,
     log_dataframe_schema,
+    log_lines,
     log_options,
     log_with_indent,
 )
@@ -125,12 +128,32 @@ def run(self, df: DataFrame) -> None:
             self.connection.check()
             self._connection_checked = True
 
-        self.connection.write_df_as_files(
-            df=df,
-            path=self.target_path,
-            format=self.format,
-            options=self.options,
-        )
+        with SparkMetricsRecorder(self.connection.spark) as recorder:
+            try:
+                self.connection.write_df_as_files(
+                    df=df,
+                    path=self.target_path,
+                    format=self.format,
+                    options=self.options,
+                )
+            except Exception:
+                metrics = recorder.metrics()
+                if metrics.output.is_empty:
+                    # SparkListener is not a reliable source of information, metrics may or may not be present.
+                    # Because of this we also do not return these metrics as method result
+                    log.error(
+                        "|%s| Error while writing dataframe.",
+                        self.__class__.__name__,
+                    )
+                else:
+                    log.error(
+                        "|%s| Error while writing dataframe. Target MAY contain partially written data!",
+                        self.__class__.__name__,
+                    )
+                self._log_metrics(metrics)
+                raise
+            finally:
+                self._log_metrics(recorder.metrics())
 
         entity_boundary_log(log, f"{self.__class__.__name__}.run() ends", char="-")
 
@@ -143,6 +166,11 @@ def _log_parameters(self, df: DataFrame) -> None:
         log_options(log, options_dict)
         log_dataframe_schema(log, df)
 
+    def _log_metrics(self, metrics: SparkCommandMetrics) -> None:
+        if not metrics.is_empty:
+            log.debug("|%s| Recorded metrics (some values may be missing!):", self.__class__.__name__)
+            log_lines(log, str(metrics), level=logging.DEBUG)
+
     @validator("target_path", pre=True)
     def _validate_target_path(cls, target_path, values):
         connection: BaseFileDFConnection = values["connection"]
diff --git a/tests/.coveragerc b/tests/.coveragerc
index 55af8c09..85c7bb2d 100644
--- a/tests/.coveragerc
+++ b/tests/.coveragerc
@@ -11,6 +11,7 @@ exclude_lines =
     def __repr__
     if self.debug:
     if settings.DEBUG
+    if log.isEnabledFor\(logging.DEBUG\)
     raise AssertionError
     raise NotImplementedError
     if __name__ == .__main__.:
@@ -20,6 +21,5 @@ exclude_lines =
     if pyspark_version
     if spark_version
     spark = SparkSession._instantiatedSession
-    if log.isEnabledFor(logging.DEBUG):
     if sys.version_info
     except .*ImportError
diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
index b72f8ac1..6cea95cc 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
@@ -1007,7 +1007,7 @@ def test_postgres_connection_sql_options(
     processing.assert_equal_df(df=df, other_frame=table_df)
 
 
-def test_postgres_fetch_with_legacy_jdbc_options(spark, processing):
+def test_postgres_connection_fetch_with_legacy_jdbc_options(spark, processing):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -1023,7 +1023,7 @@ def test_postgres_fetch_with_legacy_jdbc_options(spark, processing):
     assert df is not None
 
 
-def test_postgres_execute_with_legacy_jdbc_options(spark, processing):
+def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,

From d2103227553f41d6b0905cce7058ccb3899aad9e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 8 Aug 2024 16:21:10 +0000
Subject: [PATCH 41/64] [DOP-18743] Set default jobDescription

---
 docs/changelog/next_release/304.breaking.rst  |  3 +
 docs/changelog/next_release/304.feature.rst   |  6 ++
 onetl/_util/hadoop.py                         |  4 +-
 onetl/_util/java.py                           |  2 +-
 onetl/_util/spark.py                          | 23 +++++
 .../db_connection/clickhouse/connection.py    |  3 +
 .../db_connection/greenplum/connection.py     |  3 +
 .../db_connection/hive/connection.py          | 14 ++-
 .../jdbc_connection/connection.py             |  4 +-
 .../db_connection/jdbc_mixin/connection.py    | 77 ++++++++-------
 .../db_connection/kafka/connection.py         |  7 +-
 .../db_connection/mongodb/connection.py       | 20 ++--
 .../db_connection/mssql/connection.py         |  9 ++
 .../db_connection/mysql/connection.py         |  3 +
 .../db_connection/oracle/connection.py        |  6 ++
 .../db_connection/postgres/connection.py      |  3 +
 .../db_connection/teradata/connection.py      |  3 +
 onetl/connection/file_connection/ftp.py       |  5 +-
 onetl/connection/file_connection/ftps.py      |  4 -
 .../file_connection/hdfs/connection.py        |  5 +
 onetl/connection/file_connection/s3.py        |  5 +-
 onetl/connection/file_connection/samba.py     |  5 +-
 onetl/connection/file_connection/sftp.py      |  5 +-
 onetl/connection/file_connection/webdav.py    |  5 +-
 .../spark_hdfs/connection.py                  |  3 +
 .../file_df_connection/spark_local_fs.py      |  4 +
 .../file_df_connection/spark_s3/connection.py |  5 +-
 onetl/db/db_reader/db_reader.py               | 92 ++++++++++--------
 onetl/db/db_writer/db_writer.py               | 29 ++++--
 onetl/file/file_df_reader/file_df_reader.py   | 27 +++---
 onetl/file/file_df_writer/file_df_writer.py   | 29 ++++--
 tests/fixtures/spark.py                       |  3 +-
 .../test_clickhouse_unit.py                   |  4 +-
 .../test_greenplum_unit.py                    |  5 +-
 .../test_kafka_unit.py                        |  4 +
 .../test_mongodb_unit.py                      | 22 +++--
 .../test_mssql_unit.py                        |  6 +-
 .../test_mysql_unit.py                        |  6 +-
 .../test_oracle_unit.py                       |  6 +-
 .../test_postgres_unit.py                     |  5 +-
 .../test_teradata_unit.py                     |  5 +-
 .../test_ftp_unit.py                          | 33 +++----
 .../test_ftps_unit.py                         | 33 ++++---
 .../test_hdfs_unit.py                         | 97 ++++++++++---------
 .../test_s3_unit.py                           | 56 +++++------
 .../test_samba_unit.py                        | 39 ++++----
 .../test_sftp_unit.py                         | 46 +++++----
 .../test_webdav_unit.py                       | 37 ++++---
 .../test_spark_hdfs_unit.py                   | 39 ++++----
 .../test_spark_local_fs_unit.py               |  1 +
 .../test_spark_s3_unit.py                     | 47 ++++-----
 51 files changed, 545 insertions(+), 362 deletions(-)
 create mode 100644 docs/changelog/next_release/304.breaking.rst
 create mode 100644 docs/changelog/next_release/304.feature.rst

diff --git a/docs/changelog/next_release/304.breaking.rst b/docs/changelog/next_release/304.breaking.rst
new file mode 100644
index 00000000..60598321
--- /dev/null
+++ b/docs/changelog/next_release/304.breaking.rst
@@ -0,0 +1,3 @@
+Change connection URL used for generating HWM names of S3 and Samba sources:
+* ``smb://host:port`` -> ``smb://host:port/share``
+* ``s3://host:port`` -> ``s3://host:port/bucket``
diff --git a/docs/changelog/next_release/304.feature.rst b/docs/changelog/next_release/304.feature.rst
new file mode 100644
index 00000000..97560354
--- /dev/null
+++ b/docs/changelog/next_release/304.feature.rst
@@ -0,0 +1,6 @@
+Generate default ``jobDescription`` based on currently executed method. Examples:
+* ``DBWriter() -> Postgres[host:5432/database]``
+* ``MongoDB[localhost:27017/admin] -> DBReader.run()``
+* ``Hive[cluster].execute()``
+
+If user already set custom ``jobDescription``, it will left intact.
diff --git a/onetl/_util/hadoop.py b/onetl/_util/hadoop.py
index fdf275de..aed572e0 100644
--- a/onetl/_util/hadoop.py
+++ b/onetl/_util/hadoop.py
@@ -14,7 +14,7 @@ def get_hadoop_version(spark_session: SparkSession) -> Version:
     """
     Get version of Hadoop libraries embedded to Spark
     """
-    jvm = spark_session._jvm  # noqa: WPS437
+    jvm = spark_session._jvm  # noqa: WPS437 # type: ignore[attr-defined]
     version_info = jvm.org.apache.hadoop.util.VersionInfo  # type: ignore[union-attr]
     hadoop_version: str = version_info.getVersion()
     return Version(hadoop_version)
@@ -24,4 +24,4 @@ def get_hadoop_config(spark_session: SparkSession):
     """
     Get ``org.apache.hadoop.conf.Configuration`` object
     """
-    return spark_session.sparkContext._jsc.hadoopConfiguration()
+    return spark_session.sparkContext._jsc.hadoopConfiguration()  # type: ignore[attr-defined]
diff --git a/onetl/_util/java.py b/onetl/_util/java.py
index 45111432..1ec50a0d 100644
--- a/onetl/_util/java.py
+++ b/onetl/_util/java.py
@@ -16,7 +16,7 @@ def get_java_gateway(spark_session: SparkSession) -> JavaGateway:
     """
     Get py4j Java gateway object
     """
-    return spark_session._sc._gateway  # noqa: WPS437  # type: ignore
+    return spark_session._sc._gateway  # noqa: WPS437 # type: ignore[attr-defined]
 
 
 def try_import_java_class(spark_session: SparkSession, name: str):
diff --git a/onetl/_util/spark.py b/onetl/_util/spark.py
index 2b2edbaf..f7d018b3 100644
--- a/onetl/_util/spark.py
+++ b/onetl/_util/spark.py
@@ -19,6 +19,9 @@
     from pyspark.sql import DataFrame, SparkSession
     from pyspark.sql.conf import RuntimeConfig
 
+SPARK_JOB_DESCRIPTION_PROPERTY = "spark.job.description"
+SPARK_JOB_GROUP_PROPERTY = "spark.jobGroup.id"
+
 
 def stringify(value: Any, quote: bool = False) -> Any:  # noqa: WPS212
     """
@@ -200,3 +203,23 @@ def get_executor_total_cores(spark_session: SparkSession, include_driver: bool =
             expected_cores += 1
 
     return expected_cores, config
+
+
+@contextmanager
+def override_job_description(spark_session: SparkSession, job_description: str):
+    """
+    Override Spark job description.
+
+    Unlike ``spark_session.sparkContext.setJobDescription``, this method resets job description
+    before exiting the context manager, instead of keeping it.
+
+    If user set custom description, it will be left intact.
+    """
+    spark_context = spark_session.sparkContext
+    original_description = spark_context.getLocalProperty(SPARK_JOB_DESCRIPTION_PROPERTY)
+
+    try:
+        spark_context.setLocalProperty(SPARK_JOB_DESCRIPTION_PROPERTY, original_description or job_description)
+        yield
+    finally:
+        spark_context.setLocalProperty(SPARK_JOB_DESCRIPTION_PROPERTY, original_description)  # type: ignore[arg-type]
diff --git a/onetl/connection/db_connection/clickhouse/connection.py b/onetl/connection/db_connection/clickhouse/connection.py
index 0ca6d0ce..482cc941 100644
--- a/onetl/connection/db_connection/clickhouse/connection.py
+++ b/onetl/connection/db_connection/clickhouse/connection.py
@@ -196,6 +196,9 @@ def jdbc_params(self) -> dict:
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
+
     @staticmethod
     def _build_statement(
         statement: str,
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index 7ed60539..0f40436f 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -267,6 +267,9 @@ def package_spark_3_2(cls) -> str:
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}/{self.database}"
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.database}]"
+
     @property
     def jdbc_url(self) -> str:
         return f"jdbc:postgresql://{self.host}:{self.port}/{self.database}"
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 61032987..855a0ead 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -14,7 +14,7 @@
     from pydantic import validator  # type: ignore[no-redef, assignment]
 
 from onetl._metrics.recorder import SparkMetricsRecorder
-from onetl._util.spark import inject_spark_param
+from onetl._util.spark import inject_spark_param, override_job_description
 from onetl._util.sql import clear_statement
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.hive.dialect import HiveDialect
@@ -159,6 +159,9 @@ def get_current(cls, spark: SparkSession):
     def instance_url(self) -> str:
         return self.cluster
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.cluster}]"
+
     @slot
     def check(self):
         log.debug("|%s| Detecting current cluster...", self.__class__.__name__)
@@ -173,7 +176,8 @@ def check(self):
         log_lines(log, self._CHECK_QUERY, level=logging.DEBUG)
 
         try:
-            self._execute_sql(self._CHECK_QUERY).limit(1).collect()
+            with override_job_description(self.spark, f"{self}.check()"):
+                self._execute_sql(self._CHECK_QUERY).limit(1).collect()
             log.info("|%s| Connection is available.", self.__class__.__name__)
         except Exception as e:
             log.exception("|%s| Connection is unavailable", self.__class__.__name__)
@@ -213,7 +217,8 @@ def sql(
 
         with SparkMetricsRecorder(self.spark) as recorder:
             try:
-                df = self._execute_sql(query)
+                with override_job_description(self.spark, f"{self}.sql()"):
+                    df = self._execute_sql(query)
             except Exception:
                 log.error("|%s| Query failed", self.__class__.__name__)
 
@@ -260,7 +265,8 @@ def execute(
 
         with SparkMetricsRecorder(self.spark) as recorder:
             try:
-                self._execute_sql(statement).collect()
+                with override_job_description(self.spark, f"{self}.execute()"):
+                    self._execute_sql(statement).collect()
             except Exception:
                 log.error("|%s| Execution failed", self.__class__.__name__)
                 metrics = recorder.metrics()
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 2fc2f7cf..9d41298e 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -7,6 +7,7 @@
 import warnings
 from typing import TYPE_CHECKING, Any
 
+from onetl._util.spark import override_job_description
 from onetl._util.sql import clear_statement
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
@@ -93,7 +94,8 @@ def sql(
         log_lines(log, query)
 
         try:
-            df = self._query_on_executor(query, self.SQLOptions.parse(options))
+            with override_job_description(self.spark, f"{self}.sql()"):
+                df = self._query_on_executor(query, self.SQLOptions.parse(options))
         except Exception:
             log.error("|%s| Query failed!", self.__class__.__name__)
             raise
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index 84276147..8ec77d13 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -16,7 +16,12 @@
 
 from onetl._metrics.command import SparkCommandMetrics
 from onetl._util.java import get_java_gateway, try_import_java_class
-from onetl._util.spark import estimate_dataframe_size, get_spark_version, stringify
+from onetl._util.spark import (
+    estimate_dataframe_size,
+    get_spark_version,
+    override_job_description,
+    stringify,
+)
 from onetl._util.sql import clear_statement
 from onetl._util.version import Version
 from onetl.connection.db_connection.jdbc_mixin.options import (
@@ -209,21 +214,22 @@ def fetch(
             else self.FetchOptions.parse(options)
         )
 
-        try:
-            df = self._query_on_driver(query, call_options)
-        except Exception:
-            log.error("|%s| Query failed!", self.__class__.__name__)
-            raise
-
-        log.info("|%s| Query succeeded, created in-memory dataframe.", self.__class__.__name__)
-
-        # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
-        # Just create metrics by hand, and fill them up using information based on dataframe content.
-        metrics = SparkCommandMetrics()
-        metrics.input.read_rows = df.count()
-        metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
-        log.info("|%s| Recorded metrics:", self.__class__.__name__)
-        log_lines(log, str(metrics))
+        with override_job_description(self.spark, f"{self}.fetch()"):
+            try:
+                df = self._query_on_driver(query, call_options)
+            except Exception:
+                log.error("|%s| Query failed!", self.__class__.__name__)
+                raise
+
+            log.info("|%s| Query succeeded, created in-memory dataframe.", self.__class__.__name__)
+
+            # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
+            # Just create metrics by hand, and fill them up using information based on dataframe content.
+            metrics = SparkCommandMetrics()
+            metrics.input.read_rows = df.count()
+            metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
+            log.info("|%s| Recorded metrics:", self.__class__.__name__)
+            log_lines(log, str(metrics))
         return df
 
     @slot
@@ -280,25 +286,26 @@ def execute(
             else self.ExecuteOptions.parse(options)
         )
 
-        try:
-            df = self._call_on_driver(statement, call_options)
-        except Exception:
-            log.error("|%s| Execution failed!", self.__class__.__name__)
-            raise
-
-        if not df:
-            log.info("|%s| Execution succeeded, nothing returned.", self.__class__.__name__)
-            return None
-
-        log.info("|%s| Execution succeeded, created in-memory dataframe.", self.__class__.__name__)
-        # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
-        # Just create metrics by hand, and fill them up using information based on dataframe content.
-        metrics = SparkCommandMetrics()
-        metrics.input.read_rows = df.count()
-        metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
-
-        log.info("|%s| Recorded metrics:", self.__class__.__name__)
-        log_lines(log, str(metrics))
+        with override_job_description(self.spark, f"{self}.execute()"):
+            try:
+                df = self._call_on_driver(statement, call_options)
+            except Exception:
+                log.error("|%s| Execution failed!", self.__class__.__name__)
+                raise
+
+            if not df:
+                log.info("|%s| Execution succeeded, nothing returned.", self.__class__.__name__)
+                return None
+
+            log.info("|%s| Execution succeeded, created in-memory dataframe.", self.__class__.__name__)
+            # as we don't actually use Spark for this method, SparkMetricsRecorder is useless.
+            # Just create metrics by hand, and fill them up using information based on dataframe content.
+            metrics = SparkCommandMetrics()
+            metrics.input.read_rows = df.count()
+            metrics.driver.in_memory_bytes = estimate_dataframe_size(self.spark, df)
+
+            log.info("|%s| Recorded metrics:", self.__class__.__name__)
+            log_lines(log, str(metrics))
         return df
 
     @validator("spark")
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index ce3829e4..b404eafb 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -497,7 +497,7 @@ def get_min_max_values(
             # https://kafka.apache.org/22/javadoc/org/apache/kafka/clients/consumer/KafkaConsumer.html#partitionsFor-java.lang.String-
             partition_infos = consumer.partitionsFor(source)
 
-            jvm = self.spark._jvm
+            jvm = self.spark._jvm  # type: ignore[attr-defined]
             topic_partitions = [
                 jvm.org.apache.kafka.common.TopicPartition(source, p.partition())  # type: ignore[union-attr]
                 for p in partition_infos
@@ -542,6 +542,9 @@ def get_min_max_values(
     def instance_url(self):
         return "kafka://" + self.cluster
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.cluster}]"
+
     @root_validator(pre=True)
     def _get_addresses_by_cluster(cls, values):
         cluster = values.get("cluster")
@@ -639,7 +642,7 @@ def _get_java_consumer(self):
         return consumer_class(connection_properties)
 
     def _get_topics(self, timeout: int = 10) -> set[str]:
-        jvm = self.spark._jvm
+        jvm = self.spark._jvm  # type: ignore[attr-defined]
         # Maybe we should not pass explicit timeout at all,
         # and instead use default.api.timeout.ms which is configurable via self.extra.
         # Think about this next time if someone see issues in real use
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index 568cd953..f81a3bf8 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -18,7 +18,7 @@
 from onetl._util.classproperty import classproperty
 from onetl._util.java import try_import_java_class
 from onetl._util.scala import get_default_scala_version
-from onetl._util.spark import get_spark_version
+from onetl._util.spark import get_spark_version, override_job_description
 from onetl._util.version import Version
 from onetl.connection.db_connection.db_connection import DBConnection
 from onetl.connection.db_connection.mongodb.dialect import MongoDBDialect
@@ -347,17 +347,25 @@ def pipeline(
         if pipeline:
             read_options["aggregation.pipeline"] = json.dumps(pipeline)
         read_options["connection.uri"] = self.connection_url
-        spark_reader = self.spark.read.format("mongodb").options(**read_options)
 
-        if df_schema:
-            spark_reader = spark_reader.schema(df_schema)
+        with override_job_description(
+            self.spark,
+            f"{self}.pipeline()",
+        ):
+            spark_reader = self.spark.read.format("mongodb").options(**read_options)
 
-        return spark_reader.load()
+            if df_schema:
+                spark_reader = spark_reader.schema(df_schema)
+
+            return spark_reader.load()
 
     @property
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}/{self.database}"
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.database}]"
+
     @slot
     def check(self):
         log.info("|%s| Checking connection availability...", self.__class__.__name__)
@@ -532,7 +540,7 @@ def _check_java_class_imported(cls, spark):
         return spark
 
     def _collection_exists(self, source: str) -> bool:
-        jvm = self.spark._jvm
+        jvm = self.spark._jvm  # type: ignore[attr-defined]
         client = jvm.com.mongodb.client.MongoClients.create(self.connection_url)  # type: ignore
         collections = set(client.getDatabase(self.database).listCollectionNames().iterator())
         if source in collections:
diff --git a/onetl/connection/db_connection/mssql/connection.py b/onetl/connection/db_connection/mssql/connection.py
index 556cb4cb..f2a29b44 100644
--- a/onetl/connection/db_connection/mssql/connection.py
+++ b/onetl/connection/db_connection/mssql/connection.py
@@ -268,3 +268,12 @@ def instance_url(self) -> str:
         # for backward compatibility keep port number in legacy HWM instance url
         port = self.port or 1433
         return f"{self.__class__.__name__.lower()}://{self.host}:{port}/{self.database}"
+
+    def __str__(self):
+        extra_dict = self.extra.dict(by_alias=True)
+        instance_name = extra_dict.get("instanceName")
+        if instance_name:
+            return rf"{self.__class__.__name__}[{self.host}\{instance_name}/{self.database}]"
+
+        port = self.port or 1433
+        return f"{self.__class__.__name__}[{self.host}:{port}/{self.database}]"
diff --git a/onetl/connection/db_connection/mysql/connection.py b/onetl/connection/db_connection/mysql/connection.py
index 72090d58..e3c91196 100644
--- a/onetl/connection/db_connection/mysql/connection.py
+++ b/onetl/connection/db_connection/mysql/connection.py
@@ -175,3 +175,6 @@ def jdbc_params(self) -> dict:
     @property
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
diff --git a/onetl/connection/db_connection/oracle/connection.py b/onetl/connection/db_connection/oracle/connection.py
index c7669361..40164fe1 100644
--- a/onetl/connection/db_connection/oracle/connection.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -262,6 +262,12 @@ def instance_url(self) -> str:
 
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}/{self.service_name}"
 
+    def __str__(self):
+        if self.sid:
+            return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.sid}]"
+
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.service_name}]"
+
     @slot
     def get_min_max_values(
         self,
diff --git a/onetl/connection/db_connection/postgres/connection.py b/onetl/connection/db_connection/postgres/connection.py
index 132d9727..1c11d9e3 100644
--- a/onetl/connection/db_connection/postgres/connection.py
+++ b/onetl/connection/db_connection/postgres/connection.py
@@ -182,6 +182,9 @@ def jdbc_params(self) -> dict[str, str]:
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}/{self.database}"
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.database}]"
+
     def _options_to_connection_properties(
         self,
         options: JDBCFetchOptions | JDBCExecuteOptions,
diff --git a/onetl/connection/db_connection/teradata/connection.py b/onetl/connection/db_connection/teradata/connection.py
index 6ef2637b..9c8f073c 100644
--- a/onetl/connection/db_connection/teradata/connection.py
+++ b/onetl/connection/db_connection/teradata/connection.py
@@ -208,3 +208,6 @@ def jdbc_url(self) -> str:
     @property
     def instance_url(self) -> str:
         return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
diff --git a/onetl/connection/file_connection/ftp.py b/onetl/connection/file_connection/ftp.py
index b457b966..d5ff5216 100644
--- a/onetl/connection/file_connection/ftp.py
+++ b/onetl/connection/file_connection/ftp.py
@@ -105,7 +105,10 @@ class FTP(FileConnection, RenameDirMixin):
 
     @property
     def instance_url(self) -> str:
-        return f"ftp://{self.host}:{self.port}"
+        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
 
     @slot
     def path_exists(self, path: os.PathLike | str) -> bool:
diff --git a/onetl/connection/file_connection/ftps.py b/onetl/connection/file_connection/ftps.py
index 8cf9aa8f..0180edf4 100644
--- a/onetl/connection/file_connection/ftps.py
+++ b/onetl/connection/file_connection/ftps.py
@@ -95,10 +95,6 @@ class FTPS(FTP):
         )
     """
 
-    @property
-    def instance_url(self) -> str:
-        return f"ftps://{self.host}:{self.port}"
-
     def _get_client(self) -> FTPHost:
         """
         Returns a FTPS connection object
diff --git a/onetl/connection/file_connection/hdfs/connection.py b/onetl/connection/file_connection/hdfs/connection.py
index 056622fb..89c0ec96 100644
--- a/onetl/connection/file_connection/hdfs/connection.py
+++ b/onetl/connection/file_connection/hdfs/connection.py
@@ -264,6 +264,11 @@ def instance_url(self) -> str:
             return self.cluster
         return f"hdfs://{self.host}:{self.webhdfs_port}"
 
+    def __str__(self):
+        if self.cluster:
+            return f"{self.__class__.__name__}[{self.cluster}]"
+        return f"{self.__class__.__name__}[{self.host}:{self.webhdfs_port}]"
+
     @slot
     def path_exists(self, path: os.PathLike | str) -> bool:
         return self.client.status(os.fspath(path), strict=False)
diff --git a/onetl/connection/file_connection/s3.py b/onetl/connection/file_connection/s3.py
index f8f584dc..0f411c85 100644
--- a/onetl/connection/file_connection/s3.py
+++ b/onetl/connection/file_connection/s3.py
@@ -131,7 +131,10 @@ def validate_port(cls, values):
 
     @property
     def instance_url(self) -> str:
-        return f"s3://{self.host}:{self.port}"
+        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}/{self.bucket}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.bucket}]"
 
     @slot
     def create_dir(self, path: os.PathLike | str) -> RemoteDirectory:
diff --git a/onetl/connection/file_connection/samba.py b/onetl/connection/file_connection/samba.py
index 9fc0857f..430e15a7 100644
--- a/onetl/connection/file_connection/samba.py
+++ b/onetl/connection/file_connection/samba.py
@@ -125,7 +125,10 @@ class Samba(FileConnection):
 
     @property
     def instance_url(self) -> str:
-        return f"smb://{self.host}:{self.port}"
+        return f"smb://{self.host}:{self.port}/{self.share}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}/{self.share}]"
 
     @slot
     def check(self):
diff --git a/onetl/connection/file_connection/sftp.py b/onetl/connection/file_connection/sftp.py
index 8cd2ac1e..92db2adc 100644
--- a/onetl/connection/file_connection/sftp.py
+++ b/onetl/connection/file_connection/sftp.py
@@ -120,7 +120,10 @@ class SFTP(FileConnection, RenameDirMixin):
 
     @property
     def instance_url(self) -> str:
-        return f"sftp://{self.host}:{self.port}"
+        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
 
     @slot
     def path_exists(self, path: os.PathLike | str) -> bool:
diff --git a/onetl/connection/file_connection/webdav.py b/onetl/connection/file_connection/webdav.py
index aa540567..44ac766a 100644
--- a/onetl/connection/file_connection/webdav.py
+++ b/onetl/connection/file_connection/webdav.py
@@ -130,7 +130,10 @@ def check_port(cls, values):
 
     @property
     def instance_url(self) -> str:
-        return f"webdav://{self.host}:{self.port}"
+        return f"{self.__class__.__name__.lower()}://{self.host}:{self.port}"
+
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.host}:{self.port}]"
 
     @slot
     def path_exists(self, path: os.PathLike | str) -> bool:
diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py
index 26c1416e..10ff1005 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/connection.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py
@@ -164,6 +164,9 @@ def path_from_string(self, path: os.PathLike | str) -> Path:
     def instance_url(self):
         return self.cluster
 
+    def __str__(self):
+        return f"HDFS[{self.cluster}]"
+
     def __enter__(self):
         return self
 
diff --git a/onetl/connection/file_df_connection/spark_local_fs.py b/onetl/connection/file_df_connection/spark_local_fs.py
index 839cbdae..71c70414 100644
--- a/onetl/connection/file_df_connection/spark_local_fs.py
+++ b/onetl/connection/file_df_connection/spark_local_fs.py
@@ -74,6 +74,10 @@ def instance_url(self):
         fqdn = socket.getfqdn()
         return f"file://{fqdn}"
 
+    def __str__(self):
+        # str should not make network requests
+        return "LocalFS"
+
     @validator("spark")
     def _validate_spark(cls, spark):
         master = spark.conf.get("spark.master")
diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py
index 1efe39d4..eb74d698 100644
--- a/onetl/connection/file_df_connection/spark_s3/connection.py
+++ b/onetl/connection/file_df_connection/spark_s3/connection.py
@@ -256,7 +256,10 @@ def path_from_string(self, path: os.PathLike | str) -> RemotePath:
 
     @property
     def instance_url(self):
-        return f"s3://{self.host}:{self.port}"
+        return f"s3://{self.host}:{self.port}/{self.bucket}"
+
+    def __str__(self):
+        return f"S3[{self.host}:{self.port}/{self.bucket}]"
 
     def __enter__(self):
         return self
diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py
index 91b3f21b..f560104d 100644
--- a/onetl/db/db_reader/db_reader.py
+++ b/onetl/db/db_reader/db_reader.py
@@ -17,7 +17,7 @@
 except (ImportError, AttributeError):
     from pydantic import Field, PrivateAttr, root_validator, validator  # type: ignore[no-redef, assignment]
 
-from onetl._util.spark import try_import_pyspark
+from onetl._util.spark import override_job_description, try_import_pyspark
 from onetl.base import (
     BaseDBConnection,
     ContainsGetDFSchemaMethod,
@@ -542,26 +542,30 @@ def has_data(self) -> bool:
         """
         self._check_strategy()
 
-        if not self._connection_checked:
-            self._log_parameters()
-            self.connection.check()
-
-        window, limit = self._calculate_window_and_limit()
-        if limit == 0:
-            return False
-
-        df = self.connection.read_source_as_df(
-            source=str(self.source),
-            columns=self.columns,
-            hint=self.hint,
-            where=self.where,
-            df_schema=self.df_schema,
-            window=window,
-            limit=1,
-            **self._get_read_kwargs(),
-        )
+        with override_job_description(
+            self.connection.spark,
+            f"{self.connection} -> {self.__class__.__name__}.has_data()",
+        ):
+            if not self._connection_checked:
+                self._log_parameters()
+                self.connection.check()
+
+            window, limit = self._calculate_window_and_limit()
+            if limit == 0:
+                return False
+
+            df = self.connection.read_source_as_df(
+                source=str(self.source),
+                columns=self.columns,
+                hint=self.hint,
+                where=self.where,
+                df_schema=self.df_schema,
+                window=window,
+                limit=1,
+                **self._get_read_kwargs(),
+            )
 
-        return bool(df.take(1))
+            return bool(df.take(1))
 
     @slot
     def raise_if_no_data(self) -> None:
@@ -633,28 +637,32 @@ def run(self) -> DataFrame:
 
         self._check_strategy()
 
-        if not self._connection_checked:
-            self._log_parameters()
-            self.connection.check()
-            self._connection_checked = True
-
-        window, limit = self._calculate_window_and_limit()
-
-        # update the HWM with the stop value
-        if self.hwm and window:
-            strategy: HWMStrategy = StrategyManager.get_current()  # type: ignore[assignment]
-            strategy.update_hwm(window.stop_at.value)
-
-        df = self.connection.read_source_as_df(
-            source=str(self.source),
-            columns=self.columns,
-            hint=self.hint,
-            where=self.where,
-            df_schema=self.df_schema,
-            window=window,
-            limit=limit,
-            **self._get_read_kwargs(),
-        )
+        with override_job_description(
+            self.connection.spark,
+            f"{self.connection} -> {self.__class__.__name__}.run()",
+        ):
+            if not self._connection_checked:
+                self._log_parameters()
+                self.connection.check()
+                self._connection_checked = True
+
+            window, limit = self._calculate_window_and_limit()
+
+            # update the HWM with the stop value
+            if self.hwm and window:
+                strategy: HWMStrategy = StrategyManager.get_current()  # type: ignore[assignment]
+                strategy.update_hwm(window.stop_at.value)
+
+            df = self.connection.read_source_as_df(
+                source=str(self.source),
+                columns=self.columns,
+                hint=self.hint,
+                where=self.where,
+                df_schema=self.df_schema,
+                window=window,
+                limit=limit,
+                **self._get_read_kwargs(),
+            )
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py
index 06dbd44c..0b07ec4e 100644
--- a/onetl/db/db_writer/db_writer.py
+++ b/onetl/db/db_writer/db_writer.py
@@ -12,6 +12,7 @@
 
 from onetl._metrics.command import SparkCommandMetrics
 from onetl._metrics.recorder import SparkMetricsRecorder
+from onetl._util.spark import override_job_description
 from onetl.base import BaseDBConnection
 from onetl.hooks import slot, support_hooks
 from onetl.impl import FrozenModel, GenericOptions
@@ -201,19 +202,27 @@ def run(self, df: DataFrame) -> None:
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() starts")
-        if not self._connection_checked:
-            self._log_parameters()
-            log_dataframe_schema(log, df)
-            self.connection.check()
-            self._connection_checked = True
+        with override_job_description(
+            self.connection.spark,
+            f"{self.__class__.__name__}.run() -> {self.connection}",
+        ):
+            if not self._connection_checked:
+                self._log_parameters()
+                log_dataframe_schema(log, df)
+                self.connection.check()
+                self._connection_checked = True
 
         with SparkMetricsRecorder(self.connection.spark) as recorder:
             try:
-                self.connection.write_df_to_target(
-                    df=df,
-                    target=str(self.target),
-                    **self._get_write_kwargs(),
-                )
+                with override_job_description(
+                    self.connection.spark,
+                    f"{self.__class__.__name__}.run() -> {self.connection}",
+                ):
+                    self.connection.write_df_to_target(
+                        df=df,
+                        target=str(self.target),
+                        **self._get_write_kwargs(),
+                    )
             except Exception:
                 metrics = recorder.metrics()
                 # SparkListener is not a reliable source of information, metrics may or may not be present.
diff --git a/onetl/file/file_df_reader/file_df_reader.py b/onetl/file/file_df_reader/file_df_reader.py
index b18fc179..f1e2f01e 100644
--- a/onetl/file/file_df_reader/file_df_reader.py
+++ b/onetl/file/file_df_reader/file_df_reader.py
@@ -13,7 +13,7 @@
 except (ImportError, AttributeError):
     from pydantic import PrivateAttr, validator  # type: ignore[no-redef, assignment]
 
-from onetl._util.spark import try_import_pyspark
+from onetl._util.spark import override_job_description, try_import_pyspark
 from onetl.base import BaseFileDFConnection, BaseReadableFileFormat, PurePathProtocol
 from onetl.file.file_df_reader.options import FileDFReaderOptions
 from onetl.file.file_set import FileSet
@@ -211,18 +211,23 @@ def run(self, files: Iterable[str | os.PathLike] | None = None) -> DataFrame:
         if not self._connection_checked:
             self._log_parameters(files)
 
-        paths: FileSet[PurePathProtocol] = FileSet()
-        if files is not None:
-            paths = FileSet(self._validate_files(files))
-        elif self.source_path:
-            paths = FileSet([self.source_path])
+        with override_job_description(
+            self.connection.spark,
+            f"{self.connection} -> {self.__class__.__name__}.run()",
+        ):
+            paths: FileSet[PurePathProtocol] = FileSet()
+            if files is not None:
+                paths = FileSet(self._validate_files(files))
+            elif self.source_path:
+                paths = FileSet([self.source_path])
 
-        if not self._connection_checked:
-            self.connection.check()
-            log_with_indent(log, "")
-            self._connection_checked = True
+            if not self._connection_checked:
+                self.connection.check()
+                log_with_indent(log, "")
+                self._connection_checked = True
+
+            df = self._read_files(paths)
 
-        df = self._read_files(paths)
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
 
diff --git a/onetl/file/file_df_writer/file_df_writer.py b/onetl/file/file_df_writer/file_df_writer.py
index 6431219a..35baaf15 100644
--- a/onetl/file/file_df_writer/file_df_writer.py
+++ b/onetl/file/file_df_writer/file_df_writer.py
@@ -12,6 +12,7 @@
 
 from onetl._metrics.command import SparkCommandMetrics
 from onetl._metrics.recorder import SparkMetricsRecorder
+from onetl._util.spark import override_job_description
 from onetl.base import BaseFileDFConnection, BaseWritableFileFormat, PurePathProtocol
 from onetl.file.file_df_writer.options import FileDFWriterOptions
 from onetl.hooks import slot, support_hooks
@@ -123,19 +124,27 @@ def run(self, df: DataFrame) -> None:
         if df.isStreaming:
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
-        if not self._connection_checked:
-            self._log_parameters(df)
-            self.connection.check()
-            self._connection_checked = True
+        with override_job_description(
+            self.connection.spark,
+            f"{self.__class__.__name__}.run() -> {self.connection}",
+        ):
+            if not self._connection_checked:
+                self._log_parameters(df)
+                self.connection.check()
+                self._connection_checked = True
 
         with SparkMetricsRecorder(self.connection.spark) as recorder:
             try:
-                self.connection.write_df_as_files(
-                    df=df,
-                    path=self.target_path,
-                    format=self.format,
-                    options=self.options,
-                )
+                with override_job_description(
+                    self.connection.spark,
+                    f"{self.__class__.__name__}.run() -> {self.connection}",
+                ):
+                    self.connection.write_df_as_files(
+                        df=df,
+                        path=self.target_path,
+                        format=self.format,
+                        options=self.options,
+                    )
             except Exception:
                 metrics = recorder.metrics()
                 if metrics.output.is_empty:
diff --git a/tests/fixtures/spark.py b/tests/fixtures/spark.py
index e7248e84..7a9b812a 100644
--- a/tests/fixtures/spark.py
+++ b/tests/fixtures/spark.py
@@ -123,12 +123,11 @@ def excluded_packages():
 
 @pytest.fixture(
     scope="session",
-    name="spark",
     params=[
         pytest.param("real-spark", marks=[pytest.mark.db_connection, pytest.mark.connection]),
     ],
 )
-def get_spark_session(warehouse_dir, spark_metastore_dir, ivysettings_path, maven_packages, excluded_packages):
+def spark(warehouse_dir, spark_metastore_dir, ivysettings_path, maven_packages, excluded_packages):
     from pyspark.sql import SparkSession
 
     spark = (
diff --git a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
index ff36e0a6..287061d2 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
@@ -128,10 +128,10 @@ def test_clickhouse(spark_mock):
         "url": "jdbc:clickhouse://some_host:8123/database",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "clickhouse://some_host:8123"
+    assert str(conn) == "Clickhouse[some_host:8123]"
 
 
 def test_clickhouse_with_port(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
index 0d382d44..47821642 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
@@ -129,10 +129,10 @@ def test_greenplum(spark_mock):
         "tcpKeepAlive": "true",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "greenplum://some_host:5432/database"
+    assert str(conn) == "Greenplum[some_host:5432/database]"
 
 
 def test_greenplum_with_port(spark_mock):
@@ -156,6 +156,7 @@ def test_greenplum_with_port(spark_mock):
     }
 
     assert conn.instance_url == "greenplum://some_host:5000/database"
+    assert str(conn) == "Greenplum[some_host:5000/database]"
 
 
 def test_greenplum_without_database_error(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py b/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
index 2e0ccd1a..74101388 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_kafka_unit.py
@@ -181,6 +181,7 @@ def test_kafka_basic_auth_get_jaas_conf(spark_mock):
     assert conn.addresses == ["192.168.1.1"]
 
     assert conn.instance_url == "kafka://some_cluster"
+    assert str(conn) == "Kafka[some_cluster]"
 
 
 def test_kafka_anon_auth(spark_mock):
@@ -194,6 +195,7 @@ def test_kafka_anon_auth(spark_mock):
     assert conn.addresses == ["192.168.1.1"]
 
     assert conn.instance_url == "kafka://some_cluster"
+    assert str(conn) == "Kafka[some_cluster]"
 
 
 @pytest.mark.parametrize("digest", ["SHA-256", "SHA-512"])
@@ -217,6 +219,7 @@ def test_kafka_scram_auth(spark_mock, digest):
     assert conn.addresses == ["192.168.1.1"]
 
     assert conn.instance_url == "kafka://some_cluster"
+    assert str(conn) == "Kafka[some_cluster]"
 
 
 def test_kafka_auth_keytab(spark_mock, create_keytab):
@@ -235,6 +238,7 @@ def test_kafka_auth_keytab(spark_mock, create_keytab):
     assert conn.addresses == ["192.168.1.1"]
 
     assert conn.instance_url == "kafka://some_cluster"
+    assert str(conn) == "Kafka[some_cluster]"
 
 
 def test_kafka_empty_addresses(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
index f494e3de..9142848e 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
@@ -126,9 +126,10 @@ def test_mongodb(spark_mock):
     assert conn.database == "database"
 
     assert conn.connection_url == "mongodb://user:password@host:27017/database"
+    assert conn.instance_url == "mongodb://host:27017/database"
+    assert str(conn) == "MongoDB[host:27017/database]"
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
 
 @pytest.mark.parametrize(
@@ -150,7 +151,7 @@ def test_mongodb_options_hint():
 
 
 def test_mongodb_with_port(spark_mock):
-    mongo = MongoDB(
+    conn = MongoDB(
         host="host",
         user="user",
         password="password",
@@ -159,14 +160,15 @@ def test_mongodb_with_port(spark_mock):
         spark=spark_mock,
     )
 
-    assert mongo.host == "host"
-    assert mongo.port == 12345
-    assert mongo.user == "user"
-    assert mongo.password != "password"
-    assert mongo.password.get_secret_value() == "password"
-    assert mongo.database == "database"
+    assert conn.host == "host"
+    assert conn.port == 12345
+    assert conn.user == "user"
+    assert conn.password != "password"
+    assert conn.password.get_secret_value() == "password"
+    assert conn.database == "database"
 
-    assert mongo.connection_url == "mongodb://user:password@host:12345/database"
+    assert conn.connection_url == "mongodb://user:password@host:12345/database"
+    assert conn.instance_url == "mongodb://host:12345/database"
 
 
 def test_mongodb_without_mandatory_args(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
index e1a18aa9..d9f3cfda 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
@@ -101,10 +101,10 @@ def test_mssql(spark_mock):
         "databaseName": "database",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "mssql://some_host:1433/database"
+    assert str(conn) == "MSSQL[some_host:1433/database]"
 
 
 def test_mssql_with_custom_port(spark_mock):
@@ -127,6 +127,7 @@ def test_mssql_with_custom_port(spark_mock):
     }
 
     assert conn.instance_url == "mssql://some_host:5000/database"
+    assert str(conn) == "MSSQL[some_host:5000/database]"
 
 
 def test_mssql_with_instance_name(spark_mock):
@@ -157,6 +158,7 @@ def test_mssql_with_instance_name(spark_mock):
     }
 
     assert conn.instance_url == "mssql://some_host\\myinstance/database"
+    assert str(conn) == "MSSQL[some_host\\myinstance/database]"
 
 
 def test_mssql_without_database_error(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
index f2c68d93..0d57da48 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
@@ -89,10 +89,10 @@ def test_mysql(spark_mock):
         "useUnicode": "yes",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "mysql://some_host:3306"
+    assert str(conn) == "MySQL[some_host:3306]"
 
 
 def test_mysql_with_port(spark_mock):
@@ -116,6 +116,7 @@ def test_mysql_with_port(spark_mock):
     }
 
     assert conn.instance_url == "mysql://some_host:5000"
+    assert str(conn) == "MySQL[some_host:5000]"
 
 
 def test_mysql_without_database(spark_mock):
@@ -139,6 +140,7 @@ def test_mysql_without_database(spark_mock):
     }
 
     assert conn.instance_url == "mysql://some_host:3306"
+    assert str(conn) == "MySQL[some_host:3306]"
 
 
 def test_mysql_with_extra(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py b/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
index ae7bf87c..dd02b5c9 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
@@ -110,10 +110,10 @@ def test_oracle(spark_mock):
         "url": "jdbc:oracle:thin:@some_host:1521:sid",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "oracle://some_host:1521/sid"
+    assert str(conn) == "Oracle[some_host:1521/sid]"
 
 
 def test_oracle_with_port(spark_mock):
@@ -135,6 +135,7 @@ def test_oracle_with_port(spark_mock):
     }
 
     assert conn.instance_url == "oracle://some_host:5000/sid"
+    assert str(conn) == "Oracle[some_host:5000/sid]"
 
 
 def test_oracle_uri_with_service_name(spark_mock):
@@ -149,6 +150,7 @@ def test_oracle_uri_with_service_name(spark_mock):
     }
 
     assert conn.instance_url == "oracle://some_host:1521/service"
+    assert str(conn) == "Oracle[some_host:1521/service]"
 
 
 def test_oracle_without_sid_and_service_name(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py b/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
index 6e37417a..2b0080bf 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
@@ -90,10 +90,10 @@ def test_postgres(spark_mock):
         "stringtype": "unspecified",
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "postgres://some_host:5432/database"
+    assert str(conn) == "Postgres[some_host:5432/database]"
 
 
 def test_postgres_with_port(spark_mock):
@@ -118,6 +118,7 @@ def test_postgres_with_port(spark_mock):
     }
 
     assert conn.instance_url == "postgres://some_host:5000/database"
+    assert str(conn) == "Postgres[some_host:5000/database]"
 
 
 def test_postgres_without_database_error(spark_mock):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py b/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
index bef65a55..39c90cce 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_teradata_unit.py
@@ -89,10 +89,10 @@ def test_teradata(spark_mock):
         "url": conn.jdbc_url,
     }
 
-    assert "password='passwd'" not in str(conn)
-    assert "password='passwd'" not in repr(conn)
+    assert "passwd" not in repr(conn)
 
     assert conn.instance_url == "teradata://some_host:1025"
+    assert str(conn) == "Teradata[some_host:1025]"
 
 
 def test_teradata_with_port(spark_mock):
@@ -117,6 +117,7 @@ def test_teradata_with_port(spark_mock):
     }
 
     assert conn.instance_url == "teradata://some_host:5000"
+    assert str(conn) == "Teradata[some_host:5000]"
 
 
 def test_teradata_without_database(spark_mock):
diff --git a/tests/tests_unit/tests_file_connection_unit/test_ftp_unit.py b/tests/tests_unit/tests_file_connection_unit/test_ftp_unit.py
index ab47c248..33f6b29f 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_ftp_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_ftp_unit.py
@@ -8,35 +8,34 @@
 def test_ftp_connection():
     from onetl.connection import FTP
 
-    ftp = FTP(host="some_host", user="some_user", password="pwd")
-    assert isinstance(ftp, FileConnection)
-    assert ftp.host == "some_host"
-    assert ftp.user == "some_user"
-    assert ftp.password != "pwd"
-    assert ftp.password.get_secret_value() == "pwd"
-    assert ftp.port == 21
+    conn = FTP(host="some_host", user="some_user", password="pwd")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
+    assert conn.port == 21
 
-    assert "password='pwd'" not in str(ftp)
-    assert "password='pwd'" not in repr(ftp)
+    assert str(conn) == "FTP[some_host:21]"
+    assert "pwd" not in repr(conn)
 
 
 def test_ftp_connection_anonymous():
     from onetl.connection import FTP
 
-    ftp = FTP(host="some_host")
-
-    assert isinstance(ftp, FileConnection)
-    assert ftp.host == "some_host"
-    assert ftp.user is None
-    assert ftp.password is None
+    conn = FTP(host="some_host")
+    assert conn.host == "some_host"
+    assert conn.user is None
+    assert conn.password is None
 
 
 def test_ftp_connection_with_port():
     from onetl.connection import FTP
 
-    ftp = FTP(host="some_host", user="some_user", password="pwd", port=500)
+    conn = FTP(host="some_host", user="some_user", password="pwd", port=500)
 
-    assert ftp.port == 500
+    assert conn.port == 500
+    assert str(conn) == "FTP[some_host:500]"
 
 
 def test_ftp_connection_without_mandatory_args():
diff --git a/tests/tests_unit/tests_file_connection_unit/test_ftps_unit.py b/tests/tests_unit/tests_file_connection_unit/test_ftps_unit.py
index aa63de1e..c0b201e6 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_ftps_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_ftps_unit.py
@@ -8,35 +8,36 @@
 def test_ftps_connection():
     from onetl.connection import FTPS
 
-    ftps = FTPS(host="some_host", user="some_user", password="pwd")
-    assert isinstance(ftps, FileConnection)
-    assert ftps.host == "some_host"
-    assert ftps.user == "some_user"
-    assert ftps.password != "pwd"
-    assert ftps.password.get_secret_value() == "pwd"
-    assert ftps.port == 21
+    conn = FTPS(host="some_host", user="some_user", password="pwd")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
+    assert conn.port == 21
 
-    assert "password='pwd'" not in str(ftps)
-    assert "password='pwd'" not in repr(ftps)
+    assert str(conn) == "FTPS[some_host:21]"
+    assert "pwd" not in repr(conn)
 
 
 def test_ftps_connection_anonymous():
     from onetl.connection import FTPS
 
-    ftps = FTPS(host="some_host")
+    conn = FTPS(host="some_host")
 
-    assert isinstance(ftps, FileConnection)
-    assert ftps.host == "some_host"
-    assert ftps.user is None
-    assert ftps.password is None
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.user is None
+    assert conn.password is None
 
 
 def test_ftps_connection_with_port():
     from onetl.connection import FTPS
 
-    ftps = FTPS(host="some_host", user="some_user", password="pwd", port=500)
+    conn = FTPS(host="some_host", user="some_user", password="pwd", port=500)
 
-    assert ftps.port == 500
+    assert conn.port == 500
+    assert str(conn) == "FTPS[some_host:500]"
 
 
 def test_ftps_connection_without_mandatory_args():
diff --git a/tests/tests_unit/tests_file_connection_unit/test_hdfs_unit.py b/tests/tests_unit/tests_file_connection_unit/test_hdfs_unit.py
index 2249e237..26b4cf7a 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_hdfs_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_hdfs_unit.py
@@ -15,73 +15,74 @@
 def test_hdfs_connection_with_host():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(host="some-host.domain.com")
-    assert isinstance(hdfs, FileConnection)
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.webhdfs_port == 50070
-    assert not hdfs.user
-    assert not hdfs.password
-    assert not hdfs.keytab
-    assert hdfs.instance_url == "hdfs://some-host.domain.com:50070"
+    conn = HDFS(host="some-host.domain.com")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some-host.domain.com"
+    assert conn.webhdfs_port == 50070
+    assert not conn.user
+    assert not conn.password
+    assert not conn.keytab
+    assert conn.instance_url == "hdfs://some-host.domain.com:50070"
+    assert str(conn) == "HDFS[some-host.domain.com:50070]"
 
 
 def test_hdfs_connection_with_cluster():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(cluster="rnd-dwh")
-    assert isinstance(hdfs, FileConnection)
-    assert hdfs.cluster == "rnd-dwh"
-    assert hdfs.webhdfs_port == 50070
-    assert not hdfs.user
-    assert not hdfs.password
-    assert not hdfs.keytab
-    assert hdfs.instance_url == "rnd-dwh"
+    conn = HDFS(cluster="rnd-dwh")
+    assert conn.cluster == "rnd-dwh"
+    assert conn.webhdfs_port == 50070
+    assert not conn.user
+    assert not conn.password
+    assert not conn.keytab
+    assert conn.instance_url == "rnd-dwh"
+    assert str(conn) == "HDFS[rnd-dwh]"
 
 
 def test_hdfs_connection_with_cluster_and_host():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(cluster="rnd-dwh", host="some-host.domain.com")
-    assert isinstance(hdfs, FileConnection)
-    assert hdfs.cluster == "rnd-dwh"
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.instance_url == "rnd-dwh"
+    conn = HDFS(cluster="rnd-dwh", host="some-host.domain.com")
+    assert conn.cluster == "rnd-dwh"
+    assert conn.host == "some-host.domain.com"
+    assert conn.instance_url == "rnd-dwh"
+    assert str(conn) == "HDFS[rnd-dwh]"
 
 
-def test_hdfs_connection_with_port():
+def test_hdfs_connection_with_host_and_port():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(host="some-host.domain.com", port=9080)
-    assert isinstance(hdfs, FileConnection)
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.webhdfs_port == 9080
-    assert hdfs.instance_url == "hdfs://some-host.domain.com:9080"
+    conn = HDFS(host="some-host.domain.com", port=9080)
+    assert conn.host == "some-host.domain.com"
+    assert conn.webhdfs_port == 9080
+    assert conn.instance_url == "hdfs://some-host.domain.com:9080"
+    assert str(conn) == "HDFS[some-host.domain.com:9080]"
 
 
 def test_hdfs_connection_with_user():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(host="some-host.domain.com", user="some_user")
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.webhdfs_port == 50070
-    assert hdfs.user == "some_user"
-    assert not hdfs.password
-    assert not hdfs.keytab
+    conn = HDFS(host="some-host.domain.com", user="some_user")
+    assert conn.host == "some-host.domain.com"
+    assert conn.webhdfs_port == 50070
+    assert conn.user == "some_user"
+    assert not conn.password
+    assert not conn.keytab
 
 
 def test_hdfs_connection_with_password():
     from onetl.connection import HDFS
 
-    hdfs = HDFS(host="some-host.domain.com", user="some_user", password="pwd")
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.webhdfs_port == 50070
-    assert hdfs.user == "some_user"
-    assert hdfs.password != "pwd"
-    assert hdfs.password.get_secret_value() == "pwd"
-    assert not hdfs.keytab
+    conn = HDFS(host="some-host.domain.com", user="some_user", password="pwd")
+    assert conn.host == "some-host.domain.com"
+    assert conn.webhdfs_port == 50070
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
+    assert not conn.keytab
+    assert str(conn) == "HDFS[some-host.domain.com:50070]"
 
-    assert "password='pwd'" not in str(hdfs)
-    assert "password='pwd'" not in repr(hdfs)
+    assert "pwd" not in repr(conn)
 
 
 def test_hdfs_connection_with_keytab(request, tmp_path_factory):
@@ -91,15 +92,15 @@ def test_hdfs_connection_with_keytab(request, tmp_path_factory):
     folder.mkdir(exist_ok=True, parents=True)
     keytab = folder / "user.keytab"
     keytab.touch()
-    hdfs = HDFS(host="some-host.domain.com", user="some_user", keytab=keytab)
+    conn = HDFS(host="some-host.domain.com", user="some_user", keytab=keytab)
 
     def finalizer():
         shutil.rmtree(folder)
 
     request.addfinalizer(finalizer)
 
-    assert hdfs.user == "some_user"
-    assert not hdfs.password
+    assert conn.user == "some_user"
+    assert not conn.password
 
 
 def test_hdfs_connection_keytab_does_not_exist():
@@ -242,7 +243,7 @@ def get_webhdfs_port(cluster: str) -> int | None:
     assert HDFS(host="some-node.domain.com", cluster="rnd-dwh").webhdfs_port == 9080
 
 
-def test_hdfs_known_get_current(request, mocker):
+def test_hdfs_known_get_current(request):
     from onetl.connection import HDFS
 
     # no hooks bound to HDFS.Slots.get_current_cluster
@@ -259,5 +260,5 @@ def get_current_cluster() -> str:
 
     request.addfinalizer(get_current_cluster.disable)
 
-    hdfs = HDFS.get_current()
-    assert hdfs.cluster == "rnd-dwh"
+    conn = HDFS.get_current()
+    assert conn.cluster == "rnd-dwh"
diff --git a/tests/tests_unit/tests_file_connection_unit/test_s3_unit.py b/tests/tests_unit/tests_file_connection_unit/test_s3_unit.py
index e652c24e..524c24f6 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_s3_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_s3_unit.py
@@ -6,29 +6,29 @@
 def test_s3_connection():
     from onetl.connection import S3
 
-    s3 = S3(
+    conn = S3(
         host="some_host",
         access_key="access key",
         secret_key="some key",
         bucket="bucket",
     )
 
-    assert s3.host == "some_host"
-    assert s3.access_key == "access key"
-    assert s3.secret_key != "some key"
-    assert s3.secret_key.get_secret_value() == "some key"
-    assert s3.protocol == "https"
-    assert s3.port == 443
-    assert s3.instance_url == "s3://some_host:443"
+    assert conn.host == "some_host"
+    assert conn.access_key == "access key"
+    assert conn.secret_key != "some key"
+    assert conn.secret_key.get_secret_value() == "some key"
+    assert conn.protocol == "https"
+    assert conn.port == 443
+    assert conn.instance_url == "s3://some_host:443/bucket"
+    assert str(conn) == "S3[some_host:443/bucket]"
 
-    assert "some key" not in str(s3)
-    assert "some key" not in repr(s3)
+    assert "some key" not in repr(conn)
 
 
 def test_s3_connection_with_session_token():
     from onetl.connection import S3
 
-    s3 = S3(
+    conn = S3(
         host="some_host",
         access_key="access_key",
         secret_key="some key",
@@ -36,17 +36,16 @@ def test_s3_connection_with_session_token():
         bucket="bucket",
     )
 
-    assert s3.session_token != "some token"
-    assert s3.session_token.get_secret_value() == "some token"
+    assert conn.session_token != "some token"
+    assert conn.session_token.get_secret_value() == "some token"
 
-    assert "some token" not in str(s3)
-    assert "some token" not in repr(s3)
+    assert "some token" not in repr(conn)
 
 
 def test_s3_connection_https():
     from onetl.connection import S3
 
-    s3 = S3(
+    conn = S3(
         host="some_host",
         access_key="access_key",
         secret_key="secret_key",
@@ -54,15 +53,16 @@ def test_s3_connection_https():
         protocol="https",
     )
 
-    assert s3.protocol == "https"
-    assert s3.port == 443
-    assert s3.instance_url == "s3://some_host:443"
+    assert conn.protocol == "https"
+    assert conn.port == 443
+    assert conn.instance_url == "s3://some_host:443/bucket"
+    assert str(conn) == "S3[some_host:443/bucket]"
 
 
 def test_s3_connection_http():
     from onetl.connection import S3
 
-    s3 = S3(
+    conn = S3(
         host="some_host",
         access_key="access_key",
         secret_key="secret_key",
@@ -70,16 +70,17 @@ def test_s3_connection_http():
         protocol="http",
     )
 
-    assert s3.protocol == "http"
-    assert s3.port == 80
-    assert s3.instance_url == "s3://some_host:80"
+    assert conn.protocol == "http"
+    assert conn.port == 80
+    assert conn.instance_url == "s3://some_host:80/bucket"
+    assert str(conn) == "S3[some_host:80/bucket]"
 
 
 @pytest.mark.parametrize("protocol", ["http", "https"])
 def test_s3_connection_with_port(protocol):
     from onetl.connection import S3
 
-    s3 = S3(
+    conn = S3(
         host="some_host",
         port=9000,
         access_key="access_key",
@@ -88,6 +89,7 @@ def test_s3_connection_with_port(protocol):
         protocol=protocol,
     )
 
-    assert s3.protocol == protocol
-    assert s3.port == 9000
-    assert s3.instance_url == "s3://some_host:9000"
+    assert conn.protocol == protocol
+    assert conn.port == 9000
+    assert conn.instance_url == "s3://some_host:9000/bucket"
+    assert str(conn) == "S3[some_host:9000/bucket]"
diff --git a/tests/tests_unit/tests_file_connection_unit/test_samba_unit.py b/tests/tests_unit/tests_file_connection_unit/test_samba_unit.py
index 42f95b36..2dfd06e6 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_samba_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_samba_unit.py
@@ -8,36 +8,39 @@
 def test_samba_connection():
     from onetl.connection import Samba
 
-    samba = Samba(host="some_host", share="share_name", user="some_user", password="pwd")
-    assert isinstance(samba, FileConnection)
-    assert samba.host == "some_host"
-    assert samba.protocol == "SMB"
-    assert samba.domain == ""
-    assert samba.auth_type == "NTLMv2"
-    assert samba.port == 445
-    assert samba.user == "some_user"
-    assert samba.password != "pwd"
-    assert samba.password.get_secret_value() == "pwd"
+    conn = Samba(host="some_host", share="share_name", user="some_user", password="pwd")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.port == 445
+    assert conn.share == "share_name"
+    assert conn.protocol == "SMB"
+    assert conn.domain == ""
+    assert conn.auth_type == "NTLMv2"
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
 
-    assert "password='pwd'" not in str(samba)
-    assert "password='pwd'" not in repr(samba)
+    assert conn.instance_url == "smb://some_host:445/share_name"
+    assert str(conn) == "Samba[some_host:445/share_name]"
+
+    assert "pwd" not in repr(conn)
 
 
 def test_samba_connection_with_net_bios():
     from onetl.connection import Samba
 
-    samba = Samba(host="some_host", share="share_name", user="some_user", password="pwd", protocol="NetBIOS")
-    assert samba.protocol == "NetBIOS"
-    assert samba.port == 139
+    conn = Samba(host="some_host", share="share_name", user="some_user", password="pwd", protocol="NetBIOS")
+    assert conn.protocol == "NetBIOS"
+    assert conn.port == 139
 
 
 @pytest.mark.parametrize("protocol", ["SMB", "NetBIOS"])
 def test_samba_connection_with_custom_port(protocol):
     from onetl.connection import Samba
 
-    samba = Samba(host="some_host", share="share_name", user="some_user", password="pwd", protocol=protocol, port=444)
-    assert samba.protocol == protocol
-    assert samba.port == 444
+    conn = Samba(host="some_host", share="share_name", user="some_user", password="pwd", protocol=protocol, port=444)
+    assert conn.protocol == protocol
+    assert conn.port == 444
 
 
 def test_samba_connection_without_mandatory_args():
diff --git a/tests/tests_unit/tests_file_connection_unit/test_sftp_unit.py b/tests/tests_unit/tests_file_connection_unit/test_sftp_unit.py
index 11f6cfbd..d2e02b75 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_sftp_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_sftp_unit.py
@@ -7,35 +7,41 @@
 
 
 def test_sftp_connection_anonymous():
-    from onetl.connection import SFTP
+    from onetl.connection import SFTP, FileConnection
 
-    sftp = SFTP(host="some_host")
-    assert sftp.host == "some_host"
-    assert sftp.port == 22
-    assert not sftp.user
-    assert not sftp.password
-    assert not sftp.key_file
+    conn = SFTP(host="some_host")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.port == 22
+    assert not conn.user
+    assert not conn.password
+    assert not conn.key_file
+    assert conn.instance_url == "sftp://some_host:22"
+    assert str(conn) == "SFTP[some_host:22]"
 
 
 def test_sftp_connection_with_port():
     from onetl.connection import SFTP
 
-    sftp = SFTP(host="some_host", port=500)
+    conn = SFTP(host="some_host", port=500)
 
-    assert sftp.port == 500
+    assert conn.port == 500
+    assert conn.instance_url == "sftp://some_host:500"
+    assert str(conn) == "SFTP[some_host:500]"
 
 
 def test_sftp_connection_with_password():
     from onetl.connection import SFTP
 
-    sftp = SFTP(host="some_host", user="some_user", password="pwd")
-    assert sftp.user == "some_user"
-    assert sftp.password != "pwd"
-    assert sftp.password.get_secret_value() == "pwd"
-    assert not sftp.key_file
+    conn = SFTP(host="some_host", user="some_user", password="pwd")
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
+    assert not conn.key_file
+    assert conn.instance_url == "sftp://some_host:22"
+    assert str(conn) == "SFTP[some_host:22]"
 
-    assert "password='pwd'" not in str(sftp)
-    assert "password='pwd'" not in repr(sftp)
+    assert "pwd" not in repr(conn)
 
 
 def test_sftp_connection_with_key_file(request, tmp_path_factory):
@@ -51,10 +57,10 @@ def finalizer():
 
     request.addfinalizer(finalizer)
 
-    sftp = SFTP(host="some_host", user="some_user", key_file=key_file)
-    assert sftp.user == "some_user"
-    assert not sftp.password
-    assert sftp.key_file == key_file
+    conn = SFTP(host="some_host", user="some_user", key_file=key_file)
+    assert conn.user == "some_user"
+    assert not conn.password
+    assert conn.key_file == key_file
 
 
 def test_sftp_connection_key_file_does_not_exist():
diff --git a/tests/tests_unit/tests_file_connection_unit/test_webdav_unit.py b/tests/tests_unit/tests_file_connection_unit/test_webdav_unit.py
index 7d92d494..7f458678 100644
--- a/tests/tests_unit/tests_file_connection_unit/test_webdav_unit.py
+++ b/tests/tests_unit/tests_file_connection_unit/test_webdav_unit.py
@@ -8,34 +8,39 @@
 def test_webdav_connection():
     from onetl.connection import WebDAV
 
-    webdav = WebDAV(host="some_host", user="some_user", password="pwd")
-    assert isinstance(webdav, FileConnection)
-    assert webdav.host == "some_host"
-    assert webdav.protocol == "https"
-    assert webdav.port == 443
-    assert webdav.user == "some_user"
-    assert webdav.password != "pwd"
-    assert webdav.password.get_secret_value() == "pwd"
+    conn = WebDAV(host="some_host", user="some_user", password="pwd")
+    assert isinstance(conn, FileConnection)
+    assert conn.host == "some_host"
+    assert conn.protocol == "https"
+    assert conn.port == 443
+    assert conn.user == "some_user"
+    assert conn.password != "pwd"
+    assert conn.password.get_secret_value() == "pwd"
+    assert conn.instance_url == "webdav://some_host:443"
+    assert str(conn) == "WebDAV[some_host:443]"
 
-    assert "password='pwd'" not in str(webdav)
-    assert "password='pwd'" not in repr(webdav)
+    assert "pwd" not in repr(conn)
 
 
 def test_webdav_connection_with_http():
     from onetl.connection import WebDAV
 
-    webdav = WebDAV(host="some_host", user="some_user", password="pwd", protocol="http")
-    assert webdav.protocol == "http"
-    assert webdav.port == 80
+    conn = WebDAV(host="some_host", user="some_user", password="pwd", protocol="http")
+    assert conn.protocol == "http"
+    assert conn.port == 80
+    assert conn.instance_url == "webdav://some_host:80"
+    assert str(conn) == "WebDAV[some_host:80]"
 
 
 @pytest.mark.parametrize("protocol", ["http", "https"])
 def test_webdav_connection_with_custom_port(protocol):
     from onetl.connection import WebDAV
 
-    webdav = WebDAV(host="some_host", user="some_user", password="pwd", port=500, protocol=protocol)
-    assert webdav.protocol == protocol
-    assert webdav.port == 500
+    conn = WebDAV(host="some_host", user="some_user", password="pwd", port=500, protocol=protocol)
+    assert conn.protocol == protocol
+    assert conn.port == 500
+    assert conn.instance_url == "webdav://some_host:500"
+    assert str(conn) == "WebDAV[some_host:500]"
 
 
 def test_webdav_connection_without_mandatory_args():
diff --git a/tests/tests_unit/tests_file_df_connection_unit/test_spark_hdfs_unit.py b/tests/tests_unit/tests_file_df_connection_unit/test_spark_hdfs_unit.py
index 08ca6c1f..0d392c8d 100644
--- a/tests/tests_unit/tests_file_df_connection_unit/test_spark_hdfs_unit.py
+++ b/tests/tests_unit/tests_file_df_connection_unit/test_spark_hdfs_unit.py
@@ -12,28 +12,31 @@
 
 
 def test_spark_hdfs_with_cluster(spark_mock):
-    hdfs = SparkHDFS(cluster="rnd-dwh", spark=spark_mock)
-    assert isinstance(hdfs, BaseFileDFConnection)
-    assert hdfs.cluster == "rnd-dwh"
-    assert hdfs.host is None
-    assert hdfs.ipc_port == 8020
-    assert hdfs.instance_url == "rnd-dwh"
+    conn = SparkHDFS(cluster="rnd-dwh", spark=spark_mock)
+    assert isinstance(conn, BaseFileDFConnection)
+    assert conn.cluster == "rnd-dwh"
+    assert conn.host is None
+    assert conn.ipc_port == 8020
+    assert conn.instance_url == "rnd-dwh"
+    assert str(conn) == "HDFS[rnd-dwh]"
 
 
 def test_spark_hdfs_with_cluster_and_host(spark_mock):
-    hdfs = SparkHDFS(cluster="rnd-dwh", host="some-host.domain.com", spark=spark_mock)
-    assert isinstance(hdfs, BaseFileDFConnection)
-    assert hdfs.cluster == "rnd-dwh"
-    assert hdfs.host == "some-host.domain.com"
-    assert hdfs.instance_url == "rnd-dwh"
+    conn = SparkHDFS(cluster="rnd-dwh", host="some-host.domain.com", spark=spark_mock)
+    assert isinstance(conn, BaseFileDFConnection)
+    assert conn.cluster == "rnd-dwh"
+    assert conn.host == "some-host.domain.com"
+    assert conn.instance_url == "rnd-dwh"
+    assert str(conn) == "HDFS[rnd-dwh]"
 
 
 def test_spark_hdfs_with_port(spark_mock):
-    hdfs = SparkHDFS(cluster="rnd-dwh", port=9020, spark=spark_mock)
-    assert isinstance(hdfs, BaseFileDFConnection)
-    assert hdfs.cluster == "rnd-dwh"
-    assert hdfs.ipc_port == 9020
-    assert hdfs.instance_url == "rnd-dwh"
+    conn = SparkHDFS(cluster="rnd-dwh", port=9020, spark=spark_mock)
+    assert isinstance(conn, BaseFileDFConnection)
+    assert conn.cluster == "rnd-dwh"
+    assert conn.ipc_port == 9020
+    assert conn.instance_url == "rnd-dwh"
+    assert str(conn) == "HDFS[rnd-dwh]"
 
 
 def test_spark_hdfs_without_cluster(spark_mock):
@@ -143,5 +146,5 @@ def get_current_cluster() -> str:
 
     request.addfinalizer(get_current_cluster.disable)
 
-    hdfs = SparkHDFS.get_current(spark=spark_mock)
-    assert hdfs.cluster == "rnd-dwh"
+    conn = SparkHDFS.get_current(spark=spark_mock)
+    assert conn.cluster == "rnd-dwh"
diff --git a/tests/tests_unit/tests_file_df_connection_unit/test_spark_local_fs_unit.py b/tests/tests_unit/tests_file_df_connection_unit/test_spark_local_fs_unit.py
index e98c986c..ac41f7f8 100644
--- a/tests/tests_unit/tests_file_df_connection_unit/test_spark_local_fs_unit.py
+++ b/tests/tests_unit/tests_file_df_connection_unit/test_spark_local_fs_unit.py
@@ -13,6 +13,7 @@ def test_spark_local_fs_spark_local(spark_mock):
     conn = SparkLocalFS(spark=spark_mock)
     assert conn.spark == spark_mock
     assert conn.instance_url == f"file://{socket.getfqdn()}"
+    assert str(conn) == "LocalFS"
 
 
 @pytest.mark.parametrize("master", ["k8s", "yarn"])
diff --git a/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py b/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
index 99a20633..34ac4387 100644
--- a/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
+++ b/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
@@ -84,7 +84,7 @@ def spark_mock_hadoop_3(spark_mock):
 
 
 def test_spark_s3(spark_mock_hadoop_3):
-    s3 = SparkS3(
+    conn = SparkS3(
         host="some_host",
         access_key="access key",
         secret_key="some key",
@@ -92,20 +92,20 @@ def test_spark_s3(spark_mock_hadoop_3):
         spark=spark_mock_hadoop_3,
     )
 
-    assert s3.host == "some_host"
-    assert s3.access_key == "access key"
-    assert s3.secret_key != "some key"
-    assert s3.secret_key.get_secret_value() == "some key"
-    assert s3.protocol == "https"
-    assert s3.port == 443
-    assert s3.instance_url == "s3://some_host:443"
+    assert conn.host == "some_host"
+    assert conn.access_key == "access key"
+    assert conn.secret_key != "some key"
+    assert conn.secret_key.get_secret_value() == "some key"
+    assert conn.protocol == "https"
+    assert conn.port == 443
+    assert conn.instance_url == "s3://some_host:443/bucket"
+    assert str(conn) == "S3[some_host:443/bucket]"
 
-    assert "some key" not in str(s3)
-    assert "some key" not in repr(s3)
+    assert "some key" not in repr(conn)
 
 
 def test_spark_s3_with_protocol_https(spark_mock_hadoop_3):
-    s3 = SparkS3(
+    conn = SparkS3(
         host="some_host",
         access_key="access_key",
         secret_key="secret_key",
@@ -114,13 +114,14 @@ def test_spark_s3_with_protocol_https(spark_mock_hadoop_3):
         spark=spark_mock_hadoop_3,
     )
 
-    assert s3.protocol == "https"
-    assert s3.port == 443
-    assert s3.instance_url == "s3://some_host:443"
+    assert conn.protocol == "https"
+    assert conn.port == 443
+    assert conn.instance_url == "s3://some_host:443/bucket"
+    assert str(conn) == "S3[some_host:443/bucket]"
 
 
 def test_spark_s3_with_protocol_http(spark_mock_hadoop_3):
-    s3 = SparkS3(
+    conn = SparkS3(
         host="some_host",
         access_key="access_key",
         secret_key="secret_key",
@@ -129,14 +130,15 @@ def test_spark_s3_with_protocol_http(spark_mock_hadoop_3):
         spark=spark_mock_hadoop_3,
     )
 
-    assert s3.protocol == "http"
-    assert s3.port == 80
-    assert s3.instance_url == "s3://some_host:80"
+    assert conn.protocol == "http"
+    assert conn.port == 80
+    assert conn.instance_url == "s3://some_host:80/bucket"
+    assert str(conn) == "S3[some_host:80/bucket]"
 
 
 @pytest.mark.parametrize("protocol", ["http", "https"])
 def test_spark_s3_with_port(spark_mock_hadoop_3, protocol):
-    s3 = SparkS3(
+    conn = SparkS3(
         host="some_host",
         port=9000,
         access_key="access_key",
@@ -146,9 +148,10 @@ def test_spark_s3_with_port(spark_mock_hadoop_3, protocol):
         spark=spark_mock_hadoop_3,
     )
 
-    assert s3.protocol == protocol
-    assert s3.port == 9000
-    assert s3.instance_url == "s3://some_host:9000"
+    assert conn.protocol == protocol
+    assert conn.port == 9000
+    assert conn.instance_url == "s3://some_host:9000/bucket"
+    assert str(conn) == "S3[some_host:9000/bucket]"
 
 
 @pytest.mark.parametrize(

From 1382600ec8552eb22bc53e242f1afea1ac38ab01 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 20 Aug 2024 11:14:06 +0000
Subject: [PATCH 42/64] Fix documentation build

---
 docs/conf.py          | 3 ---
 requirements/docs.txt | 3 ++-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index f781dddd..867d4daf 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -120,9 +120,6 @@
     {"rel": "icon", "href": "icon.svg", "type": "image/svg+xml"},
 ]
 
-# TODO: remove after https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
-git_exclude_patterns = ["docs/_static/logo_wide.svg"]
-
 # The master toctree document.
 master_doc = "index"
 
diff --git a/requirements/docs.txt b/requirements/docs.txt
index be2cd127..87768350 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -9,7 +9,8 @@ sphinx<8
 sphinx-copybutton
 sphinx-design
 sphinx-favicon
-sphinx-last-updated-by-git
+# https://github.com/mgeier/sphinx-last-updated-by-git/pull/77
+sphinx-last-updated-by-git>=0.3.8
 # TODO: uncomment after https://github.com/zqmillet/sphinx-plantuml/pull/4
 # sphinx-plantuml
 sphinx-tabs

From f4d1f3dbcf76acc1cdb4e587705b84f351241c70 Mon Sep 17 00:00:00 2001
From: Maxim Liksakov <67663774+maxim-lixakov@users.noreply.github.com>
Date: Tue, 20 Aug 2024 17:54:27 +0300
Subject: [PATCH 43/64] [DOP-16999] - Add jdbc_dialect logging (#305)

---
 docs/changelog/next_release/305.feature.rst   |  1 +
 .../jdbc_connection/connection.py             |  2 ++
 .../db_connection/jdbc_mixin/connection.py    | 17 +++++++++++---
 .../test_clickhouse_integration.py            | 22 ++++++++++++++-----
 .../test_postgres_integration.py              | 21 +++++++++++++-----
 .../test_strategy_incremental_batch.py        |  6 ++++-
 6 files changed, 53 insertions(+), 16 deletions(-)
 create mode 100644 docs/changelog/next_release/305.feature.rst

diff --git a/docs/changelog/next_release/305.feature.rst b/docs/changelog/next_release/305.feature.rst
new file mode 100644
index 00000000..c4c44dc6
--- /dev/null
+++ b/docs/changelog/next_release/305.feature.rst
@@ -0,0 +1 @@
+Add log.info about JDBC dialect usage: ``Detected dialect: 'org.apache.spark.sql.jdbc.MySQLDialect'``
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 9d41298e..0f3ac024 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -90,6 +90,7 @@ def sql(
 
         query = clear_statement(query)
 
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
         log_lines(log, query)
 
@@ -195,6 +196,7 @@ def get_df_schema(
         columns: list[str] | None = None,
         options: JDBCReadOptions | None = None,
     ) -> StructType:
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Fetching schema of table %r ...", self.__class__.__name__, source)
 
         query = self.dialect.get_sql_query(source, columns=columns, limit=0, compact=True)
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index 8ec77d13..2f25b5a9 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -205,6 +205,7 @@ def fetch(
 
         query = clear_statement(query)
 
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing SQL query (on driver):", self.__class__.__name__)
         log_lines(log, query)
 
@@ -277,6 +278,7 @@ def execute(
 
         statement = clear_statement(statement)
 
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing statement (on driver):", self.__class__.__name__)
         log_lines(log, statement)
 
@@ -417,6 +419,17 @@ def _get_jdbc_connection(self, options: JDBCFetchOptions | JDBCExecuteOptions):
         self._last_connection_and_options.data = (new_connection, options)
         return new_connection
 
+    def _get_spark_dialect_name(self) -> str:
+        """
+        Returns the name of the JDBC dialect associated with the connection URL.
+        """
+        dialect = self._get_spark_dialect().toString()
+        return dialect.split("$")[0] if "$" in dialect else dialect
+
+    def _get_spark_dialect(self):
+        jdbc_dialects_package = self.spark._jvm.org.apache.spark.sql.jdbc
+        return jdbc_dialects_package.JdbcDialects.get(self.jdbc_url)
+
     def _close_connections(self):
         with suppress(Exception):
             # connection maybe not opened yet
@@ -559,9 +572,7 @@ def _resultset_to_dataframe(self, result_set) -> DataFrame:
 
         from pyspark.sql import DataFrame  # noqa: WPS442
 
-        jdbc_dialects_package = self.spark._jvm.org.apache.spark.sql.jdbc  # type: ignore
-        jdbc_dialect = jdbc_dialects_package.JdbcDialects.get(self.jdbc_url)
-
+        jdbc_dialect = self._get_spark_dialect()
         jdbc_utils_package = self.spark._jvm.org.apache.spark.sql.execution.datasources.jdbc  # type: ignore
         jdbc_utils = jdbc_utils_package.JdbcUtils
 
diff --git a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
index 78656d83..aa9205b8 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_clickhouse_integration.py
@@ -62,7 +62,7 @@ def test_clickhouse_connection_check_extra_is_handled_by_driver(spark, processin
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
+def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix, caplog):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -73,7 +73,11 @@ def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
     )
 
     table = load_table_data.full_name
-    df = clickhouse.sql(f"SELECT * FROM {table}{suffix}")
+
+    with caplog.at_level(logging.INFO):
+        df = clickhouse.sql(f"SELECT * FROM {table}{suffix}")
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
+
     table_df = processing.get_expected_dataframe(
         schema=load_table_data.schema,
         table=load_table_data.table,
@@ -91,7 +95,7 @@ def test_clickhouse_connection_sql(spark, processing, load_table_data, suffix):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix):
+def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix, caplog):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -103,7 +107,10 @@ def test_clickhouse_connection_fetch(spark, processing, load_table_data, suffix)
 
     schema = load_table_data.schema
     table = load_table_data.full_name
-    df = clickhouse.fetch(f"SELECT * FROM {table}{suffix}")
+
+    with caplog.at_level(logging.INFO):
+        df = clickhouse.fetch(f"SELECT * FROM {table}{suffix}")
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
 
     table_df = processing.get_expected_dataframe(
         schema=load_table_data.schema,
@@ -192,7 +199,7 @@ def test_clickhouse_connection_execute_ddl(spark, processing, get_schema_table,
 
 @pytest.mark.flaky
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_clickhouse_connection_execute_dml(request, spark, processing, load_table_data, suffix):
+def test_clickhouse_connection_execute_dml(request, spark, processing, load_table_data, suffix, caplog):
     clickhouse = Clickhouse(
         host=processing.host,
         port=processing.port,
@@ -242,7 +249,9 @@ def table_finalizer():
     updated_df = pandas.concat([updated_rows, unchanged_rows])
     processing.assert_equal_df(df=df, other_frame=updated_df, order_by="id_int")
 
-    clickhouse.execute(f"UPDATE {temp_table} SET hwm_int = 1 WHERE id_int < 50{suffix}")
+    with caplog.at_level(logging.INFO):
+        clickhouse.execute(f"UPDATE {temp_table} SET hwm_int = 1 WHERE id_int < 50{suffix}")
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.NoopDialect'" in caplog.text
 
     clickhouse.execute(f"ALTER TABLE {temp_table} DELETE WHERE id_int < 70{suffix}")
     df = clickhouse.fetch(f"SELECT * FROM {temp_table}{suffix}")
@@ -273,6 +282,7 @@ def test_clickhouse_connection_execute_function(
     processing,
     load_table_data,
     suffix,
+    caplog,
 ):
     clickhouse = Clickhouse(
         host=processing.host,
diff --git a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
index 6cea95cc..ead0275e 100644
--- a/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
+++ b/tests/tests_integration/tests_db_connection_integration/test_postgres_integration.py
@@ -48,7 +48,7 @@ def test_postgres_connection_check_fail(spark):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
+def test_postgres_connection_sql(spark, processing, load_table_data, suffix, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -60,7 +60,10 @@ def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
 
     table = load_table_data.full_name
 
-    df = postgres.sql(f"SELECT * FROM {table}{suffix}")
+    with caplog.at_level(logging.INFO):
+        df = postgres.sql(f"SELECT * FROM {table}{suffix}")
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
+
     table_df = processing.get_expected_dataframe(
         schema=load_table_data.schema,
         table=load_table_data.table,
@@ -79,7 +82,7 @@ def test_postgres_connection_sql(spark, processing, load_table_data, suffix):
 
 
 @pytest.mark.parametrize("suffix", ["", ";"])
-def test_postgres_connection_fetch(spark, processing, load_table_data, suffix):
+def test_postgres_connection_fetch(spark, processing, load_table_data, suffix, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -91,7 +94,10 @@ def test_postgres_connection_fetch(spark, processing, load_table_data, suffix):
 
     table = load_table_data.full_name
 
-    df = postgres.fetch(f"SELECT * FROM {table}{suffix}", Postgres.FetchOptions(fetchsize=2))
+    with caplog.at_level(logging.INFO):
+        df = postgres.fetch(f"SELECT * FROM {table}{suffix}", Postgres.FetchOptions(fetchsize=2))
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
+
     table_df = processing.get_expected_dataframe(
         schema=load_table_data.schema,
         table=load_table_data.table,
@@ -1023,7 +1029,7 @@ def test_postgres_connection_fetch_with_legacy_jdbc_options(spark, processing):
     assert df is not None
 
 
-def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing):
+def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing, caplog):
     postgres = Postgres(
         host=processing.host,
         port=processing.port,
@@ -1034,4 +1040,7 @@ def test_postgres_connection_execute_with_legacy_jdbc_options(spark, processing)
     )
 
     options = Postgres.JDBCOptions(query_timeout=30)
-    postgres.execute("DROP TABLE IF EXISTS temp_table;", options=options)
+
+    with caplog.at_level(logging.INFO):
+        postgres.execute("DROP TABLE IF EXISTS temp_table;", options=options)
+        assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
diff --git a/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py b/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py
index 66c7ad31..e72b91e8 100644
--- a/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py
+++ b/tests/tests_integration/tests_strategy_integration/test_strategy_incremental_batch.py
@@ -1,3 +1,4 @@
+import logging
 import re
 import secrets
 from datetime import date, datetime, timedelta
@@ -182,6 +183,7 @@ def test_postgres_strategy_incremental_batch_different_hwm_type_in_store(
     hwm_column,
     new_type,
     step,
+    caplog,
 ):
     postgres = Postgres(
         host=processing.host,
@@ -200,7 +202,9 @@ def test_postgres_strategy_incremental_batch_different_hwm_type_in_store(
 
     with IncrementalBatchStrategy(step=step) as batches:
         for _ in batches:
-            reader.run()
+            with caplog.at_level(logging.INFO):
+                reader.run()
+                assert "Detected dialect: 'org.apache.spark.sql.jdbc.PostgresDialect'" in caplog.text
 
     # change table schema
     new_fields = {column_name: processing.get_column_type(column_name) for column_name in processing.column_names}

From e3d83594685cbfcded56f9dc4978c67080b8a23a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 21 Aug 2024 10:07:00 +0000
Subject: [PATCH 44/64] Test Spark 3.5.2

---
 .github/workflows/data/clickhouse/matrix.yml     |  2 +-
 .github/workflows/data/core/matrix.yml           |  2 +-
 .github/workflows/data/hdfs/matrix.yml           |  2 +-
 .github/workflows/data/hive/matrix.yml           |  2 +-
 .github/workflows/data/kafka/matrix.yml          |  2 +-
 .github/workflows/data/local-fs/matrix.yml       |  4 ++--
 .github/workflows/data/mongodb/matrix.yml        |  2 +-
 .github/workflows/data/mssql/matrix.yml          |  2 +-
 .github/workflows/data/mysql/matrix.yml          |  2 +-
 .github/workflows/data/oracle/matrix.yml         |  2 +-
 .github/workflows/data/postgres/matrix.yml       |  2 +-
 .github/workflows/data/s3/matrix.yml             |  2 +-
 .github/workflows/data/teradata/matrix.yml       |  2 +-
 CONTRIBUTING.rst                                 |  2 +-
 README.rst                                       |  6 +++---
 docker-compose.yml                               |  2 +-
 docker/Dockerfile                                |  2 +-
 docs/changelog/next_release/306.feature.rst      |  1 +
 .../db_connection/clickhouse/types.rst           |  4 ++--
 docs/connection/db_connection/mssql/types.rst    |  4 ++--
 docs/connection/db_connection/mysql/types.rst    |  4 ++--
 docs/connection/db_connection/oracle/types.rst   |  4 ++--
 docs/connection/db_connection/postgres/types.rst |  4 ++--
 onetl/_metrics/extract.py                        |  2 +-
 onetl/_metrics/listener/base.py                  |  2 +-
 onetl/_metrics/listener/execution.py             | 12 ++++++------
 onetl/_metrics/listener/job.py                   |  4 ++--
 onetl/_metrics/listener/listener.py              |  2 +-
 onetl/_metrics/listener/stage.py                 |  6 +++---
 onetl/_metrics/listener/task.py                  |  6 +++---
 onetl/_util/spark.py                             |  2 +-
 .../connection/db_connection/kafka/connection.py |  4 ++--
 .../file_df_connection/spark_s3/connection.py    |  6 +++---
 onetl/file/format/avro.py                        |  4 ++--
 onetl/file/format/excel.py                       | 16 ++++++++--------
 onetl/file/format/xml.py                         |  8 ++++----
 .../tests/{spark-3.5.0.txt => spark-3.5.2.txt}   |  2 +-
 .../test_file/test_format_unit/test_avro_unit.py |  8 ++++----
 .../test_format_unit/test_excel_unit.py          | 16 ++++++++--------
 .../test_spark_s3_unit.py                        |  6 +++---
 40 files changed, 84 insertions(+), 83 deletions(-)
 create mode 100644 docs/changelog/next_release/306.feature.rst
 rename requirements/tests/{spark-3.5.0.txt => spark-3.5.2.txt} (76%)

diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml
index 6f1d7261..d18856df 100644
--- a/.github/workflows/data/clickhouse/matrix.yml
+++ b/.github/workflows/data/clickhouse/matrix.yml
@@ -11,7 +11,7 @@ min: &min
 max: &max
   clickhouse-image: clickhouse/clickhouse-server
   clickhouse-version: 24.6.3.70-alpine
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/core/matrix.yml b/.github/workflows/data/core/matrix.yml
index d20f074a..504f1d4d 100644
--- a/.github/workflows/data/core/matrix.yml
+++ b/.github/workflows/data/core/matrix.yml
@@ -6,7 +6,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/hdfs/matrix.yml b/.github/workflows/data/hdfs/matrix.yml
index af4553f1..f8bae7d5 100644
--- a/.github/workflows/data/hdfs/matrix.yml
+++ b/.github/workflows/data/hdfs/matrix.yml
@@ -8,7 +8,7 @@ min: &min
 
 max: &max
   hadoop-version: hadoop3-hdfs
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/hive/matrix.yml b/.github/workflows/data/hive/matrix.yml
index 6ce0d7a8..31b2120f 100644
--- a/.github/workflows/data/hive/matrix.yml
+++ b/.github/workflows/data/hive/matrix.yml
@@ -6,7 +6,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/kafka/matrix.yml b/.github/workflows/data/kafka/matrix.yml
index 1b9b2336..4ff5fe64 100644
--- a/.github/workflows/data/kafka/matrix.yml
+++ b/.github/workflows/data/kafka/matrix.yml
@@ -12,7 +12,7 @@ min: &min
 max: &max
   kafka-version: 3.7.1
   pydantic-version: 2
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   python-version: '3.12'
   java-version: 20
   os: ubuntu-latest
diff --git a/.github/workflows/data/local-fs/matrix.yml b/.github/workflows/data/local-fs/matrix.yml
index d1337291..c4466f3c 100644
--- a/.github/workflows/data/local-fs/matrix.yml
+++ b/.github/workflows/data/local-fs/matrix.yml
@@ -20,8 +20,8 @@ min_excel: &min_excel
   os: ubuntu-latest
 
 max: &max
-  # Excel package currently has no release for 3.5.1
-  spark-version: 3.5.0
+  # Excel package currently has no release for 3.5.2
+  spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/mongodb/matrix.yml b/.github/workflows/data/mongodb/matrix.yml
index 98e1fe97..4c3d9d86 100644
--- a/.github/workflows/data/mongodb/matrix.yml
+++ b/.github/workflows/data/mongodb/matrix.yml
@@ -9,7 +9,7 @@ min: &min
 
 max: &max
   mongodb-version: 7.0.12
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/mssql/matrix.yml b/.github/workflows/data/mssql/matrix.yml
index fad2e738..3748a0a7 100644
--- a/.github/workflows/data/mssql/matrix.yml
+++ b/.github/workflows/data/mssql/matrix.yml
@@ -8,7 +8,7 @@ min: &min
 
 max: &max
   mssql-version: 2022-CU14-ubuntu-22.04
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/mysql/matrix.yml b/.github/workflows/data/mysql/matrix.yml
index d2e70314..17dacdb2 100644
--- a/.github/workflows/data/mysql/matrix.yml
+++ b/.github/workflows/data/mysql/matrix.yml
@@ -10,7 +10,7 @@ min: &min
 
 max: &max
   mysql-version: 9.0.1
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/oracle/matrix.yml b/.github/workflows/data/oracle/matrix.yml
index 7a79c68a..ccafa20f 100644
--- a/.github/workflows/data/oracle/matrix.yml
+++ b/.github/workflows/data/oracle/matrix.yml
@@ -12,7 +12,7 @@ max: &max
   oracle-image: gvenzl/oracle-free
   oracle-version: 23.4-slim-faststart
   db-name: FREEPDB1
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/postgres/matrix.yml b/.github/workflows/data/postgres/matrix.yml
index 4c5b5f4e..d37c3a83 100644
--- a/.github/workflows/data/postgres/matrix.yml
+++ b/.github/workflows/data/postgres/matrix.yml
@@ -9,7 +9,7 @@ min: &min
 
 max: &max
   postgres-version: 16.3-alpine
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/s3/matrix.yml b/.github/workflows/data/s3/matrix.yml
index 06d4f748..405b8b68 100644
--- a/.github/workflows/data/s3/matrix.yml
+++ b/.github/workflows/data/s3/matrix.yml
@@ -10,7 +10,7 @@ min: &min
 
 max: &max
   minio-version: 2024.7.26
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/teradata/matrix.yml b/.github/workflows/data/teradata/matrix.yml
index 6c2a5545..d9792be6 100644
--- a/.github/workflows/data/teradata/matrix.yml
+++ b/.github/workflows/data/teradata/matrix.yml
@@ -1,5 +1,5 @@
 max: &max
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 7a70dbac..aa1a3c03 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -71,7 +71,7 @@ Create virtualenv and install dependencies:
         -r requirements/tests/postgres.txt \
         -r requirements/tests/oracle.txt \
         -r requirements/tests/pydantic-2.txt \
-        -r requirements/tests/spark-3.5.1.txt
+        -r requirements/tests/spark-3.5.2.txt
 
     # TODO: remove after https://github.com/zqmillet/sphinx-plantuml/pull/4
     pip install sphinx-plantuml --no-deps
diff --git a/README.rst b/README.rst
index 0a4cbc97..9def167f 100644
--- a/README.rst
+++ b/README.rst
@@ -184,7 +184,7 @@ Compatibility matrix
 +--------------------------------------------------------------+-------------+-------------+-------+
 | `3.4.x <https://spark.apache.org/docs/3.4.3/#downloading>`_  | 3.7 - 3.12  | 8u362 - 20  | 2.12  |
 +--------------------------------------------------------------+-------------+-------------+-------+
-| `3.5.x <https://spark.apache.org/docs/3.5.1/#downloading>`_  | 3.8 - 3.12  | 8u371 - 20  | 2.12  |
+| `3.5.x <https://spark.apache.org/docs/3.5.2/#downloading>`_  | 3.8 - 3.12  | 8u371 - 20  | 2.12  |
 +--------------------------------------------------------------+-------------+-------------+-------+
 
 .. _pyspark-install:
@@ -199,7 +199,7 @@ or install PySpark explicitly:
 
 .. code:: bash
 
-    pip install onetl pyspark==3.5.1  # install a specific PySpark version
+    pip install onetl pyspark==3.5.2  # install a specific PySpark version
 
 or inject PySpark to ``sys.path`` in some other way BEFORE creating a class instance.
 **Otherwise connection object cannot be created.**
@@ -540,7 +540,7 @@ Read files directly from S3 path, convert them to dataframe, transform it and th
     setup_logging()
 
     # Initialize new SparkSession with Hadoop AWS libraries and Postgres driver loaded
-    maven_packages = SparkS3.get_packages(spark_version="3.5.1") + Postgres.get_packages()
+    maven_packages = SparkS3.get_packages(spark_version="3.5.2") + Postgres.get_packages()
     spark = (
         SparkSession.builder.appName("spark_app_onetl_demo")
         .config("spark.jars.packages", ",".join(maven_packages))
diff --git a/docker-compose.yml b/docker-compose.yml
index f5859bb5..73e8a21e 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -9,7 +9,7 @@ services:
       context: .
       target: base
       args:
-        SPARK_VERSION: 3.5.1
+        SPARK_VERSION: 3.5.2
     env_file: .env.docker
     volumes:
       - ./:/app/
diff --git a/docker/Dockerfile b/docker/Dockerfile
index d3d34ef2..68f40a52 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -44,7 +44,7 @@ ENV PATH=${ONETL_USER_HOME}/.local/bin:${PATH}
 COPY --chown=onetl:onetl ./run_tests.sh ./pytest_runner.sh ./combine_coverage.sh /app/
 RUN chmod +x /app/run_tests.sh /app/pytest_runner.sh /app/combine_coverage.sh
 
-ARG SPARK_VERSION=3.5.1
+ARG SPARK_VERSION=3.5.2
 # Spark is heavy, and version change is quite rare
 COPY --chown=onetl:onetl ./requirements/tests/spark-${SPARK_VERSION}.txt /app/requirements/tests/
 RUN pip install -r /app/requirements/tests/spark-${SPARK_VERSION}.txt
diff --git a/docs/changelog/next_release/306.feature.rst b/docs/changelog/next_release/306.feature.rst
new file mode 100644
index 00000000..1c2b95f7
--- /dev/null
+++ b/docs/changelog/next_release/306.feature.rst
@@ -0,0 +1 @@
+Update ``Excel`` package from ``0.20.3`` to ``0.20.4``, to include Spark 3.5.1 support.
diff --git a/docs/connection/db_connection/clickhouse/types.rst b/docs/connection/db_connection/clickhouse/types.rst
index 21ddf0ba..0d8c5675 100644
--- a/docs/connection/db_connection/clickhouse/types.rst
+++ b/docs/connection/db_connection/clickhouse/types.rst
@@ -106,8 +106,8 @@ References
 Here you can find source code with type conversions:
 
 * `Clickhouse -> JDBC <https://github.com/ClickHouse/clickhouse-java/blob/0.3.2/clickhouse-jdbc/src/main/java/com/clickhouse/jdbc/JdbcTypeMapping.java#L39-L176>`_
-* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L307>`_
-* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L141-L164>`_
+* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L307>`_
+* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala#L141-L164>`_
 * `JDBC -> Clickhouse <https://github.com/ClickHouse/clickhouse-java/blob/0.3.2/clickhouse-jdbc/src/main/java/com/clickhouse/jdbc/JdbcTypeMapping.java#L185-L311>`_
 
 Supported types
diff --git a/docs/connection/db_connection/mssql/types.rst b/docs/connection/db_connection/mssql/types.rst
index 807d62d9..852289ad 100644
--- a/docs/connection/db_connection/mssql/types.rst
+++ b/docs/connection/db_connection/mssql/types.rst
@@ -101,8 +101,8 @@ References
 Here you can find source code with type conversions:
 
 * `MSSQL -> JDBC <https://github.com/microsoft/mssql-jdbc/blob/v12.2.0/src/main/java/com/microsoft/sqlserver/jdbc/SQLServerResultSetMetaData.java#L117-L170>`_
-* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala#L102-L119>`_
-* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala#L121-L130>`_
+* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala#L117-L134>`_
+* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala#L136-L145>`_
 * `JDBC -> MSSQL <https://github.com/microsoft/mssql-jdbc/blob/v12.2.0/src/main/java/com/microsoft/sqlserver/jdbc/DataTypes.java#L625-L676>`_
 
 Supported types
diff --git a/docs/connection/db_connection/mysql/types.rst b/docs/connection/db_connection/mysql/types.rst
index 1ad6815c..001a221f 100644
--- a/docs/connection/db_connection/mysql/types.rst
+++ b/docs/connection/db_connection/mysql/types.rst
@@ -97,8 +97,8 @@ References
 Here you can find source code with type conversions:
 
 * `MySQL -> JDBC <https://github.com/mysql/mysql-connector-j/blob/8.0.33/src/main/core-api/java/com/mysql/cj/MysqlType.java#L44-L623>`_
-* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala#L89-L106>`_
-* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala#L182-L188>`_
+* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala#L104-L132>`_
+* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MySQLDialect.scala#L204-L211>`_
 * `JDBC -> MySQL <https://github.com/mysql/mysql-connector-j/blob/8.0.33/src/main/core-api/java/com/mysql/cj/MysqlType.java#L625-L867>`_
 
 Supported types
diff --git a/docs/connection/db_connection/oracle/types.rst b/docs/connection/db_connection/oracle/types.rst
index 81b7da10..2433b0f7 100644
--- a/docs/connection/db_connection/oracle/types.rst
+++ b/docs/connection/db_connection/oracle/types.rst
@@ -101,8 +101,8 @@ See `List of Oracle types <https://docs.oracle.com/en/database/oracle/oracle-dat
 
 Here you can find source code with type conversions:
 
-* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala#L83-L109>`_
-* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala#L111-L123>`_
+* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala#L83-L109>`_
+* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/OracleDialect.scala#L111-L123>`_
 
 Numeric types
 ~~~~~~~~~~~~~
diff --git a/docs/connection/db_connection/postgres/types.rst b/docs/connection/db_connection/postgres/types.rst
index b4d9d202..f0fe8821 100644
--- a/docs/connection/db_connection/postgres/types.rst
+++ b/docs/connection/db_connection/postgres/types.rst
@@ -109,8 +109,8 @@ See `List of Postgres types <https://www.postgresql.org/docs/current/datatype.ht
 Here you can find source code with type conversions:
 
 * `Postgres <-> JDBC <https://github.com/pgjdbc/pgjdbc/blob/REL42.6.0/pgjdbc/src/main/java/org/postgresql/jdbc/TypeInfoCache.java#L78-L112>`_
-* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.0/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala#L50-L106>`_
-* `Spark -> JDBC <https://github.com/apache/spark/blob/ce5ddad990373636e94071e7cef2f31021add07b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala#L116-L130>`_
+* `JDBC -> Spark <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala#L50-L106>`_
+* `Spark -> JDBC <https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/jdbc/PostgresDialect.scala#L116-L130>`_
 
 Numeric types
 ~~~~~~~~~~~~~
diff --git a/onetl/_metrics/extract.py b/onetl/_metrics/extract.py
index 4789d8fd..8b623bb8 100644
--- a/onetl/_metrics/extract.py
+++ b/onetl/_metrics/extract.py
@@ -70,7 +70,7 @@ def extract_metrics_from_execution(execution: SparkListenerExecution) -> SparkCo
             disk_spilled_bytes += stage.metrics.disk_spilled_bytes
             result_size_bytes += stage.metrics.result_size_bytes
 
-    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L467-L473
+    # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L467-L473
     input_file_count = (
         _get_int(execution.metrics, SparkSQLMetricNames.NUMBER_OF_FILES_READ)
         or _get_int(execution.metrics, SparkSQLMetricNames.STATIC_NUMBER_OF_FILES_READ)
diff --git a/onetl/_metrics/listener/base.py b/onetl/_metrics/listener/base.py
index 90432c7c..a8d5b855 100644
--- a/onetl/_metrics/listener/base.py
+++ b/onetl/_metrics/listener/base.py
@@ -16,7 +16,7 @@
 class BaseSparkListener:
     """Base no-op SparkListener implementation.
 
-    See `SparkListener <https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListener.html>`_ interface.
+    See `SparkListener <https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListener.html>`_ interface.
     """
 
     spark: SparkSession
diff --git a/onetl/_metrics/listener/execution.py b/onetl/_metrics/listener/execution.py
index 728c4c2c..f5749e16 100644
--- a/onetl/_metrics/listener/execution.py
+++ b/onetl/_metrics/listener/execution.py
@@ -22,18 +22,18 @@ class SparkSQLMetricNames(str, Enum):  # noqa: WPS338
     # Metric names passed to SQLMetrics.createMetric(...)
     # But only those we're interested in.
 
-    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L233C55-L233C87
+    # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L231
     NUMBER_OF_PARTITIONS_READ = "number of partitions read"
 
-    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L225-L227
+    # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L225-L227
     NUMBER_OF_FILES_READ = "number of files read"
     SIZE_OF_FILES_READ = "size of files read"
 
-    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L455-L456
+    # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/DataSourceScanExec.scala#L225-L227
     STATIC_NUMBER_OF_FILES_READ = "static number of files read"
     STATIC_SIZE_OF_FILES_READ = "static size of files read"
 
-    # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala#L241-L246
+    # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala#L241-L246
     NUMBER_OF_DYNAMIC_PART = "number of dynamic part"
     NUMBER_OF_WRITTEN_FILES = "number of written files"
 
@@ -62,11 +62,11 @@ def jobs(self) -> list[SparkListenerJob]:
         return result
 
     def on_execution_start(self, event):
-        # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L44-L58
+        # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L44-L58
         self.status = SparkListenerExecutionStatus.STARTED
 
     def on_execution_end(self, event):
-        # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L61-L83
+        # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L61-L83
         for job in self._jobs.values():
             if job.status == SparkListenerJobStatus.FAILED:
                 self.status = SparkListenerExecutionStatus.FAILED
diff --git a/onetl/_metrics/listener/job.py b/onetl/_metrics/listener/job.py
index b3abbd06..915f1f3d 100644
--- a/onetl/_metrics/listener/job.py
+++ b/onetl/_metrics/listener/job.py
@@ -38,8 +38,8 @@ def stages(self) -> list[SparkListenerStage]:
 
     @classmethod
     def create(cls, event):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerJobSubmitted.html
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerJobCompleted.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerJobSubmitted.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerJobCompleted.html
         result = cls(
             id=event.jobId(),
             description=event.properties().get("spark.job.description"),
diff --git a/onetl/_metrics/listener/listener.py b/onetl/_metrics/listener/listener.py
index 3421e5ae..997f22a7 100644
--- a/onetl/_metrics/listener/listener.py
+++ b/onetl/_metrics/listener/listener.py
@@ -73,7 +73,7 @@ def onExecutionEnd(self, event):
 
             # Get execution metrics from SQLAppStatusStore,
             # as SparkListenerSQLExecutionEnd event does not provide them:
-            # https://github.com/apache/spark/blob/v3.5.1/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
+            # https://github.com/apache/spark/blob/v3.5.2/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusStore.scala
             session_status_store = self.spark._jsparkSession.sharedState().statusStore()  # noqa: WPS437
             raw_execution = session_status_store.execution(execution.id).get()
             metrics = raw_execution.metrics()
diff --git a/onetl/_metrics/listener/stage.py b/onetl/_metrics/listener/stage.py
index 4bf4dffb..89d6a6ae 100644
--- a/onetl/_metrics/listener/stage.py
+++ b/onetl/_metrics/listener/stage.py
@@ -21,7 +21,7 @@ def __str__(self):
 
 @dataclass
 class SparkListenerStage:
-    # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/StageInfo.html
+    # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/StageInfo.html
     id: int
     status: SparkListenerStageStatus = SparkListenerStageStatus.PENDING
     metrics: SparkListenerTaskMetrics = field(default_factory=SparkListenerTaskMetrics, repr=False, init=False)
@@ -39,11 +39,11 @@ def create(cls, stage_info):
         return cls(id=stage_info.stageId())
 
     def on_stage_start(self, event):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerStageSubmitted.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerStageSubmitted.html
         self.status = SparkListenerStageStatus.ACTIVE
 
     def on_stage_end(self, event):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerStageCompleted.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerStageCompleted.html
         stage_info = event.stageInfo()
         if stage_info.failureReason().isDefined():
             self.status = SparkListenerStageStatus.FAILED
diff --git a/onetl/_metrics/listener/task.py b/onetl/_metrics/listener/task.py
index 4b27ffcf..ced938a8 100644
--- a/onetl/_metrics/listener/task.py
+++ b/onetl/_metrics/listener/task.py
@@ -81,14 +81,14 @@ class SparkListenerTask:
 
     @classmethod
     def create(cls, task_info):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/TaskInfo.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/TaskInfo.html
         return cls(id=task_info.taskId())
 
     def on_task_start(self, event):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerTaskStart.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerTaskStart.html
         self.status = SparkListenerTaskStatus(event.taskInfo().status())
 
     def on_task_end(self, event):
-        # https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/scheduler/SparkListenerTaskEnd.html
+        # https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/scheduler/SparkListenerTaskEnd.html
         self.status = SparkListenerTaskStatus(event.taskInfo().status())
         self.metrics = SparkListenerTaskMetrics.create(event.taskMetrics())
diff --git a/onetl/_util/spark.py b/onetl/_util/spark.py
index f7d018b3..547095af 100644
--- a/onetl/_util/spark.py
+++ b/onetl/_util/spark.py
@@ -143,7 +143,7 @@ def estimate_dataframe_size(spark_session: SparkSession, df: DataFrame) -> int:
     """
     Estimate in-memory DataFrame size in bytes. If cannot be estimated, return 0.
 
-    Using Spark's `SizeEstimator <https://spark.apache.org/docs/3.5.1/api/java/org/apache/spark/util/SizeEstimator.html>`_.
+    Using Spark's `SizeEstimator <https://spark.apache.org/docs/3.5.2/api/java/org/apache/spark/util/SizeEstimator.html>`_.
     """
     try:
         size_estimator = spark_session._jvm.org.apache.spark.util.SizeEstimator  # type: ignore[union-attr]
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index b404eafb..9b8bf2cd 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -332,7 +332,7 @@ def write_df_to_target(
         write_options.update(options.dict(by_alias=True, exclude_none=True, exclude={"if_exists"}))
         write_options["topic"] = target
 
-        # As of Apache Spark version 3.5.0, the mode 'error' is not functioning as expected.
+        # As of Apache Spark version 3.5.2, the mode 'error' is not functioning as expected.
         # This issue has been reported and can be tracked at:
         # https://issues.apache.org/jira/browse/SPARK-44774
         mode = options.if_exists
@@ -418,7 +418,7 @@ def get_packages(
             from onetl.connection import Kafka
 
             Kafka.get_packages(spark_version="3.2.4")
-            Kafka.get_packages(spark_version="3.2.4", scala_version="2.13")
+            Kafka.get_packages(spark_version="3.2.4", scala_version="2.12")
 
         """
 
diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py
index eb74d698..8fe07d10 100644
--- a/onetl/connection/file_df_connection/spark_s3/connection.py
+++ b/onetl/connection/file_df_connection/spark_s3/connection.py
@@ -133,7 +133,7 @@ class SparkS3(SparkFileDFConnection):
         from pyspark.sql import SparkSession
 
         # Create Spark session with Hadoop AWS libraries loaded
-        maven_packages = SparkS3.get_packages(spark_version="3.5.0")
+        maven_packages = SparkS3.get_packages(spark_version="3.5.2")
         # Some dependencies are not used, but downloading takes a lot of time. Skipping them.
         excluded_packages = [
             "com.google.cloud.bigdataoss:gcs-connector",
@@ -236,8 +236,8 @@ def get_packages(
 
             from onetl.connection import SparkS3
 
-            SparkS3.get_packages(spark_version="3.5.0")
-            SparkS3.get_packages(spark_version="3.5.0", scala_version="2.12")
+            SparkS3.get_packages(spark_version="3.5.2")
+            SparkS3.get_packages(spark_version="3.5.2", scala_version="2.12")
 
         """
 
diff --git a/onetl/file/format/avro.py b/onetl/file/format/avro.py
index 3699620b..418e4064 100644
--- a/onetl/file/format/avro.py
+++ b/onetl/file/format/avro.py
@@ -88,7 +88,7 @@ class Avro(ReadWriteFileFormat):
         from pyspark.sql import SparkSession
 
         # Create Spark session with Avro package loaded
-        maven_packages = Avro.get_packages(spark_version="3.5.0")
+        maven_packages = Avro.get_packages(spark_version="3.5.2")
         spark = (
             SparkSession.builder.appName("spark-app-name")
             .config("spark.jars.packages", ",".join(maven_packages))
@@ -151,7 +151,7 @@ def get_packages(
             from onetl.file.format import Avro
 
             Avro.get_packages(spark_version="3.2.4")
-            Avro.get_packages(spark_version="3.2.4", scala_version="2.13")
+            Avro.get_packages(spark_version="3.2.4", scala_version="2.12")
 
         """
 
diff --git a/onetl/file/format/excel.py b/onetl/file/format/excel.py
index 2ec12758..3f26522f 100644
--- a/onetl/file/format/excel.py
+++ b/onetl/file/format/excel.py
@@ -87,7 +87,7 @@ class Excel(ReadWriteFileFormat):
         from pyspark.sql import SparkSession
 
         # Create Spark session with Excel package loaded
-        maven_packages = Excel.get_packages(spark_version="3.5.0")
+        maven_packages = Excel.get_packages(spark_version="3.5.1")
         spark = (
             SparkSession.builder.appName("spark-app-name")
             .config("spark.jars.packages", ",".join(maven_packages))
@@ -139,7 +139,7 @@ def get_packages(
             If ``None``, ``spark_version`` is used to determine Scala version.
 
         package_version : str, optional
-            Package version in format ``major.minor.patch``. Default is ``0.20.3``.
+            Package version in format ``major.minor.patch``. Default is ``0.20.4``.
 
             .. warning::
 
@@ -157,12 +157,12 @@ def get_packages(
 
             from onetl.file.format import Excel
 
-            Excel.get_packages(spark_version="3.5.0")
-            Excel.get_packages(spark_version="3.5.0", scala_version="2.13")
+            Excel.get_packages(spark_version="3.5.1")
+            Excel.get_packages(spark_version="3.5.1", scala_version="2.12")
             Excel.get_packages(
-                spark_version="3.5.0",
-                scala_version="2.13",
-                package_version="0.20.3",
+                spark_version="3.5.1",
+                scala_version="2.12",
+                package_version="0.20.4",
             )
 
         """
@@ -176,7 +176,7 @@ def get_packages(
                 raise ValueError(f"Package version should be at least 0.15, got {package_version}")
             log.warning("Passed custom package version %r, it is not guaranteed to be supported", package_version)
         else:
-            version = Version("0.20.3")
+            version = Version("0.20.4")
 
         spark_ver = Version(spark_version).min_digits(3)
         if spark_ver < Version("3.2"):
diff --git a/onetl/file/format/xml.py b/onetl/file/format/xml.py
index cc7cd477..11425809 100644
--- a/onetl/file/format/xml.py
+++ b/onetl/file/format/xml.py
@@ -119,7 +119,7 @@ class XML(ReadWriteFileFormat):
         from pyspark.sql import SparkSession
 
         # Create Spark session with XML package loaded
-        maven_packages = XML.get_packages(spark_version="3.5.0")
+        maven_packages = XML.get_packages(spark_version="3.5.2")
         spark = (
             SparkSession.builder.appName("spark-app-name")
             .config("spark.jars.packages", ",".join(maven_packages))
@@ -184,10 +184,10 @@ def get_packages(  # noqa: WPS231
 
             from onetl.file.format import XML
 
-            XML.get_packages(spark_version="3.5.0")
-            XML.get_packages(spark_version="3.5.0", scala_version="2.12")
+            XML.get_packages(spark_version="3.5.2")
+            XML.get_packages(spark_version="3.5.2", scala_version="2.12")
             XML.get_packages(
-                spark_version="3.5.0",
+                spark_version="3.5.2",
                 scala_version="2.12",
                 package_version="0.18.0",
             )
diff --git a/requirements/tests/spark-3.5.0.txt b/requirements/tests/spark-3.5.2.txt
similarity index 76%
rename from requirements/tests/spark-3.5.0.txt
rename to requirements/tests/spark-3.5.2.txt
index 2e49168a..214f0d63 100644
--- a/requirements/tests/spark-3.5.0.txt
+++ b/requirements/tests/spark-3.5.2.txt
@@ -1,5 +1,5 @@
 numpy>=1.16
 pandas>=1.0
 pyarrow>=1.0
-pyspark==3.5.0
+pyspark==3.5.2
 sqlalchemy
diff --git a/tests/tests_unit/test_file/test_format_unit/test_avro_unit.py b/tests/tests_unit/test_file/test_format_unit/test_avro_unit.py
index 3c2ef160..53c7a67a 100644
--- a/tests/tests_unit/test_file/test_format_unit/test_avro_unit.py
+++ b/tests/tests_unit/test_file/test_format_unit/test_avro_unit.py
@@ -29,14 +29,14 @@ def test_avro_get_packages_scala_version_not_supported():
     [
         # Detect Scala version by Spark version
         ("2.4.0", None, "org.apache.spark:spark-avro_2.11:2.4.0"),
-        ("3.5.0", None, "org.apache.spark:spark-avro_2.12:3.5.0"),
+        ("3.5.2", None, "org.apache.spark:spark-avro_2.12:3.5.2"),
         # Override Scala version
         ("2.4.0", "2.11", "org.apache.spark:spark-avro_2.11:2.4.0"),
         ("2.4.0", "2.12", "org.apache.spark:spark-avro_2.12:2.4.0"),
-        ("3.5.0", "2.12", "org.apache.spark:spark-avro_2.12:3.5.0"),
-        ("3.5.0", "2.13", "org.apache.spark:spark-avro_2.13:3.5.0"),
+        ("3.5.2", "2.12", "org.apache.spark:spark-avro_2.12:3.5.2"),
+        ("3.5.2", "2.13", "org.apache.spark:spark-avro_2.13:3.5.2"),
         # Scala version contain three digits when only two needed
-        ("3.5.0", "2.12.1", "org.apache.spark:spark-avro_2.12:3.5.0"),
+        ("3.5.2", "2.12.1", "org.apache.spark:spark-avro_2.12:3.5.2"),
     ],
 )
 def test_avro_get_packages(spark_version, scala_version, package):
diff --git a/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py b/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py
index 95dae3da..ecacb2ca 100644
--- a/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py
+++ b/tests/tests_unit/test_file/test_format_unit/test_excel_unit.py
@@ -34,18 +34,18 @@ def test_excel_get_packages_package_version_not_supported():
     "spark_version, scala_version, package_version, packages",
     [
         # Detect Scala version by Spark version
-        ("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.3"]),
-        ("3.5.0", None, None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.3"]),
+        ("3.2.4", None, None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.4"]),
+        ("3.5.2", None, None, ["com.crealytics:spark-excel_2.12:3.5.2_0.20.4"]),
         # Override Scala version
-        ("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.3"]),
-        ("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.20.3"]),
-        ("3.5.0", "2.12", None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.3"]),
-        ("3.5.0", "2.13", None, ["com.crealytics:spark-excel_2.13:3.5.0_0.20.3"]),
+        ("3.2.4", "2.12", None, ["com.crealytics:spark-excel_2.12:3.2.4_0.20.4"]),
+        ("3.2.4", "2.13", None, ["com.crealytics:spark-excel_2.13:3.2.4_0.20.4"]),
+        ("3.5.2", "2.12", None, ["com.crealytics:spark-excel_2.12:3.5.2_0.20.4"]),
+        ("3.5.2", "2.13", None, ["com.crealytics:spark-excel_2.13:3.5.2_0.20.4"]),
         # Override package version
         ("3.2.0", None, "0.16.0", ["com.crealytics:spark-excel_2.12:3.2.0_0.16.0"]),
-        ("3.5.0", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.5.0_0.18.0"]),
+        ("3.5.2", None, "0.18.0", ["com.crealytics:spark-excel_2.12:3.5.2_0.18.0"]),
         # Scala version contain three digits when only two needed
-        ("3.5.0", "2.12.1", None, ["com.crealytics:spark-excel_2.12:3.5.0_0.20.3"]),
+        ("3.5.2", "2.12.1", None, ["com.crealytics:spark-excel_2.12:3.5.2_0.20.4"]),
     ],
 )
 def test_excel_get_packages(caplog, spark_version, scala_version, package_version, packages):
diff --git a/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py b/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
index 34ac4387..9a5e6fac 100644
--- a/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
+++ b/tests/tests_unit/tests_file_df_connection_unit/test_spark_s3_unit.py
@@ -10,9 +10,9 @@
 @pytest.mark.parametrize(
     "spark_version, scala_version, package",
     [
-        ("3.5.0", None, "org.apache.spark:spark-hadoop-cloud_2.12:3.5.0"),
-        ("3.5.0", "2.12", "org.apache.spark:spark-hadoop-cloud_2.12:3.5.0"),
-        ("3.5.0", "2.13", "org.apache.spark:spark-hadoop-cloud_2.13:3.5.0"),
+        ("3.5.2", None, "org.apache.spark:spark-hadoop-cloud_2.12:3.5.2"),
+        ("3.5.2", "2.12", "org.apache.spark:spark-hadoop-cloud_2.12:3.5.2"),
+        ("3.5.2", "2.13", "org.apache.spark:spark-hadoop-cloud_2.13:3.5.2"),
     ],
 )
 def test_spark_s3_get_packages(spark_version, scala_version, package):

From 75f74f7a96cb6cc2d18dc5989241ccfe061a6004 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 06:52:34 +0000
Subject: [PATCH 45/64] Bump tj-actions/changed-files from 44 to 45 in the
 github-actions group

Bumps the github-actions group with 1 update: [tj-actions/changed-files](https://github.com/tj-actions/changed-files).


Updates `tj-actions/changed-files` from 44 to 45
- [Release notes](https://github.com/tj-actions/changed-files/releases)
- [Changelog](https://github.com/tj-actions/changed-files/blob/main/HISTORY.md)
- [Commits](https://github.com/tj-actions/changed-files/compare/v44...v45)

---
updated-dependencies:
- dependency-name: tj-actions/changed-files
  dependency-type: direct:production
  update-type: version-update:semver-major
  dependency-group: github-actions
...

Signed-off-by: dependabot[bot] <support@github.com>
---
 .github/workflows/get-matrix.yml | 46 ++++++++++++++++----------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/workflows/get-matrix.yml b/.github/workflows/get-matrix.yml
index eba22eaf..c169bc2d 100644
--- a/.github/workflows/get-matrix.yml
+++ b/.github/workflows/get-matrix.yml
@@ -86,7 +86,7 @@ jobs:
 
       - name: Check if base files are changed
         id: changed-base
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/base/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/base/ignored.txt
@@ -97,7 +97,7 @@ jobs:
 
       - name: Check if db-related files are changed
         id: changed-db
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/db/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/db/ignored.txt
@@ -108,7 +108,7 @@ jobs:
 
       - name: Check if file-related files are changed
         id: changed-file
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/file/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/file/ignored.txt
@@ -119,7 +119,7 @@ jobs:
 
       - name: Check if file-df-related files are changed
         id: changed-file-df
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/file-df/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/file-df/ignored.txt
@@ -130,7 +130,7 @@ jobs:
 
       - name: Check if core files are changed
         id: changed-core
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/core/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/core/ignored.txt
@@ -160,7 +160,7 @@ jobs:
 
       - name: Check if Clickhouse files are changed
         id: changed-clickhouse
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/clickhouse/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/clickhouse/ignored.txt
@@ -190,7 +190,7 @@ jobs:
 
       - name: Check if Greenplum files are changed
         id: changed-greenplum
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/greenplum/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/greenplum/ignored.txt
@@ -220,7 +220,7 @@ jobs:
 
       - name: Check if Hive files are changed
         id: changed-hive
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/hive/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/hive/ignored.txt
@@ -250,7 +250,7 @@ jobs:
 
       - name: Check if Kafka files are changed
         id: changed-kafka
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/kafka/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/kafka/ignored.txt
@@ -280,7 +280,7 @@ jobs:
 
       - name: Check if LocalFS files are changed
         id: changed-local-fs
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/local-fs/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/local-fs/ignored.txt
@@ -310,7 +310,7 @@ jobs:
 
       - name: Check if MongoDB files are changed
         id: changed-mongodb
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/mongodb/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/mongodb/ignored.txt
@@ -340,7 +340,7 @@ jobs:
 
       - name: Check if MSSQL files are changed
         id: changed-mssql
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/mssql/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/mssql/ignored.txt
@@ -370,7 +370,7 @@ jobs:
 
       - name: Check if MySQL files are changed
         id: changed-mysql
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/mysql/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/mysql/ignored.txt
@@ -400,7 +400,7 @@ jobs:
 
       - name: Check if Oracle files are changed
         id: changed-oracle
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/oracle/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/oracle/ignored.txt
@@ -430,7 +430,7 @@ jobs:
 
       - name: Check if Postgres files are changed
         id: changed-postgres
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/postgres/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/postgres/ignored.txt
@@ -460,7 +460,7 @@ jobs:
 
       - name: Check if Teradata files are changed
         id: changed-teradata
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/teradata/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/teradata/ignored.txt
@@ -490,7 +490,7 @@ jobs:
 
       - name: Check if FTP files are changed
         id: changed-ftp
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/ftp/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/ftp/ignored.txt
@@ -520,7 +520,7 @@ jobs:
 
       - name: Check if FTPS files are changed
         id: changed-ftps
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/ftps/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/ftps/ignored.txt
@@ -550,7 +550,7 @@ jobs:
 
       - name: Check if HDFS files are changed
         id: changed-hdfs
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/hdfs/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/hdfs/ignored.txt
@@ -580,7 +580,7 @@ jobs:
 
       - name: Check if S3 files are changed
         id: changed-s3
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/s3/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/s3/ignored.txt
@@ -610,7 +610,7 @@ jobs:
 
       - name: Check if SFTP files are changed
         id: changed-sftp
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/sftp/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/sftp/ignored.txt
@@ -640,7 +640,7 @@ jobs:
 
       - name: Check if Samba files are changed
         id: changed-samba
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/samba/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/samba/ignored.txt
@@ -670,7 +670,7 @@ jobs:
 
       - name: Check if WebDAV files are changed
         id: changed-webdav
-        uses: tj-actions/changed-files@v44
+        uses: tj-actions/changed-files@v45
         with:
           files_from_source_file: .github/workflows/data/webdav/tracked.txt
           files_ignore_from_source_file: .github/workflows/data/webdav/ignored.txt

From c6b09be3f019a08889f5a388dd6ceb3bd8ab714e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 26 Aug 2024 16:39:08 +0000
Subject: [PATCH 46/64] [DOP-18574] Relax check for number of yields in hooks

---
 docs/changelog/next_release/+yield.feature.rst     | 1 +
 onetl/hooks/hook.py                                | 2 +-
 tests/tests_unit/test_hooks/test_hooks_callback.py | 4 ++--
 3 files changed, 4 insertions(+), 3 deletions(-)
 create mode 100644 docs/changelog/next_release/+yield.feature.rst

diff --git a/docs/changelog/next_release/+yield.feature.rst b/docs/changelog/next_release/+yield.feature.rst
new file mode 100644
index 00000000..efc58606
--- /dev/null
+++ b/docs/changelog/next_release/+yield.feature.rst
@@ -0,0 +1 @@
+Do not raise exception if yield-based hook whas something past (and only one) ``yield``.
diff --git a/onetl/hooks/hook.py b/onetl/hooks/hook.py
index d49297f1..619cff7d 100644
--- a/onetl/hooks/hook.py
+++ b/onetl/hooks/hook.py
@@ -285,7 +285,7 @@ def __enter__(self):
         try:
             self.first_yield_result = self.gen.send(None)
         except StopIteration:
-            raise RuntimeError("generator didn't yield") from None
+            pass
 
         return self
 
diff --git a/tests/tests_unit/test_hooks/test_hooks_callback.py b/tests/tests_unit/test_hooks/test_hooks_callback.py
index d1b69cf9..7fbc00ed 100644
--- a/tests/tests_unit/test_hooks/test_hooks_callback.py
+++ b/tests/tests_unit/test_hooks/test_hooks_callback.py
@@ -291,8 +291,8 @@ def plus(self, arg: int) -> int:
     def modify_callback(self, arg: int):
         yield from (i for i in ())  # noqa: WPS335
 
-    with pytest.raises(RuntimeError, match="generator didn't yield"):
-        Calculator(1).plus(2)
+    # no yield = no override
+    assert Calculator(1).plus(2) == 3
 
 
 def test_hooks_execute_callback_too_many_yields(caplog):

From 3c4a1e02ebbb3658642c3c96aeabc9539f51abc7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Wed, 28 Aug 2024 10:25:35 +0000
Subject: [PATCH 47/64] [DOP-18570] Fix SparkMetricsListener.onExecutionEnd on
 Python 3.9 and below

---
 onetl/_metrics/listener/execution.py | 4 ++++
 onetl/_metrics/listener/listener.py  | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/onetl/_metrics/listener/execution.py b/onetl/_metrics/listener/execution.py
index f5749e16..1fbc3925 100644
--- a/onetl/_metrics/listener/execution.py
+++ b/onetl/_metrics/listener/execution.py
@@ -40,6 +40,10 @@ class SparkSQLMetricNames(str, Enum):  # noqa: WPS338
     def __str__(self):
         return self.value
 
+    @classmethod
+    def values(cls):
+        return set(cls.__members__.values())
+
 
 @dataclass
 class SparkListenerExecution:
diff --git a/onetl/_metrics/listener/listener.py b/onetl/_metrics/listener/listener.py
index 997f22a7..e78dca2a 100644
--- a/onetl/_metrics/listener/listener.py
+++ b/onetl/_metrics/listener/listener.py
@@ -12,6 +12,8 @@
     SparkSQLMetricNames,
 )
 
+KNOWN_METRICS = SparkSQLMetricNames.values()
+
 
 @dataclass
 class SparkMetricsListener(BaseSparkListener):
@@ -81,7 +83,7 @@ def onExecutionEnd(self, event):
             for i in range(metrics.size()):
                 metric = metrics.apply(i)
                 metric_name = metric.name()
-                if metric_name not in SparkSQLMetricNames:
+                if metric_name not in KNOWN_METRICS:
                     continue
                 metric_value = metric_values.get(metric.accumulatorId())
                 if not metric_value.isDefined():

From 98a57842336c3cb6b7db05e5845c472307665781 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 27 Aug 2024 12:40:00 +0000
Subject: [PATCH 48/64] [DOP-19024] Fix passing custom JDBC options to
 Greenplum.extra

---
 docs/changelog/next_release/308.bugfix.rst    |  1 +
 .../db_connection/greenplum/connection.py     | 42 ++++++++++++-------
 .../test_greenplum_unit.py                    | 28 +++++++++++++
 3 files changed, 57 insertions(+), 14 deletions(-)
 create mode 100644 docs/changelog/next_release/308.bugfix.rst

diff --git a/docs/changelog/next_release/308.bugfix.rst b/docs/changelog/next_release/308.bugfix.rst
new file mode 100644
index 00000000..3ffcdcc5
--- /dev/null
+++ b/docs/changelog/next_release/308.bugfix.rst
@@ -0,0 +1 @@
+Fix passing ``Greenplum(extra={"options": ...)`` during read/write operations.
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index 0f40436f..cc3191af 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -7,6 +7,7 @@
 import textwrap
 import warnings
 from typing import TYPE_CHECKING, Any, ClassVar
+from urllib.parse import quote, urlencode, urlparse, urlunparse
 
 from etl_entities.instance import Host
 
@@ -274,17 +275,20 @@ def __str__(self):
     def jdbc_url(self) -> str:
         return f"jdbc:postgresql://{self.host}:{self.port}/{self.database}"
 
+    @property
+    def jdbc_custom_params(self) -> dict:
+        result = {
+            key: value
+            for key, value in self.extra.dict(by_alias=True).items()
+            if not (key.startswith("server.") or key.startswith("pool."))
+        }
+        result["ApplicationName"] = result.get("ApplicationName", self.spark.sparkContext.appName)
+        return result
+
     @property
     def jdbc_params(self) -> dict:
         result = super().jdbc_params
-        result.update(
-            {
-                key: value
-                for key, value in self.extra.dict(by_alias=True).items()
-                if not (key.startswith("server.") or key.startswith("pool."))
-            },
-        )
-        result["ApplicationName"] = result.get("ApplicationName", self.spark.sparkContext.appName)
+        result.update(self.jdbc_custom_params)
         return result
 
     @slot
@@ -305,7 +309,7 @@ def read_source_as_df(
         fake_query_for_log = self.dialect.get_sql_query(table=source, columns=columns, where=where, limit=limit)
         log_lines(log, fake_query_for_log)
 
-        df = self.spark.read.format("greenplum").options(**self._connector_params(source), **read_options).load()
+        df = self.spark.read.format("greenplum").options(**self._get_connector_params(source), **read_options).load()
         self._check_expected_jobs_number(df, action="read")
 
         if where:
@@ -340,7 +344,7 @@ def write_df_to_target(
             else write_options.if_exists.value
         )
         df.write.format("greenplum").options(
-            **self._connector_params(target),
+            **self._get_connector_params(target),
             **options_dict,
         ).mode(mode).save()
 
@@ -425,21 +429,31 @@ def _check_java_class_imported(cls, spark):
             raise ValueError(msg) from e
         return spark
 
-    def _connector_params(
+    def _get_connector_params(
         self,
         table: str,
     ) -> dict:
         schema, table_name = table.split(".")  # noqa: WPS414
         extra = self.extra.dict(by_alias=True, exclude_none=True)
-        extra = {key: value for key, value in extra.items() if key.startswith("server.") or key.startswith("pool.")}
+        greenplum_connector_options = {
+            key: value for key, value in extra.items() if key.startswith("server.") or key.startswith("pool.")
+        }
+
+        # Greenplum connector requires all JDBC params to be passed via JDBC URL:
+        # https://docs.vmware.com/en/VMware-Greenplum-Connector-for-Apache-Spark/2.3/greenplum-connector-spark/using_the_connector.html#specifying-session-parameters
+        parsed_jdbc_url = urlparse(self.jdbc_url)
+        sorted_jdbc_params = [(k, v) for k, v in sorted(self.jdbc_custom_params.items(), key=lambda x: x[0].lower())]
+        jdbc_url_query = urlencode(sorted_jdbc_params, quote_via=quote)
+        jdbc_url = urlunparse(parsed_jdbc_url._replace(query=jdbc_url_query))
+
         return {
             "driver": self.DRIVER,
-            "url": self.jdbc_url,
+            "url": jdbc_url,
             "user": self.user,
             "password": self.password.get_secret_value(),
             "dbschema": schema,
             "dbtable": table_name,
-            **extra,
+            **greenplum_connector_options,
         }
 
     def _options_to_connection_properties(self, options: JDBCFetchOptions | JDBCExecuteOptions):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
index 47821642..b6ea9544 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_greenplum_unit.py
@@ -128,6 +128,14 @@ def test_greenplum(spark_mock):
         "ApplicationName": "abc",
         "tcpKeepAlive": "true",
     }
+    assert conn._get_connector_params("some.table") == {
+        "user": "user",
+        "password": "passwd",
+        "driver": "org.postgresql.Driver",
+        "url": "jdbc:postgresql://some_host:5432/database?ApplicationName=abc&tcpKeepAlive=true",
+        "dbschema": "some",
+        "dbtable": "table",
+    }
 
     assert "passwd" not in repr(conn)
 
@@ -154,6 +162,14 @@ def test_greenplum_with_port(spark_mock):
         "ApplicationName": "abc",
         "tcpKeepAlive": "true",
     }
+    assert conn._get_connector_params("some.table") == {
+        "user": "user",
+        "password": "passwd",
+        "driver": "org.postgresql.Driver",
+        "url": "jdbc:postgresql://some_host:5000/database?ApplicationName=abc&tcpKeepAlive=true",
+        "dbschema": "some",
+        "dbtable": "table",
+    }
 
     assert conn.instance_url == "greenplum://some_host:5000/database"
     assert str(conn) == "Greenplum[some_host:5000/database]"
@@ -174,6 +190,7 @@ def test_greenplum_with_extra(spark_mock):
             "autosave": "always",
             "tcpKeepAlive": "false",
             "ApplicationName": "override",
+            "options": "-c search_path=public",
             "server.port": 8000,
             "pool.maxSize": 40,
         },
@@ -191,6 +208,17 @@ def test_greenplum_with_extra(spark_mock):
         "ApplicationName": "override",
         "tcpKeepAlive": "false",
         "autosave": "always",
+        "options": "-c search_path=public",
+    }
+    assert conn._get_connector_params("some.table") == {
+        "user": "user",
+        "password": "passwd",
+        "driver": "org.postgresql.Driver",
+        "url": "jdbc:postgresql://some_host:5432/database?ApplicationName=override&autosave=always&options=-c%20search_path%3Dpublic&tcpKeepAlive=false",
+        "dbschema": "some",
+        "dbtable": "table",
+        "pool.maxSize": 40,
+        "server.port": 8000,
     }
 
 
From a7d4f40497974e34d2f9f6015d2d93b664a71e7e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 29 Aug 2024 06:59:00 +0000
Subject: [PATCH 49/64] Update LICENSE

---
 .spdx-license-header.txt                                        | 2 +-
 LICENSE.txt                                                     | 2 +-
 docs/conf.py                                                    | 2 +-
 onetl/__init__.py                                               | 2 +-
 onetl/_metrics/__init__.py                                      | 2 +-
 onetl/_metrics/command.py                                       | 2 +-
 onetl/_metrics/driver.py                                        | 2 +-
 onetl/_metrics/executor.py                                      | 2 +-
 onetl/_metrics/extract.py                                       | 2 +-
 onetl/_metrics/input.py                                         | 2 +-
 onetl/_metrics/listener/__init__.py                             | 2 +-
 onetl/_metrics/listener/base.py                                 | 2 +-
 onetl/_metrics/listener/execution.py                            | 2 +-
 onetl/_metrics/listener/job.py                                  | 2 +-
 onetl/_metrics/listener/listener.py                             | 2 +-
 onetl/_metrics/listener/stage.py                                | 2 +-
 onetl/_metrics/listener/task.py                                 | 2 +-
 onetl/_metrics/output.py                                        | 2 +-
 onetl/_metrics/recorder.py                                      | 2 +-
 onetl/_util/__init__.py                                         | 2 +-
 onetl/_util/classproperty.py                                    | 2 +-
 onetl/_util/file.py                                             | 2 +-
 onetl/_util/hadoop.py                                           | 2 +-
 onetl/_util/java.py                                             | 2 +-
 onetl/_util/scala.py                                            | 2 +-
 onetl/_util/spark.py                                            | 2 +-
 onetl/_util/sql.py                                              | 2 +-
 onetl/_util/version.py                                          | 2 +-
 onetl/base/__init__.py                                          | 2 +-
 onetl/base/base_connection.py                                   | 2 +-
 onetl/base/base_db_connection.py                                | 2 +-
 onetl/base/base_file_connection.py                              | 2 +-
 onetl/base/base_file_df_connection.py                           | 2 +-
 onetl/base/base_file_filter.py                                  | 2 +-
 onetl/base/base_file_format.py                                  | 2 +-
 onetl/base/base_file_limit.py                                   | 2 +-
 onetl/base/contains_exception.py                                | 2 +-
 onetl/base/contains_get_df_schema.py                            | 2 +-
 onetl/base/contains_get_min_max_values.py                       | 2 +-
 onetl/base/path_protocol.py                                     | 2 +-
 onetl/base/path_stat_protocol.py                                | 2 +-
 onetl/base/pure_path_protocol.py                                | 2 +-
 onetl/base/supports_rename_dir.py                               | 2 +-
 onetl/connection/__init__.py                                    | 2 +-
 onetl/connection/db_connection/__init__.py                      | 2 +-
 onetl/connection/db_connection/clickhouse/__init__.py           | 2 +-
 onetl/connection/db_connection/clickhouse/connection.py         | 2 +-
 onetl/connection/db_connection/clickhouse/dialect.py            | 2 +-
 onetl/connection/db_connection/clickhouse/options.py            | 2 +-
 onetl/connection/db_connection/db_connection/__init__.py        | 2 +-
 onetl/connection/db_connection/db_connection/connection.py      | 2 +-
 onetl/connection/db_connection/db_connection/dialect.py         | 2 +-
 onetl/connection/db_connection/dialect_mixins/__init__.py       | 2 +-
 .../db_connection/dialect_mixins/not_support_columns.py         | 2 +-
 .../db_connection/dialect_mixins/not_support_df_schema.py       | 2 +-
 .../connection/db_connection/dialect_mixins/not_support_hint.py | 2 +-
 .../db_connection/dialect_mixins/not_support_where.py           | 2 +-
 .../db_connection/dialect_mixins/requires_df_schema.py          | 2 +-
 .../db_connection/dialect_mixins/support_columns_list.py        | 2 +-
 .../connection/db_connection/dialect_mixins/support_hint_str.py | 2 +-
 .../db_connection/dialect_mixins/support_hwm_expression_str.py  | 2 +-
 .../connection/db_connection/dialect_mixins/support_name_any.py | 2 +-
 .../dialect_mixins/support_name_with_schema_only.py             | 2 +-
 .../db_connection/dialect_mixins/support_where_str.py           | 2 +-
 onetl/connection/db_connection/greenplum/__init__.py            | 2 +-
 onetl/connection/db_connection/greenplum/connection.py          | 2 +-
 onetl/connection/db_connection/greenplum/connection_limit.py    | 2 +-
 onetl/connection/db_connection/greenplum/dialect.py             | 2 +-
 onetl/connection/db_connection/greenplum/options.py             | 2 +-
 onetl/connection/db_connection/hive/__init__.py                 | 2 +-
 onetl/connection/db_connection/hive/connection.py               | 2 +-
 onetl/connection/db_connection/hive/dialect.py                  | 2 +-
 onetl/connection/db_connection/hive/options.py                  | 2 +-
 onetl/connection/db_connection/hive/slots.py                    | 2 +-
 onetl/connection/db_connection/jdbc_connection/__init__.py      | 2 +-
 onetl/connection/db_connection/jdbc_connection/connection.py    | 2 +-
 onetl/connection/db_connection/jdbc_connection/dialect.py       | 2 +-
 onetl/connection/db_connection/jdbc_connection/options.py       | 2 +-
 onetl/connection/db_connection/jdbc_mixin/__init__.py           | 2 +-
 onetl/connection/db_connection/jdbc_mixin/connection.py         | 2 +-
 onetl/connection/db_connection/jdbc_mixin/options.py            | 2 +-
 onetl/connection/db_connection/kafka/__init__.py                | 2 +-
 onetl/connection/db_connection/kafka/connection.py              | 2 +-
 onetl/connection/db_connection/kafka/dialect.py                 | 2 +-
 onetl/connection/db_connection/kafka/extra.py                   | 2 +-
 onetl/connection/db_connection/kafka/kafka_auth.py              | 2 +-
 onetl/connection/db_connection/kafka/kafka_basic_auth.py        | 2 +-
 onetl/connection/db_connection/kafka/kafka_kerberos_auth.py     | 2 +-
 .../connection/db_connection/kafka/kafka_plaintext_protocol.py  | 2 +-
 onetl/connection/db_connection/kafka/kafka_protocol.py          | 2 +-
 onetl/connection/db_connection/kafka/kafka_scram_auth.py        | 2 +-
 onetl/connection/db_connection/kafka/kafka_ssl_protocol.py      | 2 +-
 onetl/connection/db_connection/kafka/options.py                 | 2 +-
 onetl/connection/db_connection/kafka/slots.py                   | 2 +-
 onetl/connection/db_connection/mongodb/__init__.py              | 2 +-
 onetl/connection/db_connection/mongodb/connection.py            | 2 +-
 onetl/connection/db_connection/mongodb/dialect.py               | 2 +-
 onetl/connection/db_connection/mongodb/options.py               | 2 +-
 onetl/connection/db_connection/mssql/__init__.py                | 2 +-
 onetl/connection/db_connection/mssql/connection.py              | 2 +-
 onetl/connection/db_connection/mssql/dialect.py                 | 2 +-
 onetl/connection/db_connection/mssql/options.py                 | 2 +-
 onetl/connection/db_connection/mysql/__init__.py                | 2 +-
 onetl/connection/db_connection/mysql/connection.py              | 2 +-
 onetl/connection/db_connection/mysql/dialect.py                 | 2 +-
 onetl/connection/db_connection/mysql/options.py                 | 2 +-
 onetl/connection/db_connection/oracle/__init__.py               | 2 +-
 onetl/connection/db_connection/oracle/connection.py             | 2 +-
 onetl/connection/db_connection/oracle/dialect.py                | 2 +-
 onetl/connection/db_connection/oracle/options.py                | 2 +-
 onetl/connection/db_connection/postgres/__init__.py             | 2 +-
 onetl/connection/db_connection/postgres/connection.py           | 2 +-
 onetl/connection/db_connection/postgres/dialect.py              | 2 +-
 onetl/connection/db_connection/postgres/options.py              | 2 +-
 onetl/connection/db_connection/teradata/__init__.py             | 2 +-
 onetl/connection/db_connection/teradata/connection.py           | 2 +-
 onetl/connection/db_connection/teradata/dialect.py              | 2 +-
 onetl/connection/db_connection/teradata/options.py              | 2 +-
 onetl/connection/file_connection/__init__.py                    | 2 +-
 onetl/connection/file_connection/file_connection.py             | 2 +-
 onetl/connection/file_connection/ftp.py                         | 2 +-
 onetl/connection/file_connection/ftps.py                        | 2 +-
 onetl/connection/file_connection/hdfs/__init__.py               | 2 +-
 onetl/connection/file_connection/hdfs/connection.py             | 2 +-
 onetl/connection/file_connection/hdfs/slots.py                  | 2 +-
 onetl/connection/file_connection/mixins/__init__.py             | 2 +-
 onetl/connection/file_connection/mixins/rename_dir_mixin.py     | 2 +-
 onetl/connection/file_connection/s3.py                          | 2 +-
 onetl/connection/file_connection/samba.py                       | 2 +-
 onetl/connection/file_connection/sftp.py                        | 2 +-
 onetl/connection/file_connection/webdav.py                      | 2 +-
 onetl/connection/file_df_connection/__init__.py                 | 2 +-
 onetl/connection/file_df_connection/spark_file_df_connection.py | 2 +-
 onetl/connection/file_df_connection/spark_hdfs/__init__.py      | 2 +-
 onetl/connection/file_df_connection/spark_hdfs/connection.py    | 2 +-
 onetl/connection/file_df_connection/spark_hdfs/slots.py         | 2 +-
 onetl/connection/file_df_connection/spark_local_fs.py           | 2 +-
 onetl/connection/file_df_connection/spark_s3/__init__.py        | 2 +-
 onetl/connection/file_df_connection/spark_s3/connection.py      | 2 +-
 onetl/connection/file_df_connection/spark_s3/extra.py           | 2 +-
 onetl/connection/kerberos_helpers.py                            | 2 +-
 onetl/core/__init__.py                                          | 2 +-
 onetl/core/file_filter/__init__.py                              | 2 +-
 onetl/core/file_filter/file_filter.py                           | 2 +-
 onetl/core/file_limit/__init__.py                               | 2 +-
 onetl/core/file_limit/file_limit.py                             | 2 +-
 onetl/db/__init__.py                                            | 2 +-
 onetl/db/db_reader/__init__.py                                  | 2 +-
 onetl/db/db_reader/db_reader.py                                 | 2 +-
 onetl/db/db_writer/__init__.py                                  | 2 +-
 onetl/db/db_writer/db_writer.py                                 | 2 +-
 onetl/exception.py                                              | 2 +-
 onetl/file/__init__.py                                          | 2 +-
 onetl/file/file_df_reader/__init__.py                           | 2 +-
 onetl/file/file_df_reader/file_df_reader.py                     | 2 +-
 onetl/file/file_df_reader/options.py                            | 2 +-
 onetl/file/file_df_writer/__init__.py                           | 2 +-
 onetl/file/file_df_writer/file_df_writer.py                     | 2 +-
 onetl/file/file_df_writer/options.py                            | 2 +-
 onetl/file/file_downloader/__init__.py                          | 2 +-
 onetl/file/file_downloader/file_downloader.py                   | 2 +-
 onetl/file/file_downloader/options.py                           | 2 +-
 onetl/file/file_downloader/result.py                            | 2 +-
 onetl/file/file_mover/__init__.py                               | 2 +-
 onetl/file/file_mover/file_mover.py                             | 2 +-
 onetl/file/file_mover/options.py                                | 2 +-
 onetl/file/file_mover/result.py                                 | 2 +-
 onetl/file/file_result.py                                       | 2 +-
 onetl/file/file_set.py                                          | 2 +-
 onetl/file/file_uploader/__init__.py                            | 2 +-
 onetl/file/file_uploader/file_uploader.py                       | 2 +-
 onetl/file/file_uploader/options.py                             | 2 +-
 onetl/file/file_uploader/result.py                              | 2 +-
 onetl/file/filter/__init__.py                                   | 2 +-
 onetl/file/filter/exclude_dir.py                                | 2 +-
 onetl/file/filter/file_hwm.py                                   | 2 +-
 onetl/file/filter/glob.py                                       | 2 +-
 onetl/file/filter/match_all_filters.py                          | 2 +-
 onetl/file/filter/regexp.py                                     | 2 +-
 onetl/file/format/__init__.py                                   | 2 +-
 onetl/file/format/avro.py                                       | 2 +-
 onetl/file/format/csv.py                                        | 2 +-
 onetl/file/format/excel.py                                      | 2 +-
 onetl/file/format/file_format.py                                | 2 +-
 onetl/file/format/json.py                                       | 2 +-
 onetl/file/format/jsonline.py                                   | 2 +-
 onetl/file/format/orc.py                                        | 2 +-
 onetl/file/format/parquet.py                                    | 2 +-
 onetl/file/format/xml.py                                        | 2 +-
 onetl/file/limit/__init__.py                                    | 2 +-
 onetl/file/limit/limits_reached.py                              | 2 +-
 onetl/file/limit/limits_stop_at.py                              | 2 +-
 onetl/file/limit/max_files_count.py                             | 2 +-
 onetl/file/limit/reset_limits.py                                | 2 +-
 onetl/hooks/__init__.py                                         | 2 +-
 onetl/hooks/hook.py                                             | 2 +-
 onetl/hooks/hook_collection.py                                  | 2 +-
 onetl/hooks/hooks_state.py                                      | 2 +-
 onetl/hooks/method_inheritance_stack.py                         | 2 +-
 onetl/hooks/slot.py                                             | 2 +-
 onetl/hooks/support_hooks.py                                    | 2 +-
 onetl/hwm/__init__.py                                           | 2 +-
 onetl/hwm/auto_hwm.py                                           | 2 +-
 onetl/hwm/store/__init__.py                                     | 2 +-
 onetl/hwm/store/hwm_class_registry.py                           | 2 +-
 onetl/hwm/store/yaml_hwm_store.py                               | 2 +-
 onetl/hwm/window.py                                             | 2 +-
 onetl/impl/__init__.py                                          | 2 +-
 onetl/impl/base_model.py                                        | 2 +-
 onetl/impl/failed_local_file.py                                 | 2 +-
 onetl/impl/file_exist_behavior.py                               | 2 +-
 onetl/impl/frozen_model.py                                      | 2 +-
 onetl/impl/generic_options.py                                   | 2 +-
 onetl/impl/local_path.py                                        | 2 +-
 onetl/impl/path_container.py                                    | 2 +-
 onetl/impl/path_repr.py                                         | 2 +-
 onetl/impl/remote_directory.py                                  | 2 +-
 onetl/impl/remote_file.py                                       | 2 +-
 onetl/impl/remote_path.py                                       | 2 +-
 onetl/impl/remote_path_stat.py                                  | 2 +-
 onetl/log.py                                                    | 2 +-
 onetl/plugins/__init__.py                                       | 2 +-
 onetl/plugins/import_plugins.py                                 | 2 +-
 onetl/strategy/__init__.py                                      | 2 +-
 onetl/strategy/base_strategy.py                                 | 2 +-
 onetl/strategy/batch_hwm_strategy.py                            | 2 +-
 onetl/strategy/hwm_store/__init__.py                            | 2 +-
 onetl/strategy/hwm_strategy.py                                  | 2 +-
 onetl/strategy/incremental_strategy.py                          | 2 +-
 onetl/strategy/snapshot_strategy.py                             | 2 +-
 onetl/strategy/strategy_manager.py                              | 2 +-
 onetl/version.py                                                | 2 +-
 232 files changed, 232 insertions(+), 232 deletions(-)

diff --git a/.spdx-license-header.txt b/.spdx-license-header.txt
index 19a8b2e4..44939ae1 100644
--- a/.spdx-license-header.txt
+++ b/.spdx-license-header.txt
@@ -1,2 +1,2 @@
-SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 SPDX-License-Identifier: Apache-2.0
diff --git a/LICENSE.txt b/LICENSE.txt
index a22e190a..6b68d87e 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright 2021-2024 MTS (Mobile Telesystems).  All rights reserved.
+Copyright 2021-2024 MTS PJSC.  All rights reserved.
 
                                  Apache License
                            Version 2.0, January 2004
diff --git a/docs/conf.py b/docs/conf.py
index 867d4daf..e1cc58f0 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -25,7 +25,7 @@
 # -- Project information -----------------------------------------------------
 
 project = "onETL"
-copyright = "2021-2024 MTS (Mobile Telesystems)"
+copyright = "2021-2024 MTS PJSC"
 author = "DataOps.ETL"
 
 # The version info for the project you're documenting, acts as replacement for
diff --git a/onetl/__init__.py b/onetl/__init__.py
index 04793543..02dbd6aa 100644
--- a/onetl/__init__.py
+++ b/onetl/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import os
 
diff --git a/onetl/_metrics/__init__.py b/onetl/_metrics/__init__.py
index 5d7482b6..94099688 100644
--- a/onetl/_metrics/__init__.py
+++ b/onetl/_metrics/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl._metrics.command import SparkCommandMetrics
 from onetl._metrics.driver import SparkDriverMetrics
diff --git a/onetl/_metrics/command.py b/onetl/_metrics/command.py
index 2a8a53c6..c823e4c4 100644
--- a/onetl/_metrics/command.py
+++ b/onetl/_metrics/command.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/driver.py b/onetl/_metrics/driver.py
index 4e685719..d3b49d96 100644
--- a/onetl/_metrics/driver.py
+++ b/onetl/_metrics/driver.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/executor.py b/onetl/_metrics/executor.py
index 3fd6f3fc..bbb6d732 100644
--- a/onetl/_metrics/executor.py
+++ b/onetl/_metrics/executor.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/extract.py b/onetl/_metrics/extract.py
index 8b623bb8..4b058092 100644
--- a/onetl/_metrics/extract.py
+++ b/onetl/_metrics/extract.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/input.py b/onetl/_metrics/input.py
index 39061311..71451634 100644
--- a/onetl/_metrics/input.py
+++ b/onetl/_metrics/input.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/__init__.py b/onetl/_metrics/listener/__init__.py
index 112e4fba..720c3da6 100644
--- a/onetl/_metrics/listener/__init__.py
+++ b/onetl/_metrics/listener/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl._metrics.listener.execution import (
     SparkListenerExecution,
diff --git a/onetl/_metrics/listener/base.py b/onetl/_metrics/listener/base.py
index a8d5b855..bbc6431c 100644
--- a/onetl/_metrics/listener/base.py
+++ b/onetl/_metrics/listener/base.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/execution.py b/onetl/_metrics/listener/execution.py
index 1fbc3925..a0d2a522 100644
--- a/onetl/_metrics/listener/execution.py
+++ b/onetl/_metrics/listener/execution.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/job.py b/onetl/_metrics/listener/job.py
index 915f1f3d..5581d76e 100644
--- a/onetl/_metrics/listener/job.py
+++ b/onetl/_metrics/listener/job.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/listener.py b/onetl/_metrics/listener/listener.py
index e78dca2a..04fe53c2 100644
--- a/onetl/_metrics/listener/listener.py
+++ b/onetl/_metrics/listener/listener.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/stage.py b/onetl/_metrics/listener/stage.py
index 89d6a6ae..b858e151 100644
--- a/onetl/_metrics/listener/stage.py
+++ b/onetl/_metrics/listener/stage.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/listener/task.py b/onetl/_metrics/listener/task.py
index ced938a8..5a17ffc5 100644
--- a/onetl/_metrics/listener/task.py
+++ b/onetl/_metrics/listener/task.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/output.py b/onetl/_metrics/output.py
index 8600bb68..8f27a346 100644
--- a/onetl/_metrics/output.py
+++ b/onetl/_metrics/output.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_metrics/recorder.py b/onetl/_metrics/recorder.py
index 4cc5745b..4c65fe8d 100644
--- a/onetl/_metrics/recorder.py
+++ b/onetl/_metrics/recorder.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/__init__.py b/onetl/_util/__init__.py
index 07325b1d..54237d1f 100644
--- a/onetl/_util/__init__.py
+++ b/onetl/_util/__init__.py
@@ -1,2 +1,2 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
diff --git a/onetl/_util/classproperty.py b/onetl/_util/classproperty.py
index e971638a..4e0ed39e 100644
--- a/onetl/_util/classproperty.py
+++ b/onetl/_util/classproperty.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/file.py b/onetl/_util/file.py
index ee27c57f..2dbb9915 100644
--- a/onetl/_util/file.py
+++ b/onetl/_util/file.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/hadoop.py b/onetl/_util/hadoop.py
index aed572e0..12376749 100644
--- a/onetl/_util/hadoop.py
+++ b/onetl/_util/hadoop.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/java.py b/onetl/_util/java.py
index 1ec50a0d..c0dcbd0d 100644
--- a/onetl/_util/java.py
+++ b/onetl/_util/java.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/scala.py b/onetl/_util/scala.py
index 5e6c21bc..5d472f2f 100644
--- a/onetl/_util/scala.py
+++ b/onetl/_util/scala.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/spark.py b/onetl/_util/spark.py
index 547095af..ab2090b0 100644
--- a/onetl/_util/spark.py
+++ b/onetl/_util/spark.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/_util/sql.py b/onetl/_util/sql.py
index 37aa09a7..80118555 100644
--- a/onetl/_util/sql.py
+++ b/onetl/_util/sql.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 def clear_statement(statement: str) -> str:
     """
diff --git a/onetl/_util/version.py b/onetl/_util/version.py
index 85bde1c7..075928c6 100644
--- a/onetl/_util/version.py
+++ b/onetl/_util/version.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/__init__.py b/onetl/base/__init__.py
index 4178e7c9..7c30c412 100644
--- a/onetl/base/__init__.py
+++ b/onetl/base/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.base.base_connection import BaseConnection
 from onetl.base.base_db_connection import BaseDBConnection, BaseDBDialect
diff --git a/onetl/base/base_connection.py b/onetl/base/base_connection.py
index dc2cbd4f..264fc367 100644
--- a/onetl/base/base_connection.py
+++ b/onetl/base/base_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from abc import ABC, abstractmethod
 from typing import TypeVar
diff --git a/onetl/base/base_db_connection.py b/onetl/base/base_db_connection.py
index 2c427deb..6e2e7e08 100644
--- a/onetl/base/base_db_connection.py
+++ b/onetl/base/base_db_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/base_file_connection.py b/onetl/base/base_file_connection.py
index 81d57bfb..28949499 100644
--- a/onetl/base/base_file_connection.py
+++ b/onetl/base/base_file_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/base_file_df_connection.py b/onetl/base/base_file_df_connection.py
index 28c57f3c..85e22da4 100644
--- a/onetl/base/base_file_df_connection.py
+++ b/onetl/base/base_file_df_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/base_file_filter.py b/onetl/base/base_file_filter.py
index 01a9893f..d2fb0a64 100644
--- a/onetl/base/base_file_filter.py
+++ b/onetl/base/base_file_filter.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/base_file_format.py b/onetl/base/base_file_format.py
index a4c72e3e..17fb3612 100644
--- a/onetl/base/base_file_format.py
+++ b/onetl/base/base_file_format.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/base_file_limit.py b/onetl/base/base_file_limit.py
index d930690d..b8cdd0f3 100644
--- a/onetl/base/base_file_limit.py
+++ b/onetl/base/base_file_limit.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/contains_exception.py b/onetl/base/contains_exception.py
index cf9ae5aa..12b7a09c 100644
--- a/onetl/base/contains_exception.py
+++ b/onetl/base/contains_exception.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from typing_extensions import Protocol, runtime_checkable
 
diff --git a/onetl/base/contains_get_df_schema.py b/onetl/base/contains_get_df_schema.py
index ccb7d34a..f607cf60 100644
--- a/onetl/base/contains_get_df_schema.py
+++ b/onetl/base/contains_get_df_schema.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/contains_get_min_max_values.py b/onetl/base/contains_get_min_max_values.py
index d23029d5..e2362269 100644
--- a/onetl/base/contains_get_min_max_values.py
+++ b/onetl/base/contains_get_min_max_values.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/path_protocol.py b/onetl/base/path_protocol.py
index 68594a10..eed285a2 100644
--- a/onetl/base/path_protocol.py
+++ b/onetl/base/path_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/path_stat_protocol.py b/onetl/base/path_stat_protocol.py
index a42f0569..961ad288 100644
--- a/onetl/base/path_stat_protocol.py
+++ b/onetl/base/path_stat_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/pure_path_protocol.py b/onetl/base/pure_path_protocol.py
index d5757c3b..ccf69c40 100644
--- a/onetl/base/pure_path_protocol.py
+++ b/onetl/base/pure_path_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/base/supports_rename_dir.py b/onetl/base/supports_rename_dir.py
index 8f0d3971..f44b42d1 100644
--- a/onetl/base/supports_rename_dir.py
+++ b/onetl/base/supports_rename_dir.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/__init__.py b/onetl/connection/__init__.py
index 4a25e210..608beb41 100644
--- a/onetl/connection/__init__.py
+++ b/onetl/connection/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/__init__.py b/onetl/connection/db_connection/__init__.py
index 07325b1d..54237d1f 100644
--- a/onetl/connection/db_connection/__init__.py
+++ b/onetl/connection/db_connection/__init__.py
@@ -1,2 +1,2 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
diff --git a/onetl/connection/db_connection/clickhouse/__init__.py b/onetl/connection/db_connection/clickhouse/__init__.py
index b830a78d..d2a57f44 100644
--- a/onetl/connection/db_connection/clickhouse/__init__.py
+++ b/onetl/connection/db_connection/clickhouse/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.clickhouse.connection import (
     Clickhouse,
diff --git a/onetl/connection/db_connection/clickhouse/connection.py b/onetl/connection/db_connection/clickhouse/connection.py
index 482cc941..21b282b3 100644
--- a/onetl/connection/db_connection/clickhouse/connection.py
+++ b/onetl/connection/db_connection/clickhouse/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/clickhouse/dialect.py b/onetl/connection/db_connection/clickhouse/dialect.py
index 2c03620d..394843b8 100644
--- a/onetl/connection/db_connection/clickhouse/dialect.py
+++ b/onetl/connection/db_connection/clickhouse/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/clickhouse/options.py b/onetl/connection/db_connection/clickhouse/options.py
index 5e35c969..a6907793 100644
--- a/onetl/connection/db_connection/clickhouse/options.py
+++ b/onetl/connection/db_connection/clickhouse/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/onetl/connection/db_connection/db_connection/__init__.py b/onetl/connection/db_connection/db_connection/__init__.py
index 71439ddc..acdab0d3 100644
--- a/onetl/connection/db_connection/db_connection/__init__.py
+++ b/onetl/connection/db_connection/db_connection/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.db_connection.connection import DBConnection
 from onetl.connection.db_connection.db_connection.dialect import DBDialect
diff --git a/onetl/connection/db_connection/db_connection/connection.py b/onetl/connection/db_connection/db_connection/connection.py
index 1372cd69..1158942c 100644
--- a/onetl/connection/db_connection/db_connection/connection.py
+++ b/onetl/connection/db_connection/db_connection/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/db_connection/dialect.py b/onetl/connection/db_connection/db_connection/dialect.py
index 73efba33..7080e324 100644
--- a/onetl/connection/db_connection/db_connection/dialect.py
+++ b/onetl/connection/db_connection/db_connection/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/__init__.py b/onetl/connection/db_connection/dialect_mixins/__init__.py
index da36f089..b40538f5 100644
--- a/onetl/connection/db_connection/dialect_mixins/__init__.py
+++ b/onetl/connection/db_connection/dialect_mixins/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.dialect_mixins.not_support_columns import (
     NotSupportColumns,
diff --git a/onetl/connection/db_connection/dialect_mixins/not_support_columns.py b/onetl/connection/db_connection/dialect_mixins/not_support_columns.py
index 2d98ac74..3ba8ae48 100644
--- a/onetl/connection/db_connection/dialect_mixins/not_support_columns.py
+++ b/onetl/connection/db_connection/dialect_mixins/not_support_columns.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/not_support_df_schema.py b/onetl/connection/db_connection/dialect_mixins/not_support_df_schema.py
index b99f3873..1973799b 100644
--- a/onetl/connection/db_connection/dialect_mixins/not_support_df_schema.py
+++ b/onetl/connection/db_connection/dialect_mixins/not_support_df_schema.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/not_support_hint.py b/onetl/connection/db_connection/dialect_mixins/not_support_hint.py
index 7680c4aa..47039532 100644
--- a/onetl/connection/db_connection/dialect_mixins/not_support_hint.py
+++ b/onetl/connection/db_connection/dialect_mixins/not_support_hint.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/not_support_where.py b/onetl/connection/db_connection/dialect_mixins/not_support_where.py
index 122de982..7bb3956e 100644
--- a/onetl/connection/db_connection/dialect_mixins/not_support_where.py
+++ b/onetl/connection/db_connection/dialect_mixins/not_support_where.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/requires_df_schema.py b/onetl/connection/db_connection/dialect_mixins/requires_df_schema.py
index 9b026572..cb9261bc 100644
--- a/onetl/connection/db_connection/dialect_mixins/requires_df_schema.py
+++ b/onetl/connection/db_connection/dialect_mixins/requires_df_schema.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_columns_list.py b/onetl/connection/db_connection/dialect_mixins/support_columns_list.py
index e443cf8f..5d204f5c 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_columns_list.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_columns_list.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_hint_str.py b/onetl/connection/db_connection/dialect_mixins/support_hint_str.py
index b90d9309..cb081808 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_hint_str.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_hint_str.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_hwm_expression_str.py b/onetl/connection/db_connection/dialect_mixins/support_hwm_expression_str.py
index 157ffecd..e3a9ccc2 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_hwm_expression_str.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_hwm_expression_str.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_name_any.py b/onetl/connection/db_connection/dialect_mixins/support_name_any.py
index dbe23024..1c7e55de 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_name_any.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_name_any.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py b/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
index d13cfc74..0e66b980 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_name_with_schema_only.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/dialect_mixins/support_where_str.py b/onetl/connection/db_connection/dialect_mixins/support_where_str.py
index c171ec4d..3949a6b1 100644
--- a/onetl/connection/db_connection/dialect_mixins/support_where_str.py
+++ b/onetl/connection/db_connection/dialect_mixins/support_where_str.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/greenplum/__init__.py b/onetl/connection/db_connection/greenplum/__init__.py
index 8a401d0b..71fdd32d 100644
--- a/onetl/connection/db_connection/greenplum/__init__.py
+++ b/onetl/connection/db_connection/greenplum/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.greenplum.connection import Greenplum
 from onetl.connection.db_connection.greenplum.dialect import GreenplumDialect
diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index cc3191af..ff5c5a76 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/greenplum/connection_limit.py b/onetl/connection/db_connection/greenplum/connection_limit.py
index 32cb99d8..81ce6120 100644
--- a/onetl/connection/db_connection/greenplum/connection_limit.py
+++ b/onetl/connection/db_connection/greenplum/connection_limit.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/greenplum/dialect.py b/onetl/connection/db_connection/greenplum/dialect.py
index f4bafa68..8a602215 100644
--- a/onetl/connection/db_connection/greenplum/dialect.py
+++ b/onetl/connection/db_connection/greenplum/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/greenplum/options.py b/onetl/connection/db_connection/greenplum/options.py
index e1cd1902..4d19ae7a 100644
--- a/onetl/connection/db_connection/greenplum/options.py
+++ b/onetl/connection/db_connection/greenplum/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/hive/__init__.py b/onetl/connection/db_connection/hive/__init__.py
index bc1a73e3..092d6c5a 100644
--- a/onetl/connection/db_connection/hive/__init__.py
+++ b/onetl/connection/db_connection/hive/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.hive.connection import Hive
 from onetl.connection.db_connection.hive.dialect import HiveDialect
diff --git a/onetl/connection/db_connection/hive/connection.py b/onetl/connection/db_connection/hive/connection.py
index 855a0ead..6e21f9c3 100644
--- a/onetl/connection/db_connection/hive/connection.py
+++ b/onetl/connection/db_connection/hive/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/hive/dialect.py b/onetl/connection/db_connection/hive/dialect.py
index 38b737fe..15520bd4 100644
--- a/onetl/connection/db_connection/hive/dialect.py
+++ b/onetl/connection/db_connection/hive/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/hive/options.py b/onetl/connection/db_connection/hive/options.py
index 16d21a0e..40097619 100644
--- a/onetl/connection/db_connection/hive/options.py
+++ b/onetl/connection/db_connection/hive/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/hive/slots.py b/onetl/connection/db_connection/hive/slots.py
index 3044950f..de813e5d 100644
--- a/onetl/connection/db_connection/hive/slots.py
+++ b/onetl/connection/db_connection/hive/slots.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/jdbc_connection/__init__.py b/onetl/connection/db_connection/jdbc_connection/__init__.py
index 476e7ea1..3b6eb5b2 100644
--- a/onetl/connection/db_connection/jdbc_connection/__init__.py
+++ b/onetl/connection/db_connection/jdbc_connection/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.jdbc_connection.connection import JDBCConnection
 from onetl.connection.db_connection.jdbc_connection.dialect import JDBCDialect
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 0f3ac024..2752dc25 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/jdbc_connection/dialect.py b/onetl/connection/db_connection/jdbc_connection/dialect.py
index cbf0ceb6..1fce839a 100644
--- a/onetl/connection/db_connection/jdbc_connection/dialect.py
+++ b/onetl/connection/db_connection/jdbc_connection/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/jdbc_connection/options.py b/onetl/connection/db_connection/jdbc_connection/options.py
index a2aa39ad..c04e8850 100644
--- a/onetl/connection/db_connection/jdbc_connection/options.py
+++ b/onetl/connection/db_connection/jdbc_connection/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/jdbc_mixin/__init__.py b/onetl/connection/db_connection/jdbc_mixin/__init__.py
index 0f368a12..a4b3975b 100644
--- a/onetl/connection/db_connection/jdbc_mixin/__init__.py
+++ b/onetl/connection/db_connection/jdbc_mixin/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.jdbc_mixin.connection import (
     JDBCMixin,
diff --git a/onetl/connection/db_connection/jdbc_mixin/connection.py b/onetl/connection/db_connection/jdbc_mixin/connection.py
index 2f25b5a9..a6830ae4 100644
--- a/onetl/connection/db_connection/jdbc_mixin/connection.py
+++ b/onetl/connection/db_connection/jdbc_mixin/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/jdbc_mixin/options.py b/onetl/connection/db_connection/jdbc_mixin/options.py
index 2504c364..ce9710a1 100644
--- a/onetl/connection/db_connection/jdbc_mixin/options.py
+++ b/onetl/connection/db_connection/jdbc_mixin/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/__init__.py b/onetl/connection/db_connection/kafka/__init__.py
index 71d01ebc..1eadb815 100644
--- a/onetl/connection/db_connection/kafka/__init__.py
+++ b/onetl/connection/db_connection/kafka/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.kafka.connection import Kafka
diff --git a/onetl/connection/db_connection/kafka/connection.py b/onetl/connection/db_connection/kafka/connection.py
index 9b8bf2cd..93f9d821 100644
--- a/onetl/connection/db_connection/kafka/connection.py
+++ b/onetl/connection/db_connection/kafka/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/dialect.py b/onetl/connection/db_connection/kafka/dialect.py
index 16c4d605..b09c19cf 100644
--- a/onetl/connection/db_connection/kafka/dialect.py
+++ b/onetl/connection/db_connection/kafka/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/extra.py b/onetl/connection/db_connection/kafka/extra.py
index 6dd95e2c..ff1b87cc 100644
--- a/onetl/connection/db_connection/kafka/extra.py
+++ b/onetl/connection/db_connection/kafka/extra.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.impl import GenericOptions
 
diff --git a/onetl/connection/db_connection/kafka/kafka_auth.py b/onetl/connection/db_connection/kafka/kafka_auth.py
index f1bbdf10..2c451d5e 100644
--- a/onetl/connection/db_connection/kafka/kafka_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_auth.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_basic_auth.py b/onetl/connection/db_connection/kafka/kafka_basic_auth.py
index 4038dd02..de5cc57f 100644
--- a/onetl/connection/db_connection/kafka/kafka_basic_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_basic_auth.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py b/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
index 40e9aa55..083c3787 100644
--- a/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_kerberos_auth.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_plaintext_protocol.py b/onetl/connection/db_connection/kafka/kafka_plaintext_protocol.py
index 2dd3a6a9..011713a0 100644
--- a/onetl/connection/db_connection/kafka/kafka_plaintext_protocol.py
+++ b/onetl/connection/db_connection/kafka/kafka_plaintext_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_protocol.py b/onetl/connection/db_connection/kafka/kafka_protocol.py
index 5d2a328c..8c884fb7 100644
--- a/onetl/connection/db_connection/kafka/kafka_protocol.py
+++ b/onetl/connection/db_connection/kafka/kafka_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_scram_auth.py b/onetl/connection/db_connection/kafka/kafka_scram_auth.py
index 823d0f82..af2c6b3d 100644
--- a/onetl/connection/db_connection/kafka/kafka_scram_auth.py
+++ b/onetl/connection/db_connection/kafka/kafka_scram_auth.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
index 24dd52f6..ea464b36 100644
--- a/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
+++ b/onetl/connection/db_connection/kafka/kafka_ssl_protocol.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/options.py b/onetl/connection/db_connection/kafka/options.py
index e2a4a8d3..68f89711 100644
--- a/onetl/connection/db_connection/kafka/options.py
+++ b/onetl/connection/db_connection/kafka/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/kafka/slots.py b/onetl/connection/db_connection/kafka/slots.py
index 2abf00ce..15e0f48f 100644
--- a/onetl/connection/db_connection/kafka/slots.py
+++ b/onetl/connection/db_connection/kafka/slots.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mongodb/__init__.py b/onetl/connection/db_connection/mongodb/__init__.py
index b452ca53..839d31c0 100644
--- a/onetl/connection/db_connection/mongodb/__init__.py
+++ b/onetl/connection/db_connection/mongodb/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.mongodb.connection import MongoDB, MongoDBExtra
 from onetl.connection.db_connection.mongodb.dialect import MongoDBDialect
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index f81a3bf8..4cc7e3ed 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mongodb/dialect.py b/onetl/connection/db_connection/mongodb/dialect.py
index 247c58aa..8faba8b3 100644
--- a/onetl/connection/db_connection/mongodb/dialect.py
+++ b/onetl/connection/db_connection/mongodb/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mongodb/options.py b/onetl/connection/db_connection/mongodb/options.py
index 223e05ec..323c1f93 100644
--- a/onetl/connection/db_connection/mongodb/options.py
+++ b/onetl/connection/db_connection/mongodb/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mssql/__init__.py b/onetl/connection/db_connection/mssql/__init__.py
index 5b07949f..747ff244 100644
--- a/onetl/connection/db_connection/mssql/__init__.py
+++ b/onetl/connection/db_connection/mssql/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.mssql.connection import MSSQL, MSSQLExtra
 from onetl.connection.db_connection.mssql.dialect import MSSQLDialect
diff --git a/onetl/connection/db_connection/mssql/connection.py b/onetl/connection/db_connection/mssql/connection.py
index f2a29b44..18a08e32 100644
--- a/onetl/connection/db_connection/mssql/connection.py
+++ b/onetl/connection/db_connection/mssql/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mssql/dialect.py b/onetl/connection/db_connection/mssql/dialect.py
index 7dcfdd61..6be43c80 100644
--- a/onetl/connection/db_connection/mssql/dialect.py
+++ b/onetl/connection/db_connection/mssql/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mssql/options.py b/onetl/connection/db_connection/mssql/options.py
index c14e38b6..856536ab 100644
--- a/onetl/connection/db_connection/mssql/options.py
+++ b/onetl/connection/db_connection/mssql/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 from onetl.connection.db_connection.jdbc_connection.options import (
diff --git a/onetl/connection/db_connection/mysql/__init__.py b/onetl/connection/db_connection/mysql/__init__.py
index 490f356e..df5dcb7b 100644
--- a/onetl/connection/db_connection/mysql/__init__.py
+++ b/onetl/connection/db_connection/mysql/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.mysql.connection import MySQL, MySQLExtra
 from onetl.connection.db_connection.mysql.dialect import MySQLDialect
diff --git a/onetl/connection/db_connection/mysql/connection.py b/onetl/connection/db_connection/mysql/connection.py
index e3c91196..15ab2a62 100644
--- a/onetl/connection/db_connection/mysql/connection.py
+++ b/onetl/connection/db_connection/mysql/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mysql/dialect.py b/onetl/connection/db_connection/mysql/dialect.py
index b9c186e2..5b59bc38 100644
--- a/onetl/connection/db_connection/mysql/dialect.py
+++ b/onetl/connection/db_connection/mysql/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/mysql/options.py b/onetl/connection/db_connection/mysql/options.py
index 06abd6d2..60018fc3 100644
--- a/onetl/connection/db_connection/mysql/options.py
+++ b/onetl/connection/db_connection/mysql/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/onetl/connection/db_connection/oracle/__init__.py b/onetl/connection/db_connection/oracle/__init__.py
index 3bcca706..4c880840 100644
--- a/onetl/connection/db_connection/oracle/__init__.py
+++ b/onetl/connection/db_connection/oracle/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.oracle.connection import Oracle, OracleExtra
 from onetl.connection.db_connection.oracle.dialect import OracleDialect
diff --git a/onetl/connection/db_connection/oracle/connection.py b/onetl/connection/db_connection/oracle/connection.py
index 40164fe1..96ba9bd7 100644
--- a/onetl/connection/db_connection/oracle/connection.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/oracle/dialect.py b/onetl/connection/db_connection/oracle/dialect.py
index 70e0eff3..2f121871 100644
--- a/onetl/connection/db_connection/oracle/dialect.py
+++ b/onetl/connection/db_connection/oracle/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/oracle/options.py b/onetl/connection/db_connection/oracle/options.py
index 61b82e1b..a9cc7ae2 100644
--- a/onetl/connection/db_connection/oracle/options.py
+++ b/onetl/connection/db_connection/oracle/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 
diff --git a/onetl/connection/db_connection/postgres/__init__.py b/onetl/connection/db_connection/postgres/__init__.py
index 3eef06fa..43ac2584 100644
--- a/onetl/connection/db_connection/postgres/__init__.py
+++ b/onetl/connection/db_connection/postgres/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.postgres.connection import Postgres, PostgresExtra
 from onetl.connection.db_connection.postgres.dialect import PostgresDialect
diff --git a/onetl/connection/db_connection/postgres/connection.py b/onetl/connection/db_connection/postgres/connection.py
index 1c11d9e3..ac5e50d1 100644
--- a/onetl/connection/db_connection/postgres/connection.py
+++ b/onetl/connection/db_connection/postgres/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/postgres/dialect.py b/onetl/connection/db_connection/postgres/dialect.py
index 0e4f67ab..1dca8ec9 100644
--- a/onetl/connection/db_connection/postgres/dialect.py
+++ b/onetl/connection/db_connection/postgres/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/postgres/options.py b/onetl/connection/db_connection/postgres/options.py
index 3a4dd806..8e6741c6 100644
--- a/onetl/connection/db_connection/postgres/options.py
+++ b/onetl/connection/db_connection/postgres/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 from onetl.connection.db_connection.jdbc_connection.options import (
diff --git a/onetl/connection/db_connection/teradata/__init__.py b/onetl/connection/db_connection/teradata/__init__.py
index 8356d51e..b29dbf26 100644
--- a/onetl/connection/db_connection/teradata/__init__.py
+++ b/onetl/connection/db_connection/teradata/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.db_connection.teradata.connection import Teradata, TeradataExtra
 from onetl.connection.db_connection.teradata.dialect import TeradataDialect
diff --git a/onetl/connection/db_connection/teradata/connection.py b/onetl/connection/db_connection/teradata/connection.py
index 9c8f073c..71f4aeed 100644
--- a/onetl/connection/db_connection/teradata/connection.py
+++ b/onetl/connection/db_connection/teradata/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/teradata/dialect.py b/onetl/connection/db_connection/teradata/dialect.py
index ac225ce4..b7fc9c47 100644
--- a/onetl/connection/db_connection/teradata/dialect.py
+++ b/onetl/connection/db_connection/teradata/dialect.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/db_connection/teradata/options.py b/onetl/connection/db_connection/teradata/options.py
index eb77f8c8..4f879e70 100644
--- a/onetl/connection/db_connection/teradata/options.py
+++ b/onetl/connection/db_connection/teradata/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 
 from onetl.connection.db_connection.jdbc_connection.options import (
diff --git a/onetl/connection/file_connection/__init__.py b/onetl/connection/file_connection/__init__.py
index 07325b1d..54237d1f 100644
--- a/onetl/connection/file_connection/__init__.py
+++ b/onetl/connection/file_connection/__init__.py
@@ -1,2 +1,2 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
diff --git a/onetl/connection/file_connection/file_connection.py b/onetl/connection/file_connection/file_connection.py
index 0a158409..de5916c4 100644
--- a/onetl/connection/file_connection/file_connection.py
+++ b/onetl/connection/file_connection/file_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/ftp.py b/onetl/connection/file_connection/ftp.py
index d5ff5216..135f6bbb 100644
--- a/onetl/connection/file_connection/ftp.py
+++ b/onetl/connection/file_connection/ftp.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/ftps.py b/onetl/connection/file_connection/ftps.py
index 0180edf4..ef69ae44 100644
--- a/onetl/connection/file_connection/ftps.py
+++ b/onetl/connection/file_connection/ftps.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import ftplib  # noqa: S402  # nosec
 import textwrap
diff --git a/onetl/connection/file_connection/hdfs/__init__.py b/onetl/connection/file_connection/hdfs/__init__.py
index 0eedb25b..ac1bb02f 100644
--- a/onetl/connection/file_connection/hdfs/__init__.py
+++ b/onetl/connection/file_connection/hdfs/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.file_connection.hdfs.connection import HDFS
 from onetl.connection.file_connection.hdfs.slots import HDFSSlots
diff --git a/onetl/connection/file_connection/hdfs/connection.py b/onetl/connection/file_connection/hdfs/connection.py
index 89c0ec96..8cb6d1b5 100644
--- a/onetl/connection/file_connection/hdfs/connection.py
+++ b/onetl/connection/file_connection/hdfs/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/hdfs/slots.py b/onetl/connection/file_connection/hdfs/slots.py
index 2f75fefa..5e6dac2e 100644
--- a/onetl/connection/file_connection/hdfs/slots.py
+++ b/onetl/connection/file_connection/hdfs/slots.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/mixins/__init__.py b/onetl/connection/file_connection/mixins/__init__.py
index 7b11e58a..422a1a03 100644
--- a/onetl/connection/file_connection/mixins/__init__.py
+++ b/onetl/connection/file_connection/mixins/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.file_connection.mixins.rename_dir_mixin import RenameDirMixin
diff --git a/onetl/connection/file_connection/mixins/rename_dir_mixin.py b/onetl/connection/file_connection/mixins/rename_dir_mixin.py
index c110745c..858e29ed 100644
--- a/onetl/connection/file_connection/mixins/rename_dir_mixin.py
+++ b/onetl/connection/file_connection/mixins/rename_dir_mixin.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/s3.py b/onetl/connection/file_connection/s3.py
index 0f411c85..2b941483 100644
--- a/onetl/connection/file_connection/s3.py
+++ b/onetl/connection/file_connection/s3.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/samba.py b/onetl/connection/file_connection/samba.py
index 430e15a7..c5fe74a5 100644
--- a/onetl/connection/file_connection/samba.py
+++ b/onetl/connection/file_connection/samba.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/sftp.py b/onetl/connection/file_connection/sftp.py
index 92db2adc..37a74a40 100644
--- a/onetl/connection/file_connection/sftp.py
+++ b/onetl/connection/file_connection/sftp.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_connection/webdav.py b/onetl/connection/file_connection/webdav.py
index 44ac766a..2b9e50eb 100644
--- a/onetl/connection/file_connection/webdav.py
+++ b/onetl/connection/file_connection/webdav.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/__init__.py b/onetl/connection/file_df_connection/__init__.py
index 07325b1d..54237d1f 100644
--- a/onetl/connection/file_df_connection/__init__.py
+++ b/onetl/connection/file_df_connection/__init__.py
@@ -1,2 +1,2 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
diff --git a/onetl/connection/file_df_connection/spark_file_df_connection.py b/onetl/connection/file_df_connection/spark_file_df_connection.py
index 06121139..c75b060c 100644
--- a/onetl/connection/file_df_connection/spark_file_df_connection.py
+++ b/onetl/connection/file_df_connection/spark_file_df_connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/spark_hdfs/__init__.py b/onetl/connection/file_df_connection/spark_hdfs/__init__.py
index 6977eb4a..338638e9 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/__init__.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.file_df_connection.spark_hdfs.connection import SparkHDFS
 from onetl.connection.file_df_connection.spark_hdfs.slots import SparkHDFSSlots
diff --git a/onetl/connection/file_df_connection/spark_hdfs/connection.py b/onetl/connection/file_df_connection/spark_hdfs/connection.py
index 10ff1005..36d20d4b 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/connection.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/spark_hdfs/slots.py b/onetl/connection/file_df_connection/spark_hdfs/slots.py
index 4dab6b54..c8e0123d 100644
--- a/onetl/connection/file_df_connection/spark_hdfs/slots.py
+++ b/onetl/connection/file_df_connection/spark_hdfs/slots.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/spark_local_fs.py b/onetl/connection/file_df_connection/spark_local_fs.py
index 71c70414..678b1dd8 100644
--- a/onetl/connection/file_df_connection/spark_local_fs.py
+++ b/onetl/connection/file_df_connection/spark_local_fs.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/spark_s3/__init__.py b/onetl/connection/file_df_connection/spark_s3/__init__.py
index 303a7496..2a72ab82 100644
--- a/onetl/connection/file_df_connection/spark_s3/__init__.py
+++ b/onetl/connection/file_df_connection/spark_s3/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.connection.file_df_connection.spark_s3.connection import SparkS3
 from onetl.connection.file_df_connection.spark_s3.extra import SparkS3Extra
diff --git a/onetl/connection/file_df_connection/spark_s3/connection.py b/onetl/connection/file_df_connection/spark_s3/connection.py
index 8fe07d10..182955cd 100644
--- a/onetl/connection/file_df_connection/spark_s3/connection.py
+++ b/onetl/connection/file_df_connection/spark_s3/connection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/connection/file_df_connection/spark_s3/extra.py b/onetl/connection/file_df_connection/spark_s3/extra.py
index 440eabed..62af3a5b 100644
--- a/onetl/connection/file_df_connection/spark_s3/extra.py
+++ b/onetl/connection/file_df_connection/spark_s3/extra.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import re
 
diff --git a/onetl/connection/kerberos_helpers.py b/onetl/connection/kerberos_helpers.py
index 5e2bd65b..b1dd5019 100644
--- a/onetl/connection/kerberos_helpers.py
+++ b/onetl/connection/kerberos_helpers.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/core/__init__.py b/onetl/core/__init__.py
index 1768b603..9796b173 100644
--- a/onetl/core/__init__.py
+++ b/onetl/core/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import textwrap
 import warnings
diff --git a/onetl/core/file_filter/__init__.py b/onetl/core/file_filter/__init__.py
index 9f2c9a5a..8e875f72 100644
--- a/onetl/core/file_filter/__init__.py
+++ b/onetl/core/file_filter/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.core.file_filter.file_filter import FileFilter
diff --git a/onetl/core/file_filter/file_filter.py b/onetl/core/file_filter/file_filter.py
index a8fd7b69..5a885170 100644
--- a/onetl/core/file_filter/file_filter.py
+++ b/onetl/core/file_filter/file_filter.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/core/file_limit/__init__.py b/onetl/core/file_limit/__init__.py
index 58759c16..9913f1f4 100644
--- a/onetl/core/file_limit/__init__.py
+++ b/onetl/core/file_limit/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.core.file_limit.file_limit import FileLimit
diff --git a/onetl/core/file_limit/file_limit.py b/onetl/core/file_limit/file_limit.py
index d3b98718..de82dafe 100644
--- a/onetl/core/file_limit/file_limit.py
+++ b/onetl/core/file_limit/file_limit.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/db/__init__.py b/onetl/db/__init__.py
index 2cd60981..115d2a6a 100644
--- a/onetl/db/__init__.py
+++ b/onetl/db/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.db.db_reader import DBReader
 from onetl.db.db_writer import DBWriter
diff --git a/onetl/db/db_reader/__init__.py b/onetl/db/db_reader/__init__.py
index a71bb526..66f207cd 100644
--- a/onetl/db/db_reader/__init__.py
+++ b/onetl/db/db_reader/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.db.db_reader.db_reader import DBReader
diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py
index f560104d..a4f45ab0 100644
--- a/onetl/db/db_reader/db_reader.py
+++ b/onetl/db/db_reader/db_reader.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/db/db_writer/__init__.py b/onetl/db/db_writer/__init__.py
index b181c7f0..f5408a18 100644
--- a/onetl/db/db_writer/__init__.py
+++ b/onetl/db/db_writer/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.db.db_writer.db_writer import DBWriter
diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py
index 0b07ec4e..c261ad23 100644
--- a/onetl/db/db_writer/db_writer.py
+++ b/onetl/db/db_writer/db_writer.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/exception.py b/onetl/exception.py
index 03650e9a..e3c965f9 100644
--- a/onetl/exception.py
+++ b/onetl/exception.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import textwrap
 
diff --git a/onetl/file/__init__.py b/onetl/file/__init__.py
index a747f037..6f6fdd4f 100644
--- a/onetl/file/__init__.py
+++ b/onetl/file/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_df_reader import FileDFReader
 from onetl.file.file_df_writer import FileDFWriter
diff --git a/onetl/file/file_df_reader/__init__.py b/onetl/file/file_df_reader/__init__.py
index b7b1ff18..a273107a 100644
--- a/onetl/file/file_df_reader/__init__.py
+++ b/onetl/file/file_df_reader/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_df_reader.file_df_reader import FileDFReader
diff --git a/onetl/file/file_df_reader/file_df_reader.py b/onetl/file/file_df_reader/file_df_reader.py
index f1e2f01e..517e23a9 100644
--- a/onetl/file/file_df_reader/file_df_reader.py
+++ b/onetl/file/file_df_reader/file_df_reader.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_df_reader/options.py b/onetl/file/file_df_reader/options.py
index 714cf1a9..d6559bfb 100644
--- a/onetl/file/file_df_reader/options.py
+++ b/onetl/file/file_df_reader/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_df_writer/__init__.py b/onetl/file/file_df_writer/__init__.py
index 0cbb35bb..37407c47 100644
--- a/onetl/file/file_df_writer/__init__.py
+++ b/onetl/file/file_df_writer/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_df_writer.file_df_writer import FileDFWriter
diff --git a/onetl/file/file_df_writer/file_df_writer.py b/onetl/file/file_df_writer/file_df_writer.py
index 35baaf15..0daea008 100644
--- a/onetl/file/file_df_writer/file_df_writer.py
+++ b/onetl/file/file_df_writer/file_df_writer.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_df_writer/options.py b/onetl/file/file_df_writer/options.py
index 01bd9ee4..81971919 100644
--- a/onetl/file/file_df_writer/options.py
+++ b/onetl/file/file_df_writer/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_downloader/__init__.py b/onetl/file/file_downloader/__init__.py
index e19f5052..dd6660d0 100644
--- a/onetl/file/file_downloader/__init__.py
+++ b/onetl/file/file_downloader/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_downloader.file_downloader import FileDownloader
 from onetl.file.file_downloader.options import FileDownloaderOptions
diff --git a/onetl/file/file_downloader/file_downloader.py b/onetl/file/file_downloader/file_downloader.py
index 069f8c69..ffd4925c 100644
--- a/onetl/file/file_downloader/file_downloader.py
+++ b/onetl/file/file_downloader/file_downloader.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_downloader/options.py b/onetl/file/file_downloader/options.py
index 91dd44d9..54b74b32 100644
--- a/onetl/file/file_downloader/options.py
+++ b/onetl/file/file_downloader/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_downloader/result.py b/onetl/file/file_downloader/result.py
index 96d184e9..36d3bff1 100644
--- a/onetl/file/file_downloader/result.py
+++ b/onetl/file/file_downloader/result.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_mover/__init__.py b/onetl/file/file_mover/__init__.py
index a1baa0db..f1260d07 100644
--- a/onetl/file/file_mover/__init__.py
+++ b/onetl/file/file_mover/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_mover.file_mover import FileMover
 from onetl.file.file_mover.options import FileMoverOptions
diff --git a/onetl/file/file_mover/file_mover.py b/onetl/file/file_mover/file_mover.py
index 0bb2e666..3fd7e5c6 100644
--- a/onetl/file/file_mover/file_mover.py
+++ b/onetl/file/file_mover/file_mover.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_mover/options.py b/onetl/file/file_mover/options.py
index ce9c12b2..9d52139f 100644
--- a/onetl/file/file_mover/options.py
+++ b/onetl/file/file_mover/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_mover/result.py b/onetl/file/file_mover/result.py
index 99313d0f..e7afcdab 100644
--- a/onetl/file/file_mover/result.py
+++ b/onetl/file/file_mover/result.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_result.py b/onetl/file/file_result.py
index d2b9aec6..01844d7f 100644
--- a/onetl/file/file_result.py
+++ b/onetl/file/file_result.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_set.py b/onetl/file/file_set.py
index 2447ad04..c4fb2159 100644
--- a/onetl/file/file_set.py
+++ b/onetl/file/file_set.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import os
 import textwrap
diff --git a/onetl/file/file_uploader/__init__.py b/onetl/file/file_uploader/__init__.py
index 85fc5fcd..9b1a2974 100644
--- a/onetl/file/file_uploader/__init__.py
+++ b/onetl/file/file_uploader/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.file_uploader.file_uploader import FileUploader
 from onetl.file.file_uploader.options import FileUploaderOptions
diff --git a/onetl/file/file_uploader/file_uploader.py b/onetl/file/file_uploader/file_uploader.py
index fc6709ce..ebcae6f1 100644
--- a/onetl/file/file_uploader/file_uploader.py
+++ b/onetl/file/file_uploader/file_uploader.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_uploader/options.py b/onetl/file/file_uploader/options.py
index b046ec3a..98db43eb 100644
--- a/onetl/file/file_uploader/options.py
+++ b/onetl/file/file_uploader/options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/file_uploader/result.py b/onetl/file/file_uploader/result.py
index 34638bae..cf45a0c4 100644
--- a/onetl/file/file_uploader/result.py
+++ b/onetl/file/file_uploader/result.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/filter/__init__.py b/onetl/file/filter/__init__.py
index 1ebee030..88e2f835 100644
--- a/onetl/file/filter/__init__.py
+++ b/onetl/file/filter/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.filter.exclude_dir import ExcludeDir
 from onetl.file.filter.file_hwm import FileHWMFilter
diff --git a/onetl/file/filter/exclude_dir.py b/onetl/file/filter/exclude_dir.py
index f5b096d2..d0bc1f7e 100644
--- a/onetl/file/filter/exclude_dir.py
+++ b/onetl/file/filter/exclude_dir.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/filter/file_hwm.py b/onetl/file/filter/file_hwm.py
index 232bf451..398cab21 100644
--- a/onetl/file/filter/file_hwm.py
+++ b/onetl/file/filter/file_hwm.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/filter/glob.py b/onetl/file/filter/glob.py
index db622cfd..529feae2 100644
--- a/onetl/file/filter/glob.py
+++ b/onetl/file/filter/glob.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/filter/match_all_filters.py b/onetl/file/filter/match_all_filters.py
index 484bee93..cfc59d17 100644
--- a/onetl/file/filter/match_all_filters.py
+++ b/onetl/file/filter/match_all_filters.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import logging
 from typing import Iterable
diff --git a/onetl/file/filter/regexp.py b/onetl/file/filter/regexp.py
index 48e321ad..e698bc15 100644
--- a/onetl/file/filter/regexp.py
+++ b/onetl/file/filter/regexp.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/__init__.py b/onetl/file/format/__init__.py
index f149f5c2..a7e17229 100644
--- a/onetl/file/format/__init__.py
+++ b/onetl/file/format/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.format.avro import Avro
 from onetl.file.format.csv import CSV
diff --git a/onetl/file/format/avro.py b/onetl/file/format/avro.py
index 418e4064..1f6e2e0e 100644
--- a/onetl/file/format/avro.py
+++ b/onetl/file/format/avro.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/csv.py b/onetl/file/format/csv.py
index 1c4442fd..c958bb69 100644
--- a/onetl/file/format/csv.py
+++ b/onetl/file/format/csv.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/excel.py b/onetl/file/format/excel.py
index 3f26522f..a62d2941 100644
--- a/onetl/file/format/excel.py
+++ b/onetl/file/format/excel.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/file_format.py b/onetl/file/format/file_format.py
index 4a7de4f3..e7998223 100644
--- a/onetl/file/format/file_format.py
+++ b/onetl/file/format/file_format.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/json.py b/onetl/file/format/json.py
index 085d125e..bfbda83d 100644
--- a/onetl/file/format/json.py
+++ b/onetl/file/format/json.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/jsonline.py b/onetl/file/format/jsonline.py
index 1d1c910d..d573e26e 100644
--- a/onetl/file/format/jsonline.py
+++ b/onetl/file/format/jsonline.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/orc.py b/onetl/file/format/orc.py
index f108a150..b342de43 100644
--- a/onetl/file/format/orc.py
+++ b/onetl/file/format/orc.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/parquet.py b/onetl/file/format/parquet.py
index f96ad444..f3c9a75b 100644
--- a/onetl/file/format/parquet.py
+++ b/onetl/file/format/parquet.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/format/xml.py b/onetl/file/format/xml.py
index 11425809..7946f997 100644
--- a/onetl/file/format/xml.py
+++ b/onetl/file/format/xml.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/limit/__init__.py b/onetl/file/limit/__init__.py
index 1e00ffdd..2d353fc2 100644
--- a/onetl/file/limit/__init__.py
+++ b/onetl/file/limit/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.file.limit.limits_reached import limits_reached
 from onetl.file.limit.limits_stop_at import limits_stop_at
diff --git a/onetl/file/limit/limits_reached.py b/onetl/file/limit/limits_reached.py
index 27d7fb50..8171df93 100644
--- a/onetl/file/limit/limits_reached.py
+++ b/onetl/file/limit/limits_reached.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/limit/limits_stop_at.py b/onetl/file/limit/limits_stop_at.py
index 035ac642..9e478a8a 100644
--- a/onetl/file/limit/limits_stop_at.py
+++ b/onetl/file/limit/limits_stop_at.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/limit/max_files_count.py b/onetl/file/limit/max_files_count.py
index ec604ff4..c62292fa 100644
--- a/onetl/file/limit/max_files_count.py
+++ b/onetl/file/limit/max_files_count.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/file/limit/reset_limits.py b/onetl/file/limit/reset_limits.py
index de9201da..8f95a911 100644
--- a/onetl/file/limit/reset_limits.py
+++ b/onetl/file/limit/reset_limits.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/__init__.py b/onetl/hooks/__init__.py
index 6002b402..8ba492dc 100644
--- a/onetl/hooks/__init__.py
+++ b/onetl/hooks/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.hooks.hook import HookPriority, hook
 from onetl.hooks.hooks_state import resume_all_hooks, skip_all_hooks, stop_all_hooks
diff --git a/onetl/hooks/hook.py b/onetl/hooks/hook.py
index 619cff7d..abf010ef 100644
--- a/onetl/hooks/hook.py
+++ b/onetl/hooks/hook.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/hook_collection.py b/onetl/hooks/hook_collection.py
index d715086c..443aa9f2 100644
--- a/onetl/hooks/hook_collection.py
+++ b/onetl/hooks/hook_collection.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/hooks_state.py b/onetl/hooks/hooks_state.py
index 53a2c0c3..4d489f0f 100644
--- a/onetl/hooks/hooks_state.py
+++ b/onetl/hooks/hooks_state.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/method_inheritance_stack.py b/onetl/hooks/method_inheritance_stack.py
index 99fef076..4999fc6f 100644
--- a/onetl/hooks/method_inheritance_stack.py
+++ b/onetl/hooks/method_inheritance_stack.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/slot.py b/onetl/hooks/slot.py
index 6d4b0b87..ee066606 100644
--- a/onetl/hooks/slot.py
+++ b/onetl/hooks/slot.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hooks/support_hooks.py b/onetl/hooks/support_hooks.py
index 33d440b5..d6323107 100644
--- a/onetl/hooks/support_hooks.py
+++ b/onetl/hooks/support_hooks.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hwm/__init__.py b/onetl/hwm/__init__.py
index e516ebea..88984b6b 100644
--- a/onetl/hwm/__init__.py
+++ b/onetl/hwm/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.hwm.auto_hwm import AutoDetectHWM
 from onetl.hwm.window import Edge, Window
diff --git a/onetl/hwm/auto_hwm.py b/onetl/hwm/auto_hwm.py
index 5d346433..bb996509 100644
--- a/onetl/hwm/auto_hwm.py
+++ b/onetl/hwm/auto_hwm.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hwm/store/__init__.py b/onetl/hwm/store/__init__.py
index 0e34caa0..4fb2d991 100644
--- a/onetl/hwm/store/__init__.py
+++ b/onetl/hwm/store/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import textwrap
 import warnings
diff --git a/onetl/hwm/store/hwm_class_registry.py b/onetl/hwm/store/hwm_class_registry.py
index 82b0eef2..b15b77af 100644
--- a/onetl/hwm/store/hwm_class_registry.py
+++ b/onetl/hwm/store/hwm_class_registry.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hwm/store/yaml_hwm_store.py b/onetl/hwm/store/yaml_hwm_store.py
index 2ffd24f6..4a7ad690 100644
--- a/onetl/hwm/store/yaml_hwm_store.py
+++ b/onetl/hwm/store/yaml_hwm_store.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/hwm/window.py b/onetl/hwm/window.py
index 7a902000..35251dad 100644
--- a/onetl/hwm/window.py
+++ b/onetl/hwm/window.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/__init__.py b/onetl/impl/__init__.py
index 76d32f48..f8396131 100644
--- a/onetl/impl/__init__.py
+++ b/onetl/impl/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.impl.base_model import BaseModel
 from onetl.impl.failed_local_file import FailedLocalFile
diff --git a/onetl/impl/base_model.py b/onetl/impl/base_model.py
index 3208619e..7478a823 100644
--- a/onetl/impl/base_model.py
+++ b/onetl/impl/base_model.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 # isort: skip_file
 
diff --git a/onetl/impl/failed_local_file.py b/onetl/impl/failed_local_file.py
index 34a12bb9..a53c2fbe 100644
--- a/onetl/impl/failed_local_file.py
+++ b/onetl/impl/failed_local_file.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/file_exist_behavior.py b/onetl/impl/file_exist_behavior.py
index 1081aa0e..0933bf44 100644
--- a/onetl/impl/file_exist_behavior.py
+++ b/onetl/impl/file_exist_behavior.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import logging
 import warnings
diff --git a/onetl/impl/frozen_model.py b/onetl/impl/frozen_model.py
index 10cb06a6..e9965b53 100644
--- a/onetl/impl/frozen_model.py
+++ b/onetl/impl/frozen_model.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/generic_options.py b/onetl/impl/generic_options.py
index 8d3e629b..df4fea7a 100644
--- a/onetl/impl/generic_options.py
+++ b/onetl/impl/generic_options.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/local_path.py b/onetl/impl/local_path.py
index 0dc70986..0f52681e 100644
--- a/onetl/impl/local_path.py
+++ b/onetl/impl/local_path.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 import os
 import sys
diff --git a/onetl/impl/path_container.py b/onetl/impl/path_container.py
index 85ee7e82..b974aeb6 100644
--- a/onetl/impl/path_container.py
+++ b/onetl/impl/path_container.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/path_repr.py b/onetl/impl/path_repr.py
index 5a61b740..ad007280 100644
--- a/onetl/impl/path_repr.py
+++ b/onetl/impl/path_repr.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/remote_directory.py b/onetl/impl/remote_directory.py
index 2b52346e..6e0188e0 100644
--- a/onetl/impl/remote_directory.py
+++ b/onetl/impl/remote_directory.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/remote_file.py b/onetl/impl/remote_file.py
index 86193d44..1f0408ce 100644
--- a/onetl/impl/remote_file.py
+++ b/onetl/impl/remote_file.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/impl/remote_path.py b/onetl/impl/remote_path.py
index 78fce394..672cee3e 100644
--- a/onetl/impl/remote_path.py
+++ b/onetl/impl/remote_path.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from pathlib import PurePosixPath
 
diff --git a/onetl/impl/remote_path_stat.py b/onetl/impl/remote_path_stat.py
index 85974803..7b500962 100644
--- a/onetl/impl/remote_path_stat.py
+++ b/onetl/impl/remote_path_stat.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/log.py b/onetl/log.py
index aeed6403..d7fdd8e8 100644
--- a/onetl/log.py
+++ b/onetl/log.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/plugins/__init__.py b/onetl/plugins/__init__.py
index 8127ff0d..e73221fb 100644
--- a/onetl/plugins/__init__.py
+++ b/onetl/plugins/__init__.py
@@ -1,3 +1,3 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.plugins.import_plugins import import_plugins
diff --git a/onetl/plugins/import_plugins.py b/onetl/plugins/import_plugins.py
index f22bd280..17ea9280 100644
--- a/onetl/plugins/import_plugins.py
+++ b/onetl/plugins/import_plugins.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/__init__.py b/onetl/strategy/__init__.py
index 1a0ff464..6fb490c3 100644
--- a/onetl/strategy/__init__.py
+++ b/onetl/strategy/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from onetl.strategy.base_strategy import BaseStrategy
 from onetl.strategy.incremental_strategy import (
diff --git a/onetl/strategy/base_strategy.py b/onetl/strategy/base_strategy.py
index 0daf309d..ccac5361 100644
--- a/onetl/strategy/base_strategy.py
+++ b/onetl/strategy/base_strategy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/batch_hwm_strategy.py b/onetl/strategy/batch_hwm_strategy.py
index 1e5ec5b5..deaaf80c 100644
--- a/onetl/strategy/batch_hwm_strategy.py
+++ b/onetl/strategy/batch_hwm_strategy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/hwm_store/__init__.py b/onetl/strategy/hwm_store/__init__.py
index 7a0338d3..de994c20 100644
--- a/onetl/strategy/hwm_store/__init__.py
+++ b/onetl/strategy/hwm_store/__init__.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 # TODO: remove in 1.0.0
 
diff --git a/onetl/strategy/hwm_strategy.py b/onetl/strategy/hwm_strategy.py
index 02249554..570b6c2a 100644
--- a/onetl/strategy/hwm_strategy.py
+++ b/onetl/strategy/hwm_strategy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/incremental_strategy.py b/onetl/strategy/incremental_strategy.py
index 0397514b..14ac8862 100644
--- a/onetl/strategy/incremental_strategy.py
+++ b/onetl/strategy/incremental_strategy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/snapshot_strategy.py b/onetl/strategy/snapshot_strategy.py
index 77ed4b35..0c0a54b0 100644
--- a/onetl/strategy/snapshot_strategy.py
+++ b/onetl/strategy/snapshot_strategy.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/strategy/strategy_manager.py b/onetl/strategy/strategy_manager.py
index bb380ae5..f66930c7 100644
--- a/onetl/strategy/strategy_manager.py
+++ b/onetl/strategy/strategy_manager.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
diff --git a/onetl/version.py b/onetl/version.py
index 1a3c6cec..af26d895 100644
--- a/onetl/version.py
+++ b/onetl/version.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: 2021-2024 MTS (Mobile Telesystems)
+# SPDX-FileCopyrightText: 2021-2024 MTS PJSC
 # SPDX-License-Identifier: Apache-2.0
 """
 __version__ parameter required to be able to output to the console

From cf1dca4f0ec34d88f03257118a376bbc7c7ae59d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 29 Aug 2024 09:59:10 +0000
Subject: [PATCH 50/64] Bump version

---
 docs/changelog/0.12.0.rst                     | 53 +++++++++++++++++++
 docs/changelog/index.rst                      |  1 +
 .../changelog/next_release/+yield.feature.rst |  1 -
 docs/changelog/next_release/292.feature.rst   |  1 -
 docs/changelog/next_release/303.feature.1.rst |  1 -
 docs/changelog/next_release/303.feature.2.rst | 10 ----
 docs/changelog/next_release/304.breaking.rst  |  3 --
 docs/changelog/next_release/304.feature.rst   |  6 ---
 docs/changelog/next_release/305.feature.rst   |  1 -
 docs/changelog/next_release/306.feature.rst   |  1 -
 docs/changelog/next_release/308.bugfix.rst    |  1 -
 onetl/VERSION                                 |  2 +-
 12 files changed, 55 insertions(+), 26 deletions(-)
 create mode 100644 docs/changelog/0.12.0.rst
 delete mode 100644 docs/changelog/next_release/+yield.feature.rst
 delete mode 100644 docs/changelog/next_release/292.feature.rst
 delete mode 100644 docs/changelog/next_release/303.feature.1.rst
 delete mode 100644 docs/changelog/next_release/303.feature.2.rst
 delete mode 100644 docs/changelog/next_release/304.breaking.rst
 delete mode 100644 docs/changelog/next_release/304.feature.rst
 delete mode 100644 docs/changelog/next_release/305.feature.rst
 delete mode 100644 docs/changelog/next_release/306.feature.rst
 delete mode 100644 docs/changelog/next_release/308.bugfix.rst

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
new file mode 100644
index 00000000..dc4f4b83
--- /dev/null
+++ b/docs/changelog/0.12.0.rst
@@ -0,0 +1,53 @@
+0.12.0 (2024-08-29)
+===================
+
+Breaking Changes
+----------------
+
+- Change connection URL used for generating HWM names of S3 and Samba sources:
+    * ``smb://host:port`` -> ``smb://host:port/share``
+    * ``s3://host:port`` -> ``s3://host:port/bucket`` (:github:pull:`304`)
+
+- Update ``Excel`` package from ``0.20.3`` to ``0.20.4``, to include Spark 3.5.1 support. (:github:pull:`306`)
+
+Features
+--------
+
+- Add support for specifying file formats (``ORC``, ``Parquet``, ``CSV``, etc.) in ``HiveWriteOptions.format`` (:github:pull:`292`):
+
+  .. code:: python
+
+    Hive.WriteOptions(format=ORC(compression="snappy"))
+
+- Collect Spark execution metrics in following methods, and log then in DEBUG mode:
+    * ``DBWriter.run()``
+    * ``FileDFWriter.run()``
+    * ``Hive.sql()``
+    * ``Hive.execute()``
+
+  This is implemented using custom ``SparkListener`` which wraps the entire method call, and
+  then report collected metrics. But these metrics sometimes may be missing due to Spark architecture,
+  so they are not reliable source of information. That's why logs are printed only in DEBUG mode, and
+  are not returned as method call result. (:github:pull:`303`)
+
+- Generate default ``jobDescription`` based on currently executed method. Examples:
+    * ``DBWriter() -> Postgres[host:5432/database]``
+    * ``MongoDB[localhost:27017/admin] -> DBReader.run()``
+    * ``Hive[cluster].execute()``
+
+  If user already set custom ``jobDescription``, it will left intact. (:github:pull:`304`)
+
+- Add log.info about JDBC dialect usage (:github:pull:`305`):
+
+  .. code:: text
+
+    |MySQL| Detected dialect: 'org.apache.spark.sql.jdbc.MySQLDialect'
+
+- Log estimated size of in-memory dataframe created by ``JDBC.fetch`` and ``JDBC.execute`` methods. (:github:pull:`303`)
+
+
+Bug Fixes
+---------
+
+- Fix passing ``Greenplum(extra={"options": ...)`` during read/write operations. (:github:pull:`308`)
+- Do not raise exception if yield-based hook whas something past (and only one) ``yield``.
diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst
index 4bdac946..7700528e 100644
--- a/docs/changelog/index.rst
+++ b/docs/changelog/index.rst
@@ -3,6 +3,7 @@
     :caption: Changelog
 
     DRAFT
+    0.12.0
     0.11.1
     0.11.0
     0.10.2
diff --git a/docs/changelog/next_release/+yield.feature.rst b/docs/changelog/next_release/+yield.feature.rst
deleted file mode 100644
index efc58606..00000000
--- a/docs/changelog/next_release/+yield.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-Do not raise exception if yield-based hook whas something past (and only one) ``yield``.
diff --git a/docs/changelog/next_release/292.feature.rst b/docs/changelog/next_release/292.feature.rst
deleted file mode 100644
index e50a5fcd..00000000
--- a/docs/changelog/next_release/292.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-Add support for specifying file formats (``ORC``, ``Parquet``, ``CSV``, etc.) in ``HiveWriteOptions.format``: ``Hive.WriteOptions(format=ORC(compression="snappy"))``.
diff --git a/docs/changelog/next_release/303.feature.1.rst b/docs/changelog/next_release/303.feature.1.rst
deleted file mode 100644
index 8c0b1e19..00000000
--- a/docs/changelog/next_release/303.feature.1.rst
+++ /dev/null
@@ -1 +0,0 @@
-Log estimated size of in-memory dataframe created by ``JDBC.fetch`` and ``JDBC.execute`` methods.
diff --git a/docs/changelog/next_release/303.feature.2.rst b/docs/changelog/next_release/303.feature.2.rst
deleted file mode 100644
index 92bbe13c..00000000
--- a/docs/changelog/next_release/303.feature.2.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-Collect Spark execution metrics in following methods, and log then in DEBUG mode:
-* ``DBWriter.run()``
-* ``FileDFWriter.run()``
-* ``Hive.sql()``
-* ``Hive.execute()``
-
-This is implemented using custom ``SparkListener`` which wraps the entire method call, and
-then report collected metrics. But these metrics sometimes may be missing due to Spark architecture,
-so they are not reliable source of information. That's why logs are printed only in DEBUG mode, and
-are not returned as method call result.
diff --git a/docs/changelog/next_release/304.breaking.rst b/docs/changelog/next_release/304.breaking.rst
deleted file mode 100644
index 60598321..00000000
--- a/docs/changelog/next_release/304.breaking.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-Change connection URL used for generating HWM names of S3 and Samba sources:
-* ``smb://host:port`` -> ``smb://host:port/share``
-* ``s3://host:port`` -> ``s3://host:port/bucket``
diff --git a/docs/changelog/next_release/304.feature.rst b/docs/changelog/next_release/304.feature.rst
deleted file mode 100644
index 97560354..00000000
--- a/docs/changelog/next_release/304.feature.rst
+++ /dev/null
@@ -1,6 +0,0 @@
-Generate default ``jobDescription`` based on currently executed method. Examples:
-* ``DBWriter() -> Postgres[host:5432/database]``
-* ``MongoDB[localhost:27017/admin] -> DBReader.run()``
-* ``Hive[cluster].execute()``
-
-If user already set custom ``jobDescription``, it will left intact.
diff --git a/docs/changelog/next_release/305.feature.rst b/docs/changelog/next_release/305.feature.rst
deleted file mode 100644
index c4c44dc6..00000000
--- a/docs/changelog/next_release/305.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-Add log.info about JDBC dialect usage: ``Detected dialect: 'org.apache.spark.sql.jdbc.MySQLDialect'``
diff --git a/docs/changelog/next_release/306.feature.rst b/docs/changelog/next_release/306.feature.rst
deleted file mode 100644
index 1c2b95f7..00000000
--- a/docs/changelog/next_release/306.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-Update ``Excel`` package from ``0.20.3`` to ``0.20.4``, to include Spark 3.5.1 support.
diff --git a/docs/changelog/next_release/308.bugfix.rst b/docs/changelog/next_release/308.bugfix.rst
deleted file mode 100644
index 3ffcdcc5..00000000
--- a/docs/changelog/next_release/308.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Fix passing ``Greenplum(extra={"options": ...)`` during read/write operations.
diff --git a/onetl/VERSION b/onetl/VERSION
index bc859cbd..ac454c6a 100644
--- a/onetl/VERSION
+++ b/onetl/VERSION
@@ -1 +1 @@
-0.11.2
+0.12.0

From 7e70db16b71251a0c68716a2811e529aeb8062e5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 29 Aug 2024 09:59:50 +0000
Subject: [PATCH 51/64] [DOP-16999] Log detected JDBC dialect while using
 DBReader

---
 onetl/connection/db_connection/greenplum/connection.py       | 1 +
 onetl/connection/db_connection/jdbc_connection/connection.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/onetl/connection/db_connection/greenplum/connection.py b/onetl/connection/db_connection/greenplum/connection.py
index ff5c5a76..7f7e8961 100644
--- a/onetl/connection/db_connection/greenplum/connection.py
+++ b/onetl/connection/db_connection/greenplum/connection.py
@@ -357,6 +357,7 @@ def get_df_schema(
         columns: list[str] | None = None,
         options: JDBCReadOptions | None = None,
     ) -> StructType:
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Fetching schema of table %r ...", self.__class__.__name__, source)
 
         query = self.dialect.get_sql_query(source, columns=columns, limit=0, compact=True)
diff --git a/onetl/connection/db_connection/jdbc_connection/connection.py b/onetl/connection/db_connection/jdbc_connection/connection.py
index 2752dc25..0ea3078c 100644
--- a/onetl/connection/db_connection/jdbc_connection/connection.py
+++ b/onetl/connection/db_connection/jdbc_connection/connection.py
@@ -159,6 +159,7 @@ def read_source_as_df(
             limit=limit,
         )
 
+        log.info("|%s| Detected dialect: '%s'", self.__class__.__name__, self._get_spark_dialect_name())
         log.info("|%s| Executing SQL query (on executor):", self.__class__.__name__)
         log_lines(log, query)
 

From 82685e966d968c518fce87f87aa18dafd0967f3d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 29 Aug 2024 10:22:57 +0000
Subject: [PATCH 52/64] Update JDBC & MongoDB packages to latest versions

---
 docs/changelog/0.12.0.rst                     |  8 ++++++++
 .../db_connection/mongodb/types.rst           |  4 ++--
 .../db_connection/clickhouse/connection.py    |  8 ++++----
 .../db_connection/mongodb/connection.py       | 14 ++++++-------
 .../db_connection/mssql/connection.py         | 10 +++++-----
 .../db_connection/mysql/connection.py         |  8 ++++----
 .../db_connection/oracle/connection.py        | 10 +++++-----
 .../db_connection/postgres/connection.py      |  8 ++++----
 onetl/file/format/xml.py                      |  2 +-
 .../test_clickhouse_unit.py                   |  6 +++---
 .../test_mongodb_unit.py                      | 18 ++++++++---------
 .../test_mssql_unit.py                        | 20 +++++++++----------
 .../test_mysql_unit.py                        |  6 +++---
 .../test_oracle_unit.py                       | 16 +++++++--------
 .../test_postgres_unit.py                     |  8 ++++----
 15 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index dc4f4b83..d0a87cc7 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -8,6 +8,14 @@ Breaking Changes
     * ``smb://host:port`` -> ``smb://host:port/share``
     * ``s3://host:port`` -> ``s3://host:port/bucket`` (:github:pull:`304`)
 
+- Update DB connectors/drivers to latest versions:
+    * Clickhouse ``0.6.0-patch5`` → ``0.6.4``
+    * MongoDB ``10.3.0`` → ``10.4.0``
+    * MSSQL ``12.6.2`` → ``12.8.1``
+    * MySQL ``8.4.0`` → ``9.0.0``
+    * Oracle ``23.4.0.24.05`` → ``23.5.0.24.07``
+    * Postgres ``42.7.3`` → ``42.7.4``
+
 - Update ``Excel`` package from ``0.20.3`` to ``0.20.4``, to include Spark 3.5.1 support. (:github:pull:`306`)
 
 Features
diff --git a/docs/connection/db_connection/mongodb/types.rst b/docs/connection/db_connection/mongodb/types.rst
index 4b22b7cb..f701ac93 100644
--- a/docs/connection/db_connection/mongodb/types.rst
+++ b/docs/connection/db_connection/mongodb/types.rst
@@ -73,8 +73,8 @@ References
 
 Here you can find source code with type conversions:
 
-* `MongoDB -> Spark <https://github.com/mongodb/mongo-spark/blob/r10.3.0/src/main/java/com/mongodb/spark/sql/connector/schema/InferSchema.java#L154-L200>`_
-* `Spark -> MongoDB <https://github.com/mongodb/mongo-spark/blob/r10.3.0/src/main/java/com/mongodb/spark/sql/connector/schema/RowToBsonDocumentConverter.java#L157-L260>`_
+* `MongoDB -> Spark <https://github.com/mongodb/mongo-spark/blob/r10.4.0/src/main/java/com/mongodb/spark/sql/connector/schema/InferSchema.java#L214-L260>`_
+* `Spark -> MongoDB <https://github.com/mongodb/mongo-spark/blob/r10.4.0/src/main/java/com/mongodb/spark/sql/connector/schema/RowToBsonDocumentConverter.java#L157-L260>`_
 
 Supported types
 ---------------
diff --git a/onetl/connection/db_connection/clickhouse/connection.py b/onetl/connection/db_connection/clickhouse/connection.py
index 21b282b3..fad82942 100644
--- a/onetl/connection/db_connection/clickhouse/connection.py
+++ b/onetl/connection/db_connection/clickhouse/connection.py
@@ -37,7 +37,7 @@ class Config:
 class Clickhouse(JDBCConnection):
     """Clickhouse JDBC connection. |support_hooks|
 
-    Based on Maven package `com.clickhouse:clickhouse-jdbc:0.6.0-patch5 <https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc/0.6.0-patch5>`_
+    Based on Maven package `com.clickhouse:clickhouse-jdbc:0.6.4 <https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc/0.6.4>`_
     (`official Clickhouse JDBC driver <https://github.com/ClickHouse/clickhouse-jdbc>`_).
 
     .. seealso::
@@ -139,7 +139,7 @@ def get_packages(
         Parameters
         ----------
         package_version : str, optional
-            ClickHouse JDBC version client packages. Defaults to ``0.6.0-patch5``.
+            ClickHouse JDBC version client packages. Defaults to ``0.6.4``.
 
             .. versionadded:: 0.11.0
 
@@ -158,7 +158,7 @@ def get_packages(
             Clickhouse.get_packages(package_version="0.6.0", apache_http_client_version="5.3.1")
 
         """
-        default_jdbc_version = "0.6.0-patch5"
+        default_jdbc_version = "0.6.4"
         default_http_version = "5.3.1"
 
         jdbc_version = Version(package_version or default_jdbc_version).min_digits(3)
@@ -177,7 +177,7 @@ def get_packages(
     @classproperty
     def package(self) -> str:
         """Get a single string of package names to be downloaded by Spark for establishing a Clickhouse connection."""
-        return "com.clickhouse:clickhouse-jdbc:0.6.0-patch5,com.clickhouse:clickhouse-http-client:0.6.0-patch5,org.apache.httpcomponents.client5:httpclient5:5.3.1"
+        return "com.clickhouse:clickhouse-jdbc:0.6.4,com.clickhouse:clickhouse-http-client:0.6.4,org.apache.httpcomponents.client5:httpclient5:5.3.1"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index 4cc7e3ed..f406a232 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -50,7 +50,7 @@ class Config:
 class MongoDB(DBConnection):
     """MongoDB connection. |support_hooks|
 
-    Based on package `org.mongodb.spark:mongo-spark-connector:10.3.0 <https://mvnrepository.com/artifact/org.mongodb.spark/mongo-spark-connector_2.12/10.3.0>`_
+    Based on package `org.mongodb.spark:mongo-spark-connector:10.4.0 <https://mvnrepository.com/artifact/org.mongodb.spark/mongo-spark-connector_2.12/10.4.0>`_
     (`MongoDB connector for Spark <https://www.mongodb.com/docs/spark-connector/current/>`_)
 
     .. seealso::
@@ -153,7 +153,7 @@ def get_packages(
             Spark version in format ``major.minor``. Used only if ``scala_version=None``.
 
         package_version : str, optional
-            Specifies the version of the MongoDB Spark connector to use. Defaults to ``10.3.0``.
+            Specifies the version of the MongoDB Spark connector to use. Defaults to ``10.4.0``.
 
             .. versionadded:: 0.11.0
 
@@ -166,10 +166,10 @@ def get_packages(
             MongoDB.get_packages(scala_version="2.12")
 
             # specify custom connector version
-            MongoDB.get_packages(scala_version="2.12", package_version="10.3.0")
+            MongoDB.get_packages(scala_version="2.12", package_version="10.4.0")
         """
 
-        default_package_version = "10.3.0"
+        default_package_version = "10.4.0"
 
         if scala_version:
             scala_ver = Version(scala_version).min_digits(2)
@@ -196,7 +196,7 @@ def package_spark_3_2(cls) -> str:
             "use `MongoDB.get_packages(spark_version='3.2')` instead"
         )
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
+        return "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
 
     @classproperty
     def package_spark_3_3(cls) -> str:
@@ -206,7 +206,7 @@ def package_spark_3_3(cls) -> str:
             "use `MongoDB.get_packages(spark_version='3.3')` instead"
         )
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
+        return "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
 
     @classproperty
     def package_spark_3_4(cls) -> str:
@@ -216,7 +216,7 @@ def package_spark_3_4(cls) -> str:
             "use `MongoDB.get_packages(spark_version='3.4')` instead"
         )
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
+        return "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
 
     @slot
     def pipeline(
diff --git a/onetl/connection/db_connection/mssql/connection.py b/onetl/connection/db_connection/mssql/connection.py
index 18a08e32..50235332 100644
--- a/onetl/connection/db_connection/mssql/connection.py
+++ b/onetl/connection/db_connection/mssql/connection.py
@@ -34,7 +34,7 @@ class Config:
 class MSSQL(JDBCConnection):
     """MSSQL JDBC connection. |support_hooks|
 
-    Based on Maven package `com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8 <https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc/12.6.2.jre8>`_
+    Based on Maven package `com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8 <https://mvnrepository.com/artifact/com.microsoft.sqlserver/mssql-jdbc/12.8.1.jre8>`_
     (`official MSSQL JDBC driver
     <https://docs.microsoft.com/en-us/sql/connect/jdbc/download-microsoft-jdbc-driver-for-sql-server>`_).
 
@@ -205,7 +205,7 @@ def get_packages(
         java_version : str, optional
             Java major version, defaults to ``8``. Must be ``8`` or ``11``.
         package_version : str, optional
-            Specifies the version of the MSSQL JDBC driver to use. Defaults to ``12.6.2.``.
+            Specifies the version of the MSSQL JDBC driver to use. Defaults to ``12.8.1.``.
 
         Examples
         --------
@@ -216,10 +216,10 @@ def get_packages(
             MSSQL.get_packages()
 
             # specify Java and package versions
-            MSSQL.get_packages(java_version="8", package_version="12.6.2.jre11")
+            MSSQL.get_packages(java_version="8", package_version="12.8.1.jre11")
         """
         default_java_version = "8"
-        default_package_version = "12.6.2"
+        default_package_version = "12.8.1"
 
         java_ver = Version(java_version or default_java_version)
         if java_ver.major < 8:
@@ -241,7 +241,7 @@ def package(cls) -> str:
         """Get package name to be downloaded by Spark."""
         msg = "`MSSQL.package` will be removed in 1.0.0, use `MSSQL.get_packages()` instead"
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"
+        return "com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/connection/db_connection/mysql/connection.py b/onetl/connection/db_connection/mysql/connection.py
index 15ab2a62..2588d79a 100644
--- a/onetl/connection/db_connection/mysql/connection.py
+++ b/onetl/connection/db_connection/mysql/connection.py
@@ -36,7 +36,7 @@ class Config:
 class MySQL(JDBCConnection):
     """MySQL JDBC connection. |support_hooks|
 
-    Based on Maven package `com.mysql:mysql-connector-j:8.4.0 <https://mvnrepository.com/artifact/com.mysql/mysql-connector-j/8.4.0>`_
+    Based on Maven package `com.mysql:mysql-connector-j:9.0.0 <https://mvnrepository.com/artifact/com.mysql/mysql-connector-j/9.0.0>`_
     (`official MySQL JDBC driver <https://dev.mysql.com/downloads/connector/j/8.4.html>`_).
 
     .. seealso::
@@ -132,7 +132,7 @@ def get_packages(cls, package_version: str | None = None) -> list[str]:
         Parameters
         ----------
         package_version : str, optional
-            Specifies the version of the MySQL JDBC driver to use. Defaults to ``8.4.0``.
+            Specifies the version of the MySQL JDBC driver to use. Defaults to ``9.0.0``.
 
             .. versionadded:: 0.11.0
 
@@ -147,7 +147,7 @@ def get_packages(cls, package_version: str | None = None) -> list[str]:
             # specify a custom package version
             MySQL.get_packages(package_version="8.2.0")
         """
-        default_version = "8.4.0"
+        default_version = "9.0.0"
         version = Version(package_version or default_version).min_digits(3)
 
         return [f"com.mysql:mysql-connector-j:{version}"]
@@ -157,7 +157,7 @@ def package(cls) -> str:
         """Get package name to be downloaded by Spark."""
         msg = "`MySQL.package` will be removed in 1.0.0, use `MySQL.get_packages()` instead"
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "com.mysql:mysql-connector-j:8.4.0"
+        return "com.mysql:mysql-connector-j:9.0.0"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/connection/db_connection/oracle/connection.py b/onetl/connection/db_connection/oracle/connection.py
index 96ba9bd7..0d163d22 100644
--- a/onetl/connection/db_connection/oracle/connection.py
+++ b/onetl/connection/db_connection/oracle/connection.py
@@ -79,7 +79,7 @@ class Config:
 class Oracle(JDBCConnection):
     """Oracle JDBC connection. |support_hooks|
 
-    Based on Maven package `com.oracle.database.jdbc:ojdbc8:23.4.0.24.05 <https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8/23.4.0.24.05>`_
+    Based on Maven package `com.oracle.database.jdbc:ojdbc8:23.5.0.24.07 <https://mvnrepository.com/artifact/com.oracle.database.jdbc/ojdbc8/23.5.0.24.07>`_
     (`official Oracle JDBC driver <https://www.oracle.com/cis/database/technologies/appdev/jdbc-downloads.html>`_).
 
     .. seealso::
@@ -208,7 +208,7 @@ def get_packages(
         java_version : str, optional
             Java major version, defaults to "8". Must be "8" or "11".
         package_version : str, optional
-            Specifies the version of the Oracle JDBC driver to use. Defaults to "23.4.0.24.05".
+            Specifies the version of the Oracle JDBC driver to use. Defaults to "23.5.0.24.07".
 
         Examples
         --------
@@ -220,11 +220,11 @@ def get_packages(
             Oracle.get_packages()
 
             # specify Java and package versions
-            Oracle.get_packages(java_version="8", package_version="23.4.0.24.05")
+            Oracle.get_packages(java_version="8", package_version="23.5.0.24.07")
         """
 
         default_java_version = "8"
-        default_package_version = "23.4.0.24.05"
+        default_package_version = "23.5.0.24.07"
 
         java_ver = Version(java_version or default_java_version)
         if java_ver.major < 8:
@@ -240,7 +240,7 @@ def package(cls) -> str:
         """Get package name to be downloaded by Spark."""
         msg = "`Oracle.package` will be removed in 1.0.0, use `Oracle.get_packages()` instead"
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"
+        return "com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/connection/db_connection/postgres/connection.py b/onetl/connection/db_connection/postgres/connection.py
index ac5e50d1..c8a272ba 100644
--- a/onetl/connection/db_connection/postgres/connection.py
+++ b/onetl/connection/db_connection/postgres/connection.py
@@ -44,7 +44,7 @@ class Config:
 class Postgres(JDBCConnection):
     """PostgreSQL JDBC connection. |support_hooks|
 
-    Based on Maven package `org.postgresql:postgresql:42.7.3 <https://mvnrepository.com/artifact/org.postgresql/postgresql/42.7.3>`_
+    Based on Maven package `org.postgresql:postgresql:42.7.4 <https://mvnrepository.com/artifact/org.postgresql/postgresql/42.7.4>`_
     (`official Postgres JDBC driver <https://jdbc.postgresql.org/>`_).
 
     .. seealso::
@@ -140,7 +140,7 @@ def get_packages(cls, package_version: str | None = None) -> list[str]:
         Parameters
         ----------
         package_version : str, optional
-            Specifies the version of the PostgreSQL JDBC driver to use.  Defaults to ``42.7.3``.
+            Specifies the version of the PostgreSQL JDBC driver to use.  Defaults to ``42.7.4``.
 
         Examples
         --------
@@ -155,7 +155,7 @@ def get_packages(cls, package_version: str | None = None) -> list[str]:
             Postgres.get_packages(package_version="42.6.0")
 
         """
-        default_version = "42.7.3"
+        default_version = "42.7.4"
         version = Version(package_version or default_version).min_digits(3)
 
         return [f"org.postgresql:postgresql:{version}"]
@@ -165,7 +165,7 @@ def package(cls) -> str:
         """Get package name to be downloaded by Spark."""
         msg = "`Postgres.package` will be removed in 1.0.0, use `Postgres.get_packages()` instead"
         warnings.warn(msg, UserWarning, stacklevel=3)
-        return "org.postgresql:postgresql:42.7.3"
+        return "org.postgresql:postgresql:42.7.4"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/onetl/file/format/xml.py b/onetl/file/format/xml.py
index 7946f997..2e1ad003 100644
--- a/onetl/file/format/xml.py
+++ b/onetl/file/format/xml.py
@@ -200,7 +200,7 @@ def get_packages(  # noqa: WPS231
                 raise ValueError(f"Package version must be above 0.13, got {version}")
             log.warning("Passed custom package version %r, it is not guaranteed to be supported", package_version)
         else:
-            version = Version("0.18.0").min_digits(3)
+            version = Version("0.18.0")
 
         spark_ver = Version(spark_version)
         scala_ver = Version(scala_version).min_digits(2) if scala_version else get_default_scala_version(spark_ver)
diff --git a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
index 287061d2..9bf7a068 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
@@ -11,7 +11,7 @@ def test_clickhouse_driver():
 
 def test_clickhouse_package():
     expected_packages = (
-        "com.clickhouse:clickhouse-jdbc:0.6.0-patch5,com.clickhouse:clickhouse-http-client:0.6.0-patch5,"
+        "com.clickhouse:clickhouse-jdbc:0.6.4,com.clickhouse:clickhouse-http-client:0.6.4,"
         "org.apache.httpcomponents.client5:httpclient5:5.3.1"
     )
     assert Clickhouse.package == expected_packages
@@ -24,8 +24,8 @@ def test_clickhouse_package():
             None,
             None,
             [
-                "com.clickhouse:clickhouse-jdbc:0.6.0-patch5",
-                "com.clickhouse:clickhouse-http-client:0.6.0-patch5",
+                "com.clickhouse:clickhouse-jdbc:0.6.4",
+                "com.clickhouse:clickhouse-http-client:0.6.4",
                 "org.apache.httpcomponents.client5:httpclient5:5.3.1",
             ],
         ),
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
index 9142848e..4e67f93a 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mongodb_unit.py
@@ -12,9 +12,9 @@
 def test_mongodb_package():
     warning_msg = re.escape("will be removed in 1.0.0, use `MongoDB.get_packages(spark_version=")
     with pytest.warns(UserWarning, match=warning_msg):
-        assert MongoDB.package_spark_3_2 == "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
-        assert MongoDB.package_spark_3_3 == "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
-        assert MongoDB.package_spark_3_4 == "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"
+        assert MongoDB.package_spark_3_2 == "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
+        assert MongoDB.package_spark_3_3 == "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
+        assert MongoDB.package_spark_3_4 == "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"
 
 
 def test_mongodb_get_packages_no_input():
@@ -50,16 +50,16 @@ def test_mongodb_get_packages_scala_version_not_supported(scala_version):
 @pytest.mark.parametrize(
     "spark_version, scala_version, package_version, package",
     [
-        (None, "2.12", "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"),
-        (None, "2.13", "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.13:10.3.0"),
-        ("3.2", None, "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"),
-        ("3.3", None, "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"),
-        ("3.4", None, "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"),
+        (None, "2.12", "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"),
+        (None, "2.13", "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.13:10.4.0"),
+        ("3.2", None, "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"),
+        ("3.3", None, "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"),
+        ("3.4", None, "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"),
         ("3.2", "2.12", "10.1.1", "org.mongodb.spark:mongo-spark-connector_2.12:10.1.1"),
         ("3.4", "2.13", "10.1.1", "org.mongodb.spark:mongo-spark-connector_2.13:10.1.1"),
         ("3.2", "2.12", "10.2.1", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.1"),
         ("3.2", "2.12", "10.2.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.2.0"),
-        ("3.2.4", "2.12.1", "10.3.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.3.0"),
+        ("3.2.4", "2.12.1", "10.4.0", "org.mongodb.spark:mongo-spark-connector_2.12:10.4.0"),
     ],
 )
 def test_mongodb_get_packages(spark_version, scala_version, package_version, package):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
index d9f3cfda..aedd1990 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mssql_unit.py
@@ -14,23 +14,23 @@ def test_mssql_class_attributes():
 def test_mssql_package():
     warning_msg = re.escape("will be removed in 1.0.0, use `MSSQL.get_packages()` instead")
     with pytest.warns(UserWarning, match=warning_msg):
-        assert MSSQL.package == "com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"
+        assert MSSQL.package == "com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"
 
 
 @pytest.mark.parametrize(
     "java_version, package_version, expected_packages",
     [
-        (None, None, ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"]),
-        ("8", None, ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"]),
-        ("9", None, ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"]),
-        ("11", None, ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre11"]),
-        ("20", None, ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre11"]),
-        ("8", "12.6.2.jre8", ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"]),
-        ("11", "12.6.2.jre11", ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre11"]),
+        (None, None, ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"]),
+        ("8", None, ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"]),
+        ("9", None, ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"]),
+        ("11", None, ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11"]),
+        ("20", None, ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11"]),
+        ("8", "12.8.1.jre8", ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"]),
+        ("11", "12.8.1.jre11", ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11"]),
         ("11", "12.7.0.jre11-preview", ["com.microsoft.sqlserver:mssql-jdbc:12.7.0.jre11-preview"]),
         ("8", "12.7.0.jre8-preview", ["com.microsoft.sqlserver:mssql-jdbc:12.7.0.jre8-preview"]),
-        ("8", "12.6.2", ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre8"]),
-        ("11", "12.6.2", ["com.microsoft.sqlserver:mssql-jdbc:12.6.2.jre11"]),
+        ("8", "12.8.1", ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre8"]),
+        ("11", "12.8.1", ["com.microsoft.sqlserver:mssql-jdbc:12.8.1.jre11"]),
     ],
 )
 def test_mssql_get_packages(java_version, package_version, expected_packages):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py b/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
index 0d57da48..54913c07 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_mysql_unit.py
@@ -14,14 +14,14 @@ def test_mysql_class_attributes():
 def test_mysql_package():
     warning_msg = re.escape("will be removed in 1.0.0, use `MySQL.get_packages()` instead")
     with pytest.warns(UserWarning, match=warning_msg):
-        assert MySQL.package == "com.mysql:mysql-connector-j:8.4.0"
+        assert MySQL.package == "com.mysql:mysql-connector-j:9.0.0"
 
 
 @pytest.mark.parametrize(
     "package_version, expected_packages",
     [
-        (None, ["com.mysql:mysql-connector-j:8.4.0"]),
-        ("8.4.0", ["com.mysql:mysql-connector-j:8.4.0"]),
+        (None, ["com.mysql:mysql-connector-j:9.0.0"]),
+        ("9.0.0", ["com.mysql:mysql-connector-j:9.0.0"]),
         ("8.1.0", ["com.mysql:mysql-connector-j:8.1.0"]),
         ("8.0.33", ["com.mysql:mysql-connector-j:8.0.33"]),
     ],
diff --git a/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py b/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
index dd02b5c9..d0618c89 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_oracle_unit.py
@@ -14,11 +14,11 @@ def test_oracle_class_attributes():
 def test_oracle_package():
     warning_msg = re.escape("will be removed in 1.0.0, use `Oracle.get_packages()` instead")
     with pytest.warns(UserWarning, match=warning_msg):
-        assert Oracle.package == "com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"
+        assert Oracle.package == "com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"
 
 
 def test_oracle_get_packages_no_input():
-    assert Oracle.get_packages() == ["com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"]
+    assert Oracle.get_packages() == ["com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"]
 
 
 @pytest.mark.parametrize("java_version", ["7", "6"])
@@ -30,16 +30,16 @@ def test_oracle_get_packages_java_version_not_supported(java_version):
 @pytest.mark.parametrize(
     "java_version, package_version, expected_packages",
     [
-        (None, None, ["com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"]),
-        ("8", None, ["com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"]),
-        ("8", "23.4.0.24.05", ["com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"]),
+        (None, None, ["com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"]),
+        ("8", None, ["com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"]),
+        ("8", "23.5.0.24.07", ["com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"]),
         ("8", "21.13.0.0", ["com.oracle.database.jdbc:ojdbc8:21.13.0.0"]),
-        ("9", None, ["com.oracle.database.jdbc:ojdbc8:23.4.0.24.05"]),
+        ("9", None, ["com.oracle.database.jdbc:ojdbc8:23.5.0.24.07"]),
         ("9", "21.13.0.0", ["com.oracle.database.jdbc:ojdbc8:21.13.0.0"]),
-        ("11", None, ["com.oracle.database.jdbc:ojdbc11:23.4.0.24.05"]),
+        ("11", None, ["com.oracle.database.jdbc:ojdbc11:23.5.0.24.07"]),
         ("11", "21.13.0.0", ["com.oracle.database.jdbc:ojdbc11:21.13.0.0"]),
         ("17", "21.13.0.0", ["com.oracle.database.jdbc:ojdbc11:21.13.0.0"]),
-        ("20", "23.4.0.24.05", ["com.oracle.database.jdbc:ojdbc11:23.4.0.24.05"]),
+        ("20", "23.5.0.24.07", ["com.oracle.database.jdbc:ojdbc11:23.5.0.24.07"]),
     ],
 )
 def test_oracle_get_packages(java_version, package_version, expected_packages):
diff --git a/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py b/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
index 2b0080bf..eae69579 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_postgres_unit.py
@@ -14,15 +14,15 @@ def test_postgres_class_attributes():
 def test_postgres_package():
     warning_msg = re.escape("will be removed in 1.0.0, use `Postgres.get_packages()` instead")
     with pytest.warns(UserWarning, match=warning_msg):
-        assert Postgres.package == "org.postgresql:postgresql:42.7.3"
+        assert Postgres.package == "org.postgresql:postgresql:42.7.4"
 
 
 @pytest.mark.parametrize(
     "package_version, expected_packages",
     [
-        (None, ["org.postgresql:postgresql:42.7.3"]),
-        ("42.7.3", ["org.postgresql:postgresql:42.7.3"]),
-        ("42.7.3-patch", ["org.postgresql:postgresql:42.7.3-patch"]),
+        (None, ["org.postgresql:postgresql:42.7.4"]),
+        ("42.7.4", ["org.postgresql:postgresql:42.7.4"]),
+        ("42.7.4-patch", ["org.postgresql:postgresql:42.7.4-patch"]),
         ("42.6.0", ["org.postgresql:postgresql:42.6.0"]),
     ],
 )

From d50c123086a5176d19e5c088c2e926a9ae5964f2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Thu, 29 Aug 2024 11:37:00 +0000
Subject: [PATCH 53/64] Update JDBC prerequisites

---
 docs/connection/db_connection/clickhouse/prerequisites.rst | 4 +++-
 docs/connection/db_connection/mongodb/prerequisites.rst    | 2 +-
 docs/connection/db_connection/mssql/prerequisites.rst      | 4 +++-
 docs/connection/db_connection/mysql/prerequisites.rst      | 6 ++++--
 docs/connection/db_connection/oracle/prerequisites.rst     | 4 +++-
 docs/connection/db_connection/postgres/prerequisites.rst   | 2 +-
 6 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/docs/connection/db_connection/clickhouse/prerequisites.rst b/docs/connection/db_connection/clickhouse/prerequisites.rst
index 03384b1a..b9a7577b 100644
--- a/docs/connection/db_connection/clickhouse/prerequisites.rst
+++ b/docs/connection/db_connection/clickhouse/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Clickhouse server versions: 21.1 or higher
+* Clickhouse server versions:
+    * Officially declared: 22.8 or higher
+    * Actually supported: 21.1 or higher
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/mongodb/prerequisites.rst b/docs/connection/db_connection/mongodb/prerequisites.rst
index 7df5f502..5cc8179a 100644
--- a/docs/connection/db_connection/mongodb/prerequisites.rst
+++ b/docs/connection/db_connection/mongodb/prerequisites.rst
@@ -10,7 +10,7 @@ Version Compatibility
 * Spark versions: 3.2.x - 3.5.x
 * Java versions: 8 - 20
 
-See `official documentation <https://www.mongodb.com/docs/spark-connector/v10.1/>`_.
+See `official documentation <https://www.mongodb.com/docs/spark-connector/>`_.
 
 Installing PySpark
 ------------------
diff --git a/docs/connection/db_connection/mssql/prerequisites.rst b/docs/connection/db_connection/mssql/prerequisites.rst
index 8dde0f6c..c3c9059a 100644
--- a/docs/connection/db_connection/mssql/prerequisites.rst
+++ b/docs/connection/db_connection/mssql/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* SQL Server versions: 2014 - 2022
+* SQL Server versions:
+    * Officially declared: 2016 - 2022
+    * Actually supported: 2014 - 2022
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/mysql/prerequisites.rst b/docs/connection/db_connection/mysql/prerequisites.rst
index b92f3320..b98186bc 100644
--- a/docs/connection/db_connection/mysql/prerequisites.rst
+++ b/docs/connection/db_connection/mysql/prerequisites.rst
@@ -6,11 +6,13 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* MySQL server versions: 5.7 - 9.0
+* MySQL server versions:
+    * Officially declared: 8.0 - 9.0
+    * Actually supported: 5.7 - 9.0
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
-See `official documentation <https://dev.mysql.com/doc/relnotes/connector-j/en/news-8-0-33.html>`_.
+See `official documentation <https://dev.mysql.com/doc/connector-j/en/connector-j-versions.html>`_.
 
 Installing PySpark
 ------------------
diff --git a/docs/connection/db_connection/oracle/prerequisites.rst b/docs/connection/db_connection/oracle/prerequisites.rst
index b5b64e43..82fa55a4 100644
--- a/docs/connection/db_connection/oracle/prerequisites.rst
+++ b/docs/connection/db_connection/oracle/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Oracle Server versions: 23, 21, 19, 18, 12.2 and __probably__ 11.2 (tested, but it's not mentioned in official docs).
+* Oracle Server versions:
+    * Officially declared: 19 - 23
+    * Actually supported: 11.2 - 23
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/postgres/prerequisites.rst b/docs/connection/db_connection/postgres/prerequisites.rst
index ef83144f..fd8aadc8 100644
--- a/docs/connection/db_connection/postgres/prerequisites.rst
+++ b/docs/connection/db_connection/postgres/prerequisites.rst
@@ -10,7 +10,7 @@ Version Compatibility
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
-See `official documentation <https://jdbc.postgresql.org/download/>`_.
+See `official documentation <https://jdbc.postgresql.org/>`_.
 
 Installing PySpark
 ------------------

From 4010f9beff61e629da932ab9ca295a78ce56f9e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 30 Aug 2024 08:07:46 +0000
Subject: [PATCH 54/64] Update DB prerequisites

---
 .github/workflows/data/clickhouse/matrix.yml               | 2 +-
 .github/workflows/data/local-fs/matrix.yml                 | 3 +--
 .github/workflows/data/mongodb/matrix.yml                  | 2 +-
 .github/workflows/data/postgres/matrix.yml                 | 4 ++--
 .github/workflows/data/s3/matrix.yml                       | 2 +-
 .github/workflows/data/sftp/matrix.yml                     | 2 +-
 docker-compose.yml                                         | 2 +-
 docs/connection/db_connection/clickhouse/prerequisites.rst | 2 +-
 docs/connection/db_connection/greenplum/prerequisites.rst  | 4 +++-
 docs/connection/db_connection/hive/prerequisites.rst       | 4 +++-
 docs/connection/db_connection/kafka/prerequisites.rst      | 4 +++-
 docs/connection/db_connection/mongodb/prerequisites.rst    | 4 +++-
 docs/connection/db_connection/mssql/prerequisites.rst      | 2 +-
 docs/connection/db_connection/mysql/prerequisites.rst      | 2 +-
 docs/connection/db_connection/oracle/prerequisites.rst     | 2 +-
 docs/connection/db_connection/postgres/prerequisites.rst   | 4 +++-
 docs/connection/db_connection/teradata/prerequisites.rst   | 4 +++-
 17 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml
index d18856df..1373d5e8 100644
--- a/.github/workflows/data/clickhouse/matrix.yml
+++ b/.github/workflows/data/clickhouse/matrix.yml
@@ -10,7 +10,7 @@ min: &min
 
 max: &max
   clickhouse-image: clickhouse/clickhouse-server
-  clickhouse-version: 24.6.3.70-alpine
+  clickhouse-version: 24.8.2.3-alpine
   spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
diff --git a/.github/workflows/data/local-fs/matrix.yml b/.github/workflows/data/local-fs/matrix.yml
index c4466f3c..081d9eb8 100644
--- a/.github/workflows/data/local-fs/matrix.yml
+++ b/.github/workflows/data/local-fs/matrix.yml
@@ -20,8 +20,7 @@ min_excel: &min_excel
   os: ubuntu-latest
 
 max: &max
-  # Excel package currently has no release for 3.5.2
-  spark-version: 3.5.1
+  spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/mongodb/matrix.yml b/.github/workflows/data/mongodb/matrix.yml
index 4c3d9d86..11892d65 100644
--- a/.github/workflows/data/mongodb/matrix.yml
+++ b/.github/workflows/data/mongodb/matrix.yml
@@ -8,7 +8,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  mongodb-version: 7.0.12
+  mongodb-version: 7.0.14
   spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
diff --git a/.github/workflows/data/postgres/matrix.yml b/.github/workflows/data/postgres/matrix.yml
index d37c3a83..cd63ae03 100644
--- a/.github/workflows/data/postgres/matrix.yml
+++ b/.github/workflows/data/postgres/matrix.yml
@@ -1,5 +1,5 @@
 min: &min
-  # Min supported version by JDBC driver is 8.4, but it is too ancient to be used by anyone in real life
+  # Min supported version by JDBC driver is 8.2, but it is too ancient to be used by anyone in real life
   postgres-version: 9.4.26-alpine
   spark-version: 2.3.1
   pydantic-version: 1
@@ -8,7 +8,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  postgres-version: 16.3-alpine
+  postgres-version: 16.4-alpine
   spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
diff --git a/.github/workflows/data/s3/matrix.yml b/.github/workflows/data/s3/matrix.yml
index 405b8b68..ffb9aff9 100644
--- a/.github/workflows/data/s3/matrix.yml
+++ b/.github/workflows/data/s3/matrix.yml
@@ -9,7 +9,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  minio-version: 2024.7.26
+  minio-version: RELEASE.2024-08-29T01-40-52Z
   spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'
diff --git a/.github/workflows/data/sftp/matrix.yml b/.github/workflows/data/sftp/matrix.yml
index 5a5a757c..e54f796c 100644
--- a/.github/workflows/data/sftp/matrix.yml
+++ b/.github/workflows/data/sftp/matrix.yml
@@ -6,7 +6,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  openssh-version: 9.6_p1-r0-ls154
+  openssh-version: 9.7_p1-r4-ls166
   pydantic-version: 2
   python-version: '3.12'
   os: ubuntu-latest
diff --git a/docker-compose.yml b/docker-compose.yml
index 73e8a21e..d32f682a 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -118,7 +118,7 @@ services:
     platform: linux/amd64
 
   postgres:
-    image: ${POSTGRES_IMAGE:-postgres:15.2-alpine}
+    image: ${POSTGRES_IMAGE:-postgres:alpine}
     restart: unless-stopped
     env_file: .env.dependencies
     ports:
diff --git a/docs/connection/db_connection/clickhouse/prerequisites.rst b/docs/connection/db_connection/clickhouse/prerequisites.rst
index b9a7577b..c66be635 100644
--- a/docs/connection/db_connection/clickhouse/prerequisites.rst
+++ b/docs/connection/db_connection/clickhouse/prerequisites.rst
@@ -8,7 +8,7 @@ Version Compatibility
 
 * Clickhouse server versions:
     * Officially declared: 22.8 or higher
-    * Actually supported: 21.1 or higher
+    * Actually tested: 21.1, 24.8
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/greenplum/prerequisites.rst b/docs/connection/db_connection/greenplum/prerequisites.rst
index 04595766..b3cf52d7 100644
--- a/docs/connection/db_connection/greenplum/prerequisites.rst
+++ b/docs/connection/db_connection/greenplum/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Greenplum server versions: 5.x, 6.x, 7.x (requires ``Greenplum.get_packages(package_version="2.3.0")`` or higher)
+* Greenplum server versions:
+    * Officially declared: 5.x, 6.x, and 7.x (which requires ``Greenplum.get_packages(package_version="2.3.0")`` or higher)
+    * Actually tested: 6.23, 7.0
 * Spark versions: 2.3.x - 3.2.x (Spark 3.3+ is not supported yet)
 * Java versions: 8 - 11
 
diff --git a/docs/connection/db_connection/hive/prerequisites.rst b/docs/connection/db_connection/hive/prerequisites.rst
index d690f918..0f56e7ba 100644
--- a/docs/connection/db_connection/hive/prerequisites.rst
+++ b/docs/connection/db_connection/hive/prerequisites.rst
@@ -14,7 +14,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Hive Metastore version: 0.12 - 3.1.3 (may require to add proper .jar file explicitly)
+* Hive Metastore version:
+    * Officially declared: 0.12 - 3.1.3 (may require to add proper .jar file explicitly)
+    * Actually tested: 1.2.100, 2.3.10, 3.1.3
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/kafka/prerequisites.rst b/docs/connection/db_connection/kafka/prerequisites.rst
index 29f5885b..db2598d1 100644
--- a/docs/connection/db_connection/kafka/prerequisites.rst
+++ b/docs/connection/db_connection/kafka/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Kafka server versions: 0.10 or higher
+* Kafka server versions:
+    * Officially declared: 0.10 or higher
+    * Actually tested: 3.2.3, 3.7.1 (only 3.x supports message headers)
 * Spark versions: 2.4.x - 3.5.x
 * Java versions: 8 - 17
 
diff --git a/docs/connection/db_connection/mongodb/prerequisites.rst b/docs/connection/db_connection/mongodb/prerequisites.rst
index 5cc8179a..8a01d675 100644
--- a/docs/connection/db_connection/mongodb/prerequisites.rst
+++ b/docs/connection/db_connection/mongodb/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* MongoDB server versions: 4.0 or higher
+* MongoDB server versions:
+    * Officially declared: 4.0 or higher
+    * Actually tested: 4.0, 7.0
 * Spark versions: 3.2.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/mssql/prerequisites.rst b/docs/connection/db_connection/mssql/prerequisites.rst
index c3c9059a..4e9fd263 100644
--- a/docs/connection/db_connection/mssql/prerequisites.rst
+++ b/docs/connection/db_connection/mssql/prerequisites.rst
@@ -8,7 +8,7 @@ Version Compatibility
 
 * SQL Server versions:
     * Officially declared: 2016 - 2022
-    * Actually supported: 2014 - 2022
+    * Actually tested: 2014, 2022
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/mysql/prerequisites.rst b/docs/connection/db_connection/mysql/prerequisites.rst
index b98186bc..15b7c574 100644
--- a/docs/connection/db_connection/mysql/prerequisites.rst
+++ b/docs/connection/db_connection/mysql/prerequisites.rst
@@ -8,7 +8,7 @@ Version Compatibility
 
 * MySQL server versions:
     * Officially declared: 8.0 - 9.0
-    * Actually supported: 5.7 - 9.0
+    * Actually tested: 5.7, 9.0
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/oracle/prerequisites.rst b/docs/connection/db_connection/oracle/prerequisites.rst
index 82fa55a4..35dd7569 100644
--- a/docs/connection/db_connection/oracle/prerequisites.rst
+++ b/docs/connection/db_connection/oracle/prerequisites.rst
@@ -8,7 +8,7 @@ Version Compatibility
 
 * Oracle Server versions:
     * Officially declared: 19 - 23
-    * Actually supported: 11.2 - 23
+    * Actually tested: 11.2, 23
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/postgres/prerequisites.rst b/docs/connection/db_connection/postgres/prerequisites.rst
index fd8aadc8..b1961b0d 100644
--- a/docs/connection/db_connection/postgres/prerequisites.rst
+++ b/docs/connection/db_connection/postgres/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* PostgreSQL server versions: 8.2  - 16
+* PostgreSQL server versions:
+    * Officially declared: 8.2  - 16
+    * Actually tested: 9.4, 16
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 
diff --git a/docs/connection/db_connection/teradata/prerequisites.rst b/docs/connection/db_connection/teradata/prerequisites.rst
index 294f9d53..69f83c49 100644
--- a/docs/connection/db_connection/teradata/prerequisites.rst
+++ b/docs/connection/db_connection/teradata/prerequisites.rst
@@ -6,7 +6,9 @@ Prerequisites
 Version Compatibility
 ---------------------
 
-* Teradata server versions: 16.10 - 20.0
+* Teradata server versions:
+    * Officially declared: 16.10 - 20.0
+    * Actually tested: 16.10
 * Spark versions: 2.3.x - 3.5.x
 * Java versions: 8 - 20
 

From cb6187402201a8bf71740cbfad33711e16bc7084 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Fri, 30 Aug 2024 09:19:31 +0000
Subject: [PATCH 55/64] Fix CI

---
 .github/workflows/data/local-fs/matrix.yml | 3 ++-
 .github/workflows/data/s3/matrix.yml       | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/data/local-fs/matrix.yml b/.github/workflows/data/local-fs/matrix.yml
index 081d9eb8..365d1d3c 100644
--- a/.github/workflows/data/local-fs/matrix.yml
+++ b/.github/workflows/data/local-fs/matrix.yml
@@ -20,7 +20,8 @@ min_excel: &min_excel
   os: ubuntu-latest
 
 max: &max
-  spark-version: 3.5.2
+  # Excel pagkage currently supports Spark 3.5.1 max
+  spark-version: 3.5.1
   pydantic-version: 2
   python-version: '3.12'
   java-version: 20
diff --git a/.github/workflows/data/s3/matrix.yml b/.github/workflows/data/s3/matrix.yml
index ffb9aff9..3990f312 100644
--- a/.github/workflows/data/s3/matrix.yml
+++ b/.github/workflows/data/s3/matrix.yml
@@ -9,7 +9,7 @@ min: &min
   os: ubuntu-latest
 
 max: &max
-  minio-version: RELEASE.2024-08-29T01-40-52Z
+  minio-version: 2024.8.29
   spark-version: 3.5.2
   pydantic-version: 2
   python-version: '3.12'

From baad512aba92f40c28edfb63265386e0d60d93ad Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 07:50:47 +0000
Subject: [PATCH 56/64] [DOP-18948] Add 0.11.2 to CHANGELOG

---
 docs/changelog/0.11.2.rst | 7 +++++++
 docs/changelog/0.12.0.rst | 2 +-
 docs/changelog/index.rst  | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 docs/changelog/0.11.2.rst

diff --git a/docs/changelog/0.11.2.rst b/docs/changelog/0.11.2.rst
new file mode 100644
index 00000000..5c954cbf
--- /dev/null
+++ b/docs/changelog/0.11.2.rst
@@ -0,0 +1,7 @@
+0.11.2 (2024-09-02)
+===================
+
+Bug Fixes
+---------
+
+- Fix passing ``Greenplum(extra={"options": ...})`` during read/write operations. (:github:pull:`308`)
diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index d0a87cc7..eb8af50d 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -57,5 +57,5 @@ Features
 Bug Fixes
 ---------
 
-- Fix passing ``Greenplum(extra={"options": ...)`` during read/write operations. (:github:pull:`308`)
+- Fix passing ``Greenplum(extra={"options": ...})`` during read/write operations. (:github:pull:`308`)
 - Do not raise exception if yield-based hook whas something past (and only one) ``yield``.
diff --git a/docs/changelog/index.rst b/docs/changelog/index.rst
index 7700528e..756f0cb1 100644
--- a/docs/changelog/index.rst
+++ b/docs/changelog/index.rst
@@ -4,6 +4,7 @@
 
     DRAFT
     0.12.0
+    0.11.2
     0.11.1
     0.11.0
     0.10.2

From 2e713e345f76b9e06322848f208cad5faba32a57 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 08:02:46 +0000
Subject: [PATCH 57/64] [DOP-18743] Update jobDescription format

---
 docs/changelog/0.12.0.rst                         |  4 ++--
 .../db_connection/mongodb/connection.py           |  5 +----
 onetl/db/db_reader/db_reader.py                   | 15 +++++++--------
 onetl/db/db_writer/db_writer.py                   | 15 +++++++--------
 onetl/file/file_df_reader/file_df_reader.py       | 11 +++++++----
 onetl/file/file_df_writer/file_df_writer.py       | 15 +++++++--------
 6 files changed, 31 insertions(+), 34 deletions(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index eb8af50d..4a09682a 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -39,8 +39,8 @@ Features
   are not returned as method call result. (:github:pull:`303`)
 
 - Generate default ``jobDescription`` based on currently executed method. Examples:
-    * ``DBWriter() -> Postgres[host:5432/database]``
-    * ``MongoDB[localhost:27017/admin] -> DBReader.run()``
+    * ``DBWriter[schema.table].run() -> Postgres[host:5432/database]``
+    * ``MongoDB[localhost:27017/admin] -> DBReader[mycollection].run()``
     * ``Hive[cluster].execute()``
 
   If user already set custom ``jobDescription``, it will left intact. (:github:pull:`304`)
diff --git a/onetl/connection/db_connection/mongodb/connection.py b/onetl/connection/db_connection/mongodb/connection.py
index f406a232..f5495238 100644
--- a/onetl/connection/db_connection/mongodb/connection.py
+++ b/onetl/connection/db_connection/mongodb/connection.py
@@ -348,10 +348,7 @@ def pipeline(
             read_options["aggregation.pipeline"] = json.dumps(pipeline)
         read_options["connection.uri"] = self.connection_url
 
-        with override_job_description(
-            self.spark,
-            f"{self}.pipeline()",
-        ):
+        with override_job_description(self.spark, f"{self}.pipeline()"):
             spark_reader = self.spark.read.format("mongodb").options(**read_options)
 
             if df_schema:
diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py
index a4f45ab0..42f17db3 100644
--- a/onetl/db/db_reader/db_reader.py
+++ b/onetl/db/db_reader/db_reader.py
@@ -542,10 +542,8 @@ def has_data(self) -> bool:
         """
         self._check_strategy()
 
-        with override_job_description(
-            self.connection.spark,
-            f"{self.connection} -> {self.__class__.__name__}.has_data()",
-        ):
+        job_description = f"{self}.has_data()"
+        with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
                 self.connection.check()
@@ -637,10 +635,8 @@ def run(self) -> DataFrame:
 
         self._check_strategy()
 
-        with override_job_description(
-            self.connection.spark,
-            f"{self.connection} -> {self.__class__.__name__}.run()",
-        ):
+        job_description = f"{self}.run() -> {self.connection}"
+        with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
                 self.connection.check()
@@ -667,6 +663,9 @@ def run(self) -> DataFrame:
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.source}]"
+
     def _check_strategy(self):
         strategy = StrategyManager.get_current()
         class_name = type(self).__name__
diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py
index c261ad23..5206c07d 100644
--- a/onetl/db/db_writer/db_writer.py
+++ b/onetl/db/db_writer/db_writer.py
@@ -202,10 +202,9 @@ def run(self, df: DataFrame) -> None:
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() starts")
-        with override_job_description(
-            self.connection.spark,
-            f"{self.__class__.__name__}.run() -> {self.connection}",
-        ):
+
+        job_description = f"{self}.run() -> {self.connection}"
+        with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
                 log_dataframe_schema(log, df)
@@ -214,10 +213,7 @@ def run(self, df: DataFrame) -> None:
 
         with SparkMetricsRecorder(self.connection.spark) as recorder:
             try:
-                with override_job_description(
-                    self.connection.spark,
-                    f"{self.__class__.__name__}.run() -> {self.connection}",
-                ):
+                with override_job_description(self.connection.spark, job_description):
                     self.connection.write_df_to_target(
                         df=df,
                         target=str(self.target),
@@ -244,6 +240,9 @@ def run(self, df: DataFrame) -> None:
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{self.target}]"
+
     def _log_parameters(self) -> None:
         log.info("|Spark| -> |%s| Writing DataFrame to target using parameters:", self.connection.__class__.__name__)
         log_with_indent(log, "target = '%s'", self.target)
diff --git a/onetl/file/file_df_reader/file_df_reader.py b/onetl/file/file_df_reader/file_df_reader.py
index 517e23a9..ed83bbc5 100644
--- a/onetl/file/file_df_reader/file_df_reader.py
+++ b/onetl/file/file_df_reader/file_df_reader.py
@@ -211,10 +211,8 @@ def run(self, files: Iterable[str | os.PathLike] | None = None) -> DataFrame:
         if not self._connection_checked:
             self._log_parameters(files)
 
-        with override_job_description(
-            self.connection.spark,
-            f"{self.connection} -> {self.__class__.__name__}.run()",
-        ):
+        job_description = f"{self}.run() -> {self.connection}"
+        with override_job_description(self.connection.spark, job_description):
             paths: FileSet[PurePathProtocol] = FileSet()
             if files is not None:
                 paths = FileSet(self._validate_files(files))
@@ -231,6 +229,11 @@ def run(self, files: Iterable[str | os.PathLike] | None = None) -> DataFrame:
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
 
+    def __str__(self):
+        if self.source_path:
+            return f"{self.__class__.__name__}[{os.fspath(self.source_path)}]"
+        return f"{self.__class__.__name__}"
+
     def _read_files(self, paths: FileSet[PurePathProtocol]) -> DataFrame:
         log.info("|%s| Paths to be read:", self.__class__.__name__)
         log_lines(log, str(paths))
diff --git a/onetl/file/file_df_writer/file_df_writer.py b/onetl/file/file_df_writer/file_df_writer.py
index 0daea008..aeda5f7b 100644
--- a/onetl/file/file_df_writer/file_df_writer.py
+++ b/onetl/file/file_df_writer/file_df_writer.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+import os
 from typing import TYPE_CHECKING
 
 try:
@@ -124,10 +125,8 @@ def run(self, df: DataFrame) -> None:
         if df.isStreaming:
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
-        with override_job_description(
-            self.connection.spark,
-            f"{self.__class__.__name__}.run() -> {self.connection}",
-        ):
+        job_description = f"{self}).run() -> {self.connection}"
+        with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters(df)
                 self.connection.check()
@@ -135,10 +134,7 @@ def run(self, df: DataFrame) -> None:
 
         with SparkMetricsRecorder(self.connection.spark) as recorder:
             try:
-                with override_job_description(
-                    self.connection.spark,
-                    f"{self.__class__.__name__}.run() -> {self.connection}",
-                ):
+                with override_job_description(self.connection.spark, job_description):
                     self.connection.write_df_as_files(
                         df=df,
                         path=self.target_path,
@@ -166,6 +162,9 @@ def run(self, df: DataFrame) -> None:
 
         entity_boundary_log(log, f"{self.__class__.__name__}.run() ends", char="-")
 
+    def __str__(self):
+        return f"{self.__class__.__name__}[{os.fspath(self.target_path)}]"
+
     def _log_parameters(self, df: DataFrame) -> None:
         log.info("|Spark| -> |%s| Writing dataframe using parameters:", self.connection.__class__.__name__)
         log_with_indent(log, "target_path = '%s'", self.target_path)

From 6aa7cdf83f68e3b8bc84a9963a9d37c949db2ccb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 08:08:33 +0000
Subject: [PATCH 58/64] [DOP-18743] Update setup.py

---
 docs/changelog/0.12.0.rst | 2 +-
 setup.py                  | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index 4a09682a..7aca258a 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -9,7 +9,7 @@ Breaking Changes
     * ``s3://host:port`` -> ``s3://host:port/bucket`` (:github:pull:`304`)
 
 - Update DB connectors/drivers to latest versions:
-    * Clickhouse ``0.6.0-patch5`` → ``0.6.4``
+    * Clickhouse ``0.6.0-patch5`` → ``0.6.5``
     * MongoDB ``10.3.0`` → ``10.4.0``
     * MSSQL ``12.6.2`` → ``12.8.1``
     * MySQL ``8.4.0`` → ``9.0.0``
diff --git a/setup.py b/setup.py
index c7ce5d0d..2c1126f0 100644
--- a/setup.py
+++ b/setup.py
@@ -67,6 +67,7 @@ def parse_requirements(file: Path) -> list[str]:
         "Development Status :: 3 - Alpha",
         "Framework :: Pydantic",
         "Framework :: Pydantic :: 1",
+        "Framework :: Pydantic :: 2",
         "Intended Audience :: Developers",
         "License :: OSI Approved :: Apache Software License",
         "Operating System :: OS Independent",

From 61695dc20bdba035db1725bb7e27fed7958abb77 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 08:40:13 +0000
Subject: [PATCH 59/64] [DOP-18948] Fix CHANGELOG

---
 docs/changelog/0.12.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index 7aca258a..4a09682a 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -9,7 +9,7 @@ Breaking Changes
     * ``s3://host:port`` -> ``s3://host:port/bucket`` (:github:pull:`304`)
 
 - Update DB connectors/drivers to latest versions:
-    * Clickhouse ``0.6.0-patch5`` → ``0.6.5``
+    * Clickhouse ``0.6.0-patch5`` → ``0.6.4``
     * MongoDB ``10.3.0`` → ``10.4.0``
     * MSSQL ``12.6.2`` → ``12.8.1``
     * MySQL ``8.4.0`` → ``9.0.0``

From 3d5bfe51d219936945cf423e1b0a242779d5f924 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 09:32:49 +0000
Subject: [PATCH 60/64] [DOP-18743] Update jobDescription format

---
 docs/changelog/0.12.0.rst                   |  4 ++--
 onetl/db/db_reader/db_reader.py             |  7 ++-----
 onetl/db/db_writer/db_writer.py             |  5 +----
 onetl/file/file_df_reader/file_df_reader.py | 11 +++++------
 onetl/file/file_df_writer/file_df_writer.py |  6 +-----
 5 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index 4a09682a..34c9c18b 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -39,8 +39,8 @@ Features
   are not returned as method call result. (:github:pull:`303`)
 
 - Generate default ``jobDescription`` based on currently executed method. Examples:
-    * ``DBWriter[schema.table].run() -> Postgres[host:5432/database]``
-    * ``MongoDB[localhost:27017/admin] -> DBReader[mycollection].run()``
+    * ``DBWriter.run(schema.table) -> Postgres[host:5432/database]``
+    * ``MongoDB[localhost:27017/admin] -> DBReader.has_data(mycollection)``
     * ``Hive[cluster].execute()``
 
   If user already set custom ``jobDescription``, it will left intact. (:github:pull:`304`)
diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py
index 42f17db3..4ad3d236 100644
--- a/onetl/db/db_reader/db_reader.py
+++ b/onetl/db/db_reader/db_reader.py
@@ -542,7 +542,7 @@ def has_data(self) -> bool:
         """
         self._check_strategy()
 
-        job_description = f"{self}.has_data()"
+        job_description = f"{self.__class__.__name__}.has_data({self.source})"
         with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
@@ -635,7 +635,7 @@ def run(self) -> DataFrame:
 
         self._check_strategy()
 
-        job_description = f"{self}.run() -> {self.connection}"
+        job_description = f"{self.__class__.__name__}.run({self.source}) -> {self.connection}"
         with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
@@ -663,9 +663,6 @@ def run(self) -> DataFrame:
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
 
-    def __str__(self):
-        return f"{self.__class__.__name__}[{self.source}]"
-
     def _check_strategy(self):
         strategy = StrategyManager.get_current()
         class_name = type(self).__name__
diff --git a/onetl/db/db_writer/db_writer.py b/onetl/db/db_writer/db_writer.py
index 5206c07d..3bcf63aa 100644
--- a/onetl/db/db_writer/db_writer.py
+++ b/onetl/db/db_writer/db_writer.py
@@ -203,7 +203,7 @@ def run(self, df: DataFrame) -> None:
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() starts")
 
-        job_description = f"{self}.run() -> {self.connection}"
+        job_description = f"{self.__class__.__name__}.run({self.target}) -> {self.connection}"
         with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()
@@ -240,9 +240,6 @@ def run(self, df: DataFrame) -> None:
 
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
 
-    def __str__(self):
-        return f"{self.__class__.__name__}[{self.target}]"
-
     def _log_parameters(self) -> None:
         log.info("|Spark| -> |%s| Writing DataFrame to target using parameters:", self.connection.__class__.__name__)
         log_with_indent(log, "target = '%s'", self.target)
diff --git a/onetl/file/file_df_reader/file_df_reader.py b/onetl/file/file_df_reader/file_df_reader.py
index ed83bbc5..36aab796 100644
--- a/onetl/file/file_df_reader/file_df_reader.py
+++ b/onetl/file/file_df_reader/file_df_reader.py
@@ -211,7 +211,11 @@ def run(self, files: Iterable[str | os.PathLike] | None = None) -> DataFrame:
         if not self._connection_checked:
             self._log_parameters(files)
 
-        job_description = f"{self}.run() -> {self.connection}"
+        if files:
+            job_description = f"{self.__class__.__name__}.run([..files..]) -> {self.connection}"
+        else:
+            job_description = f"{self.__class__.__name__}.run({self.source_path}) -> {self.connection}"
+
         with override_job_description(self.connection.spark, job_description):
             paths: FileSet[PurePathProtocol] = FileSet()
             if files is not None:
@@ -229,11 +233,6 @@ def run(self, files: Iterable[str | os.PathLike] | None = None) -> DataFrame:
         entity_boundary_log(log, msg=f"{self.__class__.__name__}.run() ends", char="-")
         return df
 
-    def __str__(self):
-        if self.source_path:
-            return f"{self.__class__.__name__}[{os.fspath(self.source_path)}]"
-        return f"{self.__class__.__name__}"
-
     def _read_files(self, paths: FileSet[PurePathProtocol]) -> DataFrame:
         log.info("|%s| Paths to be read:", self.__class__.__name__)
         log_lines(log, str(paths))
diff --git a/onetl/file/file_df_writer/file_df_writer.py b/onetl/file/file_df_writer/file_df_writer.py
index aeda5f7b..037fc7ee 100644
--- a/onetl/file/file_df_writer/file_df_writer.py
+++ b/onetl/file/file_df_writer/file_df_writer.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import logging
-import os
 from typing import TYPE_CHECKING
 
 try:
@@ -125,7 +124,7 @@ def run(self, df: DataFrame) -> None:
         if df.isStreaming:
             raise ValueError(f"DataFrame is streaming. {self.__class__.__name__} supports only batch DataFrames.")
 
-        job_description = f"{self}).run() -> {self.connection}"
+        job_description = f"{self.__class__.__name__}.run({self.target_path}) -> {self.connection}"
         with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters(df)
@@ -162,9 +161,6 @@ def run(self, df: DataFrame) -> None:
 
         entity_boundary_log(log, f"{self.__class__.__name__}.run() ends", char="-")
 
-    def __str__(self):
-        return f"{self.__class__.__name__}[{os.fspath(self.target_path)}]"
-
     def _log_parameters(self, df: DataFrame) -> None:
         log.info("|Spark| -> |%s| Writing dataframe using parameters:", self.connection.__class__.__name__)
         log_with_indent(log, "target_path = '%s'", self.target_path)

From fb86f9222f6c548e6e79a984228240ad74bfa120 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Mon, 2 Sep 2024 15:00:28 +0000
Subject: [PATCH 61/64] [DOP-18948] Fix CI tests

---
 .github/workflows/test-clickhouse.yml | 14 ++++++++------
 .github/workflows/test-core.yml       |  2 ++
 .github/workflows/test-ftp.yml        |  2 ++
 .github/workflows/test-ftps.yml       |  2 ++
 .github/workflows/test-greenplum.yml  |  2 ++
 .github/workflows/test-hdfs.yml       |  2 ++
 .github/workflows/test-hive.yml       |  2 ++
 .github/workflows/test-kafka.yml      |  2 ++
 .github/workflows/test-local-fs.yml   |  2 ++
 .github/workflows/test-mongodb.yml    |  2 ++
 .github/workflows/test-mssql.yml      |  2 ++
 .github/workflows/test-mysql.yml      |  2 ++
 .github/workflows/test-oracle.yml     |  2 ++
 .github/workflows/test-postgres.yml   |  2 ++
 .github/workflows/test-s3.yml         |  2 ++
 .github/workflows/test-samba.yml      |  2 ++
 .github/workflows/test-sftp.yml       |  2 ++
 .github/workflows/test-teradata.yml   |  2 ++
 .github/workflows/test-webdav.yml     |  2 ++
 19 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/test-clickhouse.yml b/.github/workflows/test-clickhouse.yml
index db05402d..32dead8f 100644
--- a/.github/workflows/test-clickhouse.yml
+++ b/.github/workflows/test-clickhouse.yml
@@ -90,12 +90,6 @@ jobs:
           source ./env
           ./pytest_runner.sh -m clickhouse
 
-      - name: Upload coverage results
-        uses: actions/upload-artifact@v4
-        with:
-          name: coverage-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
-          path: reports/*
-
       - name: Dump Clickhouse logs on failure
         if: failure()
         uses: jwalton/gh-docker-logs@v2
@@ -109,3 +103,11 @@ jobs:
         with:
           name: container-logs-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}
           path: logs/*
+
+      - name: Upload coverage results
+        uses: actions/upload-artifact@v4
+        with:
+          name: coverage-clickhouse-${{ inputs.clickhouse-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
+          path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-core.yml b/.github/workflows/test-core.yml
index 8a0b3b7a..94fe3075 100644
--- a/.github/workflows/test-core.yml
+++ b/.github/workflows/test-core.yml
@@ -79,3 +79,5 @@ jobs:
         with:
           name: coverage-core-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-ftp.yml b/.github/workflows/test-ftp.yml
index 8e45ec32..e91c31e3 100644
--- a/.github/workflows/test-ftp.yml
+++ b/.github/workflows/test-ftp.yml
@@ -80,6 +80,8 @@ jobs:
         with:
           name: container-logs-ftp-${{ inputs.ftp-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: logs/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
 
       - name: Shutdown FTP
         if: always()
diff --git a/.github/workflows/test-ftps.yml b/.github/workflows/test-ftps.yml
index dfe8ffed..89eff2e9 100644
--- a/.github/workflows/test-ftps.yml
+++ b/.github/workflows/test-ftps.yml
@@ -91,3 +91,5 @@ jobs:
         with:
           name: coverage-ftps-${{ inputs.ftps-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-greenplum.yml b/.github/workflows/test-greenplum.yml
index 5f24f779..297a52cc 100644
--- a/.github/workflows/test-greenplum.yml
+++ b/.github/workflows/test-greenplum.yml
@@ -137,3 +137,5 @@ jobs:
         with:
           name: coverage-greenplum-${{ inputs.greenplum-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-hdfs.yml b/.github/workflows/test-hdfs.yml
index e06f01ed..98398f52 100644
--- a/.github/workflows/test-hdfs.yml
+++ b/.github/workflows/test-hdfs.yml
@@ -114,3 +114,5 @@ jobs:
         with:
           name: coverage-hdfs-${{ inputs.hadoop-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-hive.yml b/.github/workflows/test-hive.yml
index 17af1d93..7c74ed7a 100644
--- a/.github/workflows/test-hive.yml
+++ b/.github/workflows/test-hive.yml
@@ -81,3 +81,5 @@ jobs:
         with:
           name: coverage-hive-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-kafka.yml b/.github/workflows/test-kafka.yml
index b1f06552..127498d1 100644
--- a/.github/workflows/test-kafka.yml
+++ b/.github/workflows/test-kafka.yml
@@ -135,3 +135,5 @@ jobs:
         with:
           name: coverage-kafka-${{ inputs.kafka-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-local-fs.yml b/.github/workflows/test-local-fs.yml
index 2672afe4..27e2fc03 100644
--- a/.github/workflows/test-local-fs.yml
+++ b/.github/workflows/test-local-fs.yml
@@ -81,3 +81,5 @@ jobs:
         with:
           name: coverage-local-fs-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-mongodb.yml b/.github/workflows/test-mongodb.yml
index 334bbfc1..e1c03774 100644
--- a/.github/workflows/test-mongodb.yml
+++ b/.github/workflows/test-mongodb.yml
@@ -107,3 +107,5 @@ jobs:
         with:
           name: coverage-mongodb-${{ inputs.mongodb-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-mssql.yml b/.github/workflows/test-mssql.yml
index 0865492a..cb7252f9 100644
--- a/.github/workflows/test-mssql.yml
+++ b/.github/workflows/test-mssql.yml
@@ -112,3 +112,5 @@ jobs:
         with:
           name: coverage-mssql-${{ inputs.mssql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-mysql.yml b/.github/workflows/test-mysql.yml
index e305af6d..8f0b7871 100644
--- a/.github/workflows/test-mysql.yml
+++ b/.github/workflows/test-mysql.yml
@@ -109,3 +109,5 @@ jobs:
         with:
           name: coverage-mysql-${{ inputs.mysql-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-oracle.yml b/.github/workflows/test-oracle.yml
index 38a21daf..37ee14e4 100644
--- a/.github/workflows/test-oracle.yml
+++ b/.github/workflows/test-oracle.yml
@@ -129,3 +129,5 @@ jobs:
         with:
           name: coverage-oracle-${{ inputs.oracle-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-postgres.yml b/.github/workflows/test-postgres.yml
index 68236134..cd668a00 100644
--- a/.github/workflows/test-postgres.yml
+++ b/.github/workflows/test-postgres.yml
@@ -108,3 +108,5 @@ jobs:
         with:
           name: coverage-postgres-${{ inputs.postgres-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-s3.yml b/.github/workflows/test-s3.yml
index 1ef595e6..af25f71e 100644
--- a/.github/workflows/test-s3.yml
+++ b/.github/workflows/test-s3.yml
@@ -109,3 +109,5 @@ jobs:
         with:
           name: coverage-s3-${{ inputs.minio-version }}-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-samba.yml b/.github/workflows/test-samba.yml
index f7b07131..f493b0a5 100644
--- a/.github/workflows/test-samba.yml
+++ b/.github/workflows/test-samba.yml
@@ -89,3 +89,5 @@ jobs:
         with:
           name: coverage-samba-${{ inputs.server-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-sftp.yml b/.github/workflows/test-sftp.yml
index 2ab8de1c..0b92c423 100644
--- a/.github/workflows/test-sftp.yml
+++ b/.github/workflows/test-sftp.yml
@@ -86,3 +86,5 @@ jobs:
         with:
           name: coverage-sftp-${{ inputs.openssh-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-teradata.yml b/.github/workflows/test-teradata.yml
index 8ba3ff60..482e5fbb 100644
--- a/.github/workflows/test-teradata.yml
+++ b/.github/workflows/test-teradata.yml
@@ -81,3 +81,5 @@ jobs:
         with:
           name: coverage-teradata-spark-${{ inputs.spark-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true
diff --git a/.github/workflows/test-webdav.yml b/.github/workflows/test-webdav.yml
index 2ce0e4ef..22c55b44 100644
--- a/.github/workflows/test-webdav.yml
+++ b/.github/workflows/test-webdav.yml
@@ -91,3 +91,5 @@ jobs:
         with:
           name: coverage-webdav-${{ inputs.webdav-version }}-python-${{ inputs.python-version }}-os-${{ inputs.os }}
           path: reports/*
+          # https://github.com/actions/upload-artifact/issues/602
+          include-hidden-files: true

From ec4937dc33f7d73fea5825e54e6c549e6eb775fb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 3 Sep 2024 09:54:46 +0000
Subject: [PATCH 62/64] Update Clickhouse package

---
 docs/changelog/0.12.0.rst                                 | 2 +-
 onetl/connection/db_connection/clickhouse/connection.py   | 8 ++++----
 .../tests_db_connection_unit/test_clickhouse_unit.py      | 6 +++---
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index 34c9c18b..06491fd2 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -9,7 +9,7 @@ Breaking Changes
     * ``s3://host:port`` -> ``s3://host:port/bucket`` (:github:pull:`304`)
 
 - Update DB connectors/drivers to latest versions:
-    * Clickhouse ``0.6.0-patch5`` → ``0.6.4``
+    * Clickhouse ``0.6.0-patch5`` → ``0.6.5``
     * MongoDB ``10.3.0`` → ``10.4.0``
     * MSSQL ``12.6.2`` → ``12.8.1``
     * MySQL ``8.4.0`` → ``9.0.0``
diff --git a/onetl/connection/db_connection/clickhouse/connection.py b/onetl/connection/db_connection/clickhouse/connection.py
index fad82942..95756e2b 100644
--- a/onetl/connection/db_connection/clickhouse/connection.py
+++ b/onetl/connection/db_connection/clickhouse/connection.py
@@ -37,7 +37,7 @@ class Config:
 class Clickhouse(JDBCConnection):
     """Clickhouse JDBC connection. |support_hooks|
 
-    Based on Maven package `com.clickhouse:clickhouse-jdbc:0.6.4 <https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc/0.6.4>`_
+    Based on Maven package `com.clickhouse:clickhouse-jdbc:0.6.5 <https://mvnrepository.com/artifact/com.clickhouse/clickhouse-jdbc/0.6.5>`_
     (`official Clickhouse JDBC driver <https://github.com/ClickHouse/clickhouse-jdbc>`_).
 
     .. seealso::
@@ -139,7 +139,7 @@ def get_packages(
         Parameters
         ----------
         package_version : str, optional
-            ClickHouse JDBC version client packages. Defaults to ``0.6.4``.
+            ClickHouse JDBC version client packages. Defaults to ``0.6.5``.
 
             .. versionadded:: 0.11.0
 
@@ -158,7 +158,7 @@ def get_packages(
             Clickhouse.get_packages(package_version="0.6.0", apache_http_client_version="5.3.1")
 
         """
-        default_jdbc_version = "0.6.4"
+        default_jdbc_version = "0.6.5"
         default_http_version = "5.3.1"
 
         jdbc_version = Version(package_version or default_jdbc_version).min_digits(3)
@@ -177,7 +177,7 @@ def get_packages(
     @classproperty
     def package(self) -> str:
         """Get a single string of package names to be downloaded by Spark for establishing a Clickhouse connection."""
-        return "com.clickhouse:clickhouse-jdbc:0.6.4,com.clickhouse:clickhouse-http-client:0.6.4,org.apache.httpcomponents.client5:httpclient5:5.3.1"
+        return "com.clickhouse:clickhouse-jdbc:0.6.5,com.clickhouse:clickhouse-http-client:0.6.5,org.apache.httpcomponents.client5:httpclient5:5.3.1"
 
     @property
     def jdbc_url(self) -> str:
diff --git a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
index 9bf7a068..d8ccf7f4 100644
--- a/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
+++ b/tests/tests_unit/tests_db_connection_unit/test_clickhouse_unit.py
@@ -11,7 +11,7 @@ def test_clickhouse_driver():
 
 def test_clickhouse_package():
     expected_packages = (
-        "com.clickhouse:clickhouse-jdbc:0.6.4,com.clickhouse:clickhouse-http-client:0.6.4,"
+        "com.clickhouse:clickhouse-jdbc:0.6.5,com.clickhouse:clickhouse-http-client:0.6.5,"
         "org.apache.httpcomponents.client5:httpclient5:5.3.1"
     )
     assert Clickhouse.package == expected_packages
@@ -24,8 +24,8 @@ def test_clickhouse_package():
             None,
             None,
             [
-                "com.clickhouse:clickhouse-jdbc:0.6.4",
-                "com.clickhouse:clickhouse-http-client:0.6.4",
+                "com.clickhouse:clickhouse-jdbc:0.6.5",
+                "com.clickhouse:clickhouse-http-client:0.6.5",
                 "org.apache.httpcomponents.client5:httpclient5:5.3.1",
             ],
         ),

From dc439c34c02509f2f909f563646f589f3ed31de8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 3 Sep 2024 12:04:44 +0000
Subject: [PATCH 63/64] [DOP-18743] Update jobDescription format

---
 onetl/db/db_reader/db_reader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onetl/db/db_reader/db_reader.py b/onetl/db/db_reader/db_reader.py
index 4ad3d236..dd79876a 100644
--- a/onetl/db/db_reader/db_reader.py
+++ b/onetl/db/db_reader/db_reader.py
@@ -542,7 +542,7 @@ def has_data(self) -> bool:
         """
         self._check_strategy()
 
-        job_description = f"{self.__class__.__name__}.has_data({self.source})"
+        job_description = f"{self.connection} -> {self.__class__.__name__}.has_data({self.source})"
         with override_job_description(self.connection.spark, job_description):
             if not self._connection_checked:
                 self._log_parameters()

From fe9048d79d249e480efe7e3a0598ced96ced7b0e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B0=D1=80=D1=82=D1=8B=D0=BD=D0=BE=D0=B2=20=D0=9C?=
 =?UTF-8?q?=D0=B0=D0=BA=D1=81=D0=B8=D0=BC=20=D0=A1=D0=B5=D1=80=D0=B3=D0=B5?=
 =?UTF-8?q?=D0=B5=D0=B2=D0=B8=D1=87?= <msmarty5@mts.ru>
Date: Tue, 3 Sep 2024 12:17:11 +0000
Subject: [PATCH 64/64] [DOP-18743] Update CHANGELOG

---
 docs/changelog/0.12.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/changelog/0.12.0.rst b/docs/changelog/0.12.0.rst
index 06491fd2..8edd4a49 100644
--- a/docs/changelog/0.12.0.rst
+++ b/docs/changelog/0.12.0.rst
@@ -1,4 +1,4 @@
-0.12.0 (2024-08-29)
+0.12.0 (2024-09-03)
 ===================
 
 Breaking Changes