From e3f6b0aa431d886c92d3324080dc7460950dabb7 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Sun, 24 Oct 2021 17:33:36 +0200
Subject: [PATCH 01/34] docs: specify requirements

---
 docs/requirements.txt | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 docs/requirements.txt

diff --git a/docs/requirements.txt b/docs/requirements.txt
new file mode 100644
index 00000000..0b27c37e
--- /dev/null
+++ b/docs/requirements.txt
@@ -0,0 +1,2 @@
+sphinx_rtd_theme
+myst_parser

From 0606c566c4c8a3a711de08ad5c6acf01751a99af Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Sun, 24 Oct 2021 17:34:38 +0200
Subject: [PATCH 02/34] ci: docs requirements

---
 .readthedocs.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.readthedocs.yml b/.readthedocs.yml
index d2482943..caf5186a 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -7,4 +7,5 @@ build:
 python:
   version: 3.8
   setup_py_install: true
-
+  install:
+    - requirements: docs/requirements.txt

From 5dd4a70261488febee4ee1f3550c81c5487fa84c Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Mon, 25 Oct 2021 17:39:52 +0200
Subject: [PATCH 03/34] ci: enable codeql

---
 .github/workflows/codeql-analysis.yml | 60 +++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 .github/workflows/codeql-analysis.yml

diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 00000000..f8cbd885
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,60 @@
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ master, develop ]
+  pull_request:
+    # The branches below must be a subset of the branches above
+    branches: [ master ]
+  schedule:
+    - cron: '22 11 * * 2'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        language: [ 'python' ]
+        # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python' ]
+        # Learn more:
+        # https://docs.github.com/en/free-pro-team@latest/github/finding-security-vulnerabilities-and-errors-in-your-code/configuring-code-scanning#changing-the-languages-that-are-analyzed
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v2
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v1
+      with:
+        languages: ${{ matrix.language }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+        # queries: ./path/to/local/query, your-org/your-repo/queries@main
+
+    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
+    # If this step fails, then you should remove it and run the build manually (see below)
+    - name: Autobuild
+      uses: github/codeql-action/autobuild@v1
+
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 https://git.io/JvXDl
+
+    # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines
+    #    and modify them (or add more) to build your code if your project
+    #    uses a compiled language
+
+    #- run: |
+    #   make bootstrap
+    #   make release
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v1

From b187d360bc303b347826c77eb356d2d4dcc5ad38 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 13:03:15 +0200
Subject: [PATCH 04/34] docs: changelog md syntax

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 93e40737..b13d51ae 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -66,4 +66,4 @@
 
 ## v0.4.0 and before
 
-The release notes for preceding versions are available `here <https://github.com/ing-bank/popmon/blob/master/CHANGES.rst>`_
+The release notes for preceding versions are available [here](https://github.com/ing-bank/popmon/blob/master/CHANGES.rst>).

From d5caf4e0d9ac023ee40ae9669886033da4ed96a6 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Tue, 26 Oct 2021 22:13:44 +0200
Subject: [PATCH 05/34] ci: black on docs

---
 .pre-commit-config.yaml | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 83925539..45222b48 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,3 +21,7 @@ repos:
     hooks:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']
+-   repo: https://github.com/asottile/blacken-docs
+    rev: v1.0.0
+    hooks:
+    -   id: blacken-docs

From fb9642c30d0316fe4196141fde416c09a83877a4 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Tue, 26 Oct 2021 20:15:04 +0000
Subject: [PATCH 06/34] ci: dependency update

---
 .pre-commit-config.yaml       |  2 +-
 README.rst                    | 38 ++++++++++++++-------
 docs/source/configuration.rst | 64 +++++++++++++++++++++++------------
 popmon/pipeline/metrics.py    | 46 +++++++++++++++----------
 popmon/pipeline/report.py     | 46 +++++++++++++++----------
 5 files changed, 124 insertions(+), 72 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 45222b48..010c763a 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,6 @@ repos:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']
 -   repo: https://github.com/asottile/blacken-docs
-    rev: v1.0.0
+    rev: v1.11.0
     hooks:
     -   id: blacken-docs
diff --git a/README.rst b/README.rst
index b74375c5..994a531b 100644
--- a/README.rst
+++ b/README.rst
@@ -29,7 +29,10 @@ With Spark 3.0, based on Scala 2.12, make sure to pick up the correct `histogram
 
 .. code-block:: python
 
-  spark = SparkSession.builder.config("spark.jars.packages", "io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20").getOrCreate()
+  spark = SparkSession.builder.config(
+      "spark.jars.packages",
+      "io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20",
+  ).getOrCreate()
 
 For Spark 2.X compiled against scala 2.11, in the string above simply replace 2.12 with 2.11.
 
@@ -101,12 +104,12 @@ As a quick example, you can do:
   from popmon import resources
 
   # open synthetic data
-  df = pd.read_csv(resources.data('test.csv.gz'), parse_dates=['date'])
+  df = pd.read_csv(resources.data("test.csv.gz"), parse_dates=["date"])
   df.head()
 
   # generate stability report using automatic binning of all encountered features
   # (importing popmon automatically adds this functionality to a dataframe)
-  report = df.pm_stability_report(time_axis='date', features=['date:age', 'date:gender'])
+  report = df.pm_stability_report(time_axis="date", features=["date:age", "date:gender"])
 
   # to show the output of the report in a Jupyter notebook you can simply run:
   report
@@ -119,23 +122,32 @@ To specify your own binning specifications and features you want to report on, y
 .. code-block:: python
 
   # time-axis specifications alone; all other features are auto-binned.
-  report = df.pm_stability_report(time_axis='date', time_width='1w', time_offset='2020-1-6')
+  report = df.pm_stability_report(
+      time_axis="date", time_width="1w", time_offset="2020-1-6"
+  )
 
   # histogram selections. Here 'date' is the first axis of each histogram.
-  features=[
-      'date:isActive', 'date:age', 'date:eyeColor', 'date:gender',
-      'date:latitude', 'date:longitude', 'date:isActive:age'
+  features = [
+      "date:isActive",
+      "date:age",
+      "date:eyeColor",
+      "date:gender",
+      "date:latitude",
+      "date:longitude",
+      "date:isActive:age",
   ]
 
   # Specify your own binning specifications for individual features or combinations thereof.
   # This bin specification uses open-ended ("sparse") histograms; unspecified features get
   # auto-binned. The time-axis binning, when specified here, needs to be in nanoseconds.
-  bin_specs={
-      'longitude': {'bin_width': 5.0, 'bin_offset': 0.0},
-      'latitude': {'bin_width': 5.0, 'bin_offset': 0.0},
-      'age': {'bin_width': 10.0, 'bin_offset': 0.0},
-      'date': {'bin_width': pd.Timedelta('4w').value,
-               'bin_offset': pd.Timestamp('2015-1-1').value}
+  bin_specs = {
+      "longitude": {"bin_width": 5.0, "bin_offset": 0.0},
+      "latitude": {"bin_width": 5.0, "bin_offset": 0.0},
+      "age": {"bin_width": 10.0, "bin_offset": 0.0},
+      "date": {
+          "bin_width": pd.Timedelta("4w").value,
+          "bin_offset": pd.Timestamp("2015-1-1").value,
+      },
   }
 
   # generate stability report
diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst
index 5ec9dc04..4ed8342b 100644
--- a/docs/source/configuration.rst
+++ b/docs/source/configuration.rst
@@ -55,7 +55,9 @@ To specify the time-axis binning alone, do:
 
 .. code-block:: python
 
-  report = df.pm_stability_report(time_axis='date', time_width='1w', time_offset='2020-1-6')
+  report = df.pm_stability_report(
+      time_axis="date", time_width="1w", time_offset="2020-1-6"
+  )
 
 The ``time_axis`` argument should be the name of a column that is of type **numeric (e.g. batch id, time in ns) or date(time)**.
 The default time width is 30 days ('30d'), with time offset 2010-1-4 (a Monday).
@@ -72,11 +74,15 @@ An example bin_specs dictionary is:
 
 .. code-block:: python
 
-    bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                 'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                 'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}],
-                 'date': {'bin_width': pd.Timedelta('4w').value,
-                          'bin_offset': pd.Timestamp('2015-1-1').value}}
+    bin_specs = {
+        "x": {"bin_width": 1, "bin_offset": 0},
+        "y": {"num": 10, "low": 0.0, "high": 2.0},
+        "x:y": [{}, {"num": 5, "low": 0.0, "high": 1.0}],
+        "date": {
+            "bin_width": pd.Timedelta("4w").value,
+            "bin_offset": pd.Timestamp("2015-1-1").value,
+        },
+    }
 
 In the bin specs for 'x:y', 'x' is not provided (here) and reverts to the 1-dim setting.
 Any time-axis, when specified here ('date'), needs to be specified in nanoseconds. This takes precedence over
@@ -112,9 +118,11 @@ When not provided, the default setting is:
 
 .. code-block:: python
 
-    monitoring_rules = {"*_pull": [7, 4, -4, -7],
-                        "*_zscore": [7, 4, -4, -7],
-                        "[!p]*_unknown_labels": [0.5, 0.5, 0, 0]}
+    monitoring_rules = {
+        "*_pull": [7, 4, -4, -7],
+        "*_zscore": [7, 4, -4, -7],
+        "[!p]*_unknown_labels": [0.5, 0.5, 0, 0],
+    }
 
 Note that the (filename based) wildcards such as * apply to all statistic names matching that pattern.
 For example, ``"*_pull"`` applies for all features to all statistics ending on "_pull". Same for ``"*_zscore"``.
@@ -132,11 +140,13 @@ feature name in front. This also works for a combinations of two features. E.g.
 
 .. code-block:: python
 
-    monitoring_rules = {"featureA:*_pull": [5, 3, -3, -5],
-                        "featureA:featureB:*_pull": [6, 3, -3, -6],
-                        "featureA:nan": [4, 1, 0, 0],
-                        "*_pull": [7, 4, -4, -7],
-                        "nan": [8, 1, 0, 0]}
+    monitoring_rules = {
+        "featureA:*_pull": [5, 3, -3, -5],
+        "featureA:featureB:*_pull": [6, 3, -3, -6],
+        "featureA:nan": [4, 1, 0, 0],
+        "*_pull": [7, 4, -4, -7],
+        "nan": [8, 1, 0, 0],
+    }
 
 In the case where multiple rules could apply for a feature's statistic, the most specific one gets applied.
 So in case of the statistic "nan": "featureA:nan" is used for "featureA", and the other "nan" rule
@@ -204,13 +214,16 @@ Spark usage
     from pyspark.sql import SparkSession
 
     # downloads histogrammar jar files if not already installed, used for histogramming of spark dataframe
-    spark = SparkSession.builder.config("spark.jars.packages", "io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20").getOrCreate()
+    spark = SparkSession.builder.config(
+        "spark.jars.packages",
+        "io.github.histogrammar:histogrammar_2.12:1.0.20,io.github.histogrammar:histogrammar-sparksql_2.12:1.0.20",
+    ).getOrCreate()
 
     # load a dataframe
-    spark_df = spark.read.format('csv').options(header='true').load('file.csv')
+    spark_df = spark.read.format("csv").options(header="true").load("file.csv")
 
     # generate the report
-    report = spark_df.pm_stability_report(time_axis='timestamp')
+    report = spark_df.pm_stability_report(time_axis="timestamp")
 
 
 Spark example on Google Colab
@@ -231,16 +244,23 @@ Now that spark is installed, restart the runtime.
 .. code-block:: python
 
     import os
+
     os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
     os.environ["SPARK_HOME"] = "/content/spark-2.4.7-bin-hadoop2.7"
 
     import findspark
+
     findspark.init()
 
     from pyspark.sql import SparkSession
 
-    spark = SparkSession.builder.master("local[*]") \
-      .config("spark.jars", "/content/jars/histogrammar_2.12-1.0.20.jar,/content/jars/histogrammar-sparksql_2.12-1.0.20.jar") \
-      .config("spark.sql.execution.arrow.enabled", "false") \
-      .config("spark.sql.session.timeZone", "GMT") \
-      .getOrCreate()
+    spark = (
+        SparkSession.builder.master("local[*]")
+        .config(
+            "spark.jars",
+            "/content/jars/histogrammar_2.12-1.0.20.jar,/content/jars/histogrammar-sparksql_2.12-1.0.20.jar",
+        )
+        .config("spark.sql.execution.arrow.enabled", "false")
+        .config("spark.sql.session.timeZone", "GMT")
+        .getOrCreate()
+    )
diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py
index aa79604b..47135dab 100644
--- a/popmon/pipeline/metrics.py
+++ b/popmon/pipeline/metrics.py
@@ -73,9 +73,11 @@ def stability_metrics(
 
         .. code-block:: python
 
-            monitoring_rules = {"*_pull": [7, 4, -4, -7],
-                                "*_zscore": [7, 4, -4, -7],
-                                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0]}
+            monitoring_rules = {
+                "*_pull": [7, 4, -4, -7],
+                "*_zscore": [7, 4, -4, -7],
+                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0],
+            }
 
         Note that the (filename based) wildcards such as * apply to all statistic names matching that pattern.
         For example, ``"*_pull"`` applies for all features to all statistics ending on "_pull".
@@ -84,10 +86,12 @@ def stability_metrics(
 
         .. code-block:: python
 
-            monitoring_rules = {"featureA:*_pull": [5, 3, -3, -5],
-                                "featureA:nan": [4, 1, 0, 0],
-                                "*_pull": [7, 4, -4, -7],
-                                "nan": [8, 1, 0, 0]}
+            monitoring_rules = {
+                "featureA:*_pull": [5, 3, -3, -5],
+                "featureA:nan": [4, 1, 0, 0],
+                "*_pull": [7, 4, -4, -7],
+                "nan": [8, 1, 0, 0],
+            }
 
         In case of multiple rules could apply for a feature's statistic, the most specific one applies.
         So in case of the statistic "nan": "featureA:nan" is used for "featureA", and the other "nan" rule
@@ -182,7 +186,7 @@ def df_stability_metrics(
 
         .. code-block:: python
 
-            features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
+            features = ["x", "date", "date:x", "date:y", "date:x:y"]
 
     :param str binning: default binning to revert to in case bin_specs not supplied. options are:
         "unit" or "auto", default is "auto". When using "auto", semi-clever binning is automatically done.
@@ -191,9 +195,11 @@ def df_stability_metrics(
 
         .. code-block:: python
 
-            bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                         'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                         'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
+            bin_specs = {
+                "x": {"bin_width": 1, "bin_offset": 0},
+                "y": {"num": 10, "low": 0.0, "high": 2.0},
+                "x:y": [{}, {"num": 5, "low": 0.0, "high": 1.0}],
+            }
 
         In the bin specs for x:y, x is not provided (here) and reverts to the 1-dim setting.
         The 'bin_width', 'bin_offset' notation makes an open-ended histogram (for that feature) with given bin width
@@ -224,9 +230,11 @@ def df_stability_metrics(
 
         .. code-block:: python
 
-            monitoring_rules = {"*_pull": [7, 4, -4, -7],
-                                "*_zscore": [7, 4, -4, -7],
-                                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0]}
+            monitoring_rules = {
+                "*_pull": [7, 4, -4, -7],
+                "*_zscore": [7, 4, -4, -7],
+                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0],
+            }
 
         Note that the (filename based) wildcards such as * apply to all statistic names matching that pattern.
         For example, ``"*_pull"`` applies for all features to all statistics ending on "_pull".
@@ -235,10 +243,12 @@ def df_stability_metrics(
 
         .. code-block:: python
 
-            monitoring_rules = {"featureA:*_pull": [5, 3, -3, -5],
-                                "featureA:nan": [4, 1, 0, 0],
-                                "*_pull": [7, 4, -4, -7],
-                                "nan": [8, 1, 0, 0]}
+            monitoring_rules = {
+                "featureA:*_pull": [5, 3, -3, -5],
+                "featureA:nan": [4, 1, 0, 0],
+                "*_pull": [7, 4, -4, -7],
+                "nan": [8, 1, 0, 0],
+            }
 
         In case of multiple rules could apply for a feature's statistic, the most specific one applies.
         So in case of the statistic "nan": "featureA:nan" is used for "featureA", and the other "nan" rule
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index f78aa572..9329924b 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -83,9 +83,11 @@ def stability_report(
 
         .. code-block:: python
 
-            monitoring_rules = {"*_pull": [7, 4, -4, -7],
-                                "*_zscore": [7, 4, -4, -7],
-                                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0]}
+            monitoring_rules = {
+                "*_pull": [7, 4, -4, -7],
+                "*_zscore": [7, 4, -4, -7],
+                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0],
+            }
 
         Note that the (filename based) wildcards such as * apply to all statistic names matching that pattern.
         For example, ``"*_pull"`` applies for all features to all statistics ending on "_pull".
@@ -94,10 +96,12 @@ def stability_report(
 
         .. code-block:: python
 
-            monitoring_rules = {"featureA:*_pull": [5, 3, -3, -5],
-                                "featureA:nan": [4, 1, 0, 0],
-                                "*_pull": [7, 4, -4, -7],
-                                "nan": [8, 1, 0, 0]}
+            monitoring_rules = {
+                "featureA:*_pull": [5, 3, -3, -5],
+                "featureA:nan": [4, 1, 0, 0],
+                "*_pull": [7, 4, -4, -7],
+                "nan": [8, 1, 0, 0],
+            }
 
         In case of multiple rules could apply for a feature's statistic, the most specific one applies.
         So in case of the statistic "nan": "featureA:nan" is used for "featureA", and the other "nan" rule
@@ -212,7 +216,7 @@ def df_stability_report(
 
         .. code-block:: python
 
-            features = ['x', 'date', 'date:x', 'date:y', 'date:x:y']
+            features = ["x", "date", "date:x", "date:y", "date:x:y"]
 
     :param str binning: default binning to revert to in case bin_specs not supplied. options are:
         "unit" or "auto", default is "auto". When using "auto", semi-clever binning is automatically done.
@@ -221,9 +225,11 @@ def df_stability_report(
 
         .. code-block:: python
 
-            bin_specs = {'x': {'bin_width': 1, 'bin_offset': 0},
-                         'y': {'num': 10, 'low': 0.0, 'high': 2.0},
-                         'x:y': [{}, {'num': 5, 'low': 0.0, 'high': 1.0}]}
+            bin_specs = {
+                "x": {"bin_width": 1, "bin_offset": 0},
+                "y": {"num": 10, "low": 0.0, "high": 2.0},
+                "x:y": [{}, {"num": 5, "low": 0.0, "high": 1.0}],
+            }
 
         In the bin specs for x:y, x is not provided (here) and reverts to the 1-dim setting.
         The 'bin_width', 'bin_offset' notation makes an open-ended histogram (for that feature) with given bin width
@@ -254,9 +260,11 @@ def df_stability_report(
 
         .. code-block:: python
 
-            monitoring_rules = {"*_pull": [7, 4, -4, -7],
-                                "*_zscore": [7, 4, -4, -7],
-                                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0]}
+            monitoring_rules = {
+                "*_pull": [7, 4, -4, -7],
+                "*_zscore": [7, 4, -4, -7],
+                "[!p]*_unknown_labels": [0.5, 0.5, 0, 0],
+            }
 
         Note that the (filename based) wildcards such as * apply to all statistic names matching that pattern.
         For example, ``"*_pull"`` applies for all features to all statistics ending on "_pull".
@@ -265,10 +273,12 @@ def df_stability_report(
 
         .. code-block:: python
 
-            monitoring_rules = {"featureA:*_pull": [5, 3, -3, -5],
-                                "featureA:nan": [4, 1, 0, 0],
-                                "*_pull": [7, 4, -4, -7],
-                                "nan": [8, 1, 0, 0]}
+            monitoring_rules = {
+                "featureA:*_pull": [5, 3, -3, -5],
+                "featureA:nan": [4, 1, 0, 0],
+                "*_pull": [7, 4, -4, -7],
+                "nan": [8, 1, 0, 0],
+            }
 
         In case of multiple rules could apply for a feature's statistic, the most specific one applies.
         So in case of the statistic "nan": "featureA:nan" is used for "featureA", and the other "nan" rule

From 29dc36466847fd1d7d72e02906e0d1f5586b0cf3 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Mon, 1 Nov 2021 17:51:30 +0000
Subject: [PATCH 07/34] ci: upgrading packages

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 010c763a..d77e65fc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.9b0
+    rev: 21.10b0
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/isort

From 721572e69ca7cf69758f4e52761b782ddbec28fe Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Wed, 3 Nov 2021 17:51:56 +0000
Subject: [PATCH 08/34] ci: update dependencies

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d77e65fc..7245ef66 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/isort
-    rev: 5.9.3
+    rev: 5.10.0
     hooks:
       - id: isort
         files: '.*'

From 9494f17bda5ee407e0d0e966fe9b8e50c003ecc5 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Tue, 9 Nov 2021 17:51:17 +0000
Subject: [PATCH 09/34] ci: dependency update

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7245ef66..edb82db5 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/isort
-    rev: 5.10.0
+    rev: 5.10.1
     hooks:
       - id: isort
         files: '.*'

From add2936c52b44262b8ec7e3489ee9c7cce7e527c Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:36:54 +0200
Subject: [PATCH 10/34] refactor: move parallel processing to utils

centralize configuration
less duplicate code
---
 popmon/analysis/apply_func.py                 | 43 ++++++++++---------
 popmon/config.py                              |  5 ++-
 popmon/utils.py                               | 21 +++++++++
 .../visualization/alert_section_generator.py  | 14 +++---
 popmon/visualization/histogram_section.py     | 14 ++----
 popmon/visualization/section_generator.py     | 15 +++----
 .../traffic_light_section_generator.py        | 14 +++---
 7 files changed, 68 insertions(+), 58 deletions(-)

diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py
index 7c9e62da..0ed7ae4e 100644
--- a/popmon/analysis/apply_func.py
+++ b/popmon/analysis/apply_func.py
@@ -18,15 +18,13 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import collections
-import multiprocessing
 import warnings
 
 import numpy as np
 import pandas as pd
-from joblib import Parallel, delayed
 
 from ..base import Module
+from ..utils import parallel
 
 
 class ApplyFunc(Module):
@@ -157,27 +155,32 @@ def transform(self, datastore):
 
         features = self.get_features(apply_to_data.keys())
 
-        num_cores = multiprocessing.cpu_count()
         same_key = self.assign_to_key == self.apply_to_key
 
-        res = Parallel(n_jobs=num_cores)(
-            delayed(apply_func_array)(
-                feature=feature,
-                metrics=self.metrics,
-                apply_to_df=self.get_datastore_object(
+        args = [
+            {
+                "feature": feature,
+                "metrics": self.metrics,
+                "apply_to_df": self.get_datastore_object(
                     apply_to_data, feature, dtype=pd.DataFrame
                 ),
-                assign_to_df=None
-                if same_key
-                else self.get_datastore_object(
-                    assign_to_data, feature, dtype=pd.DataFrame, default=pd.DataFrame()
+                "assign_to_df": (
+                    None
+                    if same_key
+                    else self.get_datastore_object(
+                        assign_to_data,
+                        feature,
+                        dtype=pd.DataFrame,
+                        default=pd.DataFrame(),
+                    )
                 ),
-                apply_funcs=self.apply_funcs,
-                same_key=same_key,
-            )
+                "apply_funcs": self.apply_funcs,
+                "same_key": same_key,
+            }
             for feature in features
-        )
-        new_metrics = {r[0]: r[1] for r in res}
+        ]
+        result = parallel(apply_func_array, args, mode="kwargs")
+        new_metrics = dict(result)
 
         # storage
         datastore[self.store_key] = new_metrics
@@ -189,7 +192,7 @@ def apply_func_array(
 ):
     """Apply list of functions to dataframe
 
-    Split off for parallellization reasons
+    Split off for parallelization reasons
 
     :param str feature: feature currently looping over
     :param list metrics: list of selected metrics to apply functions to
@@ -197,7 +200,7 @@ def apply_func_array(
     :param assign_to_df: pandas data frame the output of function is assigned to
     :param apply_funcs: list of functions to apply to
     :param same_key: if True, merge apply_to_df and assign_to_df before returning assign_to_df
-    :return: untion of feature and assign_to_df
+    :return: union of feature and assign_to_df
     """
     if not isinstance(apply_to_df, pd.DataFrame):
         raise TypeError(
diff --git a/popmon/config.py b/popmon/config.py
index b25b4200..cc7f6a48 100644
--- a/popmon/config.py
+++ b/popmon/config.py
@@ -17,7 +17,7 @@
 # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-
+import multiprocessing
 from fnmatch import fnmatch
 
 profiles = {
@@ -130,3 +130,6 @@ def get_stat_description(name: str):
         return f"{int(name[1:])}% percentile"
 
     return ""
+
+
+num_jobs = multiprocessing.cpu_count()
diff --git a/popmon/utils.py b/popmon/utils.py
index 35bbbeab..04fe848c 100644
--- a/popmon/utils.py
+++ b/popmon/utils.py
@@ -21,6 +21,10 @@
 from textwrap import shorten
 from typing import Iterable, Optional
 
+from joblib import Parallel, delayed
+
+from popmon.config import num_jobs
+
 
 def short_date(date: str):
     return shorten(date, width=22, placeholder="")
@@ -37,3 +41,20 @@ def filter_metrics(metrics, ignore_stat_endswith, show_stats: Optional[Iterable]
             if any(fnmatch.fnmatch(m, pattern) for pattern in show_stats)
         ]
     return metrics
+
+
+def parallel(func, args_list, mode="args"):
+    """
+    Routine for parallel processing
+    """
+
+    if num_jobs == 1:
+        results = [
+            func(*args) if mode == "args" else func(**args) for args in args_list
+        ]
+    else:
+        results = Parallel(n_jobs=num_jobs)(
+            delayed(func)(*args) if mode == "args" else delayed(func)(**args)
+            for args in args_list
+        )
+    return results
diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py
index 5d2daac6..23cc5a58 100644
--- a/popmon/visualization/alert_section_generator.py
+++ b/popmon/visualization/alert_section_generator.py
@@ -18,16 +18,13 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import multiprocessing
-
 import numpy as np
 import pandas as pd
-from joblib import Parallel, delayed
 from tqdm import tqdm
 
 from ..base import Module
 from ..config import get_stat_description
-from ..utils import filter_metrics, short_date
+from ..utils import filter_metrics, parallel, short_date
 from ..visualization.utils import _prune, plot_bars_b64
 from .traffic_light_section_generator import _plot_metrics
 
@@ -109,8 +106,6 @@ def transform(self, datastore):
         features = self.get_features(data_obj.keys())
         features_w_metrics = []
 
-        num_cores = multiprocessing.cpu_count()
-
         self.logger.info(
             f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
         )
@@ -149,8 +144,8 @@ def transform(self, datastore):
                     )
                 )
             if self.plot_metrics:
-                plots += Parallel(n_jobs=num_cores)(
-                    delayed(_plot_metric)(
+                args = [
+                    (
                         feature,
                         metric,
                         dates,
@@ -165,7 +160,8 @@ def transform(self, datastore):
                         self.skip_empty_plots,
                     )
                     for metric in metrics
-                )
+                ]
+                plots += parallel(_plot_metric, args)
             # filter out potential empty plots (from skip empty plots)
             if self.skip_empty_plots:
                 plots = [e for e in plots if len(e["plot"])]
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
index 2ec5d348..3be3dc38 100644
--- a/popmon/visualization/histogram_section.py
+++ b/popmon/visualization/histogram_section.py
@@ -18,11 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import multiprocessing
-
 import pandas as pd
 from histogrammar.util import get_hist_props
-from joblib import Parallel, delayed
 from tqdm import tqdm
 
 from ..analysis.hist_numpy import (
@@ -32,7 +29,7 @@
 )
 from ..base import Module
 from ..config import get_stat_description
-from ..utils import short_date
+from ..utils import parallel, short_date
 from ..visualization.utils import plot_overlay_1d_histogram_b64
 
 
@@ -80,8 +77,6 @@ def transform(self, datastore):
         features = self.get_features(data_obj.keys())
         features_w_metrics = []
 
-        num_cores = multiprocessing.cpu_count()
-
         self.logger.info(f'Generating section "{self.section_name}".')
 
         for feature in tqdm(features, ncols=100):
@@ -106,10 +101,9 @@ def transform(self, datastore):
                 df[hist_names].iloc[-i].values for i in reversed(range(1, last_n + 1))
             ]
 
-            plots = Parallel(n_jobs=num_cores)(
-                delayed(_plot_histograms)(feature, dates[i], hists[i], hist_names)
-                for i in range(last_n)
-            )
+            args = [(feature, dates[i], hists[i], hist_names) for i in range(last_n)]
+            plots = parallel(_plot_histograms, args)
+
             # filter out potential empty plots
             plots = [e for e in plots if len(e["plot"])]
             features_w_metrics.append(
diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py
index dd7ba4b2..3f94bca3 100644
--- a/popmon/visualization/section_generator.py
+++ b/popmon/visualization/section_generator.py
@@ -18,16 +18,13 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import multiprocessing
-
 import numpy as np
 import pandas as pd
-from joblib import Parallel, delayed
 from tqdm import tqdm
 
 from ..base import Module
 from ..config import get_stat_description
-from ..utils import filter_metrics, short_date
+from ..utils import filter_metrics, parallel, short_date
 from ..visualization.utils import _prune, plot_bars_b64
 
 
@@ -106,8 +103,6 @@ def transform(self, datastore):
         features = self.get_features(data_obj.keys())
         features_w_metrics = []
 
-        num_cores = multiprocessing.cpu_count()
-
         self.logger.info(
             f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
         )
@@ -130,8 +125,8 @@ def transform(self, datastore):
                 df.columns, self.ignore_stat_endswith, self.show_stats
             )
 
-            plots = Parallel(n_jobs=num_cores)(
-                delayed(_plot_metric)(
+            args = [
+                (
                     feature,
                     metric,
                     dates,
@@ -146,7 +141,9 @@ def transform(self, datastore):
                     self.skip_empty_plots,
                 )
                 for metric in metrics
-            )
+            ]
+            plots = parallel(_plot_metric, args)
+
             # filter out potential empty plots (from skip empty plots)
             if self.skip_empty_plots:
                 plots = [e for e in plots if len(e["plot"])]
diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py
index 503fd027..ca5ce1d7 100644
--- a/popmon/visualization/traffic_light_section_generator.py
+++ b/popmon/visualization/traffic_light_section_generator.py
@@ -18,16 +18,13 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import multiprocessing
-
 import numpy as np
 import pandas as pd
-from joblib import Parallel, delayed
 from tqdm import tqdm
 
 from ..base import Module
 from ..config import get_stat_description
-from ..utils import filter_metrics, short_date
+from ..utils import filter_metrics, parallel, short_date
 from ..visualization.utils import (
     _prune,
     plot_traffic_lights_alerts_b64,
@@ -114,8 +111,6 @@ def transform(self, datastore):
         features = self.get_features(data_obj.keys())
         features_w_metrics = []
 
-        num_cores = multiprocessing.cpu_count()
-
         self.logger.info(
             f'Generating section "{self.section_name}". skip empty plots: {self.skip_empty_plots}'
         )
@@ -154,8 +149,8 @@ def transform(self, datastore):
                 )
 
             if self.plot_metrics:
-                plots += Parallel(n_jobs=num_cores)(
-                    delayed(_plot_metric)(
+                args = [
+                    (
                         metric,
                         dates,
                         df[metric],
@@ -165,7 +160,8 @@ def transform(self, datastore):
                         self.skip_empty_plots,
                     )
                     for metric in metrics
-                )
+                ]
+                plots += parallel(_plot_metric, args)
 
             # filter out potential empty plots (from skip empty plots)
             if self.skip_empty_plots:

From 4076d5eac45127b427111ac31b4d60963dc9a9ba Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Tue, 16 Nov 2021 17:51:27 +0000
Subject: [PATCH 11/34] ci: update dependencies

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index edb82db5..a7540272 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
             - flake8-comprehensions
         args: [ "--select=E9,F63,F7,F82,C4"]
 -   repo: https://github.com/asottile/pyupgrade
-    rev: v2.29.0
+    rev: v2.29.1
     hooks:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']

From 10a3f0d8221863f2036ba3cc53686b03bc07a983 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Wed, 17 Nov 2021 17:51:43 +0000
Subject: [PATCH 12/34] ci: upgrading packages

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a7540272..634044a9 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.10b0
+    rev: 21.11b0
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/isort

From 9cd7744458a462c40539dad8094f700071c237da Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Thu, 18 Nov 2021 17:51:38 +0000
Subject: [PATCH 13/34] ci: upgrading packages

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 634044a9..9fd4be2f 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.11b0
+    rev: 21.11b1
     hooks:
     - id: black
 -   repo: https://github.com/pycqa/isort

From 8977b1e4ea7305b1858fb4dbfe28bfb27c78d30e Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Fri, 19 Nov 2021 17:51:35 +0000
Subject: [PATCH 14/34] ci: upgrading packages

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 9fd4be2f..2918d023 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,6 +22,6 @@ repos:
     -   id: pyupgrade
         args: ['--py36-plus','--exit-zero-even-if-changed']
 -   repo: https://github.com/asottile/blacken-docs
-    rev: v1.11.0
+    rev: v1.12.0
     hooks:
     -   id: blacken-docs

From 2534cea19f5cc4ec2a99f387be77ab4f25a61ebc Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 14 Oct 2021 01:22:50 +0200
Subject: [PATCH 15/34] refactor: pipeline transformation structure

---
 popmon/alerting/alerts_summary.py             | 22 ++---
 popmon/alerting/compute_tl_bounds.py          | 68 ++++++-------
 popmon/analysis/apply_func.py                 | 45 +++++----
 popmon/analysis/comparison/hist_comparer.py   | 25 +++--
 popmon/analysis/functions.py                  |  2 +-
 popmon/analysis/merge_statistics.py           | 17 ++--
 popmon/analysis/profiling/hist_profiler.py    | 16 ++-
 popmon/analysis/profiling/pull_calculator.py  | 13 ++-
 popmon/base/module.py                         | 98 ++++++++++++++++---
 popmon/base/pipeline.py                       | 79 ++++++++++++++-
 popmon/hist/hist_splitter.py                  | 26 ++---
 popmon/io/file_reader.py                      | 15 +--
 popmon/io/file_writer.py                      | 23 ++---
 popmon/io/json_reader.py                      |  4 +-
 popmon/pipeline/metrics_pipelines.py          |  8 +-
 popmon/pipeline/report.py                     |  4 +-
 popmon/pipeline/report_pipelines.py           |  2 +
 popmon/stitching/hist_stitcher.py             | 26 ++---
 .../visualization/alert_section_generator.py  | 60 +++++++-----
 popmon/visualization/histogram_section.py     | 35 ++++---
 popmon/visualization/report_generator.py      | 10 +-
 popmon/visualization/section_generator.py     | 48 ++++-----
 .../traffic_light_section_generator.py        | 46 +++++----
 .../popmon/alerting/test_compute_tl_bounds.py |  5 +-
 .../analysis/profiling/test_apply_func.py     |  2 +-
 .../popmon/analysis/test_merge_statistics.py  |  2 +-
 tests/popmon/base/test_pipeline.py            | 51 +++++-----
 tests/popmon/io/test_file_reader.py           |  2 +-
 tests/popmon/io/test_file_writer.py           |  8 +-
 tests/popmon/io/test_json_reader.py           |  2 +-
 tools/pipeline_viz.py                         | 97 ++++++++++++++++++
 31 files changed, 572 insertions(+), 289 deletions(-)
 create mode 100644 tools/pipeline_viz.py

diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py
index 88a35343..870abcce 100644
--- a/popmon/alerting/alerts_summary.py
+++ b/popmon/alerting/alerts_summary.py
@@ -19,6 +19,7 @@
 
 
 import fnmatch
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -31,6 +32,8 @@ class AlertsSummary(Module):
 
     It combines the alerts-summaries of all individual features into an artificial feature "_AGGREGATE_".
     """
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -50,21 +53,16 @@ def __init__(
         """
         super().__init__()
         self.read_key = read_key
-        self.store_key = store_key
-        if not self.store_key:
-            self.store_key = self.read_key
+        self.store_key = store_key or self.read_key
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.combined_variable = combined_variable
 
-    def transform(self, datastore):
-        # fetch and check input data
-        data = self.get_datastore_object(datastore, self.read_key, dtype=dict)
-
+    def transform(self, data: dict) -> Optional[dict]:
         # determine all possible features, used for the comparison below
-        features = self.get_features(data.keys())
+        features = self.get_features(list(data.keys()))
         if len(features) == 0:
-            return datastore
+            return None
 
         self.logger.info(
             f'Combining alerts into artificial variable "{self.combined_variable}"'
@@ -88,7 +86,7 @@ def transform(self, datastore):
                     self.logger.warning(
                         "indices of features are different. no alerts summary generated."
                     )
-                    return datastore
+                    return None
 
         # STEP 2: Concatenate the dataframes, there was one for each original feature.
         tlv = pd.concat(df_list, axis=1)
@@ -104,6 +102,4 @@ def transform(self, datastore):
 
         # store combination of traffic alerts
         data[self.combined_variable] = dfc
-        datastore[self.store_key] = data
-
-        return datastore
+        return data
diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py
index ef7fb5bb..e8c77480 100644
--- a/popmon/alerting/compute_tl_bounds.py
+++ b/popmon/alerting/compute_tl_bounds.py
@@ -18,11 +18,10 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import collections
 import copy
 import fnmatch
-import uuid
 from collections import defaultdict
+from typing import Tuple, Any
 
 import numpy as np
 import pandas as pd
@@ -117,6 +116,8 @@ class ComputeTLBounds(Module):
     meant to be generic. Then bounds can be stored as either raw
     values or as directly calculated values on the statistics of the data.
     """
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", "apply_funcs_key")
 
     def __init__(
         self,
@@ -133,7 +134,7 @@ def __init__(
         entire=False,
         **kwargs,
     ):
-        """Initialize an instance of TafficLightBounds module.
+        """Initialize an instance of TrafficLightBounds module.
 
         :param str read_key: key of input data to read from datastore
         :param str store_key: key of output data to store in datastore (optional)
@@ -152,12 +153,13 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+        self.apply_funcs_key = apply_funcs_key
+
         self.monitoring_rules = monitoring_rules or {}
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.traffic_lights = {}
         self.traffic_light_funcs = []
-        self.apply_funcs_key = apply_funcs_key
         self.traffic_light_func = func if func is not None else traffic_light
         self.metrics_wide = metrics_wide
         self.prefix = prefix
@@ -165,10 +167,12 @@ def __init__(
         self.entire = entire
         self.kwargs = copy.copy(kwargs)
 
-        # check inputs
-        if not isinstance(self.traffic_light_func, collections.Callable):
+        if not callable(self.traffic_light_func):
             raise TypeError("supplied function must be callable object")
 
+    def get_description(self):
+        return self.traffic_light_func.__name__
+
     def _set_traffic_lights(self, feature, cols, pattern, rule_name):
         process_cols = fnmatch.filter(cols, pattern)
 
@@ -195,12 +199,9 @@ def _set_traffic_lights(self, feature, cols, pattern, rule_name):
                     }
                 )
 
-    def transform(self, datastore):
-        # fetch and check input data
-        test_data = self.get_datastore_object(datastore, self.read_key, dtype=dict)
-
+    def transform(self, test_data: dict) -> Tuple[Any, Any]:
         # determine all possible features, used for the comparison below
-        features = self.get_features(test_data.keys())
+        features = self.get_features(list(test_data.keys()))
 
         pkeys, nkeys = collect_traffic_light_bounds(self.monitoring_rules)
 
@@ -212,7 +213,9 @@ def transform(self, datastore):
             # --- 1. tl bounds explicitly defined for a particular feature
             if feature in pkeys:
                 explicit_cols = [
-                    pcol for pcol in pkeys[feature] if pcol in test_df.columns
+                    pcol
+                    for pcol in pkeys[feature]
+                    if pcol in test_df.columns
                 ]
                 implicit_cols = set(pkeys[feature]) - set(explicit_cols)
 
@@ -237,13 +240,7 @@ def transform(self, datastore):
                     feature, test_df.columns, pattern, rule_name="pattern"
                 )
 
-        # storage
-        if self.store_key:
-            datastore[self.store_key] = self.traffic_lights
-        if self.apply_funcs_key:
-            datastore[self.apply_funcs_key] = self.traffic_light_funcs
-
-        return datastore
+        return self.traffic_lights, self.traffic_light_funcs
 
 
 def pull_bounds(
@@ -338,7 +335,12 @@ class DynamicBounds(Pipeline):
     """Calculate dynamic traffic light bounds based on pull thresholds and dynamic mean and std.deviation."""
 
     def __init__(
-        self, read_key, rules, store_key="", suffix_mean="_mean", suffix_std="_std"
+        self,
+        read_key,
+        rules,
+        store_key="",
+        suffix_mean="_mean",
+        suffix_std="_std",
     ):
         """Initialize an instance of DynamicTrafficLightBounds.
 
@@ -348,10 +350,8 @@ def __init__(
         :param str suffix_mean: suffix of mean. mean column = metric + suffix_mean
         :param str suffix_std: suffix of std. std column = metric + suffix_std
         """
-        super().__init__(modules=[])
         self.read_key = read_key
-
-        apply_funcs_key = str(uuid.uuid4())
+        apply_funcs_key = f"{read_key}__{store_key}"
 
         expand_bounds = ComputeTLBounds(
             read_key=read_key,
@@ -368,8 +368,7 @@ def __init__(
             assign_to_key=store_key,
             apply_funcs_key=apply_funcs_key,
         )
-
-        self.modules = [expand_bounds, calc_bounds]
+        super().__init__(modules=[expand_bounds, calc_bounds])
 
     def transform(self, datastore):
         self.logger.info(f'Calculating dynamic bounds for "{self.read_key}"')
@@ -380,7 +379,12 @@ class StaticBounds(Pipeline):
     """Calculate static traffic light bounds based on pull thresholds and static mean and std.deviation."""
 
     def __init__(
-        self, read_key, rules, store_key="", suffix_mean="_mean", suffix_std="_std"
+        self,
+        read_key,
+        rules,
+        store_key="",
+        suffix_mean="_mean",
+        suffix_std="_std",
     ):
         """Initialize an instance of StaticBounds.
 
@@ -390,10 +394,8 @@ def __init__(
         :param str suffix_mean: suffix of mean. mean column = metric + suffix_mean
         :param str suffix_std: suffix of std. std column = metric + suffix_std
         """
-        super().__init__(modules=[])
         self.read_key = read_key
-
-        apply_funcs_key = str(uuid.uuid4())
+        apply_funcs_key = f"{read_key}__{store_key}"
 
         expand_bounds = ComputeTLBounds(
             read_key=read_key,
@@ -411,7 +413,7 @@ def __init__(
             apply_funcs_key=apply_funcs_key,
         )
 
-        self.modules = [expand_bounds, calc_bounds]
+        super().__init__(modules=[expand_bounds, calc_bounds])
 
     def transform(self, datastore):
         self.logger.info(f'Calculating static bounds for "{self.read_key}"')
@@ -437,10 +439,8 @@ def __init__(self, read_key, store_key, rules, expanded_rules_key=""):
         :param str expanded_rules_key: store key of expanded monitoring rules to store in data store,
                                                   eg. these can be used for plotting. (optional)
         """
-        super().__init__(modules=[])
         self.read_key = read_key
-
-        apply_funcs_key = str(uuid.uuid4())
+        apply_funcs_key = f"{read_key}__{store_key}"
 
         # generate static traffic light bounds by expanding the wildcarded monitoring rules
         expand_bounds = ComputeTLBounds(
@@ -457,7 +457,7 @@ def __init__(self, read_key, store_key, rules, expanded_rules_key=""):
             apply_funcs_key=apply_funcs_key,
         )
 
-        self.modules = [expand_bounds, apply_bounds]
+        super().__init__(modules=[expand_bounds, apply_bounds])
 
     def transform(self, datastore):
         self.logger.info(f'Calculating traffic light alerts for "{self.read_key}"')
diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py
index 0ed7ae4e..617fc669 100644
--- a/popmon/analysis/apply_func.py
+++ b/popmon/analysis/apply_func.py
@@ -19,6 +19,7 @@
 
 
 import warnings
+from typing import Optional
 
 import numpy as np
 import pandas as pd
@@ -32,6 +33,8 @@ class ApplyFunc(Module):
 
     Extra parameters (kwargs) can be passed to the apply function.
     """
+    _input_keys = ("apply_to_key", "assign_to_key", "apply_funcs_key")
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -67,9 +70,10 @@ def __init__(
         """
         super().__init__()
         self.apply_to_key = apply_to_key
-        self.assign_to_key = self.apply_to_key if not assign_to_key else assign_to_key
-        self.store_key = self.assign_to_key if not store_key else store_key
+        self.assign_to_key = assign_to_key or apply_to_key
         self.apply_funcs_key = apply_funcs_key
+        self.store_key = store_key or self.assign_to_key
+
         self.features = features or []
         self.metrics = metrics or []
         self.msg = msg
@@ -79,6 +83,14 @@ def __init__(
         for af in apply_funcs:
             self.add_apply_func(**af)
 
+    def get_description(self):
+        if len(self.apply_funcs) > 0:
+            return " and ".join([x['func'].__name__ for x in self.apply_funcs])
+        elif self.apply_funcs_key:
+            return f"functions from arg '{self.apply_funcs_key}'"
+        else:
+            raise NotImplementedError
+
     def add_apply_func(
         self,
         func,
@@ -127,7 +139,7 @@ def add_apply_func(
             }
         )
 
-    def transform(self, datastore):
+    def transform(self, apply_to_data: dict, assign_to_data: Optional[dict] = None, apply_funcs: Optional[list] = None):
         """
         Apply functions to specified feature and metrics
 
@@ -137,23 +149,17 @@ def transform(self, datastore):
         :return: updated datastore
         :rtype: dict
         """
-        if self.msg:
-            self.logger.info(self.msg)
+        assert isinstance(apply_to_data, dict)
+        if assign_to_data is None:
+            assign_to_data = {}
 
-        apply_to_data = self.get_datastore_object(
-            datastore, self.apply_to_key, dtype=dict
-        )
-        assign_to_data = self.get_datastore_object(
-            datastore, self.assign_to_key, dtype=dict, default={}
-        )
-
-        if self.apply_funcs_key:
-            apply_funcs = self.get_datastore_object(
-                datastore, self.apply_funcs_key, dtype=list
-            )
+        if apply_funcs is not None:
             self.apply_funcs += apply_funcs
 
-        features = self.get_features(apply_to_data.keys())
+        if self.msg:
+            self.logger.info(self.msg)
+
+        features = self.get_features(list(apply_to_data.keys()))
 
         same_key = self.assign_to_key == self.apply_to_key
 
@@ -181,10 +187,7 @@ def transform(self, datastore):
         ]
         result = parallel(apply_func_array, args, mode="kwargs")
         new_metrics = dict(result)
-
-        # storage
-        datastore[self.store_key] = new_metrics
-        return datastore
+        return new_metrics
 
 
 def apply_func_array(
diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py
index abdbc6ef..9f51fb36 100644
--- a/popmon/analysis/comparison/hist_comparer.py
+++ b/popmon/analysis/comparison/hist_comparer.py
@@ -162,13 +162,14 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to func_mean and func_std
         :param kwargs: (dict, optional): residual kwargs passed on to func_mean and func_std
         """
-        super().__init__(modules=[])
-
         if assign_to_key is None:
             assign_to_key = read_key
 
         # make reference histogram(s)
-        hist_collector = ApplyFunc(apply_to_key=read_key, assign_to_key=assign_to_key)
+        hist_collector = ApplyFunc(
+            apply_to_key=read_key,
+            assign_to_key=assign_to_key,
+        )
         hist_collector.add_apply_func(
             func=func_hist_collector, entire=True, suffix=suffix, *args, **kwargs
         )
@@ -187,7 +188,8 @@ def __init__(
                 }
             ],
         )
-        self.modules = [hist_collector, hist_comparer]
+
+        super().__init__(modules=[hist_collector, hist_comparer])
 
 
 class RollingHistComparer(HistComparer):
@@ -374,15 +376,20 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to func_hist_collector
         :param kwargs: (dict, optional): residual kwargs passed on to func_hist_collector
         """
-        super().__init__(modules=[])
-
         if assign_to_key is None:
             assign_to_key = read_key
 
         # make reference histogram(s)
-        hist_collector = ApplyFunc(apply_to_key=read_key, assign_to_key=assign_to_key)
+        hist_collector = ApplyFunc(
+            apply_to_key=read_key, 
+            assign_to_key=assign_to_key
+        )
         hist_collector.add_apply_func(
-            func=func_hist_collector, hist_name=hist_col, suffix="", *args, **kwargs
+            func=func_hist_collector, 
+            hist_name=hist_col, 
+            suffix="", 
+            *args, 
+            **kwargs
         )
 
         # do histogram comparison
@@ -399,7 +406,7 @@ def __init__(
             ],
         )
 
-        self.modules = [hist_collector, hist_comparer]
+        super().__init__(modules=[hist_collector, hist_comparer])
 
 
 class RollingNormHistComparer(NormHistComparer):
diff --git a/popmon/analysis/functions.py b/popmon/analysis/functions.py
index 75a2938b..bc7054e6 100644
--- a/popmon/analysis/functions.py
+++ b/popmon/analysis/functions.py
@@ -46,7 +46,7 @@ def pull(row, suffix_mean="_mean", suffix_std="_std", cols=None):
     """
     x = pd.Series()
     if cols is None or len(cols) == 0:
-        # if no columns are given, find colums for which pulls can be calculated.
+        # if no columns are given, find columns for which pulls can be calculated.
         # e.g. to calculate x_pull, need to have [x, x_mean, x_std] present. If so, put x in cols.
         cols = []
         for m in row.index.to_list()[:]:
diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py
index 188158b3..3d6eb3be 100644
--- a/popmon/analysis/merge_statistics.py
+++ b/popmon/analysis/merge_statistics.py
@@ -18,6 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from typing import List
+
 import pandas as pd
 
 from ..base import Module
@@ -25,22 +27,20 @@
 
 class MergeStatistics(Module):
     """Merging dictionaries of features containing dataframes with statistics as its values."""
+    _input_keys = ("read_keys", )
+    _output_keys = ("store_key", )
 
-    def __init__(self, read_keys, store_key):
+    def __init__(self, read_keys: List[str], store_key: str):
         """Initialize an instance of MergeStatistics.
 
-        :param str read_keys: list of keys of input data to read from the datastore
+        :param list read_keys: list of keys of input data to read from the datastore
         :param str store_key: key of output data to store in the datastore
         """
         super().__init__()
         self.read_keys = read_keys
         self.store_key = store_key
 
-    def transform(self, datastore):
-        dicts = [
-            self.get_datastore_object(datastore, read_key, dtype=dict)
-            for read_key in self.read_keys
-        ]
+    def transform(self, dicts: list):
         merged_stats = {}
         for dict_ in dicts:
             for feature in dict_.keys():
@@ -53,5 +53,4 @@ def transform(self, datastore):
                         )
                     else:
                         merged_stats[feature] = dict_[feature]
-        datastore[self.store_key] = merged_stats
-        return datastore
+        return merged_stats
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index afd5862a..45571ac8 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -57,6 +57,8 @@ class HistProfiler(Module):
     :param str index_col: key for index in split dictionary
     :param dict stats_functions: function_name, function(bin_labels, bin_counts) dictionary
     """
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -72,12 +74,12 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.var_timestamp = var_timestamp or []
         self.hist_col = hist_col
         self.index_col = index_col
-
         self.general_stats_1d = [
             "count",
             "filled",
@@ -89,7 +91,6 @@ def __init__(
         ]
         self.general_stats_2d = ["count", "phik"]
         self.category_stats_1d = ["fraction_true"]
-
         self.stats_functions = stats_functions
         if self.stats_functions is None:
             self.stats_functions = DEFAULT_STATS
@@ -222,15 +223,13 @@ def _profile_hist(self, split, hist_name):
 
         return profile_list
 
-    def transform(self, datastore):
+    def transform(self, data: dict) -> dict:
         self.logger.info(
             f'Profiling histograms "{self.read_key}" as "{self.store_key}"'
         )
-        data = self.get_datastore_object(datastore, self.read_key, dtype=dict)
-        profiled = {}
-
-        features = self.get_features(data.keys())
+        features = self.get_features(list(data.keys()))
 
+        profiled = {}
         for feature in features[:]:
             df = self.get_datastore_object(data, feature, dtype=pd.DataFrame)
             hist_split_list = df.reset_index().to_dict("records")
@@ -242,5 +241,4 @@ def transform(self, datastore):
                     [self.index_col]
                 )
 
-        datastore[self.store_key] = profiled
-        return datastore
+        return profiled
diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py
index 17936872..3e266545 100644
--- a/popmon/analysis/profiling/pull_calculator.py
+++ b/popmon/analysis/profiling/pull_calculator.py
@@ -131,8 +131,11 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to mean and std functions
         :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions
         """
-        kws = {"window": window, "shift": shift}
-        kws.update(kwargs)
+        kws = {
+            "window": window,
+            "shift": shift,
+            **kwargs
+        }
         super().__init__(
             rolling_mean,
             rolling_std,
@@ -183,8 +186,10 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to mean and std functions
         :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions
         """
-        kws = {"shift": shift}
-        kws.update(kwargs)
+        kws = {
+            "shift": shift,
+            **kwargs
+        }
         super().__init__(
             expanding_mean,
             expanding_std,
diff --git a/popmon/base/module.py b/popmon/base/module.py
index 150db6b6..56b5f33c 100644
--- a/popmon/base/module.py
+++ b/popmon/base/module.py
@@ -19,10 +19,13 @@
 
 
 import logging
+from abc import ABC, abstractmethod
 
 
-class Module:
+class Module(ABC):
     """Base class used for modules in a pipeline."""
+    _input_keys = None
+    _output_keys = None
 
     def __init__(self):
         """Module initialization"""
@@ -31,6 +34,26 @@ def __init__(self):
         self.feature_begins_with = []
         self.ignore_features = []
 
+    def get_inputs(self):
+        in_keys = {}
+        for x in self._input_keys:
+            in_key = self.__dict__[x]
+            if in_key != "" and in_key is not None and in_key not in in_keys:
+                in_keys[x] = in_key
+        return in_keys
+
+    def get_outputs(self):
+        out_keys = {}
+        for x in self._output_keys:
+            out_key = self.__dict__[x]
+            if out_key != "" and out_key is not None and out_key not in out_keys:
+                out_keys[x] = out_key
+        return out_keys
+
+    # @abstractmethod
+    def get_description(self):
+        return ""
+
     def set_logger(self, logger):
         """Set logger of module
 
@@ -38,7 +61,8 @@ def set_logger(self, logger):
         """
         self.logger = logger
 
-    def get_datastore_object(self, datastore, feature, dtype, default=None):
+    @staticmethod
+    def get_datastore_object(datastore, feature, dtype, default=None):
         """Get object from datastore.
 
         Bit more advanced than dict.get()
@@ -49,17 +73,19 @@ def get_datastore_object(self, datastore, feature, dtype, default=None):
         :param obj default: object to default to in case key not found.
         :return: retrieved object
         """
-        obj = datastore.get(feature)
-        if obj is None:
-            if default is not None:
-                obj = default
-            else:
+        if default is not None:
+            obj = datastore.get(feature, default)
+        else:
+            try:
+                obj = datastore[feature]
+            except KeyError:
                 raise ValueError(f"`{feature}` not found in the datastore!")
+
         if not isinstance(obj, dtype):
             raise TypeError(f"obj `{feature}` is not an instance of `{dtype}`!")
         return obj
 
-    def get_features(self, all_features):
+    def get_features(self, all_features: list) -> list:
         """Get all features that meet feature_begins_with and ignore_features requirements
 
         :param list all_features: input features list
@@ -67,25 +93,65 @@ def get_features(self, all_features):
         :rtype: list
         """
         all_features = sorted(all_features)
-        features = self.features
-        if not self.features:
-            features = all_features
+        features = self.features or all_features
+
         if self.feature_begins_with:
             features = [k for k in features if k.startswith(self.feature_begins_with)]
         if self.ignore_features:
             features = [k for k in features if k not in self.ignore_features]
 
         features_not_in_input = [
-            feature for feature in features if feature not in all_features
+            feature
+            for feature in features
+            if feature not in all_features
         ]
-        features = [feature for feature in features if feature in all_features]
-
         for feature in features_not_in_input:
             self.logger.warning(f'Feature "{feature}" not in input data; skipping.')
 
+        features = [
+            feature
+            for feature in features
+            if feature in all_features
+        ]
         return features
 
-    def transform(self, datastore):
+    def _transform(self, datastore):
+        """Transformation helper function"""
+
+        inputs = {}
+        self.logger.debug(f"load from: {type(self)}")
+        for key in self._input_keys:
+            key_value = self.__dict__[key]
+            if key_value and len(key_value) > 0:
+                if isinstance(key_value, list):
+                    inputs[key] = [datastore.get(k) for k in key_value]
+                else:
+                    inputs[key] = datastore.get(key_value)
+            else:
+                inputs[key] = None
+
+            self.logger.debug(f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})")
+
+        # cache datastore
+        self._datastore = datastore
+
+        # transformation
+        outputs = self.transform(*list(inputs.values()))
+
+        # transform returns None if no update needs to be made
+        if outputs is not None:
+            if len(self._output_keys) == 1:
+                outputs = (outputs,)
+
+            for k, v in zip(self._output_keys, outputs):
+                key_value = self.__dict__[k]
+                self.logger.debug(f"store(key={k}, key_value={key_value}, value={str(v):.100s})")
+                if key_value and len(key_value) > 0: # and v is not None:
+                    datastore[key_value] = v
+
+        return datastore
+
+    def transform(self, *args):
         """Central function of the module.
 
         Typically transform() takes something from the datastore, does something to it, and puts the results
@@ -95,4 +161,4 @@ def transform(self, datastore):
         :return: updated output datastore
         :rtype: dict
         """
-        return datastore
+        raise NotImplementedError
diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index 31a83afe..3995235a 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -17,13 +17,14 @@
 # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-
+import json
 import logging
+from pathlib import Path
 
 from ..base import Module
 
 
-class Pipeline(Module):
+class Pipeline:
     """Base class used for to run modules in a pipeline."""
 
     def __init__(self, modules, logger=None):
@@ -32,7 +33,6 @@ def __init__(self, modules, logger=None):
         :param list modules: modules of the pipeline.
         :param logger: logger to be used by each module.
         """
-        super().__init__()
         self.modules = modules
         self.set_logger(logger)
 
@@ -68,5 +68,76 @@ def transform(self, datastore):
         """
 
         for module in self.modules:
-            datastore = module.transform(datastore)
+            self.logger.debug(f"transform {module.__class__.__name__}")
+            if isinstance(module, Pipeline):
+                datastore = module.transform(datastore)
+            else:
+                datastore = module._transform(datastore)
         return datastore
+
+    def visualize(self, versioned=True, funcs=None, dsets=None):
+        if dsets is None:
+            dsets = {}
+        if funcs is None:
+            funcs = {}
+
+        modules = []
+        for module in self.modules:
+            name = module.__class__.__name__
+            if isinstance(module, Pipeline):
+                modules.append(
+                    module.visualize(versioned, funcs, dsets)
+                )
+            else:
+                in_keys = module.get_inputs()
+
+                if versioned:
+                    new_ins = {}
+                    for k, in_key in in_keys.items():
+                        if in_key not in dsets:
+                            dsets[in_key] = 1
+                        in_key += f" (v{dsets[in_key]})"
+                        new_ins[k] = in_key
+                    in_keys = new_ins
+
+                out_keys = module.get_outputs()
+                if versioned:
+                    new_outs = {}
+                    for k, out_key in out_keys.items():
+                        if out_key in dsets:
+                            dsets[out_key] += 1
+                        else:
+                            dsets[out_key] = 1
+                        out_key += f" (v{dsets[out_key]})"
+                        new_outs[k] = out_key
+                    out_keys = new_outs
+
+                self.logger.debug(f"{name}(inputs={in_keys}, outputs={out_keys})")
+
+                # add unique id
+                if name not in funcs:
+                    funcs[name] = {}
+                if id(module) not in funcs[name]:
+                    funcs[name][id(module)] = len(funcs[name]) + 1
+
+                modules.append(
+                    {
+                        'type': 'module',
+                        'name': f'{name}',
+                        'i': f'{funcs[name][id(module)]}',
+                        'desc': module.get_description(),
+                        'in': in_keys,
+                        'out': out_keys
+                    }
+                )
+        data = {
+            'type': 'subgraph',
+            'name': self.__class__.__name__,
+            'modules': modules
+        }
+        return data
+
+    def to_json(self, file_name, versioned=True):
+        d = self.visualize(versioned=versioned)
+        data = json.dumps(d, indent=4, sort_keys=True)
+        Path(file_name).write_text(data)
diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py
index 4d11260e..43163414 100644
--- a/popmon/hist/hist_splitter.py
+++ b/popmon/hist/hist_splitter.py
@@ -37,6 +37,9 @@ class HistSplitter(Module):
     where time is the index and each row is a x:y histogram.
     """
 
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", )
+
     def __init__(
         self,
         read_key,
@@ -70,6 +73,7 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.feature_begins_with = feature_begins_with
@@ -86,6 +90,9 @@ def __init__(
                 "flatten_output requires short_keys attribute to be False."
             )
 
+    def get_description(self):
+        return ""
+
     def update_divided(self, divided, split, yname):
         if self.flatten_output:
             divided.update(split)
@@ -95,18 +102,16 @@ def update_divided(self, divided, split, yname):
             ]
         return divided
 
-    def transform(self, datastore):
-        divided = {}
-
+    def transform(self, data: dict) -> dict:
         self.logger.info(
             f'Splitting histograms "{self.read_key}" as "{self.store_key}"'
         )
-        data = self.get_datastore_object(datastore, self.read_key, dtype=dict)
 
         # determine all possible features, used for comparison below
-        features = self.get_features(data.keys())
+        features = self.get_features(list(data.keys()))
 
         # if so requested split selected histograms along first axis, and then divide
+        divided = {}
         for feature in features[:]:
             self.logger.debug(f'Now splitting histogram "{feature}"')
             hist = get_histogram(data[feature])
@@ -147,9 +152,8 @@ def transform(self, datastore):
             self.update_divided(divided=divided, split=split, yname=yname)
 
         # turn divided dicts into dataframes with index
-        keys = list(divided.keys())
-        for k in keys:
-            divided[k] = pd.DataFrame(divided.pop(k)).set_index(self.index_col)
-
-        datastore[self.store_key] = divided
-        return datastore
+        divided = {
+            k: pd.DataFrame(v).set_index(self.index_col)
+            for k, v in divided.items()
+        }
+        return divided
diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py
index 06c5e0f6..929bec0b 100644
--- a/popmon/io/file_reader.py
+++ b/popmon/io/file_reader.py
@@ -28,6 +28,9 @@
 class FileReader(Module):
     """Module to read contents from a file, transform the contents with a function and write them to the datastore."""
 
+    _input_keys = tuple()
+    _output_keys = ("store_key", )
+
     def __init__(
         self,
         store_key: str,
@@ -45,9 +48,7 @@ def __init__(
         super().__init__()
         if not isinstance(file_path, (str, Path)):
             raise TypeError("file_path should be of type `str` or `pathlib.Path`")
-        if apply_func is not None and not isinstance(
-            apply_func, collections.abc.Callable
-        ):
+        if apply_func is not None and not callable(apply_func):
             raise TypeError("transformation function must be a callable object")
 
         self.store_key = store_key
@@ -55,7 +56,10 @@ def __init__(
         self.apply_func = apply_func
         self.kwargs = kwargs
 
-    def transform(self, datastore):
+    def get_description(self):
+        return self.file_path
+
+    def transform(self):
         with open(self.file_path) as file:
             data = file.read()
 
@@ -68,5 +72,4 @@ def transform(self, datastore):
         )
 
         # store the transformed/original contents
-        datastore[self.store_key] = data
-        return datastore
+        return data
diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py
index 2408b032..800729c2 100644
--- a/popmon/io/file_writer.py
+++ b/popmon/io/file_writer.py
@@ -28,6 +28,8 @@
 
 class FileWriter(Module):
     """Module transforms specific datastore content and writes it to a file."""
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -48,18 +50,20 @@ def __init__(
         super().__init__()
         if file_path is not None and not isinstance(file_path, (str, Path)):
             raise TypeError("file_path should be of type `str` or `pathlib.Path`")
-        if apply_func is not None and not isinstance(
-            apply_func, collections.abc.Callable
-        ):
+        if apply_func is not None and not callable(apply_func):
             raise TypeError("transformation function must be a callable object")
         self.read_key = read_key
-        self.store_key = store_key
+        self.store_key = store_key or read_key
+
         self.file_path = file_path
         self.apply_func = apply_func
         self.kwargs = kwargs
 
-    def transform(self, datastore):
-        data = copy.deepcopy(datastore[self.read_key])
+    def get_description(self):
+        return self.file_path
+
+    def transform(self, data):
+        data = copy.deepcopy(data)
 
         # if a transformation function is provided, transform the data
         if self.apply_func is not None:
@@ -67,14 +71,11 @@ def transform(self, datastore):
 
         # if file path is provided, write data to a file. Otherwise, write data into the datastore
         if self.file_path is None:
-            datastore[
-                self.read_key if self.store_key is None else self.store_key
-            ] = data
+            return data
         else:
             with open(self.file_path, "w+") as file:
                 file.write(data)
             self.logger.info(
                 f'Object "{self.read_key}" written to file "{self.file_path}".'
             )
-
-        return datastore
+            return None
diff --git a/popmon/io/json_reader.py b/popmon/io/json_reader.py
index 6fe4f7f2..aaf0c492 100644
--- a/popmon/io/json_reader.py
+++ b/popmon/io/json_reader.py
@@ -36,5 +36,5 @@ def __init__(self, file_path: Union[str, Path], store_key: str):
         """
         super().__init__(store_key, file_path, apply_func=json.loads)
 
-    def transform(self, datastore):
-        return super().transform(datastore)
+    def transform(self, *args):
+        return super().transform(*args)
diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py
index 3de19b23..ba0bff9d 100644
--- a/popmon/pipeline/metrics_pipelines.py
+++ b/popmon/pipeline/metrics_pipelines.py
@@ -382,7 +382,13 @@ def metrics_rolling_reference(
         ),
         ApplyFunc(
             apply_to_key="traffic_lights",
-            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
+            apply_funcs=[
+                {
+                    "func": traffic_light_summary,
+                    "axis": 1,
+                    "suffix": ""
+                }
+            ],
             assign_to_key="alerts",
             msg="Generating traffic light alerts summary.",
         ),
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index 9329924b..7b31a9bf 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -27,7 +27,7 @@
     make_histograms,
 )
 
-from ..base import Module
+from ..base import Module, Pipeline
 from ..config import config
 from ..pipeline.report_pipelines import (
     ReportPipe,
@@ -425,6 +425,8 @@ class StabilityReport(Module):
     after running the pipeline and generating the report. Report can be represented
     as a HTML string, HTML file or Jupyter notebook's cell output.
     """
+    _input_keys = ("read_key", )
+    _output_keys = tuple()
 
     def __init__(self, read_key="html_report"):
         """Initialize an instance of StabilityReport.
diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py
index 2b66c8ac..ad71727f 100644
--- a/popmon/pipeline/report_pipelines.py
+++ b/popmon/pipeline/report_pipelines.py
@@ -90,6 +90,8 @@ def self_reference(
     ]
 
     pipeline = Pipeline(modules)
+    # pipeline.to_json("pipeline_self_reference_versioned.json", versioned=True)
+    # pipeline.to_json("pipeline_self_reference_unversioned.json", versioned=False)
     return pipeline
 
 
diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py
index 7843842e..8b482682 100644
--- a/popmon/stitching/hist_stitcher.py
+++ b/popmon/stitching/hist_stitcher.py
@@ -28,6 +28,9 @@
 class HistStitcher(Module):
     """Module stitches histograms by date"""
 
+    _input_keys = ("read_key", "delta_key")
+    _output_keys = ("store_key", )
+
     def __init__(
         self,
         mode="add",
@@ -51,28 +54,25 @@ def __init__(
             (only required when calling transform(datastore) as module)
         """
         super().__init__()
-        self.mode = mode
-        self.time_axis = time_axis
-        self.time_bin_idx = time_bin_idx
         self.read_key = read_key
         self.delta_key = delta_key
         self.store_key = store_key
+        self.mode = mode
+        self.time_axis = time_axis
+        self.time_bin_idx = time_bin_idx
         self.allowed_modes = ["add", "replace"]
-        assert self.mode in self.allowed_modes
+        if self.mode not in self.allowed_modes:
+            raise ValueError("mode should be either 'add' or 'replace'")
+
+    def get_description(self):
+        return f"{self.mode}"
 
-    def transform(self, datastore):
-        # --- get input dict lists
+    def transform(self, hists_basis: dict, hists_delta: dict) -> dict:
         self.logger.info(
             f'Stitching histograms "{self.read_key}" and "{self.delta_key}" as "{self.store_key}"'
         )
-
-        hists_basis = self.get_datastore_object(datastore, self.read_key, dtype=dict)
-        hists_delta = self.get_datastore_object(datastore, self.delta_key, dtype=dict)
-
         stitched = self.stitch_histograms(self.mode, hists_basis, hists_delta)
-
-        datastore[self.store_key] = stitched
-        return datastore
+        return stitched
 
     def stitch_histograms(
         self,
diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py
index 23cc5a58..ca5712da 100644
--- a/popmon/visualization/alert_section_generator.py
+++ b/popmon/visualization/alert_section_generator.py
@@ -18,6 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from typing import Optional
+
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
@@ -34,6 +36,8 @@ class AlertSectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+    _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key")
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -76,14 +80,15 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+        self.dynamic_bounds = dynamic_bounds
+        self.static_bounds = static_bounds
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.section_name = section_name
         self.last_n = last_n
         self.skip_first_n = skip_first_n
         self.skip_last_n = skip_last_n
-        self.dynamic_bounds = dynamic_bounds
-        self.static_bounds = static_bounds
         self.prefix = prefix
         self.suffices = suffices
         self.ignore_stat_endswith = ignore_stat_endswith or []
@@ -93,17 +98,28 @@ def __init__(
         self.plot_overview = True
         self.plot_metrics = False
 
-    def transform(self, datastore):
-        data_obj = self.get_datastore_object(datastore, self.read_key, dtype=dict)
-
-        static_bounds = self.get_datastore_object(
-            datastore, self.static_bounds, dtype=dict, default={}
-        )
-        dynamic_bounds = self.get_datastore_object(
-            datastore, self.dynamic_bounds, dtype=dict, default={}
-        )
+    def get_description(self):
+        return self.section_name
 
-        features = self.get_features(data_obj.keys())
+    def transform(
+        self,
+        data_obj: dict,
+        static_bounds: Optional[dict] = None,
+        dynamic_bounds: Optional[dict] = None,
+        sections: Optional[list] = None
+    ):
+        assert isinstance(data_obj, dict)
+        if static_bounds is None:
+            static_bounds = {}
+        assert isinstance(static_bounds, dict)
+        if dynamic_bounds is None:
+            dynamic_bounds = {}
+        assert isinstance(dynamic_bounds, dict)
+        if sections is None:
+            sections = []
+        assert isinstance(sections, list)
+
+        features = self.get_features(list(data_obj.keys()))
         features_w_metrics = []
 
         self.logger.info(
@@ -170,18 +186,14 @@ def transform(self, datastore):
                 {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
             )
 
-        params = {
-            "section_title": self.section_name,
-            "section_description": self.description,
-            "features": features_w_metrics,
-        }
-
-        if self.store_key in datastore:
-            datastore[self.store_key].append(params)
-        else:
-            datastore[self.store_key] = [params]
-
-        return datastore
+        sections.append(
+            {
+                "section_title": self.section_name,
+                "section_description": self.description,
+                "features": features_w_metrics,
+            }
+        )
+        return sections
 
 
 def _plot_metric(
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
index 3be3dc38..e5cb75ee 100644
--- a/popmon/visualization/histogram_section.py
+++ b/popmon/visualization/histogram_section.py
@@ -18,6 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from typing import Optional
+
 import pandas as pd
 from histogrammar.util import get_hist_props
 from tqdm import tqdm
@@ -35,6 +37,8 @@
 
 class HistogramSection(Module):
     """This module plots histograms of all selected features for the last 'n' periods."""
+    _input_keys = ("read_key", "store_key")
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -63,6 +67,7 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.section_name = section_name
@@ -71,10 +76,14 @@ def __init__(
         self.hist_name_starts_with = hist_name_starts_with
         self.description = description
 
-    def transform(self, datastore):
-        data_obj = self.get_datastore_object(datastore, self.read_key, dtype=dict)
+    def get_description(self):
+        return self.section_name
 
-        features = self.get_features(data_obj.keys())
+    def transform(self, data_obj: dict, sections: Optional[list] = None):
+        if sections is None:
+            sections = []
+
+        features = self.get_features(list(data_obj.keys()))
         features_w_metrics = []
 
         self.logger.info(f'Generating section "{self.section_name}".')
@@ -110,18 +119,14 @@ def transform(self, datastore):
                 {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
             )
 
-        params = {
-            "section_title": self.section_name,
-            "section_description": self.description,
-            "features": features_w_metrics,
-        }
-
-        if self.store_key in datastore:
-            datastore[self.store_key].append(params)
-        else:
-            datastore[self.store_key] = [params]
-
-        return datastore
+        sections.append(
+            {
+                "section_title": self.section_name,
+                "section_description": self.description,
+                "features": features_w_metrics,
+            }
+        )
+        return sections
 
 
 def _plot_histograms(feature, date, hc_list, hist_names):
diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py
index eec0f158..b95ac0b3 100644
--- a/popmon/visualization/report_generator.py
+++ b/popmon/visualization/report_generator.py
@@ -29,6 +29,8 @@ class ReportGenerator(Module):
     """This module takes already prepared section data, renders HTML section template with the data and
     glues sections together into one compressed report which is created based on the provided template.
     """
+    _input_keys = ("read_key", )
+    _output_keys = ("store_key", )
 
     def __init__(self, read_key, store_key):
         """Initialize an instance of ReportGenerator.
@@ -40,9 +42,10 @@ def __init__(self, read_key, store_key):
         self.read_key = read_key
         self.store_key = store_key
 
-    def transform(self, datastore):
-        sections = self.get_datastore_object(datastore, self.read_key, dtype=list)
+    def get_description(self):
+        return "HTML Report"
 
+    def transform(self, sections: list) -> str:
         # concatenate HTML sections' code
         sections_html = ""
         for i, section_info in enumerate(sections):
@@ -51,11 +54,10 @@ def transform(self, datastore):
             )
 
         # get HTML template for the final report, insert placeholder data and compress the code
-        datastore[self.store_key] = htmlmin.minify(
+        return htmlmin.minify(
             templates_env(
                 filename="core.html",
                 generator=f"popmon {version}",
                 sections=sections_html,
             )
         )
-        return datastore
diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py
index 3f94bca3..f1049858 100644
--- a/popmon/visualization/section_generator.py
+++ b/popmon/visualization/section_generator.py
@@ -18,6 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from typing import Optional
+
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
@@ -33,6 +35,8 @@ class SectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+    _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key")
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -75,14 +79,15 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+        self.dynamic_bounds = dynamic_bounds
+        self.static_bounds = static_bounds
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.section_name = section_name
         self.last_n = last_n
         self.skip_first_n = skip_first_n
         self.skip_last_n = skip_last_n
-        self.dynamic_bounds = dynamic_bounds
-        self.static_bounds = static_bounds
         self.prefix = prefix
         self.suffices = suffices
         self.ignore_stat_endswith = ignore_stat_endswith or []
@@ -90,17 +95,18 @@ def __init__(
         self.description = description
         self.show_stats = show_stats
 
-    def transform(self, datastore):
-        data_obj = self.get_datastore_object(datastore, self.read_key, dtype=dict)
+    def get_description(self):
+        return self.section_name
 
-        static_bounds = self.get_datastore_object(
-            datastore, self.static_bounds, dtype=dict, default={}
-        )
-        dynamic_bounds = self.get_datastore_object(
-            datastore, self.dynamic_bounds, dtype=dict, default={}
-        )
+    def transform(self, data_obj: dict, static_bounds: Optional[dict] = None, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None):
+        if static_bounds is None:
+            static_bounds = {}
+        if dynamic_bounds is None:
+            dynamic_bounds = {}
+        if sections is None:
+            sections = []
 
-        features = self.get_features(data_obj.keys())
+        features = self.get_features(list(data_obj.keys()))
         features_w_metrics = []
 
         self.logger.info(
@@ -151,18 +157,14 @@ def transform(self, datastore):
                 {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
             )
 
-        params = {
-            "section_title": self.section_name,
-            "section_description": self.description,
-            "features": features_w_metrics,
-        }
-
-        if self.store_key not in datastore:
-            datastore[self.store_key] = []
-
-        datastore[self.store_key].append(params)
-
-        return datastore
+        sections.append(
+            {
+                "section_title": self.section_name,
+                "section_description": self.description,
+                "features": features_w_metrics,
+            }
+        )
+        return sections
 
 
 def _plot_metric(
diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py
index ca5ce1d7..56d19d26 100644
--- a/popmon/visualization/traffic_light_section_generator.py
+++ b/popmon/visualization/traffic_light_section_generator.py
@@ -18,6 +18,8 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
+from typing import Optional
+
 import numpy as np
 import pandas as pd
 from tqdm import tqdm
@@ -38,6 +40,8 @@ class TrafficLightSectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+    _input_keys = ("read_key", "dynamic_bounds", "store_key")
+    _output_keys = ("store_key", )
 
     def __init__(
         self,
@@ -84,14 +88,15 @@ def __init__(
         super().__init__()
         self.read_key = read_key
         self.store_key = store_key
+        self.dynamic_bounds = dynamic_bounds
+        self.static_bounds = static_bounds
+
         self.features = features or []
         self.ignore_features = ignore_features or []
         self.section_name = section_name
         self.last_n = last_n
         self.skip_first_n = skip_first_n
         self.skip_last_n = skip_last_n
-        self.dynamic_bounds = dynamic_bounds
-        self.static_bounds = static_bounds
         self.prefix = prefix
         self.suffices = suffices
         self.ignore_stat_endswith = ignore_stat_endswith or []
@@ -101,14 +106,19 @@ def __init__(
         self.plot_overview = plot_overview
         self.plot_metrics = plot_metrics
 
-    def transform(self, datastore):
-        data_obj = self.get_datastore_object(datastore, self.read_key, dtype=dict)
+    def get_description(self):
+        return self.section_name
 
-        dynamic_bounds = self.get_datastore_object(
-            datastore, self.dynamic_bounds, dtype=dict, default={}
-        )
+    def transform(self, data_obj: dict, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None):
+        assert isinstance(data_obj, dict)
+        if dynamic_bounds is None:
+            dynamic_bounds = {}
+        assert isinstance(dynamic_bounds, dict)
+        if sections is None:
+            sections = []
+        assert isinstance(sections, list)
 
-        features = self.get_features(data_obj.keys())
+        features = self.get_features(list(data_obj.keys()))
         features_w_metrics = []
 
         self.logger.info(
@@ -170,18 +180,14 @@ def transform(self, datastore):
                 {"name": feature, "plots": sorted(plots, key=lambda plot: plot["name"])}
             )
 
-        params = {
-            "section_title": self.section_name,
-            "section_description": self.description,
-            "features": features_w_metrics,
-        }
-
-        if self.store_key in datastore:
-            datastore[self.store_key].append(params)
-        else:
-            datastore[self.store_key] = [params]
-
-        return datastore
+        sections.append(
+            {
+                "section_title": self.section_name,
+                "section_description": self.description,
+                "features": features_w_metrics,
+            }
+        )
+        return sections
 
 
 def _plot_metric(metric, dates, values, last_n, skip_first_n, skip_last_n, skip_empty):
diff --git a/tests/popmon/alerting/test_compute_tl_bounds.py b/tests/popmon/alerting/test_compute_tl_bounds.py
index c9b392dc..b2211866 100644
--- a/tests/popmon/alerting/test_compute_tl_bounds.py
+++ b/tests/popmon/alerting/test_compute_tl_bounds.py
@@ -35,14 +35,13 @@ def test_compute_traffic_light_bounds():
         monitoring_rules=conf["monitoring_rules"],
     )
 
-    output = module.transform(datastore)["output_data"]
+    output = module._transform(datastore)["output_data"]
     assert "dummy_feature:mae" not in output.keys()
     assert output["the_feature:mae"] == [8, 4, 2, 2]
     assert output["the_feature:mse"] == [0.2, 0.11, 0.09, 0]
 
 
 def test_compute_traffic_light_funcs():
-
     datastore = {"test_data": pytest.test_comparer_df}
 
     conf = {
@@ -61,7 +60,7 @@ def test_compute_traffic_light_funcs():
         monitoring_rules=conf["monitoring_rules"],
     )
 
-    output = module.transform(datastore)["output_data"]
+    output = module._transform(datastore)["output_data"]
     assert len(output) == 3
 
     assert output[0]["features"] == ["dummy_feature"]
diff --git a/tests/popmon/analysis/profiling/test_apply_func.py b/tests/popmon/analysis/profiling/test_apply_func.py
index 8a53e87e..4adff82e 100644
--- a/tests/popmon/analysis/profiling/test_apply_func.py
+++ b/tests/popmon/analysis/profiling/test_apply_func.py
@@ -60,7 +60,7 @@ def func(x):
     module.add_apply_func(np.mean, entire=True)
     module.add_apply_func(func)
 
-    datastore = module.transform(datastore)
+    datastore = module._transform(datastore)
 
     p = datastore["profiled"]["asc_numbers"]
 
diff --git a/tests/popmon/analysis/test_merge_statistics.py b/tests/popmon/analysis/test_merge_statistics.py
index cc7c1a54..ff474311 100644
--- a/tests/popmon/analysis/test_merge_statistics.py
+++ b/tests/popmon/analysis/test_merge_statistics.py
@@ -40,7 +40,7 @@ def test_merge_statistics():
     }
     datastore = MergeStatistics(
         read_keys=["first_df", "second_df"], store_key="output_df"
-    ).transform(datastore)
+    )._transform(datastore)
 
     pd.testing.assert_frame_equal(df1.combine_first(df2), out)
     pd.testing.assert_frame_equal(datastore["output_df"]["feature_1"], out)
diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py
index 613182e1..650a1c71 100644
--- a/tests/popmon/base/test_pipeline.py
+++ b/tests/popmon/base/test_pipeline.py
@@ -6,66 +6,63 @@
 
 
 class LogTransformer(Module):
+    _input_keys = ("input_key", )
+    _output_keys = ("output_key", )
+
     def __init__(self, input_key, output_key):
         super().__init__()
         self.input_key = input_key
         self.output_key = output_key
 
-    def transform(self, datastore):
-        input_array = self.get_datastore_object(
-            datastore, self.input_key, dtype=np.ndarray
-        )
-        datastore[self.output_key] = np.log(input_array)
+    def transform(self, input_array: np.ndarray):
+        output = np.log(input_array)
         self.logger.info(f"{self.__class__.__name__} is calculated.")
-        return datastore
+        return output
 
 
 class PowerTransformer(Module):
+    _input_keys = ("input_key",)
+    _output_keys = ("output_key",)
+
     def __init__(self, input_key, output_key, power):
         super().__init__()
         self.input_key = input_key
         self.output_key = output_key
         self.power = power
 
-    def transform(self, datastore):
-        input_array = self.get_datastore_object(
-            datastore, self.input_key, dtype=np.ndarray
-        )
-        datastore[self.output_key] = np.power(input_array, self.power)
-        return datastore
+    def transform(self, input_array: np.ndarray):
+        result = np.power(input_array, self.power)
+        return result
 
 
 class SumNormalizer(Module):
+    _input_keys = ("input_key",)
+    _output_keys = ("output_key",)
+
     def __init__(self, input_key, output_key):
         super().__init__()
         self.input_key = input_key
         self.output_key = output_key
 
-    def transform(self, datastore):
-        input_array = self.get_datastore_object(
-            datastore, self.input_key, dtype=np.ndarray
-        )
-        datastore[self.output_key] = input_array / input_array.sum()
-        return datastore
+    def transform(self, input_array: np.ndarray):
+        result = input_array / input_array.sum()
+        return result
 
 
 class WeightedSum(Module):
+    _input_keys = ("input_key", "weight_key")
+    _output_keys = ("output_key",)
+
     def __init__(self, input_key, weight_key, output_key):
         super().__init__()
         self.input_key = input_key
         self.weight_key = weight_key
         self.output_key = output_key
 
-    def transform(self, datastore):
-        input_array = self.get_datastore_object(
-            datastore, self.input_key, dtype=np.ndarray
-        )
-        weights = self.get_datastore_object(
-            datastore, self.weight_key, dtype=np.ndarray
-        )
-        datastore[self.output_key] = np.sum(input_array * weights)
+    def transform(self, input_array: np.ndarray, weights: np.ndarray):
+        result = np.sum(input_array * weights)
         self.logger.info(f"{self.__class__.__name__} is calculated.")
-        return datastore
+        return result
 
 
 def test_popmon_pipeline():
diff --git a/tests/popmon/io/test_file_reader.py b/tests/popmon/io/test_file_reader.py
index 9ad91703..d953d3d2 100644
--- a/tests/popmon/io/test_file_reader.py
+++ b/tests/popmon/io/test_file_reader.py
@@ -10,7 +10,7 @@ def test_file_reader_json():
         store_key="example",
         apply_func=json.loads,
     )
-    datastore = fr.transform(datastore={})
+    datastore = fr._transform(datastore={})
 
     assert datastore["example"]["boolean"]
     assert len(datastore["example"]["array"]) == 3
diff --git a/tests/popmon/io/test_file_writer.py b/tests/popmon/io/test_file_writer.py
index c00fa308..b505b4d0 100644
--- a/tests/popmon/io/test_file_writer.py
+++ b/tests/popmon/io/test_file_writer.py
@@ -23,25 +23,25 @@ def to_pandas(data):
 
 def test_file_writer_json():
     datastore = get_ready_ds()
-    FileWriter("my_data", apply_func=to_json).transform(datastore)
+    FileWriter("my_data", apply_func=to_json)._transform(datastore)
     assert datastore["my_data"] == to_json(DATA)
 
 
 def test_file_writer_json_with_kwargument():
     datastore = get_ready_ds()
-    FileWriter("my_data", apply_func=to_json, indent=4).transform(datastore)
+    FileWriter("my_data", apply_func=to_json, indent=4)._transform(datastore)
     assert datastore["my_data"] == to_json(DATA, indent=4)
 
 
 def test_file_writer_not_a_func():
     datastore = get_ready_ds()
     with pytest.raises(TypeError):
-        FileWriter("my_data", apply_func={}).transform(datastore)
+        FileWriter("my_data", apply_func={})._transform(datastore)
 
 
 def test_file_writer_df():
     datastore = get_ready_ds()
-    FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas).transform(
+    FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas)._transform(
         datastore
     )
     assert datastore["my_data"] == DATA
diff --git a/tests/popmon/io/test_json_reader.py b/tests/popmon/io/test_json_reader.py
index 4a46651b..d47e155b 100644
--- a/tests/popmon/io/test_json_reader.py
+++ b/tests/popmon/io/test_json_reader.py
@@ -4,7 +4,7 @@
 
 def test_json_reader():
     jr = JsonReader(file_path=resources.data("example.json"), store_key="example")
-    datastore = jr.transform(datastore={})
+    datastore = jr._transform(datastore={})
 
     assert datastore["example"]["boolean"]
     assert len(datastore["example"]["array"]) == 3
diff --git a/tools/pipeline_viz.py b/tools/pipeline_viz.py
new file mode 100644
index 00000000..69f2f117
--- /dev/null
+++ b/tools/pipeline_viz.py
@@ -0,0 +1,97 @@
+import json
+from pathlib import Path
+
+import networkx as nx
+import pygraphviz
+from networkx.drawing.nx_agraph import to_agraph
+
+
+def generate_pipeline_vizualisation(input_file, output_file, include_subgraphs: bool = False, include_labels: bool = False):
+    data = Path(input_file).read_text()
+    data = json.loads(data)
+
+    subgraphs = []
+    modules = []
+
+    def populate(item):
+        if item['type'] == 'subgraph':
+            mods = []
+            for m in item['modules']:
+                mods += populate(m)
+
+            subgraphs.append(
+                {
+                    'modules': mods,
+                    'name': item['name']
+                }
+            )
+            return mods
+        elif item['type'] == 'module':
+            modules.append(item)
+            name = f"{item['name']}_{item['i']}"
+            return [name]+list(item["out"].values())
+        else:
+            raise ValueError()
+
+    populate(data)
+
+    G = nx.DiGraph()
+    for module in modules:
+        label = f"<{module['name']}"
+        d = module.get('desc', '')
+        if len(d) > 0:
+            label += f" <BR/><I>{d}</I>"
+        label += ">"
+
+        # unique name
+        name = f"{module['name']}_{module['i']}"
+
+        G.add_node(name, shape='rectangle', fillcolor='chartreuse', style='filled', label=label)
+
+
+        for k, v in module['in'].items():
+            kwargs = {}
+            if include_labels:
+                kwargs['headlabel'] = k
+            G.add_edge(v, name, **kwargs)
+        for k, v in module['out'].items():
+            kwargs = {}
+            if include_labels:
+                kwargs['taillabel'] = k
+            G.add_edge(name, v, **kwargs)
+
+    # set defaults
+    G.graph['graph'] = {'rankdir':'TD'}
+    G.graph['node'] = {'shape':'oval', 'fillcolor': 'orange', 'style': 'filled'}
+    G.graph['edge'] = {'fontcolor':"gray50"}
+
+    A = to_agraph(G)
+    if include_subgraphs:
+        for idx, subgraph in enumerate(subgraphs):
+            H = A.subgraph(subgraph["modules"], name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}')
+            H.graph_attr["color"] = "blue"
+            H.graph_attr["label"] = subgraph["name"]
+            H.graph_attr["style"] = "dotted"
+
+    A.layout('dot')
+    A.draw(output_file)
+
+
+if __name__ == "__main__":
+    data_path = Path("<...>")
+
+    input_file = data_path / "pipeline_self_reference_unversioned.json"
+    output_file = 'popmon-report-pipeline-subgraphs-unversioned.pdf'
+    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
+
+    input_file = data_path / "pipeline_self_reference_unversioned.json"
+    output_file = 'popmon-report-pipeline-unversioned.pdf'
+    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)
+
+    input_file = data_path / "pipeline_self_reference_versioned.json"
+    output_file = 'popmon-report-pipeline-subgraphs-versioned.pdf'
+    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
+
+    input_file = data_path / "pipeline_self_reference_versioned.json"
+    output_file = 'popmon-report-pipeline-versioned.pdf'
+    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)

From aa663b0f8c14e0361bfa2914daef17f45ceb09cf Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 14 Oct 2021 01:22:59 +0200
Subject: [PATCH 16/34] chore: version bump

---
 bump.py           | 4 ++--
 popmon/version.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bump.py b/bump.py
index 8a8e803f..50c8df52 100644
--- a/bump.py
+++ b/bump.py
@@ -2,8 +2,8 @@
 from pathlib import Path
 
 MAJOR = 0
-REVISION = 4
-PATCH = 4
+REVISION = 5
+PATCH = 0
 VERSION = f"{MAJOR}.{REVISION}.{PATCH}"
 
 
diff --git a/popmon/version.py b/popmon/version.py
index e143821c..5aa30b4d 100644
--- a/popmon/version.py
+++ b/popmon/version.py
@@ -1,3 +1,3 @@
 """THIS FILE IS AUTO-GENERATED BY SETUP.PY."""
 
-version = "0.4.4"
+version = "0.5.0"

From c9d861ff9e633b598f9daa399d356f4cb3005540 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 14 Oct 2021 12:04:50 +0200
Subject: [PATCH 17/34] style: lint

---
 popmon/alerting/alerts_summary.py             |  5 +-
 popmon/alerting/compute_tl_bounds.py          |  9 ++-
 popmon/analysis/apply_func.py                 | 12 +++-
 popmon/analysis/comparison/hist_comparer.py   | 11 +---
 popmon/analysis/merge_statistics.py           |  5 +-
 popmon/analysis/profiling/hist_profiler.py    |  5 +-
 popmon/analysis/profiling/pull_calculator.py  | 11 +---
 popmon/base/module.py                         | 21 ++++---
 popmon/base/pipeline.py                       | 22 +++----
 popmon/hist/hist_splitter.py                  |  7 +--
 popmon/io/file_reader.py                      |  4 +-
 popmon/io/file_writer.py                      |  5 +-
 popmon/pipeline/metrics_pipelines.py          |  8 +--
 popmon/pipeline/report.py                     |  5 +-
 popmon/stitching/hist_stitcher.py             |  2 +-
 .../visualization/alert_section_generator.py  |  5 +-
 popmon/visualization/histogram_section.py     |  3 +-
 popmon/visualization/report_generator.py      |  5 +-
 popmon/visualization/section_generator.py     | 11 +++-
 .../traffic_light_section_generator.py        | 10 +++-
 tests/popmon/base/test_pipeline.py            |  4 +-
 tests/popmon/io/test_file_writer.py           |  6 +-
 tools/pipeline_viz.py                         | 58 ++++++++++---------
 23 files changed, 118 insertions(+), 116 deletions(-)

diff --git a/popmon/alerting/alerts_summary.py b/popmon/alerting/alerts_summary.py
index 870abcce..ff21e568 100644
--- a/popmon/alerting/alerts_summary.py
+++ b/popmon/alerting/alerts_summary.py
@@ -32,8 +32,9 @@ class AlertsSummary(Module):
 
     It combines the alerts-summaries of all individual features into an artificial feature "_AGGREGATE_".
     """
-    _input_keys = ("read_key", )
-    _output_keys = ("store_key", )
+
+    _input_keys = ("read_key",)
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py
index e8c77480..1eb1f3db 100644
--- a/popmon/alerting/compute_tl_bounds.py
+++ b/popmon/alerting/compute_tl_bounds.py
@@ -21,7 +21,7 @@
 import copy
 import fnmatch
 from collections import defaultdict
-from typing import Tuple, Any
+from typing import Any, Tuple
 
 import numpy as np
 import pandas as pd
@@ -116,7 +116,8 @@ class ComputeTLBounds(Module):
     meant to be generic. Then bounds can be stored as either raw
     values or as directly calculated values on the statistics of the data.
     """
-    _input_keys = ("read_key", )
+
+    _input_keys = ("read_key",)
     _output_keys = ("store_key", "apply_funcs_key")
 
     def __init__(
@@ -213,9 +214,7 @@ def transform(self, test_data: dict) -> Tuple[Any, Any]:
             # --- 1. tl bounds explicitly defined for a particular feature
             if feature in pkeys:
                 explicit_cols = [
-                    pcol
-                    for pcol in pkeys[feature]
-                    if pcol in test_df.columns
+                    pcol for pcol in pkeys[feature] if pcol in test_df.columns
                 ]
                 implicit_cols = set(pkeys[feature]) - set(explicit_cols)
 
diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py
index 617fc669..37a4296c 100644
--- a/popmon/analysis/apply_func.py
+++ b/popmon/analysis/apply_func.py
@@ -33,8 +33,9 @@ class ApplyFunc(Module):
 
     Extra parameters (kwargs) can be passed to the apply function.
     """
+
     _input_keys = ("apply_to_key", "assign_to_key", "apply_funcs_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
@@ -85,7 +86,7 @@ def __init__(
 
     def get_description(self):
         if len(self.apply_funcs) > 0:
-            return " and ".join([x['func'].__name__ for x in self.apply_funcs])
+            return " and ".join([x["func"].__name__ for x in self.apply_funcs])
         elif self.apply_funcs_key:
             return f"functions from arg '{self.apply_funcs_key}'"
         else:
@@ -139,7 +140,12 @@ def add_apply_func(
             }
         )
 
-    def transform(self, apply_to_data: dict, assign_to_data: Optional[dict] = None, apply_funcs: Optional[list] = None):
+    def transform(
+        self,
+        apply_to_data: dict,
+        assign_to_data: Optional[dict] = None,
+        apply_funcs: Optional[list] = None,
+    ):
         """
         Apply functions to specified feature and metrics
 
diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py
index 9f51fb36..e542c6a5 100644
--- a/popmon/analysis/comparison/hist_comparer.py
+++ b/popmon/analysis/comparison/hist_comparer.py
@@ -380,16 +380,9 @@ def __init__(
             assign_to_key = read_key
 
         # make reference histogram(s)
-        hist_collector = ApplyFunc(
-            apply_to_key=read_key, 
-            assign_to_key=assign_to_key
-        )
+        hist_collector = ApplyFunc(apply_to_key=read_key, assign_to_key=assign_to_key)
         hist_collector.add_apply_func(
-            func=func_hist_collector, 
-            hist_name=hist_col, 
-            suffix="", 
-            *args, 
-            **kwargs
+            func=func_hist_collector, hist_name=hist_col, suffix="", *args, **kwargs
         )
 
         # do histogram comparison
diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py
index 3d6eb3be..232f8b98 100644
--- a/popmon/analysis/merge_statistics.py
+++ b/popmon/analysis/merge_statistics.py
@@ -27,8 +27,9 @@
 
 class MergeStatistics(Module):
     """Merging dictionaries of features containing dataframes with statistics as its values."""
-    _input_keys = ("read_keys", )
-    _output_keys = ("store_key", )
+
+    _input_keys = ("read_keys",)
+    _output_keys = ("store_key",)
 
     def __init__(self, read_keys: List[str], store_key: str):
         """Initialize an instance of MergeStatistics.
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index 45571ac8..4e4a7022 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -57,8 +57,9 @@ class HistProfiler(Module):
     :param str index_col: key for index in split dictionary
     :param dict stats_functions: function_name, function(bin_labels, bin_counts) dictionary
     """
-    _input_keys = ("read_key", )
-    _output_keys = ("store_key", )
+
+    _input_keys = ("read_key",)
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/analysis/profiling/pull_calculator.py b/popmon/analysis/profiling/pull_calculator.py
index 3e266545..63290d8b 100644
--- a/popmon/analysis/profiling/pull_calculator.py
+++ b/popmon/analysis/profiling/pull_calculator.py
@@ -131,11 +131,7 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to mean and std functions
         :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions
         """
-        kws = {
-            "window": window,
-            "shift": shift,
-            **kwargs
-        }
+        kws = {"window": window, "shift": shift, **kwargs}
         super().__init__(
             rolling_mean,
             rolling_std,
@@ -186,10 +182,7 @@ def __init__(
         :param args: (tuple, optional): residual args passed on to mean and std functions
         :param kwargs: (dict, optional): residual kwargs passed on to mean and std functions
         """
-        kws = {
-            "shift": shift,
-            **kwargs
-        }
+        kws = {"shift": shift, **kwargs}
         super().__init__(
             expanding_mean,
             expanding_std,
diff --git a/popmon/base/module.py b/popmon/base/module.py
index 56b5f33c..13208c8d 100644
--- a/popmon/base/module.py
+++ b/popmon/base/module.py
@@ -24,6 +24,7 @@
 
 class Module(ABC):
     """Base class used for modules in a pipeline."""
+
     _input_keys = None
     _output_keys = None
 
@@ -101,18 +102,12 @@ def get_features(self, all_features: list) -> list:
             features = [k for k in features if k not in self.ignore_features]
 
         features_not_in_input = [
-            feature
-            for feature in features
-            if feature not in all_features
+            feature for feature in features if feature not in all_features
         ]
         for feature in features_not_in_input:
             self.logger.warning(f'Feature "{feature}" not in input data; skipping.')
 
-        features = [
-            feature
-            for feature in features
-            if feature in all_features
-        ]
+        features = [feature for feature in features if feature in all_features]
         return features
 
     def _transform(self, datastore):
@@ -130,7 +125,9 @@ def _transform(self, datastore):
             else:
                 inputs[key] = None
 
-            self.logger.debug(f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})")
+            self.logger.debug(
+                f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})"
+            )
 
         # cache datastore
         self._datastore = datastore
@@ -145,8 +142,10 @@ def _transform(self, datastore):
 
             for k, v in zip(self._output_keys, outputs):
                 key_value = self.__dict__[k]
-                self.logger.debug(f"store(key={k}, key_value={key_value}, value={str(v):.100s})")
-                if key_value and len(key_value) > 0: # and v is not None:
+                self.logger.debug(
+                    f"store(key={k}, key_value={key_value}, value={str(v):.100s})"
+                )
+                if key_value and len(key_value) > 0:  # and v is not None:
                     datastore[key_value] = v
 
         return datastore
diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index 3995235a..18b02146 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -85,9 +85,7 @@ def visualize(self, versioned=True, funcs=None, dsets=None):
         for module in self.modules:
             name = module.__class__.__name__
             if isinstance(module, Pipeline):
-                modules.append(
-                    module.visualize(versioned, funcs, dsets)
-                )
+                modules.append(module.visualize(versioned, funcs, dsets))
             else:
                 in_keys = module.get_inputs()
 
@@ -122,19 +120,15 @@ def visualize(self, versioned=True, funcs=None, dsets=None):
 
                 modules.append(
                     {
-                        'type': 'module',
-                        'name': f'{name}',
-                        'i': f'{funcs[name][id(module)]}',
-                        'desc': module.get_description(),
-                        'in': in_keys,
-                        'out': out_keys
+                        "type": "module",
+                        "name": f"{name}",
+                        "i": f"{funcs[name][id(module)]}",
+                        "desc": module.get_description(),
+                        "in": in_keys,
+                        "out": out_keys,
                     }
                 )
-        data = {
-            'type': 'subgraph',
-            'name': self.__class__.__name__,
-            'modules': modules
-        }
+        data = {"type": "subgraph", "name": self.__class__.__name__, "modules": modules}
         return data
 
     def to_json(self, file_name, versioned=True):
diff --git a/popmon/hist/hist_splitter.py b/popmon/hist/hist_splitter.py
index 43163414..27a1a883 100644
--- a/popmon/hist/hist_splitter.py
+++ b/popmon/hist/hist_splitter.py
@@ -37,8 +37,8 @@ class HistSplitter(Module):
     where time is the index and each row is a x:y histogram.
     """
 
-    _input_keys = ("read_key", )
-    _output_keys = ("store_key", )
+    _input_keys = ("read_key",)
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
@@ -153,7 +153,6 @@ def transform(self, data: dict) -> dict:
 
         # turn divided dicts into dataframes with index
         divided = {
-            k: pd.DataFrame(v).set_index(self.index_col)
-            for k, v in divided.items()
+            k: pd.DataFrame(v).set_index(self.index_col) for k, v in divided.items()
         }
         return divided
diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py
index 929bec0b..19353cb3 100644
--- a/popmon/io/file_reader.py
+++ b/popmon/io/file_reader.py
@@ -28,8 +28,8 @@
 class FileReader(Module):
     """Module to read contents from a file, transform the contents with a function and write them to the datastore."""
 
-    _input_keys = tuple()
-    _output_keys = ("store_key", )
+    _input_keys = ()
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py
index 800729c2..6342291f 100644
--- a/popmon/io/file_writer.py
+++ b/popmon/io/file_writer.py
@@ -28,8 +28,9 @@
 
 class FileWriter(Module):
     """Module transforms specific datastore content and writes it to a file."""
-    _input_keys = ("read_key", )
-    _output_keys = ("store_key", )
+
+    _input_keys = ("read_key",)
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py
index ba0bff9d..3de19b23 100644
--- a/popmon/pipeline/metrics_pipelines.py
+++ b/popmon/pipeline/metrics_pipelines.py
@@ -382,13 +382,7 @@ def metrics_rolling_reference(
         ),
         ApplyFunc(
             apply_to_key="traffic_lights",
-            apply_funcs=[
-                {
-                    "func": traffic_light_summary,
-                    "axis": 1,
-                    "suffix": ""
-                }
-            ],
+            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
             assign_to_key="alerts",
             msg="Generating traffic light alerts summary.",
         ),
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index 7b31a9bf..a25789e1 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -425,8 +425,9 @@ class StabilityReport(Module):
     after running the pipeline and generating the report. Report can be represented
     as a HTML string, HTML file or Jupyter notebook's cell output.
     """
-    _input_keys = ("read_key", )
-    _output_keys = tuple()
+
+    _input_keys = ("read_key",)
+    _output_keys = ()
 
     def __init__(self, read_key="html_report"):
         """Initialize an instance of StabilityReport.
diff --git a/popmon/stitching/hist_stitcher.py b/popmon/stitching/hist_stitcher.py
index 8b482682..2b79e91f 100644
--- a/popmon/stitching/hist_stitcher.py
+++ b/popmon/stitching/hist_stitcher.py
@@ -29,7 +29,7 @@ class HistStitcher(Module):
     """Module stitches histograms by date"""
 
     _input_keys = ("read_key", "delta_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/visualization/alert_section_generator.py b/popmon/visualization/alert_section_generator.py
index ca5712da..89ec31e4 100644
--- a/popmon/visualization/alert_section_generator.py
+++ b/popmon/visualization/alert_section_generator.py
@@ -36,8 +36,9 @@ class AlertSectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+
     _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
@@ -106,7 +107,7 @@ def transform(
         data_obj: dict,
         static_bounds: Optional[dict] = None,
         dynamic_bounds: Optional[dict] = None,
-        sections: Optional[list] = None
+        sections: Optional[list] = None,
     ):
         assert isinstance(data_obj, dict)
         if static_bounds is None:
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
index e5cb75ee..2f685ac2 100644
--- a/popmon/visualization/histogram_section.py
+++ b/popmon/visualization/histogram_section.py
@@ -37,8 +37,9 @@
 
 class HistogramSection(Module):
     """This module plots histograms of all selected features for the last 'n' periods."""
+
     _input_keys = ("read_key", "store_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
diff --git a/popmon/visualization/report_generator.py b/popmon/visualization/report_generator.py
index b95ac0b3..d5d806fb 100644
--- a/popmon/visualization/report_generator.py
+++ b/popmon/visualization/report_generator.py
@@ -29,8 +29,9 @@ class ReportGenerator(Module):
     """This module takes already prepared section data, renders HTML section template with the data and
     glues sections together into one compressed report which is created based on the provided template.
     """
-    _input_keys = ("read_key", )
-    _output_keys = ("store_key", )
+
+    _input_keys = ("read_key",)
+    _output_keys = ("store_key",)
 
     def __init__(self, read_key, store_key):
         """Initialize an instance of ReportGenerator.
diff --git a/popmon/visualization/section_generator.py b/popmon/visualization/section_generator.py
index f1049858..342892ff 100644
--- a/popmon/visualization/section_generator.py
+++ b/popmon/visualization/section_generator.py
@@ -35,8 +35,9 @@ class SectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+
     _input_keys = ("read_key", "static_bounds", "dynamic_bounds", "store_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
@@ -98,7 +99,13 @@ def __init__(
     def get_description(self):
         return self.section_name
 
-    def transform(self, data_obj: dict, static_bounds: Optional[dict] = None, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None):
+    def transform(
+        self,
+        data_obj: dict,
+        static_bounds: Optional[dict] = None,
+        dynamic_bounds: Optional[dict] = None,
+        sections: Optional[list] = None,
+    ):
         if static_bounds is None:
             static_bounds = {}
         if dynamic_bounds is None:
diff --git a/popmon/visualization/traffic_light_section_generator.py b/popmon/visualization/traffic_light_section_generator.py
index 56d19d26..662b9a22 100644
--- a/popmon/visualization/traffic_light_section_generator.py
+++ b/popmon/visualization/traffic_light_section_generator.py
@@ -40,8 +40,9 @@ class TrafficLightSectionGenerator(Module):
     combines all the plots into a list which is stored together with the section name in a dictionary
     which later will be used for the report generation.
     """
+
     _input_keys = ("read_key", "dynamic_bounds", "store_key")
-    _output_keys = ("store_key", )
+    _output_keys = ("store_key",)
 
     def __init__(
         self,
@@ -109,7 +110,12 @@ def __init__(
     def get_description(self):
         return self.section_name
 
-    def transform(self, data_obj: dict, dynamic_bounds: Optional[dict] = None, sections: Optional[list] = None):
+    def transform(
+        self,
+        data_obj: dict,
+        dynamic_bounds: Optional[dict] = None,
+        sections: Optional[list] = None,
+    ):
         assert isinstance(data_obj, dict)
         if dynamic_bounds is None:
             dynamic_bounds = {}
diff --git a/tests/popmon/base/test_pipeline.py b/tests/popmon/base/test_pipeline.py
index 650a1c71..79c22908 100644
--- a/tests/popmon/base/test_pipeline.py
+++ b/tests/popmon/base/test_pipeline.py
@@ -6,8 +6,8 @@
 
 
 class LogTransformer(Module):
-    _input_keys = ("input_key", )
-    _output_keys = ("output_key", )
+    _input_keys = ("input_key",)
+    _output_keys = ("output_key",)
 
     def __init__(self, input_key, output_key):
         super().__init__()
diff --git a/tests/popmon/io/test_file_writer.py b/tests/popmon/io/test_file_writer.py
index b505b4d0..7471a067 100644
--- a/tests/popmon/io/test_file_writer.py
+++ b/tests/popmon/io/test_file_writer.py
@@ -41,8 +41,8 @@ def test_file_writer_not_a_func():
 
 def test_file_writer_df():
     datastore = get_ready_ds()
-    FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas)._transform(
-        datastore
-    )
+    FileWriter(
+        "my_data", store_key="transformed_data", apply_func=to_pandas
+    )._transform(datastore)
     assert datastore["my_data"] == DATA
     assert datastore["transformed_data"].to_dict() == to_pandas(DATA).to_dict()
diff --git a/tools/pipeline_viz.py b/tools/pipeline_viz.py
index 69f2f117..8e50ad43 100644
--- a/tools/pipeline_viz.py
+++ b/tools/pipeline_viz.py
@@ -6,7 +6,12 @@
 from networkx.drawing.nx_agraph import to_agraph
 
 
-def generate_pipeline_vizualisation(input_file, output_file, include_subgraphs: bool = False, include_labels: bool = False):
+def generate_pipeline_vizualisation(
+    input_file,
+    output_file,
+    include_subgraphs: bool = False,
+    include_labels: bool = False,
+):
     data = Path(input_file).read_text()
     data = json.loads(data)
 
@@ -14,22 +19,17 @@ def generate_pipeline_vizualisation(input_file, output_file, include_subgraphs:
     modules = []
 
     def populate(item):
-        if item['type'] == 'subgraph':
+        if item["type"] == "subgraph":
             mods = []
-            for m in item['modules']:
+            for m in item["modules"]:
                 mods += populate(m)
 
-            subgraphs.append(
-                {
-                    'modules': mods,
-                    'name': item['name']
-                }
-            )
+            subgraphs.append({"modules": mods, "name": item["name"]})
             return mods
-        elif item['type'] == 'module':
+        elif item["type"] == "module":
             modules.append(item)
             name = f"{item['name']}_{item['i']}"
-            return [name]+list(item["out"].values())
+            return [name] + list(item["out"].values())
         else:
             raise ValueError()
 
@@ -38,7 +38,7 @@ def populate(item):
     G = nx.DiGraph()
     for module in modules:
         label = f"<{module['name']}"
-        d = module.get('desc', '')
+        d = module.get("desc", "")
         if len(d) > 0:
             label += f" <BR/><I>{d}</I>"
         label += ">"
@@ -46,34 +46,38 @@ def populate(item):
         # unique name
         name = f"{module['name']}_{module['i']}"
 
-        G.add_node(name, shape='rectangle', fillcolor='chartreuse', style='filled', label=label)
-
+        G.add_node(
+            name, shape="rectangle", fillcolor="chartreuse", style="filled", label=label
+        )
 
-        for k, v in module['in'].items():
+        for k, v in module["in"].items():
             kwargs = {}
             if include_labels:
-                kwargs['headlabel'] = k
+                kwargs["headlabel"] = k
             G.add_edge(v, name, **kwargs)
-        for k, v in module['out'].items():
+        for k, v in module["out"].items():
             kwargs = {}
             if include_labels:
-                kwargs['taillabel'] = k
+                kwargs["taillabel"] = k
             G.add_edge(name, v, **kwargs)
 
     # set defaults
-    G.graph['graph'] = {'rankdir':'TD'}
-    G.graph['node'] = {'shape':'oval', 'fillcolor': 'orange', 'style': 'filled'}
-    G.graph['edge'] = {'fontcolor':"gray50"}
+    G.graph["graph"] = {"rankdir": "TD"}
+    G.graph["node"] = {"shape": "oval", "fillcolor": "orange", "style": "filled"}
+    G.graph["edge"] = {"fontcolor": "gray50"}
 
     A = to_agraph(G)
     if include_subgraphs:
         for idx, subgraph in enumerate(subgraphs):
-            H = A.subgraph(subgraph["modules"], name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}')
+            H = A.subgraph(
+                subgraph["modules"],
+                name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}',
+            )
             H.graph_attr["color"] = "blue"
             H.graph_attr["label"] = subgraph["name"]
             H.graph_attr["style"] = "dotted"
 
-    A.layout('dot')
+    A.layout("dot")
     A.draw(output_file)
 
 
@@ -81,17 +85,17 @@ def populate(item):
     data_path = Path("<...>")
 
     input_file = data_path / "pipeline_self_reference_unversioned.json"
-    output_file = 'popmon-report-pipeline-subgraphs-unversioned.pdf'
+    output_file = "popmon-report-pipeline-subgraphs-unversioned.pdf"
     generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
 
     input_file = data_path / "pipeline_self_reference_unversioned.json"
-    output_file = 'popmon-report-pipeline-unversioned.pdf'
+    output_file = "popmon-report-pipeline-unversioned.pdf"
     generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)
 
     input_file = data_path / "pipeline_self_reference_versioned.json"
-    output_file = 'popmon-report-pipeline-subgraphs-versioned.pdf'
+    output_file = "popmon-report-pipeline-subgraphs-versioned.pdf"
     generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
 
     input_file = data_path / "pipeline_self_reference_versioned.json"
-    output_file = 'popmon-report-pipeline-versioned.pdf'
+    output_file = "popmon-report-pipeline-versioned.pdf"
     generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)

From ba98c973c8e27fe69ce1c3a82c4fa14abba3d818 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 14 Oct 2021 17:38:22 +0200
Subject: [PATCH 18/34] fix: ensure uniqueness of apply_funcs_key

---
 popmon/alerting/compute_tl_bounds.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/popmon/alerting/compute_tl_bounds.py b/popmon/alerting/compute_tl_bounds.py
index 1eb1f3db..5269bb4d 100644
--- a/popmon/alerting/compute_tl_bounds.py
+++ b/popmon/alerting/compute_tl_bounds.py
@@ -440,6 +440,8 @@ def __init__(self, read_key, store_key, rules, expanded_rules_key=""):
         """
         self.read_key = read_key
         apply_funcs_key = f"{read_key}__{store_key}"
+        if len(expanded_rules_key) > 0:
+            apply_funcs_key += f"__{expanded_rules_key}"
 
         # generate static traffic light bounds by expanding the wildcarded monitoring rules
         expand_bounds = ComputeTLBounds(

From 66141b2b96dbd302d2472455b87a70d01070b375 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 21 Oct 2021 13:23:33 +0200
Subject: [PATCH 19/34] refactor: remove unused imports

---
 popmon/base/pipeline.py  | 2 --
 popmon/io/file_reader.py | 1 -
 popmon/io/file_writer.py | 1 -
 3 files changed, 4 deletions(-)

diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index 18b02146..8f042250 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -21,8 +21,6 @@
 import logging
 from pathlib import Path
 
-from ..base import Module
-
 
 class Pipeline:
     """Base class used for to run modules in a pipeline."""
diff --git a/popmon/io/file_reader.py b/popmon/io/file_reader.py
index 19353cb3..09e6a90b 100644
--- a/popmon/io/file_reader.py
+++ b/popmon/io/file_reader.py
@@ -18,7 +18,6 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import collections.abc
 from pathlib import Path
 from typing import Callable, Optional, Union
 
diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py
index 6342291f..2bbe37c0 100644
--- a/popmon/io/file_writer.py
+++ b/popmon/io/file_writer.py
@@ -18,7 +18,6 @@
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
 
-import collections.abc
 import copy
 from pathlib import Path
 from typing import Callable, Optional, Union

From 98acf6875fd791d22e12568495e87e176531ac39 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Thu, 21 Oct 2021 13:25:26 +0200
Subject: [PATCH 20/34] refactor: simplification

---
 popmon/analysis/comparison/hist_comparer.py | 48 +++++++++++----------
 popmon/analysis/profiling/hist_profiler.py  |  5 ++-
 popmon/pipeline/metrics.py                  |  2 +-
 3 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/popmon/analysis/comparison/hist_comparer.py b/popmon/analysis/comparison/hist_comparer.py
index e542c6a5..254e191e 100644
--- a/popmon/analysis/comparison/hist_comparer.py
+++ b/popmon/analysis/comparison/hist_comparer.py
@@ -57,20 +57,20 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
         Default is 7.0.
     :return: pandas Series with popular comparison metrics.
     """
-    x = pd.Series()
-    x["ks"] = np.nan
-    x["ks_zscore"] = np.nan
-    x["ks_pvalue"] = np.nan
-    x["pearson"] = np.nan
-    x["chi2"] = np.nan
-    x["chi2_norm"] = np.nan
-    x["chi2_zscore"] = np.nan
-    x["chi2_pvalue"] = np.nan
-    x["chi2_max_residual"] = np.nan
-    x["chi2_spike_count"] = np.nan
-    x["max_prob_diff"] = np.nan
-    unknown_labels = np.nan
-    x["unknown_labels"] = unknown_labels
+    x = {
+        "ks": np.nan,
+        "ks_zscore": np.nan,
+        "ks_pvalue": np.nan,
+        "pearson": np.nan,
+        "chi2": np.nan,
+        "chi2_norm": np.nan,
+        "chi2_zscore": np.nan,
+        "chi2_pvalue": np.nan,
+        "chi2_max_residual": np.nan,
+        "chi2_spike_count": np.nan,
+        "max_prob_diff": np.nan,
+        "unknown_labels": np.nan,
+    }
 
     # basic name checks
     cols = row.index.to_list()
@@ -83,15 +83,14 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
     # basic histogram checks
     hist1 = row[hist_name1]
     hist2 = row[hist_name2]
-    if not all([isinstance(hist, COMMON_HIST_TYPES) for hist in [hist1, hist2]]):
-        return x
-    if not check_similar_hists([hist1, hist2]):
-        return x
+    if not all(
+        [isinstance(hist, COMMON_HIST_TYPES) for hist in [hist1, hist2]]
+    ) or not check_similar_hists([hist1, hist2]):
+        return pd.Series(x)
 
     # compare
-    is_num = is_numeric(hist1)
     if hist1.n_dim == 1:
-        if is_num:
+        if is_numeric(hist1):
             numpy_1dhists = get_consistent_numpy_1dhists([hist1, hist2])
             entries_list = [nphist[0] for nphist in numpy_1dhists]
             # KS-test only properly defined for (ordered) 1D interval variables
@@ -106,10 +105,14 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
             labels1 = hist1.bin_labels()
             labels2 = hist2.bin_labels()
             subset = set(labels1) <= set(labels2)
-            unknown_labels = int(not subset)
+            x["unknown_labels"] = int(not subset)
     elif hist1.n_dim == 2:
         numpy_2dgrids = get_consistent_numpy_2dgrids([hist1, hist2])
         entries_list = [entry.flatten() for entry in numpy_2dgrids]
+    else:
+        raise NotImplementedError(
+            f"histogram with dimension {hist1.n_dim} is not supported"
+        )
 
     # calculate pearson coefficient
     pearson, pvalue = (np.nan, np.nan)
@@ -130,8 +133,7 @@ def hist_compare(row, hist_name1="", hist_name2="", max_res_bound=7.0):
     x["chi2_max_residual"] = max(list(map(abs, res)))
     x["chi2_spike_count"] = sum(abs(r) > max_res_bound for r in res)
     x["max_prob_diff"] = googl_test(*entries_list)
-    x["unknown_labels"] = unknown_labels
-    return x
+    return pd.Series(x)
 
 
 class HistComparer(Pipeline):
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index 4e4a7022..86c63ff3 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -148,7 +148,7 @@ def _profile_1d_histogram(self, name, hist):
                         for f_name, result in zip(name, results)
                     ]
 
-                profile.update({k: v for k, v in zip(names, results)})
+                profile.update(dict(zip(names, results)))
         elif not is_num:
             profile["fraction_true"] = pm_np.fraction_of_true(bin_labels, bin_counts)
 
@@ -190,7 +190,6 @@ def _profile_hist(self, split, hist_name):
         is_num = is_numeric(hist0)
 
         # these are the profiled quantities we will monitor
-        fields = []
         if dimension == 1:
             fields = list(self.general_stats_1d)
             fields += (
@@ -200,6 +199,8 @@ def _profile_hist(self, split, hist_name):
             )
         elif dimension == 2:
             fields = list(self.general_stats_2d)
+        else:
+            fields = []
 
         # now loop over split-axis, e.g. time index, and profile each sub-hist x:y
         profile_list = []
diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py
index 47135dab..27d4093c 100644
--- a/popmon/pipeline/metrics.py
+++ b/popmon/pipeline/metrics.py
@@ -147,8 +147,8 @@ def stability_metrics(
         "monitoring_rules": monitoring_rules,
         "pull_rules": pull_rules,
         "features": features,
+        **kwargs,
     }
-    cfg.update(kwargs)
 
     datastore = {"hists": hists}
     if reference_type == "external":

From 540d8a54539da00fe74177b6be7f62c989a29822 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:20:01 +0200
Subject: [PATCH 21/34] refactor: pipeline abstract class

Refactor the code in order for the Pipeline class to be abstract
This ensures that each pipeline is named and improves the code quality in general
---
 popmon/base/pipeline.py                       |   5 +-
 popmon/pipeline/amazing_pipeline.py           |  24 +-
 popmon/pipeline/metrics.py                    |  45 +-
 popmon/pipeline/metrics_pipelines.py          | 939 ++++++++++--------
 popmon/pipeline/report.py                     |  24 +-
 popmon/pipeline/report_pipelines.py           | 514 +++++-----
 .../popmon/pipeline/test_report_pipelines.py  |  24 +-
 7 files changed, 817 insertions(+), 758 deletions(-)

diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index 8f042250..ae18013c 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -19,11 +19,12 @@
 
 import json
 import logging
+from abc import ABC
 from pathlib import Path
 
 
-class Pipeline:
-    """Base class used for to run modules in a pipeline."""
+class Pipeline(ABC):
+    """Abstract base class used for to run modules in a pipeline."""
 
     def __init__(self, modules, logger=None):
         """Initialization of the pipeline
diff --git a/popmon/pipeline/amazing_pipeline.py b/popmon/pipeline/amazing_pipeline.py
index a5dbb0ac..3324dd90 100644
--- a/popmon/pipeline/amazing_pipeline.py
+++ b/popmon/pipeline/amazing_pipeline.py
@@ -25,7 +25,19 @@
 from ..base import Pipeline
 from ..config import config
 from ..io import JsonReader
-from ..pipeline.report_pipelines import self_reference
+from ..pipeline.report_pipelines import SelfReference
+
+
+class AmazingPipeline(Pipeline):
+    def __init__(self, **kwargs):
+        modules = [
+            JsonReader(
+                file_path=kwargs["histograms_path"], store_key=kwargs["hists_key"]
+            ),
+            # Or ExternalReference, RollingReference etc.
+            SelfReference(**kwargs),
+        ]
+        super().__init__(modules)
 
 
 def run():
@@ -51,15 +63,7 @@ def run():
         "show_stats": config["limited_stats"],
     }
 
-    pipeline = Pipeline(
-        modules=[
-            JsonReader(file_path=cfg["histograms_path"], store_key=cfg["hists_key"]),
-            self_reference(**cfg),
-            # fixed_reference(**config),
-            # rolling_reference(**config),
-            # expanding_reference(**config),
-        ]
-    )
+    pipeline = AmazingPipeline(**cfg)
     pipeline.transform(datastore={})
 
 
diff --git a/popmon/pipeline/metrics.py b/popmon/pipeline/metrics.py
index 27d4093c..84b9dc62 100644
--- a/popmon/pipeline/metrics.py
+++ b/popmon/pipeline/metrics.py
@@ -27,25 +27,13 @@
     make_histograms,
 )
 
-from ..pipeline.metrics_pipelines import (
-    metrics_expanding_reference,
-    metrics_external_reference,
-    metrics_rolling_reference,
-    metrics_self_reference,
-)
+from ..pipeline.metrics_pipelines import create_metrics_pipeline
 
 logging.basicConfig(
     level=logging.INFO, format="%(asctime)s %(levelname)s [%(module)s]: %(message)s"
 )
 logger = logging.getLogger()
 
-_metrics_pipeline = {
-    "self": metrics_self_reference,
-    "external": metrics_external_reference,
-    "rolling": metrics_rolling_reference,
-    "expanding": metrics_expanding_reference,
-}
-
 
 def stability_metrics(
     hists,
@@ -111,15 +99,9 @@ def stability_metrics(
     :param kwargs: residual keyword arguments passed on to report pipeline.
     :return: dict with results of metrics pipeline
     """
-    # perform basic input checks
-    reference_types = list(_metrics_pipeline.keys())
-    if reference_type not in reference_types:
-        raise TypeError(f"reference_type should be one of {str(reference_types)}.")
 
     if not isinstance(hists, dict):
         raise TypeError("hists should be a dict of histogrammar histograms.")
-    if reference_type == "external" and not isinstance(reference, dict):
-        raise TypeError("reference should be a dict of histogrammar histograms.")
 
     if not isinstance(monitoring_rules, dict):
         monitoring_rules = {
@@ -137,25 +119,24 @@ def stability_metrics(
         first_cols = [k.split(":")[0] for k in list(hists.keys())]
         time_axis = max(set(first_cols), key=first_cols.count)
 
-    # configuration and datastore for report pipeline
-    cfg = {
-        "hists_key": "hists",
-        "ref_hists_key": "ref_hists",
-        "time_axis": time_axis,
-        "window": window,
-        "shift": shift,
-        "monitoring_rules": monitoring_rules,
-        "pull_rules": pull_rules,
-        "features": features,
+    pipeline = create_metrics_pipeline(
+        reference_type=reference_type,
+        reference=reference,
+        hists_key="hists",
+        ref_hists_key="ref_hists",
+        time_axis=time_axis,
+        window=window,
+        shift=shift,
+        monitoring_rules=monitoring_rules,
+        pull_rules=pull_rules,
+        features=features,
         **kwargs,
-    }
+    )
 
     datastore = {"hists": hists}
     if reference_type == "external":
         datastore["ref_hists"] = reference
 
-    # execute reporting pipeline
-    pipeline = _metrics_pipeline[reference_type](**cfg)
     return pipeline.transform(datastore)
 
 
diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py
index 3de19b23..170695af 100644
--- a/popmon/pipeline/metrics_pipelines.py
+++ b/popmon/pipeline/metrics_pipelines.py
@@ -44,464 +44,521 @@
 from ..hist.hist_splitter import HistSplitter
 
 
-def metrics_self_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    **kwargs,
-):
-    """Example metrics pipeline for comparing test data with itself (full test set)
+def get_metrics_pipeline_class(reference_type, reference):
+    _metrics_pipeline_register = {
+        "self": SelfReferenceMetricsPipeline,
+        "external": ExternalReferenceMetricsPipeline,
+        "rolling": RollingReferenceMetricsPipeline,
+        "expanding": ExpandingReferenceMetricsPipeline,
+    }
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date'
-    :param int window: window size for trend detection. default is 10
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled self reference pipeline
-    """
-    modules = [
-        # --- 1. splitting of test histograms
-        HistSplitter(
-            read_key=hists_key,
-            store_key="split_hists",
-            features=features,
-            feature_begins_with=f"{time_axis}:",
-        ),
-        # --- 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
-        #        chi2 comparison of histograms
-        PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
-        # --- 3. Comparison of with profiled test histograms, results in chi2 comparison of histograms
-        ReferenceHistComparer(
-            reference_key="split_hists",
-            assign_to_key="split_hists",
-            store_key="comparisons",
-        ),
-        RefMedianMadPullCalculator(
-            reference_key="comparisons",
-            assign_to_key="comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-            metrics=["ref_max_prob_diff"],
-        ),
-        # --- 4. profiling of histograms, then pull calculation compared with reference mean and std,
-        #        to obtain normalized residuals of profiles
-        HistProfiler(read_key="split_hists", store_key="profiles"),
-        RefMedianMadPullCalculator(
-            reference_key="profiles",
-            assign_to_key="profiles",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-        ),
-        # --- 5. looking for significant rolling linear trends in selected features/metrics
-        ApplyFunc(
-            apply_to_key="profiles",
-            assign_to_key="comparisons",
-            apply_funcs=[
-                {
-                    "func": rolling_lr_zscore,
-                    "suffix": f"_trend{window}_zscore",
-                    "entire": True,
-                    "window": window,
-                    "metrics": ["mean", "phik", "fraction_true"],
-                }
-            ],
-            msg="Computing significance of (rolling) trend in means of features",
-        ),
-        # --- 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
-        #        used for plotting in popmon_profiles report.
-        StaticBounds(
-            read_key="profiles",
-            rules=pull_rules,
-            store_key="dynamic_bounds",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        StaticBounds(
-            read_key="comparisons",
-            rules=pull_rules,
-            store_key="dynamic_bounds_comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        # --- 7. expand all (wildcard) static traffic light bounds and apply them.
-        #        Applied to both profiles and comparisons datasets
-        TrafficLightAlerts(
-            read_key="profiles",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds",
-        ),
-        TrafficLightAlerts(
-            read_key="comparisons",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds_comparisons",
-        ),
-        ApplyFunc(
-            apply_to_key="traffic_lights",
-            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
-            assign_to_key="alerts",
-            msg="Generating traffic light alerts summary.",
-        ),
-        AlertsSummary(read_key="alerts"),
-    ]
+    if reference_type not in _metrics_pipeline_register:
+        raise ValueError(
+            f"reference_type should be in {str(_metrics_pipeline_register.keys())}'."
+        )
+    if (
+        reference_type == "external"
+        and not isinstance(reference, dict)
+        and reference is not None
+    ):
+        raise TypeError("reference should be a dict of histogrammar histograms.")
 
-    pipeline = Pipeline(modules)
-    return pipeline
+    return _metrics_pipeline_register[reference_type]
 
 
-def metrics_external_reference(
-    hists_key="test_hists",
-    ref_hists_key="ref_hists",
-    time_axis="date",
+def create_metrics_pipeline(
+    reference_type="self",
+    reference=None,
+    hists_key="hists",
+    # ref_hists_key="ref_hists",
+    time_axis="",
     window=10,
     monitoring_rules={},
     pull_rules={},
     features=None,
+    # shift=1,
     **kwargs,
 ):
-    """Example metrics pipeline for comparing test data with other (full) external reference set
+    # configuration and datastore for report pipeline
+    cfg = {
+        "hists_key": hists_key,
+        "time_axis": time_axis,
+        "window": window,
+        "monitoring_rules": monitoring_rules,
+        "pull_rules": pull_rules,
+        "features": features,
+        # "ref_hists_key": ref_hists_key,
+        # "shift": shift,
+        **kwargs,
+    }
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str ref_hists_key: key to reference histograms in datastore. default is 'ref_hists'
-    :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
-    :param int window: window size for trend detection. default is 10
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled external reference pipeline
-    """
-    modules = [
-        # --- 1. splitting of test histograms
-        HistSplitter(
-            read_key=hists_key,
-            store_key="split_hists",
-            features=features,
-            feature_begins_with=f"{time_axis}:",
-        ),
-        # --- 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
-        #        chi2 comparison of histograms
-        PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
-        # --- 3. Profiling of split reference histograms, then chi2 comparison with test histograms
-        HistSplitter(
-            read_key=ref_hists_key,
-            store_key="split_ref_hists",
-            features=features,
-            feature_begins_with=f"{time_axis}:",
-        ),
-        ReferenceHistComparer(
-            reference_key="split_ref_hists",
-            assign_to_key="split_hists",
-            store_key="comparisons",
-        ),
-        RefMedianMadPullCalculator(
-            reference_key="comparisons",
-            assign_to_key="comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-            metrics=["ref_max_prob_diff"],
-        ),
-        # --- 4. pull calculation compared with reference mean and std, to obtain normalized residuals of profiles
-        HistProfiler(read_key="split_hists", store_key="profiles"),
-        HistProfiler(read_key="split_ref_hists", store_key="ref_profiles"),
-        ReferencePullCalculator(
-            reference_key="ref_profiles",
-            assign_to_key="profiles",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-        ),
-        # --- 5. looking for significant rolling linear trends in selected features/metrics
-        ApplyFunc(
-            apply_to_key="profiles",
-            assign_to_key="comparisons",
-            apply_funcs=[
-                {
-                    "func": rolling_lr_zscore,
-                    "suffix": f"_trend{window}_zscore",
-                    "entire": True,
-                    "window": window,
-                    "metrics": ["mean", "phik", "fraction_true"],
-                }
-            ],
-            msg="Computing significance of (rolling) trend in means of features",
-        ),
-        # --- 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
-        #        used for plotting in popmon_profiles report.
-        StaticBounds(
-            read_key="profiles",
-            rules=pull_rules,
-            store_key="dynamic_bounds",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        StaticBounds(
-            read_key="comparisons",
-            rules=pull_rules,
-            store_key="dynamic_bounds_comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        # --- 7. expand all (wildcard) static traffic light bounds and apply them.
-        #        Applied to both profiles and comparisons datasets
-        TrafficLightAlerts(
-            read_key="profiles",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds",
-        ),
-        TrafficLightAlerts(
-            read_key="comparisons",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds_comparisons",
-        ),
-        ApplyFunc(
-            apply_to_key="traffic_lights",
-            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
-            assign_to_key="alerts",
-            msg="Generating traffic light alerts summary.",
-        ),
-        AlertsSummary(read_key="alerts"),
-    ]
-
-    pipeline = Pipeline(modules)
+    # execute reporting pipeline
+    cls = get_metrics_pipeline_class(reference_type, reference)
+    pipeline = cls(**cfg)
     return pipeline
 
 
-def metrics_rolling_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    shift=1,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    **kwargs,
-):
-    """Example metrics pipeline for comparing test data with itself (rolling test set)
+class SelfReferenceMetricsPipeline(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        **kwargs,
+    ):
+        """Example metrics pipeline for comparing test data with itself (full test set)
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date'
-    :param int window: size of rolling window and for trend detection. default is 10
-    :param int shift: shift in rolling window. default is 1
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled rolling reference pipeline
-    """
-    modules = [
-        # --- 1. splitting of test histograms
-        HistSplitter(
-            read_key=hists_key,
-            store_key="split_hists",
-            features=features,
-            feature_begins_with=f"{time_axis}:",
-        ),
-        # --- 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
-        #        chi2 comparison of histograms
-        PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
-        # --- 3. profiling of reference histograms, then comparison of with profiled test histograms
-        #        results in chi2 comparison of histograms
-        RollingHistComparer(
-            read_key="split_hists", window=window, shift=shift, store_key="comparisons"
-        ),
-        RefMedianMadPullCalculator(
-            reference_key="comparisons",
-            assign_to_key="comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-            metrics=["roll_max_prob_diff"],
-        ),
-        # --- 4. profiling of histograms, then pull calculation compared with reference mean and std,
-        #        to obtain normalized residuals of profiles
-        HistProfiler(read_key="split_hists", store_key="profiles"),
-        RollingPullCalculator(
-            read_key="profiles",
-            window=window,
-            shift=shift,
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-        ),
-        # --- 5. looking for significant rolling linear trends in selected features/metrics
-        ApplyFunc(
-            apply_to_key="profiles",
-            assign_to_key="comparisons",
-            apply_funcs=[
-                {
-                    "func": rolling_lr_zscore,
-                    "suffix": f"_trend{window}_zscore",
-                    "entire": True,
-                    "window": window,
-                    "metrics": ["mean", "phik", "fraction_true"],
-                }
-            ],
-            msg="Computing significance of (rolling) trend in means of features",
-        ),
-        # --- 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
-        #        used for plotting in popmon_profiles report.
-        DynamicBounds(
-            read_key="profiles",
-            rules=pull_rules,
-            store_key="dynamic_bounds",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        DynamicBounds(
-            read_key="comparisons",
-            rules=pull_rules,
-            store_key="dynamic_bounds_comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        # --- 7. expand all (wildcard) static traffic light bounds and apply them.
-        #        Applied to both profiles and comparisons datasets
-        TrafficLightAlerts(
-            read_key="profiles",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds",
-        ),
-        TrafficLightAlerts(
-            read_key="comparisons",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds_comparisons",
-        ),
-        ApplyFunc(
-            apply_to_key="traffic_lights",
-            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
-            assign_to_key="alerts",
-            msg="Generating traffic light alerts summary.",
-        ),
-        AlertsSummary(read_key="alerts"),
-    ]
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date'
+        :param int window: window size for trend detection. default is 10
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled self reference pipeline
+        """
+        modules = [
+            # 1. splitting of test histograms
+            HistSplitter(
+                read_key=hists_key,
+                store_key="split_hists",
+                features=features,
+                feature_begins_with=f"{time_axis}:",
+            ),
+            # 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
+            #        chi2 comparison of histograms
+            PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
+            # 3. Comparison of with profiled test histograms, results in chi2 comparison of histograms
+            ReferenceHistComparer(
+                reference_key="split_hists",
+                assign_to_key="split_hists",
+                store_key="comparisons",
+            ),
+            RefMedianMadPullCalculator(
+                reference_key="comparisons",
+                assign_to_key="comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+                metrics=["ref_max_prob_diff"],
+            ),
+            # 4. profiling of histograms, then pull calculation compared with reference mean and std,
+            #        to obtain normalized residuals of profiles
+            HistProfiler(read_key="split_hists", store_key="profiles"),
+            RefMedianMadPullCalculator(
+                reference_key="profiles",
+                assign_to_key="profiles",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+            ),
+            # 5. looking for significant rolling linear trends in selected features/metrics
+            ApplyFunc(
+                apply_to_key="profiles",
+                assign_to_key="comparisons",
+                apply_funcs=[
+                    {
+                        "func": rolling_lr_zscore,
+                        "suffix": f"_trend{window}_zscore",
+                        "entire": True,
+                        "window": window,
+                        "metrics": ["mean", "phik", "fraction_true"],
+                    }
+                ],
+                msg="Computing significance of (rolling) trend in means of features",
+            ),
+            # 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
+            #        used for plotting in popmon_profiles report.
+            StaticBounds(
+                read_key="profiles",
+                rules=pull_rules,
+                store_key="dynamic_bounds",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            StaticBounds(
+                read_key="comparisons",
+                rules=pull_rules,
+                store_key="dynamic_bounds_comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            # 7. expand all (wildcard) static traffic light bounds and apply them.
+            #        Applied to both profiles and comparisons datasets
+            TrafficLightAlerts(
+                read_key="profiles",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds",
+            ),
+            TrafficLightAlerts(
+                read_key="comparisons",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds_comparisons",
+            ),
+            ApplyFunc(
+                apply_to_key="traffic_lights",
+                apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
+                assign_to_key="alerts",
+                msg="Generating traffic light alerts summary.",
+            ),
+            AlertsSummary(read_key="alerts"),
+        ]
+        super().__init__(modules)
 
-    pipeline = Pipeline(modules)
-    return pipeline
 
+class ExternalReferenceMetricsPipeline(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        ref_hists_key="ref_hists",
+        time_axis="date",
+        window=10,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        **kwargs,
+    ):
+        """Example metrics pipeline for comparing test data with other (full) external reference set
 
-def metrics_expanding_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    shift=1,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    **kwargs,
-):
-    """Example metrics pipeline for comparing test data with itself (expanding test set)
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str ref_hists_key: key to reference histograms in datastore. default is 'ref_hists'
+        :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
+        :param int window: window size for trend detection. default is 10
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled external reference pipeline
+        """
+        modules = [
+            # 1. splitting of test histograms
+            HistSplitter(
+                read_key=hists_key,
+                store_key="split_hists",
+                features=features,
+                feature_begins_with=f"{time_axis}:",
+            ),
+            # 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
+            #        chi2 comparison of histograms
+            PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
+            # 3. Profiling of split reference histograms, then chi2 comparison with test histograms
+            HistSplitter(
+                read_key=ref_hists_key,
+                store_key="split_ref_hists",
+                features=features,
+                feature_begins_with=f"{time_axis}:",
+            ),
+            ReferenceHistComparer(
+                reference_key="split_ref_hists",
+                assign_to_key="split_hists",
+                store_key="comparisons",
+            ),
+            RefMedianMadPullCalculator(
+                reference_key="comparisons",
+                assign_to_key="comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+                metrics=["ref_max_prob_diff"],
+            ),
+            # 4. pull calculation compared with reference mean and std, to obtain normalized residuals of profiles
+            HistProfiler(read_key="split_hists", store_key="profiles"),
+            HistProfiler(read_key="split_ref_hists", store_key="ref_profiles"),
+            ReferencePullCalculator(
+                reference_key="ref_profiles",
+                assign_to_key="profiles",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+            ),
+            # 5. looking for significant rolling linear trends in selected features/metrics
+            ApplyFunc(
+                apply_to_key="profiles",
+                assign_to_key="comparisons",
+                apply_funcs=[
+                    {
+                        "func": rolling_lr_zscore,
+                        "suffix": f"_trend{window}_zscore",
+                        "entire": True,
+                        "window": window,
+                        "metrics": ["mean", "phik", "fraction_true"],
+                    }
+                ],
+                msg="Computing significance of (rolling) trend in means of features",
+            ),
+            # 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
+            #        used for plotting in popmon_profiles report.
+            StaticBounds(
+                read_key="profiles",
+                rules=pull_rules,
+                store_key="dynamic_bounds",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            StaticBounds(
+                read_key="comparisons",
+                rules=pull_rules,
+                store_key="dynamic_bounds_comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            # 7. expand all (wildcard) static traffic light bounds and apply them.
+            #        Applied to both profiles and comparisons datasets
+            TrafficLightAlerts(
+                read_key="profiles",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds",
+            ),
+            TrafficLightAlerts(
+                read_key="comparisons",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds_comparisons",
+            ),
+            ApplyFunc(
+                apply_to_key="traffic_lights",
+                apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
+                assign_to_key="alerts",
+                msg="Generating traffic light alerts summary.",
+            ),
+            AlertsSummary(read_key="alerts"),
+        ]
+        super().__init__(modules)
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date'
-    :param int window: window size for trend detection. default is 10
-    :param int shift: shift in expanding window. default is 1
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled expanding reference pipeline
-    """
-    modules = [
-        # --- 1. splitting of test histograms
-        HistSplitter(
-            read_key=hists_key,
-            store_key="split_hists",
-            features=features,
-            feature_begins_with=f"{time_axis}:",
-        ),
-        # --- 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
-        #        chi2 comparison of histograms
-        PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
-        # --- 3. profiling of reference histograms, then comparison of with profiled test histograms
-        #        results in chi2 comparison of histograms
-        ExpandingHistComparer(
-            read_key="split_hists", shift=shift, store_key="comparisons"
-        ),
-        # --- 4. profiling of histograms, then pull calculation compared with reference mean and std,
-        #        to obtain normalized residuals of profiles
-        RefMedianMadPullCalculator(
-            reference_key="comparisons",
-            assign_to_key="comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-            metrics=["expanding_max_prob_diff"],
-        ),
-        HistProfiler(read_key="split_hists", store_key="profiles"),
-        ExpandingPullCalculator(
-            read_key="profiles",
-            shift=shift,
-            suffix_mean="_mean",
-            suffix_std="_std",
-            suffix_pull="_pull",
-        ),
-        # --- 5. looking for significant rolling linear trends in selected features/metrics
-        ApplyFunc(
-            apply_to_key="profiles",
-            assign_to_key="comparisons",
-            apply_funcs=[
-                {
-                    "func": rolling_lr_zscore,
-                    "suffix": f"_trend{window}_zscore",
-                    "entire": True,
-                    "window": window,
-                    "metrics": ["mean", "phik", "fraction_true"],
-                }
-            ],
-            msg="Computing significance of (rolling) trend in means of features",
-        ),
-        # --- 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
-        #        used for plotting in popmon_profiles report.
-        DynamicBounds(
-            read_key="profiles",
-            rules=pull_rules,
-            store_key="dynamic_bounds",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        DynamicBounds(
-            read_key="comparisons",
-            rules=pull_rules,
-            store_key="dynamic_bounds_comparisons",
-            suffix_mean="_mean",
-            suffix_std="_std",
-        ),
-        # --- 7. expand all (wildcard) static traffic light bounds and apply them.
-        #        Applied to both profiles and comparisons datasets
-        TrafficLightAlerts(
-            read_key="profiles",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds",
-        ),
-        TrafficLightAlerts(
-            read_key="comparisons",
-            rules=monitoring_rules,
-            store_key="traffic_lights",
-            expanded_rules_key="static_bounds_comparisons",
-        ),
-        ApplyFunc(
-            apply_to_key="traffic_lights",
-            apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
-            assign_to_key="alerts",
-            msg="Generating traffic light alerts summary.",
-        ),
-        AlertsSummary(read_key="alerts"),
-    ]
 
-    pipeline = Pipeline(modules)
-    return pipeline
+class RollingReferenceMetricsPipeline(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        shift=1,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        **kwargs,
+    ):
+        """Example metrics pipeline for comparing test data with itself (rolling test set)
+
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date'
+        :param int window: size of rolling window and for trend detection. default is 10
+        :param int shift: shift in rolling window. default is 1
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled rolling reference pipeline
+        """
+        modules = [
+            # 1. splitting of test histograms
+            HistSplitter(
+                read_key=hists_key,
+                store_key="split_hists",
+                features=features,
+                feature_begins_with=f"{time_axis}:",
+            ),
+            # 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
+            #        chi2 comparison of histograms
+            PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
+            # 3. profiling of reference histograms, then comparison of with profiled test histograms
+            #        results in chi2 comparison of histograms
+            RollingHistComparer(
+                read_key="split_hists",
+                window=window,
+                shift=shift,
+                store_key="comparisons",
+            ),
+            RefMedianMadPullCalculator(
+                reference_key="comparisons",
+                assign_to_key="comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+                metrics=["roll_max_prob_diff"],
+            ),
+            # 4. profiling of histograms, then pull calculation compared with reference mean and std,
+            #        to obtain normalized residuals of profiles
+            HistProfiler(read_key="split_hists", store_key="profiles"),
+            RollingPullCalculator(
+                read_key="profiles",
+                window=window,
+                shift=shift,
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+            ),
+            # 5. looking for significant rolling linear trends in selected features/metrics
+            ApplyFunc(
+                apply_to_key="profiles",
+                assign_to_key="comparisons",
+                apply_funcs=[
+                    {
+                        "func": rolling_lr_zscore,
+                        "suffix": f"_trend{window}_zscore",
+                        "entire": True,
+                        "window": window,
+                        "metrics": ["mean", "phik", "fraction_true"],
+                    }
+                ],
+                msg="Computing significance of (rolling) trend in means of features",
+            ),
+            # 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
+            #        used for plotting in popmon_profiles report.
+            DynamicBounds(
+                read_key="profiles",
+                rules=pull_rules,
+                store_key="dynamic_bounds",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            DynamicBounds(
+                read_key="comparisons",
+                rules=pull_rules,
+                store_key="dynamic_bounds_comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            # 7. expand all (wildcard) static traffic light bounds and apply them.
+            #        Applied to both profiles and comparisons datasets
+            TrafficLightAlerts(
+                read_key="profiles",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds",
+            ),
+            TrafficLightAlerts(
+                read_key="comparisons",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds_comparisons",
+            ),
+            ApplyFunc(
+                apply_to_key="traffic_lights",
+                apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
+                assign_to_key="alerts",
+                msg="Generating traffic light alerts summary.",
+            ),
+            AlertsSummary(read_key="alerts"),
+        ]
+        super().__init__(modules)
+
+
+class ExpandingReferenceMetricsPipeline(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        shift=1,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        **kwargs,
+    ):
+        """Example metrics pipeline for comparing test data with itself (expanding test set)
+
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date'
+        :param int window: window size for trend detection. default is 10
+        :param int shift: shift in expanding window. default is 1
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled expanding reference pipeline
+        """
+        modules = [
+            # 1. splitting of test histograms
+            HistSplitter(
+                read_key=hists_key,
+                store_key="split_hists",
+                features=features,
+                feature_begins_with=f"{time_axis}:",
+            ),
+            # 2. for each histogram with datetime i, comparison of histogram i with histogram i-1, results in
+            #    chi2 comparison of histograms
+            PreviousHistComparer(read_key="split_hists", store_key="comparisons"),
+            # 3. profiling of reference histograms, then comparison of with profiled test histograms
+            #    results in chi2 comparison of histograms
+            ExpandingHistComparer(
+                read_key="split_hists", shift=shift, store_key="comparisons"
+            ),
+            # 4. profiling of histograms, then pull calculation compared with reference mean and std,
+            #        to obtain normalized residuals of profiles
+            RefMedianMadPullCalculator(
+                reference_key="comparisons",
+                assign_to_key="comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+                metrics=["expanding_max_prob_diff"],
+            ),
+            HistProfiler(read_key="split_hists", store_key="profiles"),
+            ExpandingPullCalculator(
+                read_key="profiles",
+                shift=shift,
+                suffix_mean="_mean",
+                suffix_std="_std",
+                suffix_pull="_pull",
+            ),
+            # 5. looking for significant rolling linear trends in selected features/metrics
+            ApplyFunc(
+                apply_to_key="profiles",
+                assign_to_key="comparisons",
+                apply_funcs=[
+                    {
+                        "func": rolling_lr_zscore,
+                        "suffix": f"_trend{window}_zscore",
+                        "entire": True,
+                        "window": window,
+                        "metrics": ["mean", "phik", "fraction_true"],
+                    }
+                ],
+                msg="Computing significance of (rolling) trend in means of features",
+            ),
+            # 6. generate dynamic traffic light boundaries, based on traffic lights for normalized residuals,
+            #        used for plotting in popmon_profiles report.
+            DynamicBounds(
+                read_key="profiles",
+                rules=pull_rules,
+                store_key="dynamic_bounds",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            DynamicBounds(
+                read_key="comparisons",
+                rules=pull_rules,
+                store_key="dynamic_bounds_comparisons",
+                suffix_mean="_mean",
+                suffix_std="_std",
+            ),
+            # 7. expand all (wildcard) static traffic light bounds and apply them.
+            #        Applied to both profiles and comparisons datasets
+            TrafficLightAlerts(
+                read_key="profiles",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds",
+            ),
+            TrafficLightAlerts(
+                read_key="comparisons",
+                rules=monitoring_rules,
+                store_key="traffic_lights",
+                expanded_rules_key="static_bounds_comparisons",
+            ),
+            ApplyFunc(
+                apply_to_key="traffic_lights",
+                apply_funcs=[{"func": traffic_light_summary, "axis": 1, "suffix": ""}],
+                assign_to_key="alerts",
+                msg="Generating traffic light alerts summary.",
+            ),
+            AlertsSummary(read_key="alerts"),
+        ]
+        super().__init__(modules)
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index a25789e1..0fbe9377 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -29,13 +29,7 @@
 
 from ..base import Module, Pipeline
 from ..config import config
-from ..pipeline.report_pipelines import (
-    ReportPipe,
-    expanding_reference,
-    external_reference,
-    rolling_reference,
-    self_reference,
-)
+from ..pipeline.report_pipelines import ReportPipe, get_report_pipeline_class
 from ..resources import templates_env
 
 logging.basicConfig(
@@ -43,13 +37,6 @@
 )
 logger = logging.getLogger()
 
-_report_pipeline = {
-    "self": self_reference,
-    "external": external_reference,
-    "rolling": rolling_reference,
-    "expanding": expanding_reference,
-}
-
 
 def stability_report(
     hists,
@@ -128,13 +115,8 @@ def stability_report(
     :return: dict with results of reporting pipeline
     """
     # perform basic input checks
-    reference_types = list(_report_pipeline.keys())
-    if reference_type not in reference_types:
-        raise ValueError(f"reference_type should be one of {str(reference_types)}.")
     if not isinstance(hists, dict):
         raise TypeError("hists should be a dict of histogrammar histograms.")
-    if reference_type == "external" and not isinstance(reference, dict):
-        raise TypeError("reference should be a dict of histogrammar histograms.")
     if not isinstance(monitoring_rules, dict):
         monitoring_rules = {
             "*_pull": [7, 4, -4, -7],
@@ -177,7 +159,7 @@ def stability_report(
         datastore["ref_hists"] = reference
 
     # execute reporting pipeline
-    pipeline = _report_pipeline[reference_type](**cfg)
+    pipeline = get_report_pipeline_class(reference_type, reference)(**cfg)
     stability_report = StabilityReport()
     stability_report.transform(pipeline.transform(datastore))
     return stability_report
@@ -522,7 +504,7 @@ def regenerate(
         """
         # basic checks
         if not self.datastore:
-            self.logger.warning("Empty datastore, cannot regenerate report.")
+            self.logger.warning("Empty datastore, could not regenerate report.")
             return None
 
         # start from clean slate
diff --git a/popmon/pipeline/report_pipelines.py b/popmon/pipeline/report_pipelines.py
index ad71727f..22f7295b 100644
--- a/popmon/pipeline/report_pipelines.py
+++ b/popmon/pipeline/report_pipelines.py
@@ -24,10 +24,10 @@
 from ..config import config
 from ..io import FileWriter
 from ..pipeline.metrics_pipelines import (
-    metrics_expanding_reference,
-    metrics_external_reference,
-    metrics_rolling_reference,
-    metrics_self_reference,
+    ExpandingReferenceMetricsPipeline,
+    ExternalReferenceMetricsPipeline,
+    RollingReferenceMetricsPipeline,
+    SelfReferenceMetricsPipeline,
 )
 from ..visualization import (
     AlertSectionGenerator,
@@ -38,235 +38,253 @@
 )
 
 
-def self_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    skip_empty_plots=True,
-    last_n=0,
-    plot_hist_n=6,
-    report_filepath=None,
-    show_stats=None,
-    **kwargs,
-):
-    """Example pipeline for comparing test data with itself (full test set)
+def get_report_pipeline_class(reference_type, reference):
+    _report_pipeline = {
+        "self": SelfReference,
+        "external": ExternalReference,
+        "rolling": RollingReference,
+        "expanding": ExpandingReference,
+    }
+    reference_types = list(_report_pipeline.keys())
+    if reference_type not in reference_types:
+        raise ValueError(f"reference_type should be one of {str(reference_types)}.")
+    if reference_type == "external" and not isinstance(reference, dict):
+        raise TypeError("reference should be a dict of histogrammar histograms.")
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
-    :param int window: window size for trend detection. default is 10
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param bool skip_empty_plots: if false, also show empty plots in report with only nans or zeroes (optional)
-    :param int last_n: plot statistic data for last 'n' periods (optional)
-    :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
-    :param str report_filepath: the file path where to output the report (optional)
-    :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled self reference pipeline
-    """
-    modules = [
-        metrics_self_reference(
-            hists_key,
-            time_axis,
-            window,
-            monitoring_rules,
-            pull_rules,
-            features,
-            **kwargs,
-        ),
-        ReportPipe(
-            sections_key="report_sections",
-            store_key="html_report",
-            skip_empty_plots=skip_empty_plots,
-            last_n=last_n,
-            plot_hist_n=plot_hist_n,
-            report_filepath=report_filepath,
-            show_stats=show_stats,
-        ),
-    ]
+    return _report_pipeline[reference_type]
 
-    pipeline = Pipeline(modules)
-    # pipeline.to_json("pipeline_self_reference_versioned.json", versioned=True)
-    # pipeline.to_json("pipeline_self_reference_unversioned.json", versioned=False)
-    return pipeline
 
+class SelfReference(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        skip_empty_plots=True,
+        last_n=0,
+        plot_hist_n=6,
+        report_filepath=None,
+        show_stats=None,
+        **kwargs,
+    ):
+        """Example pipeline for comparing test data with itself (full test set)
 
-def external_reference(
-    hists_key="test_hists",
-    ref_hists_key="ref_hists",
-    time_axis="date",
-    window=10,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    skip_empty_plots=True,
-    last_n=0,
-    plot_hist_n=2,
-    report_filepath=None,
-    show_stats=None,
-    **kwargs,
-):
-    """Example pipeline for comparing test data with other (full) external reference set
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
+        :param int window: window size for trend detection. default is 10
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param bool skip_empty_plots: if false, also show empty plots in report with only nans or zeroes (optional)
+        :param int last_n: plot statistic data for last 'n' periods (optional)
+        :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
+        :param str report_filepath: the file path where to output the report (optional)
+        :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled self reference pipeline
+        """
+        modules = [
+            SelfReferenceMetricsPipeline(
+                hists_key,
+                time_axis,
+                window,
+                monitoring_rules,
+                pull_rules,
+                features,
+                **kwargs,
+            ),
+            ReportPipe(
+                sections_key="report_sections",
+                store_key="html_report",
+                skip_empty_plots=skip_empty_plots,
+                last_n=last_n,
+                plot_hist_n=plot_hist_n,
+                report_filepath=report_filepath,
+                show_stats=show_stats,
+            ),
+        ]
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str ref_hists_key: key to reference histograms in datastore. default is 'ref_hists'
-    :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
-    :param int window: window size for trend detection. default is 10
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
-    :param int last_n: plot statistic data for last 'n' periods (optional)
-    :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
-    :param str report_filepath: the file path where to output the report (optional)
-    :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled external reference pipeline
-    """
-    modules = [
-        metrics_external_reference(
-            hists_key,
-            ref_hists_key,
-            time_axis,
-            window,
-            monitoring_rules,
-            pull_rules,
-            features,
-            **kwargs,
-        ),
-        ReportPipe(
-            sections_key="report_sections",
-            store_key="html_report",
-            skip_empty_plots=skip_empty_plots,
-            last_n=last_n,
-            plot_hist_n=plot_hist_n,
-            report_filepath=report_filepath,
-            show_stats=show_stats,
-        ),
-    ]
+        super().__init__(modules)
 
-    pipeline = Pipeline(modules)
-    return pipeline
 
+class ExternalReference(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        ref_hists_key="ref_hists",
+        time_axis="date",
+        window=10,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        skip_empty_plots=True,
+        last_n=0,
+        plot_hist_n=2,
+        report_filepath=None,
+        show_stats=None,
+        **kwargs,
+    ):
+        """Example pipeline for comparing test data with other (full) external reference set
 
-def rolling_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    shift=1,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    skip_empty_plots=True,
-    last_n=0,
-    plot_hist_n=6,
-    report_filepath=None,
-    show_stats=None,
-    **kwargs,
-):
-    """Example pipeline for comparing test data with itself (rolling test set)
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str ref_hists_key: key to reference histograms in datastore. default is 'ref_hists'
+        :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
+        :param int window: window size for trend detection. default is 10
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
+        :param int last_n: plot statistic data for last 'n' periods (optional)
+        :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
+        :param str report_filepath: the file path where to output the report (optional)
+        :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled external reference pipeline
+        """
+        modules = [
+            ExternalReferenceMetricsPipeline(
+                hists_key,
+                ref_hists_key,
+                time_axis,
+                window,
+                monitoring_rules,
+                pull_rules,
+                features,
+                **kwargs,
+            ),
+            ReportPipe(
+                sections_key="report_sections",
+                store_key="html_report",
+                skip_empty_plots=skip_empty_plots,
+                last_n=last_n,
+                plot_hist_n=plot_hist_n,
+                report_filepath=report_filepath,
+                show_stats=show_stats,
+            ),
+        ]
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
-    :param int window: size of rolling window and for trend detection. default is 10
-    :param int shift: shift in rolling window. default is 1
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
-    :param int last_n: plot statistic data for last 'n' periods (optional)
-    :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
-    :param str report_filepath: the file path where to output the report (optional)
-    :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled rolling reference pipeline
-    """
-    modules = [
-        metrics_rolling_reference(
-            hists_key,
-            time_axis,
-            window,
-            shift,
-            monitoring_rules,
-            pull_rules,
-            features,
-            **kwargs,
-        ),
-        ReportPipe(
-            sections_key="report_sections",
-            store_key="html_report",
-            skip_empty_plots=skip_empty_plots,
-            last_n=last_n,
-            plot_hist_n=plot_hist_n,
-            report_filepath=report_filepath,
-            show_stats=show_stats,
-        ),
-    ]
+        super().__init__(modules)
 
-    pipeline = Pipeline(modules)
-    return pipeline
 
+class RollingReference(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        shift=1,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        skip_empty_plots=True,
+        last_n=0,
+        plot_hist_n=6,
+        report_filepath=None,
+        show_stats=None,
+        **kwargs,
+    ):
+        """Example pipeline for comparing test data with itself (rolling test set)
 
-def expanding_reference(
-    hists_key="test_hists",
-    time_axis="date",
-    window=10,
-    shift=1,
-    monitoring_rules={},
-    pull_rules={},
-    features=None,
-    skip_empty_plots=True,
-    last_n=0,
-    plot_hist_n=6,
-    report_filepath=None,
-    show_stats=None,
-    **kwargs,
-):
-    """Example pipeline for comparing test data with itself (expanding test set)
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
+        :param int window: size of rolling window and for trend detection. default is 10
+        :param int shift: shift in rolling window. default is 1
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
+        :param int last_n: plot statistic data for last 'n' periods (optional)
+        :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
+        :param str report_filepath: the file path where to output the report (optional)
+        :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled rolling reference pipeline
+        """
+        modules = [
+            RollingReferenceMetricsPipeline(
+                hists_key,
+                time_axis,
+                window,
+                shift,
+                monitoring_rules,
+                pull_rules,
+                features,
+                **kwargs,
+            ),
+            ReportPipe(
+                sections_key="report_sections",
+                store_key="html_report",
+                skip_empty_plots=skip_empty_plots,
+                last_n=last_n,
+                plot_hist_n=plot_hist_n,
+                report_filepath=report_filepath,
+                show_stats=show_stats,
+            ),
+        ]
+
+        super().__init__(modules)
+
+
+class ExpandingReference(Pipeline):
+    def __init__(
+        self,
+        hists_key="test_hists",
+        time_axis="date",
+        window=10,
+        shift=1,
+        monitoring_rules={},
+        pull_rules={},
+        features=None,
+        skip_empty_plots=True,
+        last_n=0,
+        plot_hist_n=6,
+        report_filepath=None,
+        show_stats=None,
+        **kwargs,
+    ):
+        """Example pipeline for comparing test data with itself (expanding test set)
 
-    :param str hists_key: key to test histograms in datastore. default is 'test_hists'
-    :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
-    :param int window: window size for trend detection. default is 10
-    :param int shift: shift in expanding window. default is 1
-    :param dict monitoring_rules: traffic light rules
-    :param dict pull_rules: pull rules to determine dynamic boundaries
-    :param list features: features of histograms to pick up from input data (optional)
-    :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
-    :param int last_n: plot statistic data for last 'n' periods (optional)
-    :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
-    :param str report_filepath: the file path where to output the report (optional)
-    :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
-    :param kwargs: residual keyword arguments
-    :return: assembled expanding reference pipeline
-    """
-    modules = [
-        metrics_expanding_reference(
-            hists_key,
-            time_axis,
-            window,
-            shift,
-            monitoring_rules,
-            pull_rules,
-            features,
-            **kwargs,
-        ),
-        ReportPipe(
-            sections_key="report_sections",
-            store_key="html_report",
-            skip_empty_plots=skip_empty_plots,
-            last_n=last_n,
-            plot_hist_n=plot_hist_n,
-            report_filepath=report_filepath,
-            show_stats=show_stats,
-        ),
-    ]
+        :param str hists_key: key to test histograms in datastore. default is 'test_hists'
+        :param str time_axis: name of datetime feature. default is 'date' (column should be timestamp, date(time) or numeric batch id)
+        :param int window: window size for trend detection. default is 10
+        :param int shift: shift in expanding window. default is 1
+        :param dict monitoring_rules: traffic light rules
+        :param dict pull_rules: pull rules to determine dynamic boundaries
+        :param list features: features of histograms to pick up from input data (optional)
+        :param bool skip_empty_plots: if false, show empty plots in report with only nans or zeroes (optional)
+        :param int last_n: plot statistic data for last 'n' periods (optional)
+        :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
+        :param str report_filepath: the file path where to output the report (optional)
+        :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
+        :param kwargs: residual keyword arguments
+        :return: assembled expanding reference pipeline
+        """
+        modules = [
+            ExpandingReferenceMetricsPipeline(
+                hists_key,
+                time_axis,
+                window,
+                shift,
+                monitoring_rules,
+                pull_rules,
+                features,
+                **kwargs,
+            ),
+            ReportPipe(
+                sections_key="report_sections",
+                store_key="html_report",
+                skip_empty_plots=skip_empty_plots,
+                last_n=last_n,
+                plot_hist_n=plot_hist_n,
+                report_filepath=report_filepath,
+                show_stats=show_stats,
+            ),
+        ]
 
-    pipeline = Pipeline(modules)
-    return pipeline
+        super().__init__(modules)
 
 
 class ReportPipe(Pipeline):
@@ -306,32 +324,23 @@ def __init__(
         :param int plot_hist_n: plot histograms for last 'n' periods. default is 1 (optional)
         :param list show_stats: list of statistic name patterns to show in the report. If None, show all (optional)
         """
-        super().__init__(modules=[])
         self.store_key = store_key
 
         # dictionary of section descriptions
         descs = config["section_descriptions"]
 
         # default keyword arguments for each section
-        def sg_kws(read_key):
-            return {
-                "read_key": read_key,
-                "store_key": sections_key,
-                "skip_empty_plots": skip_empty_plots,
-                "last_n": last_n,
-                "skip_first_n": skip_first_n,
-                "skip_last_n": skip_last_n,
-                "show_stats": show_stats,
-                "description": descs.get(read_key, ""),
-            }
+        sg_kws = {
+            "store_key": sections_key,
+            "skip_empty_plots": skip_empty_plots,
+            "last_n": last_n,
+            "skip_first_n": skip_first_n,
+            "skip_last_n": skip_last_n,
+            "show_stats": show_stats,
+        }
 
-        self.modules = [
-            # --- o generate sections
-            #       - a section of profiled statistics with dynamic or static traffic light bounds
-            #       - a section of histogram and pull comparison statistics
-            #       - a section showing all traffic light alerts of monitored statistics
-            #       - a section with a summary of traffic light alerts
-            # --- o generate report
+        modules = [
+            # generate section with histogram
             HistogramSection(
                 read_key="split_hists",
                 store_key=sections_key,
@@ -340,28 +349,47 @@ def sg_kws(read_key):
                 last_n=plot_hist_n,
                 description=descs.get("histograms", ""),
             ),
+            # section showing all traffic light alerts of monitored statistics
             TrafficLightSectionGenerator(
-                section_name=traffic_lights_section, **sg_kws("traffic_lights")
+                read_key="traffic_lights",
+                description=descs.get("traffic_lights", ""),
+                section_name=traffic_lights_section,
+                **sg_kws,
             ),
-            AlertSectionGenerator(section_name=alerts_section, **sg_kws("alerts")),
+            # section with a summary of traffic light alerts
+            AlertSectionGenerator(
+                read_key="alerts",
+                description=descs.get("alerts", ""),
+                section_name=alerts_section,
+                **sg_kws,
+            ),
+            # section of histogram and pull comparison statistics
             SectionGenerator(
                 dynamic_bounds="dynamic_bounds_comparisons",
                 static_bounds="static_bounds_comparisons",
                 section_name=comparisons_section,
                 ignore_stat_endswith=["_mean", "_std", "_pull"],
-                **sg_kws("comparisons"),
+                read_key="comparisons",
+                description=descs.get("comparisons", ""),
+                **sg_kws,
             ),
+            # section of profiled statistics with dynamic or static traffic light bounds
             SectionGenerator(
                 dynamic_bounds="dynamic_bounds",
                 section_name=profiles_section,
                 static_bounds="static_bounds",
                 ignore_stat_endswith=["_mean", "_std", "_pull"],
-                **sg_kws("profiles"),
+                read_key="profiles",
+                description=descs.get("profiles", ""),
+                **sg_kws,
             ),
+            # generate report
             ReportGenerator(read_key=sections_key, store_key=store_key),
         ]
         if isinstance(report_filepath, (str, Path)) and len(report_filepath) > 0:
-            self.modules.append(FileWriter(store_key, file_path=report_filepath))
+            modules.append(FileWriter(store_key, file_path=report_filepath))
+
+        super().__init__(modules=modules)
 
     def transform(self, datastore):
         self.logger.info(f'Generating report "{self.store_key}".')
diff --git a/tests/popmon/pipeline/test_report_pipelines.py b/tests/popmon/pipeline/test_report_pipelines.py
index a22f7c9c..983a6212 100644
--- a/tests/popmon/pipeline/test_report_pipelines.py
+++ b/tests/popmon/pipeline/test_report_pipelines.py
@@ -2,10 +2,10 @@
 from popmon.base import Pipeline
 from popmon.io import JsonReader
 from popmon.pipeline.report_pipelines import (
-    expanding_reference,
-    external_reference,
-    rolling_reference,
-    self_reference,
+    ExpandingReference,
+    ExternalReference,
+    RollingReference,
+    SelfReference,
 )
 
 
@@ -17,7 +17,7 @@ def test_self_reference():
             JsonReader(
                 file_path=resources.data("example_histogram.json"), store_key="hists"
             ),
-            self_reference(hists_key="hists", features=hist_list),
+            SelfReference(hists_key="hists", features=hist_list),
         ]
     )
     pipeline.transform(datastore={})
@@ -31,8 +31,10 @@ def test_external_reference():
             JsonReader(
                 file_path=resources.data("example_histogram.json"), store_key="hists"
             ),
-            external_reference(
-                hists_key="hists", ref_hists_key="hists", features=hist_list
+            ExternalReference(
+                hists_key="hists",
+                ref_hists_key="hists",
+                features=hist_list,
             ),
         ]
     )
@@ -47,7 +49,11 @@ def test_rolling_reference():
             JsonReader(
                 file_path=resources.data("example_histogram.json"), store_key="hists"
             ),
-            rolling_reference(hists_key="hists", window=5, features=hist_list),
+            RollingReference(
+                hists_key="hists",
+                window=5,
+                features=hist_list,
+            ),
         ]
     )
     pipeline.transform(datastore={})
@@ -61,7 +67,7 @@ def test_expanding_reference():
             JsonReader(
                 file_path=resources.data("example_histogram.json"), store_key="hists"
             ),
-            expanding_reference(hists_key="hists", features=hist_list),
+            ExpandingReference(hists_key="hists", features=hist_list),
         ]
     )
     pipeline.transform(datastore={})

From 21b1e96f65a7f366e1c465b711b63e74b8bd33eb Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:21:21 +0200
Subject: [PATCH 22/34] refactor: remove unused imports

---
 examples/flight_delays.py  |  2 +-
 examples/synthetic_data.py |  2 +-
 popmon/__init__.py         | 13 +++++++++++++
 popmon/base/module.py      |  4 ++--
 popmon/pipeline/report.py  |  2 +-
 5 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/examples/flight_delays.py b/examples/flight_delays.py
index 657cff06..103871bb 100644
--- a/examples/flight_delays.py
+++ b/examples/flight_delays.py
@@ -1,6 +1,6 @@
 import pandas as pd
 
-import popmon
+import popmon  # noqa
 from popmon import resources
 
 # open synthetic data
diff --git a/examples/synthetic_data.py b/examples/synthetic_data.py
index b219a40b..62fe981a 100644
--- a/examples/synthetic_data.py
+++ b/examples/synthetic_data.py
@@ -1,6 +1,6 @@
 import pandas as pd
 
-import popmon
+import popmon  # noqa
 from popmon import resources
 
 # open synthetic data
diff --git a/popmon/__init__.py b/popmon/__init__.py
index f8ee496d..ee6dbcb8 100644
--- a/popmon/__init__.py
+++ b/popmon/__init__.py
@@ -32,3 +32,16 @@
 from .pipeline.report import df_stability_report, stability_report
 from .stitching import stitch_histograms
 from .version import version as __version__
+
+__all__ = [
+    "get_bin_specs",
+    "get_time_axes",
+    "make_histograms",
+    "decorators",
+    "df_stability_metrics",
+    "stability_metrics",
+    "df_stability_report",
+    "stability_report",
+    "stitch_histograms",
+    "__version__",
+]
diff --git a/popmon/base/module.py b/popmon/base/module.py
index 13208c8d..de24b50d 100644
--- a/popmon/base/module.py
+++ b/popmon/base/module.py
@@ -19,11 +19,11 @@
 
 
 import logging
-from abc import ABC, abstractmethod
+from abc import ABC
 
 
 class Module(ABC):
-    """Base class used for modules in a pipeline."""
+    """Abstract base class used for modules in a pipeline."""
 
     _input_keys = None
     _output_keys = None
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index 0fbe9377..86e5e5e1 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -27,7 +27,7 @@
     make_histograms,
 )
 
-from ..base import Module, Pipeline
+from ..base import Module
 from ..config import config
 from ..pipeline.report_pipelines import ReportPipe, get_report_pipeline_class
 from ..resources import templates_env

From fd84944468c9c783c32a7ee115328fc0408d98c1 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:21:28 +0200
Subject: [PATCH 23/34] ci: check for unused imports

---
 .pre-commit-config.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2918d023..60d700ff 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,7 @@ repos:
     -   id: flake8
         additional_dependencies:
             - flake8-comprehensions
-        args: [ "--select=E9,F63,F7,F82,C4"]
+        args: [ "--select=E9,F63,F7,F82,C4,F401"]
 -   repo: https://github.com/asottile/pyupgrade
     rev: v2.29.1
     hooks:

From bb09d730d275e4a97d0d7174d8a325e8c98bea44 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:22:05 +0200
Subject: [PATCH 24/34] feat: improve pipeline visualization

better handling of subgraphs
standalone
---
 tools/pipeline_viz.py | 189 ++++++++++++++++++++++++------------------
 1 file changed, 110 insertions(+), 79 deletions(-)

diff --git a/tools/pipeline_viz.py b/tools/pipeline_viz.py
index 8e50ad43..d6f53765 100644
--- a/tools/pipeline_viz.py
+++ b/tools/pipeline_viz.py
@@ -1,12 +1,11 @@
 import json
+from itertools import cycle
 from pathlib import Path
 
-import networkx as nx
-import pygraphviz
-from networkx.drawing.nx_agraph import to_agraph
+import pygraphviz as pgv
 
 
-def generate_pipeline_vizualisation(
+def generate_pipeline_visualisation(
     input_file,
     output_file,
     include_subgraphs: bool = False,
@@ -15,87 +14,119 @@ def generate_pipeline_vizualisation(
     data = Path(input_file).read_text()
     data = json.loads(data)
 
-    subgraphs = []
-    modules = []
-
-    def populate(item):
-        if item["type"] == "subgraph":
-            mods = []
-            for m in item["modules"]:
-                mods += populate(m)
-
-            subgraphs.append({"modules": mods, "name": item["name"]})
-            return mods
-        elif item["type"] == "module":
-            modules.append(item)
-            name = f"{item['name']}_{item['i']}"
-            return [name] + list(item["out"].values())
-        else:
-            raise ValueError()
-
-    populate(data)
-
-    G = nx.DiGraph()
-    for module in modules:
+    tableau20 = [
+        (31, 119, 180),
+        (174, 199, 232),
+        (255, 127, 14),
+        (255, 187, 120),
+        (44, 160, 44),
+        (152, 223, 138),
+        (214, 39, 40),
+        (255, 152, 150),
+        (148, 103, 189),
+        (197, 176, 213),
+        (140, 86, 75),
+        (196, 156, 148),
+        (227, 119, 194),
+        (247, 182, 210),
+        (127, 127, 127),
+        (199, 199, 199),
+        (188, 189, 34),
+        (219, 219, 141),
+        (23, 190, 207),
+        (158, 218, 229),
+    ]
+
+    colors = [f"#{r:02x}{g:02x}{b:02x}" for r, g, b in tableau20]
+    subgraph_colors = cycle(colors)
+    module_style = {"shape": "rectangle", "fillcolor": "chartreuse", "style": "filled"}
+    dataset_style = {"shape": "oval", "fillcolor": "orange", "style": "filled"}
+    subgraph_style = {}
+    edge_style = {"fontcolor": "gray50"}
+
+    def get_module_label(module):
         label = f"<{module['name']}"
         d = module.get("desc", "")
         if len(d) > 0:
             label += f" <BR/><I>{d}</I>"
         label += ">"
+        return label
+
+    def process(data, G):
+        if data["type"] == "subgraph":
+            if include_subgraphs:
+                c = G.add_subgraph(
+                    name=f'cluster_{data["name"]}',
+                    label=data["name"],
+                    color=next(subgraph_colors),
+                    **subgraph_style,
+                )
+            else:
+                c = G
+            for m in data["modules"]:
+                process(m, c)
+        elif data["type"] == "module":
+            name = f"{data['name']}_{data['i']}"
+            G.add_node(name, label=get_module_label(data), **module_style)
+
+            for k, v in data["in"].items():
+                kwargs = {}
+                if include_labels:
+                    kwargs["headlabel"] = k
+                G.add_edge(v, name, **edge_style, **kwargs)
+            for k, v in data["out"].items():
+                kwargs = {}
+                if include_labels:
+                    kwargs["taillabel"] = k
+                G.add_edge(name, v, **edge_style, **kwargs)
+        else:
+            raise ValueError("type should be 'subgraph' or 'module'")
 
-        # unique name
-        name = f"{module['name']}_{module['i']}"
-
-        G.add_node(
-            name, shape="rectangle", fillcolor="chartreuse", style="filled", label=label
-        )
-
-        for k, v in module["in"].items():
-            kwargs = {}
-            if include_labels:
-                kwargs["headlabel"] = k
-            G.add_edge(v, name, **kwargs)
-        for k, v in module["out"].items():
-            kwargs = {}
-            if include_labels:
-                kwargs["taillabel"] = k
-            G.add_edge(name, v, **kwargs)
-
-    # set defaults
-    G.graph["graph"] = {"rankdir": "TD"}
-    G.graph["node"] = {"shape": "oval", "fillcolor": "orange", "style": "filled"}
-    G.graph["edge"] = {"fontcolor": "gray50"}
-
-    A = to_agraph(G)
-    if include_subgraphs:
-        for idx, subgraph in enumerate(subgraphs):
-            H = A.subgraph(
-                subgraph["modules"],
-                name=f'cluster_{idx}_{subgraph["name"].lower().replace(" ", "_")}',
-            )
-            H.graph_attr["color"] = "blue"
-            H.graph_attr["label"] = subgraph["name"]
-            H.graph_attr["style"] = "dotted"
-
-    A.layout("dot")
-    A.draw(output_file)
-
-
-if __name__ == "__main__":
-    data_path = Path("<...>")
-
-    input_file = data_path / "pipeline_self_reference_unversioned.json"
-    output_file = "popmon-report-pipeline-subgraphs-unversioned.pdf"
-    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
+    g = pgv.AGraph(name="popmon-pipeline", directed=True)
+    g.node_attr.update(**dataset_style)
+    process(data, g)
 
-    input_file = data_path / "pipeline_self_reference_unversioned.json"
-    output_file = "popmon-report-pipeline-unversioned.pdf"
-    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)
+    g.layout("dot")
+    g.draw(output_file)
 
-    input_file = data_path / "pipeline_self_reference_versioned.json"
-    output_file = "popmon-report-pipeline-subgraphs-versioned.pdf"
-    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=True)
 
-    input_file = data_path / "pipeline_self_reference_versioned.json"
-    output_file = "popmon-report-pipeline-versioned.pdf"
-    generate_pipeline_vizualisation(input_file, output_file, include_subgraphs=False)
+if __name__ == "__main__":
+    data_path = Path(".")
+
+    # Example pipeline
+    from popmon import resources
+    from popmon.config import config
+    from popmon.pipeline.amazing_pipeline import AmazingPipeline
+
+    cfg = {
+        **config,
+        "histograms_path": resources.data("synthetic_histograms.json"),
+        "hists_key": "hists",
+        "ref_hists_key": "hists",
+        "datetime_name": "date",
+        "window": 20,
+        "shift": 1,
+        "monitoring_rules": {
+            "*_pull": [7, 4, -4, -7],
+            "*_zscore": [7, 4, -4, -7],
+        },
+        "pull_rules": {"*_pull": [7, 4, -4, -7]},
+        "show_stats": config["limited_stats"],
+    }
+
+    pipeline = AmazingPipeline(**cfg)
+    name = pipeline.__class__.__name__.lower()
+
+    input_file = data_path / f"pipeline_{name}_unversioned.json"
+    pipeline.to_json(input_file, versioned=False)
+    output_file = f"pipeline_{name}_subgraphs_unversioned.pdf"
+    generate_pipeline_visualisation(input_file, output_file, include_subgraphs=True)
+    output_file = f"pipeline_{name}_unversioned.pdf"
+    generate_pipeline_visualisation(input_file, output_file, include_subgraphs=False)
+
+    input_file = data_path / f"pipeline_{name}_versioned.json"
+    pipeline.to_json(input_file, versioned=True)
+    output_file = f"pipeline_{name}_subgraphs_versioned.pdf"
+    generate_pipeline_visualisation(input_file, output_file, include_subgraphs=True)
+    output_file = f"pipeline_{name}_versioned.pdf"
+    generate_pipeline_visualisation(input_file, output_file, include_subgraphs=False)

From 1f6c6488c64c8efe0c7c7561214340ed613025d6 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:22:27 +0200
Subject: [PATCH 25/34] style: various code style improvements

---
 popmon/analysis/merge_statistics.py | 12 ++++++------
 popmon/io/file_writer.py            | 14 +++++++-------
 popmon/pipeline/report.py           | 13 +++++++++----
 3 files changed, 22 insertions(+), 17 deletions(-)

diff --git a/popmon/analysis/merge_statistics.py b/popmon/analysis/merge_statistics.py
index 232f8b98..bcffa833 100644
--- a/popmon/analysis/merge_statistics.py
+++ b/popmon/analysis/merge_statistics.py
@@ -44,14 +44,14 @@ def __init__(self, read_keys: List[str], store_key: str):
     def transform(self, dicts: list):
         merged_stats = {}
         for dict_ in dicts:
-            for feature in dict_.keys():
-                # we add statistics dataframe to the final output for specific feature however
-                # if the feature already exists - we concatenate its dataframe with the existing one
-                if isinstance(dict_[feature], pd.DataFrame):
+            for feature, values in dict_.items():
+                if isinstance(values, pd.DataFrame):
+                    # we add statistics dataframe to the final output for specific feature however
+                    # if the feature already exists - we concatenate its dataframe with the existing one
                     if feature in merged_stats:
                         merged_stats[feature] = merged_stats[feature].combine_first(
-                            dict_[feature]
+                            values
                         )
                     else:
-                        merged_stats[feature] = dict_[feature]
+                        merged_stats[feature] = values
         return merged_stats
diff --git a/popmon/io/file_writer.py b/popmon/io/file_writer.py
index 2bbe37c0..c7455bae 100644
--- a/popmon/io/file_writer.py
+++ b/popmon/io/file_writer.py
@@ -72,10 +72,10 @@ def transform(self, data):
         # if file path is provided, write data to a file. Otherwise, write data into the datastore
         if self.file_path is None:
             return data
-        else:
-            with open(self.file_path, "w+") as file:
-                file.write(data)
-            self.logger.info(
-                f'Object "{self.read_key}" written to file "{self.file_path}".'
-            )
-            return None
+
+        with open(self.file_path, "w+") as file:
+            file.write(data)
+        self.logger.info(
+            f'Object "{self.read_key}" written to file "{self.file_path}".'
+        )
+        return None
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index 86e5e5e1..dc5259cf 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -418,12 +418,14 @@ def __init__(self, read_key="html_report"):
         """
         super().__init__()
         self.read_key = read_key
-        self.html_report = ""
         self.datastore = {}
 
+    @property
+    def html_report(self):
+        return self.get_datastore_object(self.datastore, self.read_key, str)
+
     def transform(self, datastore):
         self.datastore = datastore
-        self.html_report = self.get_datastore_object(datastore, self.read_key, str)
 
     def _repr_html_(self):
         """HTML representation of the class (report) embedded in an iframe.
@@ -444,9 +446,12 @@ def to_html(self, escape=False):
         :param bool escape: escape characters which could conflict with other HTML code. default: False
         :return str: HTML code of the report
         """
-        import html
 
-        return html.escape(self.html_report) if escape else self.html_report
+        if escape:
+            import html
+
+            return html.escape(self.html_report)
+        return self.html_report
 
     def to_file(self, filename):
         """Store HTML report in the local file system.

From 913bfb0aec607ea68567ecc71e65cfae7c86ff75 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Tue, 26 Oct 2021 12:22:54 +0200
Subject: [PATCH 26/34] docs: pipeline visualizations in docs and notebooks

---
 README.rst                                    |  14 +++
 docs/source/assets/pipeline.png               | Bin 0 -> 27222 bytes
 .../notebooks/popmon_tutorial_advanced.ipynb  |  82 ++++++++++++------
 3 files changed, 68 insertions(+), 28 deletions(-)
 create mode 100644 docs/source/assets/pipeline.png

diff --git a/README.rst b/README.rst
index 994a531b..620c4158 100644
--- a/README.rst
+++ b/README.rst
@@ -157,6 +157,17 @@ These examples also work with spark dataframes.
 You can see the output of such example notebook code `here <https://crclz.com/popmon/reports/test_data_report.html>`_.
 For all available examples, please see the `tutorials <https://popmon.readthedocs.io/en/latest/tutorials.html>`_ at read-the-docs.
 
+Pipelines for monitoring dataset shift
+======================================
+Advanced users can leverage popmon's modular data pipeline to customize their workflow.
+Visualization of the pipeline can be useful when debugging, or for didactic purposes.
+There is a `script <https://github.com/ing-bank/popmon/tree/master/tools/>`_ included with the package that you can use.
+The plotting is configurable, and depending on the options you will obtain a result that can be used for understanding the data flow, the high-level components and the (re)use of datasets.
+
+|pipeline|
+
+*Example pipeline visualization (click to enlarge)*
+
 Resources
 =========
 
@@ -214,6 +225,9 @@ Copyright ING WBAA. `popmon` is completely free, open-source and licensed under
     :target: https://github.com/ing-bank/popmon
 .. |example| image:: https://raw.githubusercontent.com/ing-bank/popmon/master/docs/source/assets/traffic_light_overview.png
     :alt: Traffic Light Overview
+.. |pipeline| image:: https://raw.githubusercontent.com/ing-bank/popmon/master/docs/source/assets/pipeline.png
+    :alt: Pipeline Visualization
+    :target: https://github.com/ing-bank/popmon/files/7417124/pipeline_amazingpipeline_subgraphs_unversioned.pdf
 .. |build| image:: https://github.com/ing-bank/popmon/workflows/build/badge.svg
     :alt: Build status
 .. |docs| image:: https://readthedocs.org/projects/popmon/badge/?version=latest
diff --git a/docs/source/assets/pipeline.png b/docs/source/assets/pipeline.png
new file mode 100644
index 0000000000000000000000000000000000000000..784c2cdf7b0a13d002cc1e522258cb407c380fe9
GIT binary patch
literal 27222
zcmXtfWl$VU(=8U<-6cqHcNTYdch}$nmf$YI-Gc{rU)<dx*y2tgEbd>P_tu?%)75=W
zPftx%&-8S(nu;tMGBGj~6cn1goRkI>6f6)53fc$}_Mb#zE>aN+3g%f&NlW_U<0Ie=
z?yVFmsAl2i_5JAT{^sFzeC=u$e7$>qdw+NPezy769`#o3@o{@OvU(}u_W2*e=H{l~
zBP;|0aovY;*@gpy!G`vl%gf8AUv*_=WnXS@<mKg^)?t4Z6nyMV>eNHO4P;+lUYZZX
zJU%_X1QUp<^7-C>x;Z=9nHpQ`?;08!YD$V#Qc_x2STF*?oSmPIkB@U}Q)yO1zdS!n
z13!0kbQ~Wa&&|#T2L<Wt>ks}aE-5J%`}&zrk7~dl3IqaKPeD&jO^wuVN@@r>til|v
zty+7Sj*N`d*3{(W<lIA^6!W02jt&zN5=?qwR7;?XY@rM~U}RIFr4pbW930Hd%+}Y}
zB_$=N!k}J?p+v)>G&D3!2Viz~b`anZQ&LiFW?@%XS8;K1#ABd(dwV@SJ?A5#GBPq+
zTU#6I>wSE@6|!JIAP1+rkYD9x8o!{$#l<DLDfOD6Pqw$vP|=*7o!h;jMuVW@;^Mk}
zp_-bSY!{%z!or$7pxoTtEXQG#f5KQ<S;fS}(9+V9k&y}t2vk;9Mn*=mu&`WYL8+>$
z0&ifsxwy`9a`wHwe>p+l&Cde@pznV|+1lD_Yik`G91K;J%L*|$Z^6{MLVf@KeQ$5C
zzrVk`Fvorwb|nT%O^QdQ2wF!+XEzDz+d1q;Jd_{{sTzPnUqvj(3Mw`Ahrho+c;Wmg
z9ZEz*#QO?np>Kb0cFNGu@O{1gVh3y~N10#*V{M?EX4{=^2K~MTazBLGbN3Jv6MMVA
z`>IT7C{AHxW8-lGv%<ujpOxaRNgnF!>TIcd^?1!fi5sN@ZQlztHa3<GpiGGk<#B`Z
z^RNw6gR%g*{|E&J>5;@FMR+=zD#VaF`dGiumHaTlgXBRm$q|!qVrfX2<=9|_>SFaC
zjOH&_vKO*iX?nKjEfoG}mEu*8|NfKLQ7q7)V5GB6?nkd?v3VR3@xBL6nI=l2id5Tn
z-{nyAXjqtcRY<9oDN{PruqEbDnC@$?w#rv+xoNf>C@5P3c`0!%pY@ByLi7+d!ie3#
zeM0W+CHIhJEA=(hA2e{4cT;^4;ZrkmaI?ZbC}84M${cwdcwY4H1%Lf@|NeKt{x|vp
zmr(ouruyZyK?fDK+-yT&pxK*)ARyeDLrLRO&GWa`uCG9x$6(0oEn>n<-YkOVNe1gV
zT|@YycFE%UZzcAC)Z<cq^nGr}e7asM<&-tQhNh8-FZ)Jm<Z%14K%7Kymp)w)9QY^T
zC&mP9Sr8;hq|79Ly&CY><~Uw|1HK3<{`_aEQ(=^5D-(jt-xU&W&$GYq;xHjXG?MVe
zyT3TYd@y|&Hz`&B>@aN1hxGk4rVz{N5i4_kV@^Y%`(2WQU<jg++mm;Bqn`V@4NZl^
zbcN9iMFAsGrb8|iNB&zSlWJpEScr--K@IpZU#1h3p(oPjs|JGkqbzrr<iW895lkV@
zDLX$V=x)4`ZZg`90Rs=5lv)8iVny8s!!S&2-wqjrwa5iXj`d77TuB-O*WZ?#@>abP
z(V)1z%z8yUexP0S?Gm;+VPH5%IrH6irC4JL;j06!CJs;i%KQhUZPcm7^lqUx;{N=p
zznyaHCmAZGpHLnr$^R)96esx$?hJoJFjeg#(QT3ofhT^$l^aHHwbEe<l_{rMKul|+
z%x*y;@>@8mQ0urbH+?t(Pf@*^B@V}yR5}4#nrxYYiQcFnK}A|8vt~*i9}x5nMYr2E
zBfW*==A`uUNG<DIi~h`L9_eHY(dslE51ncY>(apZZ{`U%*40tVv|&kSLc?K2q=kcZ
z1z4cd(Wwpz^+sX^%Meh<F2&|XN+;Gm&)X?SSLKK!O_oj{Jh%L5l9)v>XhxuPe?aoH
zlSTNU5_chaZat>_<n2#`(h;&{p_-AqqPp(*76Jx3uhK{w_5D^6(lmNzgK}c6hM1a^
z3CW^lM3nMACf60qBT}oDNGb#Zc{%K?76t3*yQEUc7|o<mv@d`iK#mdKv?}EPa-ULv
z`kQ=#0H1RU(i6}DrA3})0PLS07v9>+T|W*=T+Da~FbDvEz8p+}^uc$_C9kgm(qq7m
zfG&{zoA%DeSFtgDpzNV>?da&s&i&%n;;5<kUssAmMTtY>`reC8QMpYs!qMwU*OU)2
ztoR(0ir);FsAJ!;IK!02(Gbvy&s<Yem%4DVHfJQm4cI*yn@Kg;)Wm%mhbw;lE=4c>
zgkVolDA@eHJl>eNGzxdtSMK_*d)xLq@Q6*`XB+e`X<2w6HDAnZsh&9-iP90C*)<Op
z{8tUkqQfz0VV8D1$i86f0dm4sPTAty;)U5Q1Q{}?6`|}L^%_DSLK`7XNCGLLQxRNY
zi8EHg3`qsOg5s`ZO9sU0tE=<02`91yh?lG_=_7|*RKVBBHf7mjs;*uu?dWwUK`z`x
z)8eqe#+_ExXixl&&5tmM8d$rT<$L}J#GyTR+2d62I3J7HevlO;m*CrQo~feq=lZ5e
zXXFnOw=j_}G4wy`)uk(0U|!aUUn!yLC%?+<>dY)whkiZhB(A!lsQ~gGhRjWpZ(4C2
z>8nU;<P%+G;`t5Sv5xR<mMxtpS}AGPPtJmWbEk^x);FWjrj;wqhf##7RU(LBQdbQQ
z?Gq+>I;n_WC1N24tsr-AK9NSLNC*wH%in$+nEpZl(&YIc^1*BW`9i?>1c~eF`QlxR
z?;DOvA`;r+xVA~fyT7J-K2r?mb-?%+O#F$IN<gc3b*_FMFQQa9#EMaMn?!d8D7d9c
zNuT;f6L|i`C7XJJNgC*%y|W1&{8KGo>5zpXnvJB3n(JjlK49Vq{Xe1BVceHl(4V)r
zthhH@4|yO?BwUCDkTcEuud)D7z8bb6e+x77e|WG=ToVGa1xWwnO7LAxh))yMX%m_F
z*7d{aKN$7O=EfEwKDVpG`*m~`<yh4Jd@i!_*Aum3Y=sO(KuVY}+TOR2>FYjCoWHDx
ziG;)!0#aA4fLH60wN4NgaaekcLmquF&6*oq-rv(y;}-7E3a9vQpALEc?W4)fVV0NG
zbM~PSkM<&Rzm|<o9dSpvNnM#B9@C%a`1C17k%=z2e;YWRL13#0+-lKdwb5aBIvXrY
zx11MaB*^F(6!3^0&)v=PCQ6NjWV5o>T7P9))Y<7t`SypOzCGBSL|A9U!(vP4X1P9j
z3W$98W^??mPYgf6AO8Y^m8^dc?K2l^v*DM*B*P8;+#g{ywEOF)l`B8{jzOXNrl2)W
zm{|YT!wzdLnIDS+1KQWw`*mrC-Yd_Jse)aBb`qQeaquHL1vy*fL7Yo@jKD1ASSaW^
zPk*Jd0+3sui2zLGds-G9dpQ;dgT}gQ)AhTS8#8J_ac3?plKJL|2*B*>gvPoz_wM$E
zw6=%4LYEgr$W#bmY|&ahOW9gc7?^vzmXm0U|Br?SSdu}2ce`k)kjh7r$oKW1G1r8m
zN14PLI<_k0hqQ+DY@#jsfq1@2ex9zB_(4g`ST}^sR?eg;D-wXM<oeO1t*Vz8>pjMW
z0ijf7OMql!7k;50*V?hsXykWBp8-yJ^RH1IfqF3@N3!{E`bi6#@|o=9QlGIt#~6k}
ze(klOoE^1mCB7j8N5shwlrwLfj?Rxj5}o!dzuIngSn2l0#f8=J#<<WSlrF6}_~405
zWaIpiBAL`r<}5mfiMFKw7U2Y$$n;hV{PLN^1C09*<+RaHIs9tX7qS;j@#nn~8W{1P
zu=DCi>wWXe>895qS$bEFXwHh;#|`fP0>gD_;qP)=JMc6)5dAH*3t1BZxP7sy>_{w5
z*mrIIdl9S?hG}(839hyfGinKriYy|B5qNBs!=%Ngmd3TH)cCYmNgp0wQbE(&jHtg5
zLtbKFK(GAMUR>d1AzAXpxO3w`1Lq{Zd;t)cDS+xh{K(-!oLDYsIB1=he!NhGoB{<L
z3I-&~CQ|2QC$BaZt_(6GK;&S7lnFF~Pe*qYDAj;Uo%WmmN$PERu9`tO=m$yg5HWJ|
zt4p}=GVM3-HDX}pL~nwpftwRf=X$Z?pV0qLp##uN5T{O7a%+~tQ8dauMvZn53?e$9
za>%p3m7D3&my8PU9gX^r$|zqWPTnq++6-N%qaN*l)?g(Fz3?P^!A4Q$m_K23(QgsK
zn*~Z$e@cvlX&N?9*7%ao3D>GX`n$>UJ@ptEAmcwA7kFUA%|2p}!N&^<#+W9-7%cDr
za^&&w_l_q)Gz6gi-#VFRGaW4i$ZwQ)pQuI-ZF<Ka*b4Hf*~T<+{%HqBb1uL$t!AN2
z3js_Pv)^=VUT~S@lg1||CXo`_MkLg+(`l-4pWq-%a1iGr07*?6i~Sz{Xoylpv%2Zh
zi{<3T!}n9G06zYUR=qq{KXu+JF4?ZZ9p|3pO97Jb-_LEjGo(7%xT7!o;~Qkmi2MHb
zRm^|P1a`3|8@C*W_5a!`s5*}d!IOm4!b9HKfT`+^TRABD05FgDct(;q9`TGPty-GE
zDHsF3WgBh4srjXp_*V%Au2R68g+KLSz}cPNUbgm3WhsNFvgcThH~d<wz}0i`;iIH@
z7PaDZZ26aj!_Qx98q*f}mAnexFB}Egxz9b3M^Z{W)=X8iNU7pwFvnho!CPU>XrpdW
z9|(CD$u?`8PdI~;+y<;pD*8NL+2j``OqeOd={dua;vr}#7sC?10wq$;;<(+C2pH1y
zOx+Umg}>18Zr#rewA$Vm!8Q)}&MjB{w)=eVvp0Pr?*WU^fjOXSRzPwZ;Y3Lb<J6a=
zs=ZPj;rg@gO6_1JR37?LeQ@H`U_-mgBx0%^5IN-P2g+hf#oGJSbMDB_iQw#VyVr@`
z6@gxD{=oO%fe`i>)IuGc(q9^D6!q*j3&TedfT~Fl;dMNM3MsIditUH2A0cg(TPB+{
zN`+@Puh5R{;`YUJjq$LArzC5?&bOHoW*r~Q*JK}HYu2?b@*KCwoEXHJ#du%j+v7<v
zs~5(q@$WBzgD*gFKrGTPj-!c*+TRGVqw|R|&qZGjZlz_xE=dF<Xg~I<7wZ!%OAH%)
zZ2~*>=rbTrxxB!shmEd~jIq+UEIYh&8GoCrj#3hmxTU@GE;bQ?ZjV37p_2rc0=|%e
zGiqHwQ6CIN_EcJ=^|RP>CZgEA3%$yE+EEA2s~^AOK?PrCt=WxuwtG+&%U5Mn*i|$$
zgFl}7CntH{T3C#J=_-T$(M+%GSLmC`cJ1%)TV8TpOy}WdA5C4ik9=Gt&j1t*X@U;1
zWHELUJVd7Sh`Icb9%U1?1rPN@4L1}mpyiSZr7i%Up2Pp~`#ph44QYYiRPgNH2ibOt
zb9v^ETGh5Li?W;4;kRSu`6Rv{0M6Fuft)6xcd^#ca=zYVAvW^f_Qkg6y=zBTn{-hZ
z@3n!z8c;uMWdg^y)-7C?XdW641fHzxS6$)xq=#=nlS7^Ri>cE<nvDLY3_f@|x6j~S
z`cLkB`Jr9Vu)lGvWsYxv<}~UnX6ScA*|csSwF41SX=NpWU9^Ib9>xTrQE1nDz@vhz
z0ZbpQrr2MYIlHJ80b1B`F`*)DZ2G<vb1`UKQ$$oEnyb@tVX!AOCRYFGWAn82I59&9
z;t7^72V>7y<D5C;p!Jin^LVDuNVj?cin|(H2{bDkm;xrRqk=`$S4@zB=~Mz#YL{31
zPZ>BeP<6YUCbT|lbB$eo4D#_>PbCsaiKW0n$%b}$L>gK}w|-qT|6+ihL=r=|l_q;r
zwk-R>UaJivbtN2Xa#-P_pAn^WTC#G_8-cVYB4d-1n75L*v`Ty}h#Ygd!B5}bduXl3
zf_wA~Zsl=<q=?$39suBi?{kGP(&_}6>q&`1unSbN1)0imtQwiG&m}P>S{b1kC*nJ>
znI&mD07z7iIzVL(`35`x5W2g4a<3eiVsX6)Exf+D%#lXSGo+|9s9_StpQ8=yhJQ-|
zrq^{zwzEEf$uT!Qhf4=j>JjG1N=oVXo`91v5_*gW6f-$sJ7Xg8?!L7060x?-GMsq2
zbnvQDGb~~ruUq_={mL&YBl0GeR*R$lRe+8{kCOE{HO)IDBcMm}t^h@U61KQ?bTy@i
zbyAtVWJ*n;XG8S+RvC5xSkk2ZK4aoMX4O6Ox7shO>VQn0R+>G|l)I2#%6JeoBvUR3
zffv}z=LNhE1391}7$ap<@ChH&1a%o;2AMzr8<74$@qCtj$<L*Y9d?X19H17kg&c9Q
zV45HVer%(Y4ns!U#8KaWE{wK@6KdZ?DHIGzXZiMlBkVG#cUEY}>kY70E^Se08mdg?
zhiL=wLn0C**$g|G0YwmroXs5gTp99n6vft#4BwYA>XqEb(Ze<Dc(btd-$k3qk8gH#
zTTtJ`k^Q?QWWj97$AcFY?Rbq%*5e-LgR~0~^KF}UIU^A$;H2ZH#>Q!~pF)NvL7&&F
z26%}ECoYvgk)s48+L{h`Q#1{~s7{_|%YPzsR$Ew+EfhqKya<m6nbOV*&*sbCY`e6x
z00)YJgI4d{LGCW%y%4P`Kc`DIN8u1M;3XR)4#n(;>knkL&V>5Pgku8?Jz0|n4wR$y
zK%BSjv8IF)vT&9v0-fw}j(8$h<CC!$Ysko*yev4>ZO}?V7F;B9I8Fxih%j&*K>)(W
z>(3{x)&Z`#fUN^GFN`1MfEOu+iRH~}<CfuoK$I4Vv{^4I&x1EN;MMP@f{b!=cJv)M
z01&%4Br46!^e<(tf|S2>KhmeP9SI!kMD1L-Ygu%%PvX38!=VV@k$nk~LnoZc;Z3|D
z!YFSEQ!kgxJ;iG#^U)cAdef~%7Zr;2FL5J%xLQyryJi)DG+52WlC#FKs?MM2HdO~}
z1T_cZtv4H>7k#TFTX{D~x-l|EUiX{TjY;{I$o1RPq$3}3M%%Ifm#f}MMQ#l(Tq$@M
zTY79p;^jkenM7>H-+wF`@#4-PN(9GJpOijjb4Jj_&*&X!$J$tUtN(~VfxvW^cFbiJ
zn58q)=`mG~H<j@NnaT(Dj}#uS-^aaPwz{W<-cT^m1%V$D`S~XtQ2Ar63~2s=W+{B6
z4l&f}3Z&^kW~_OqDps~NMl2hCn;TYrtADduPmUkT1O=%<4n~MhX}T<eGB?s4<bTo-
zVuNurHPg@W;U$nO93U4AD&VW-eguMNZ<DR>tBwWVdo5iD#|;YryC^cSR2=d#KVqF3
zwJmaP)K1KyPDMP(k=5-w@3U_7oIB%Xn2tdX>WK%FM<6azImFIsZ4Z;Qy(5rlZJa9K
zi#P9f!Fh`i8`!AJ?X@}OVHGkV&j3R^Ns2~z1fa)7a)gY%T82!m_QBpT$~-NQ;p<^m
zff$+Yl}Phn0sOV&lYI@OQ#x1mg!`6iJj(|x{{rLqD>QPpr~Bp6q0Sq!aa5rgcY}}%
z>Mgi2^PeF>KBW#PjG{d@77$WMf)hi*Ca9n>UP%s6HW?sv7?D>tDYK5^h@<?RJw)tj
z9n_~4OEVmmq(^7bshdwZBnuN_gG=vmmZ(m88pJY&B3(sE&*QF{nq49dOvjyg&BOh2
zRd>EujxtLw^09uI=l5~89w#fC3u?KnS5x3v#fCP<9+}KN(ZNx&9~jm{oSB4)<N+dA
zCASu31Ckj^!^wv?bL)6^XlH^8%t|z)+x3GEz8@TB-efxC5H&32W>b|d(vh%@DXJw#
zIl<Ev&HbifnP@6uSMt)x1>G%=gc`b-_FUfj78Hn{nhIPU)O>Zpx$GvC0K~ne&E8P1
z9RtC2?fUGo2q?0QKvCWJeZSS4#$zDn)E;Z|*4GtHN;mJEk6IPJe32)+OG%*wihGjs
zJWv`0U>Kgym*G8TXj};7F>GG^%?|nX_QCP)YG!EC5SU6J`>bfm>H^el^K*nz9!>7-
zVVYe?@zF;5a)t2S_9B~~aAX%%#Tl#Lm&z*9Jyz;AQ7SZaOniyTZ~?U+C59@S%F0em
z3dX^a#*>S6rJ`zQ2i*S$U671)bM76Q%wRn<2c1>BeW7bsruOz?4G=r{0C{2nJl>$W
zK#br9!>V)bUYX<K6y;|Di9l03D512r+f|_(iDs0v%kRDj5fA3F;ml@lzZ@4mDMW%C
z=YCr=RV}pHkw%z*H#b2sTK;pze|6@R8H($RcffYDa4YPRSKQ}v@_iy7II9HEJwe4(
zR*P8^Tj0712AYLH>?4gFb*P3Yjm-ZR&gO9J`2LV?kSlI<IoM3da-hofgWE4K*zT>V
zo|vu~sBJdQx)5iAo@Jyrr$^1Hv(ef8)X;t<GJ#T@Lyy!=XEX(yOhf;|3sp1drp}){
zp)JpaC`$ig3m3DmLH#w^`$`l;yOzn=Hqgk($i&~IfBA5@>t?d*#u|w*5qN>gxf?Uf
z<)Zm$$(P3)f=>APHpB*=;xEjUN7QC3PcgYE(<-07@@MxV>u7Ve6E)k{UphQkhEuf0
zsw7>lSkl_GU>j@-@8E+HdqMnJy;kDwPq6G)%rJRmr0IYg-Vx5P=vC6NqsJCrh+l;Q
z?ekN3{jGm29_cY7b>LW%UO{*yEm5EhQv{M0j?iqvs1@oiJhFZ$ms!QE9`3;nFT&0F
zZ|kj2xsnc6m|!-evtJEih|7#A=8!<mnMcdUc3LwzJbrW~EcUXyTx(PcbrNQ!zUbo8
z;O0+CH4t%*hlJQxtTjI2Y}pmRicOK%@w$lrw541L<CyhPnSyOq)Is~2Ov%wdGWKl>
zCl`bA5Af(KoU6+sfkP|(##mU}Z9}0dmrI9q2YZ80t$6Atw%{6xU9|!?GbjbBlX%uL
zCY%!2IuDY&rYPNa#B%*5#zRVNA$hv(@PwkYN6Nr5XSADWoxUsER_I;<dKQERbNMwn
zOkUl4&F0*v2>6g_sV|%4Rc;dS`oI?CmYvVgtQM?)*UL&+#l<X=_j&C0Z>pl@@*L|J
z$ogy1*yj^5i?8`-O99@Zlql($6U5o6ccL~Xa0&2K-3M81NSk%XpS1EgkquX#{X&40
zV+9vmSB}Kqq<wBkB9JU`OaXKX(uWH#ajINjL_CU9VN6Kh4e?Nvc=<7``rJ$ulF?q>
zxcE<E8W}q`?o+zEU`$2wF~)OZYzh67lhDild%t>7^MbBSft(T>;|TequEh1`mej|+
zmpAakdH9@cx10cOD^LV_&Ju4AZ^S_y#JVrGowjWtkWa7yRCg-SBUI?Yn53OX$W3R|
zz<=frDIpwSm(VmZ#%5h0xi?~9E6}n;?q^Tqs~EXDHqTNkCkM%*^pd3Vp<<}*su59X
zvL2^>x$_Ihl`S7+X$GnM8p#h-txI~+kY`Ry-ylQ5;mAqV_4C$4FSX}XiOB4|LZ9a3
zUYOw5yv9v1W}Ho8mgkzO0F8fwjMy|HzrmEXQ*41i<~QpR%L7G-xl8+hA{46$=~3iY
zRt>G2rOh)$U|Yj?B|{7lK4kM|VoB60XwX8lQ1}q$GcECnPm@ea;@aP^W?!G~H1&+}
zEdz)f;Is)27?uapqX)|&bOd8vD*T6+wwJ?W58LA;dAv5CaH*K|-@^HR?A*IX;+&~x
zD-PSp)`s^4c6N2OeK0l{3Xxt)zp}I2P4y|Irl1~)Q4JW#T+MM^6}~dqSBxEQEO)*;
zK@CNJOKuKpIxN4MdAQ>kJs&J@+1e@|dJp*Akn>A8aB!CKA35e&mq~k5N;iUIPfj}D
zyRacQrl+m!HgCu9s`^s*h<o!k{ml(X>Gj^#&1NnobjV{=4J=1o!%@_R_zMx<<EL7x
zpQcv<su350Yzii@G_#yVCsICIXu;&*ud(5l$rn!-hv)*n0{M?A%SXzGyU@$`OT~Zy
zMi)PvWQ5b_JE6dqo8Cme{GWagP{C4|f~oOqo{jxa0^K>(;W*zK_x!pZtZbSIUas8q
z!R1%@3=9`3$peRnn=%!~%6K*X0tsX*dG%_z!^v_oL}z4eg*wOa_vArslfKtf*Te-A
zB&$OQ;vUu}Zf!YHAoI*f*XMI)LO5!6^QASt7K6OkKiKG1v)6^7>)nVTY8+=IEAo*6
z>+!-UpHuX>m{K&JX1``mk8iO#JX>1eb~-u?4VlFlp)IS!xEW$DC0^|`JyL&`-se(`
zgnZJmgrBCpDTn?#l`LfHuG0Fhh^WhId)HCir=v~7{nQn8I1v>f4rMiR8g5+g4nyiI
zkX`LNq>AX|_SVz<uK&7!z@d$4RyYvIIN24vB=C@g>f-k8+xMH>+n*c8!Y3kn{97i*
z!e9}9szt-GXbX|;^JO`g6q|4bM%)&wRbz7*7Z&~}L~9gY#K=KAzMSJDPv`{M^P4|T
zS*Y-+i*{O!e(!<q2LbMoX1rnJ%a!rao{jwA&YaN%9i7gt{ZQGQD#*8V&}qbh|23;0
zZ?%Yse|^Wg@Flquc>#GZKfj2c{#)z!*HY#ySx8uI`n*X^UZlU2p1?%PiWO%Ew;UP^
znj*TvJJV?lhE|A_yj#y!tOue*detq`hBxe?PHU#u(_r!uq3euXK9W3_9{dCp(V$l4
z2f|(;v#KQ1rF)xtJ{461zX_nghLv?f;Xc`cK7-)4XZC&jIYen}Lr8<IBBLO8mb37Z
zEq2f}Px$q(2{3N0413kAjI2i1mKR%`ale^!*K;1**aL<653K;vpL(u<&>bqDRCS5c
zroK}<5{*W)$51f>swy%nJO?~ubN}Fk--yKm>|K_9THYqdTHu3224h*%X}`@TDYyWX
z59)K4@$=>bna$}b<Bz*C4o(JK^DX~m%Rr3`r=H_&(VULTQIwY7)216kJP~>Nd23iD
zk0EK-<xO{L3MX3fwWCz@r!`)Tj*awHT#{?V=v`}<lp(x~g9H{j!Z`Qkt4;WiOm8VR
zMA4zA(33a)kA9h-il6w;O4lX~mKrZtM}0`l@12O)<IZ>~UJ531ngO&Fs^5onnW%MN
zDK14<<1dcwYp5VUb`~HqpeqMET-~-qBT10{NP$H5DJ6XQs712g%Val3y{%6n!N&F~
z;WFI)&CXE#=tcdt4&B%2PJOnJpY#e|oAcF*rcpuUDKbwLySm{-X0K?ONF)@Nuk5!l
z3yjgtH)6Q13i1H!Tw3)#{Xe&>MJ#&V&*nW$XocdD^VK(xK^4flj3eA>-gO6A^F<S(
z{QfQlJ<s)jPj>lCz4*FbTKMdYJ)TrLCkVZYOAkw-2f)$8725H+ndojI%Vnaw8XSLQ
zzm#c%^b{`w`G5P}E@x~5zPC5NeSH}1UN@iej-9Ee=b=mJNQw>~;VFy`l4)cNV`lAM
zv_W-(9u7>`u>N~`5f`;cY~s?7Q}mSs<Ie0yYI?q7p&d7|g0>#<m;$uF9I==t=X^a)
zDssNEyK<&O0OM7_n5MLTN{ECPZsPdv1xOU2HA<WeS2>!nAwL@Oj>~YAd=@+a(zQEM
z@0~x5FZUT7_C~5$h{8$_kYT)k-cpUc+6MWvXb3L<5p9bmX*QcbW0PT0mM_XcvmWG_
zz+$fu`Czc61@BrcDJ%ZYzVtPi#qlt#4Az~ZQBI8uW7+$0TAJmKg5Ol1E{z~Kd3a>z
zHF*+j18d~~UFObs<vf;i$cZdn9m)jX;f?G2%`#|VVUuWrB!i03hHocie<8?RMM^Jr
zx%N2+(4(v7hbzu>wG(Ym%j?*NmuVqK?}2z))BLJ4QM>%gv`I&nG+%y>#|E(EO0)Kf
zL?G#nxtk&)X;l)Z@RIj1F5H*qm$Eo5eQ~@iifw@4nqxeo5+7=TYm#k$gw~)6y*ADf
ze=5Sq3-zQg2Gs&m3KV(RnbzE3cz?{T>Pt=K38kh4^weV|8ciI=%WUA4LTp<KP1wkf
z`O$;buo*MDO<Y>mop|lDFFjTC((w+S-zN0m%+t&-6&3xL)@8$GVkGW-yO$fq)|?vj
zjM!Qw7KqgiHorz0HBn4F-)ZR3<HzD5t3(|?Dbe9XOVP(Pt~vFa)z4ER4nUKn$`o%o
zM;w{uXq2PC!H|>npD(HhPd#Aro_P!U!T)Xv<F0o@+T|A!t<wr{_t7*Ls(&vrix<)x
za7Xm9(yZZzW%?~Z5utjhtd<`?X!cw3@2;&{xCM3O`yEng0Vo|{UB5WBlaE=nYIM)Y
z&KGce`s!$_)M@qjMd<jZk(+e%de;1lOEl%@#=-g&26U-BAZDm*sF%kjC5C08Qn(dn
zFaWhbjkP3aR<quZ5+HRT8RmKN`Bn|LU-Fi~^NirzDcecYsDi)y^X(%&nP0T&P)Ea+
zchCLS!;4psM#(78)*NA(%O2)&>y-@ehJYPnWMrsR(dx!H^&hjjW660bbut4W6sHP|
zb9_6`UPhm#&9L8?U8$-3U`p}$G%wvfY;&t6t^W^oO;;U=UCbi2(3W$Xhm!LT=3??f
z%<N>9a(m)sB)HBk394|ZVrH+fkZ-n@9$#XG&G8PYvEuh}rIRr8>y{lloe8tc3cKEV
z?29IUd3MLI*fuZ-`XFP$?Yg=yk@oNA<+QoxAFcc;6Lg3>l5*73#tMu>yY^fn*_msI
zub{O&$uuy?vIcG_FrRvkrFdLL40dsneRUHw;fXyP%!^=X2qtaeaM?N%SRzaZ@+oCS
z&LZWybPQ`dL{F{UiLse>I?J}muQPWr%hc5+Pi6=g`edVf_6D-)yFouG`1;}EFS&RU
z>v&S!#z?+07SE-R^rlH`Ujh2JTOn3VjHWN%_gyp|k5a-*lP{|?AyD3yCx6QE{Tu?f
zaR#j5La|z{4?lyP*;gL_iV@zfi!l9hdd5bOh8OBldIa#hhnLJ>^M0D?;Bd)1(r#^t
z;&hz?Q9|Q@s_zEaGk!4&i1n=c`OU|7BP85A5ty_CclQFv!QtLl+uWj=6up~Ik53Dy
zkGFWM?`C^vw*AKm-XeZ)p1e8FXKx(j9A5u&I!o~SR56Jj=I3vlATO&4Kvb1+z3dDZ
zE9KacV|{7$0(+#|-`?)SCy4Q?GKLt9S>@U<Se5ZN_hEu14cuI4mD0T9Rx|tif@jkc
z_6WSgIG`gZ+vf-tCLexh6R_x4@^-kmptYUXWBktjwERE8XsKqb>{Yqp9k|ad%p8z(
zXhMV}Sjv8+TxuMcK7uOP=}?!mu}sn{JcR&$mVoRlU~JPnO|GI~13m}x9ySs+BKDD3
z8pconU1K%S)dZXFtcj%BdCD*WNQw}(`YHAq5atwVj&I+Bx?_vo<vKhAx@3oqox{yU
z!BCRdbQdH*Tk1<Yc+4&lpyAY7=&0<Utmm8q9XJ0ize)zJp*9Q^KDfd*XcU<tbB-?(
zCp2+E*J#k69{%cYE&1dfgwsri%-m7}R8vtRJ&V#$aGEj<1)}qWSc9U%|DaCt%VSJ;
zn$bK;5%Lu_LU|oYKcf*ahY@<3lS^mxuQ(cNOh>J9(N;*1bf%(i%hBR#tgK;IqfP>T
zmF!@+o2J#zO#c3I=<l0N*eiZUqp#aqrJ=y5`AKC|tbgtfli4EU;?vWn#t}3VCVRxR
zCdrMCOTNaZ5<<Z&px9k|?<Sd?&SgCgtl`b&OQkEeQ4xvC4^u`Z9{TFA24E@yU6fN>
z6S*F8;Hs9;My>vf>dyW_n!8j}QNG~-VCR_~nq`FdsAe5tLD-;xS7v7-EIZAMfyi=h
z^JRM3p8=PX3d_vVM0~n`{I^D={o@BeCkErF*Yu*?5EZ=Qu1-ia{wGBo`EMYF$E73j
zq;MoFo>q>lFE3tG3v!qdZm&f3>z8+8haq}!ZZXu}-|J}__!HTO*A6I$*Ej#fphT7>
zNq>$)ERw<I&cmmYMwTxf4yh@b18cnU+7R)n{K!E=LcxFPXXRoaMxE--&73r>_({DX
zmVm5{r9obQ_FE7}ro$6TsjD}54gx8QpgKj*di1jbc94K9w{m7!M@raAoZGl{M>Fwb
zGuoImsncy~nCVxH{54EKhh)=5mcnu5^3N0|Ciy39FgXXAngXj5cEDKK?}XW2yT1$m
zvg9z&$`#Ir7<cyn=6oVfYa%o(TQE_L*CJn;`S~nTr#unc1fYg4s3<O`@4U#y{8Ds2
z&gLST$&yxqk6Hnu3Hy-G>Ve!Y;lsZ$YL#o@QRGNg7fnh^%+wC$IE<I!VA;b`m4$14
zPDV)ZiiZASQItef3LveMNb4y`C9nl{SpWQ9nvV>ayKT3ynPI!~jYOC^H*x?P-A($M
z<{#Q%1^Xo2(0ve6nam$fGm+>t`=lNhASQiJWdfxi1O4d3`g=-=$FU|B*WtA$H&P%L
z%r0Ct=?{?&f;T|rkpR+}dR^~E5KPJ$<fE?#_Z_ITtP7U;j<0?aZY*T;l_Y8p>Js4{
z4od?8r$32cOsn=V!lt_q4Xwy_h81^69Ak5#KXhjVypvNro*DAf4AYLl>h6dw(M5}B
z3A4*N5g`=e1B=i8@n#6)dIxGnt1nhcVVqqJ@hiWRyC7uUpd+_?WgYG>U){K)Y4kF(
z$q+*&WIg*2mS^r@8{5NM=;2*Yz-EW}^)B<34+OuL@Az)83ADstwk9}lvG;4SdStm4
zw8PM*gUuQzXN%oZWA&E;*fza-PRS-`9vx0=fE~9o9x}9uyx<l#tc1f#cMYLAKx;iL
zrJT~hqDAt~bpyAe$7K~Rn=__w9fpr92glP}vg~meonQOHum@IpUhwbQSQTADeBUTC
z2oNT_e1BKmKmF$m>&wOm==V%oCW=nH4UV{7QF8lw+Ly0;-Le)MNrG1?Q^!vMz$*mZ
zZdQo1hFD7K;oN=+&58^SW0l|?m<G%@JD04PxEk2S;(~rxZ$Dj@sU|H`_O&K5XQgzO
z`IJ@FD#%7E8_UP^8T*{$M`8Y~thR(dr}f~FXUgi)^v-c1#J5X#kAiIm691*HjRga?
zaU9ut8p5M_()daNgwDxii1`hV%AI7u{+sElHh5K@oS$qWgi%mp6mHgmxg<TS(#j66
zG}+Y+84|$KdMMpI*HFYif`^kF{`K5wG@a3blcjSKHZILm?({c*L_E|uk%i4_=jUt<
ziw1r<lPllrQ#Ig&kls~K!6BI;c)v@>1c#s#6DF+hZ})c-tYNHAd4n-Jb2fFL4R{ki
z;OzX<QIv(<0CiBiF}14Y?8{(2SyL;TlpEB8c$t&5oTAv_*eA|ZcWAWIEC_NNoH|)D
zV({%x-P}U)P%+Ee+fDR8I}LDf(7~bieo%|j9wM3rl-kuLLs1rgcRpNY#sUHWk4WUc
zcW;H}!qz^#Uwi+hY!c2)%29t<%{u_DllYwdsj0Uiizf6(GYC_=$E!8w5U@YdT~kwc
zQkw;ElmG*=9;NpkElu?RNqYk-Qzy*@3F*gYIL|}z!ObGxJkX`}g&2^Rk+}9m4=N^Q
zq77(u_dF)lb3yF2SWSgSUzq)?jJ4=mF`n8y>2IoxFj5uEql7Y*ZJXjjKaI3S1jl)$
znS<e~_67Dpg9H>FYw8GFKY~|II=dM<o9aFM_=Z38+2g1TIOfne@AgnWJFbHve+=bZ
zO<~*#JB%ZMsT#{Z@8BDuX1bUvIA>e8xDet#Y$NY%Zy;2{N@t!M(a|IV2{87Kt(c;D
z{@KJ_475s#c*Lsp3k%~C$alkNkjq^n0;5m;2AY^)HEY2t&X$z8<6}uEPZYQ6r7N6b
zdM8V9DvqY;7o8>w7;RplB}FRAiX!ll0GZPHo?YisGPHg)DFBjeqgVu*b{fK<his53
zzZ%)IpzhvC12=>UWI_)aC~-A8jA5can@3dfBzv#(07^M(DDgLc1D335`q)K>%0_AT
zv2cFw6Mq?Q(GIpg00J>_$;GPskBJVetNYF{iK7Mtj!9!Vcx*Lil%*JOLXZ9uoxVka
z=Fk7cf$%1Ue7oppp%+TO)Wt6LIE<<S-aC%JlI17BkDu?QSsMm!+kX^$9}l1IzT9lh
z$21y&@7ls6h66S}mw8@o+&MOiJ|K1UURaOkmw(Ktb3gR9`ouiDHSDVXob0NBhFp@X
zKJ3rSfGOq~kNb)6`*OW!$%WYAk3mvDxuFwbCV}6;G)8~Ihbf(OkT~hfwe@8qY<;o8
zXR@)mU#`M0&6I3OED3rOgpRim##5{)<-hfQ6+jYTL+H-4=<wn`o~J)P%^J`U<2o7B
zG73V;ya`$egGQk_&Qhhdc45W$_}oB*{zoj^dqhU%&TC{lzDRr~2fhl+^?PqWk#75r
zh$K1NTy%3EHhM{rO5onVf)aXmbp_aNM)Ck8zXZ3~Y+jwx`KXNzTwvipR!%r5YG)n6
zI9k!WlgrpRTl6!9{e0|pilsj2qrPF2N3IacxtVm5`z3+ng;hBFW@pDyl&62j#q}u0
z<uMJr;70E@nZfxJ4o--Ki9lnc02$VAM%=Swfb3_RGM1WO&Mi;a$chBDV;w_B05(I<
zQ=8cNTm-YLSK~#*LH_rDg^zSGOeAz!-Uc5^U-<q(hGR;8J-nf){>Ng;DDGuK3raek
zxnWxI9}EgoMGUOLzWi`pnH~(0Kl%AR8F8OwZ{tJkTXtE2SAtu%?!McR$NnN;Hv3=F
z5HhUqJ%9+Xo<27NZeo0@G>kQG|K9j>QG=nzj@DeUG|Az{cdz8c2W+*Xx!HC@1XS{3
zfDp4MN$i0Hc?&R8v4X};m_F^T32Ql<`6p<#DV%R4ztBr$Kgj1?JEXH^Bh1miYjCKN
zH@oa&S(JYc2Z~d?n%>v|17zQ;0QNAayVE$d%PjB<I6Y#dJeG2~!1+%%W9=6C0JFBg
zh=F;?oxpL}#7Tg8z2Za7hvV;hnhv07f*2eyl&EC16+upF#l(T!TyqKs`Z>zd(HXB2
z=<RR|zHq+$yk!3QM)$p#zczgd1V9Weuydk>HT>=^ckJHIIsqCEY>(09P^I(q&>j}m
znVTXxfzIPn#>5m*C58KDBHX~31l;~ZIkiw`Lt(Y!=RpmPs25i>wPDIHbeCq4uT<Sf
znv}*rv46MKKs48}BqffVUV&2V>}XiESMGv04sxN1qO6V%Lf!d7G8!~sukiyIKCtIU
ze>5JZ-lCSN7&#d0raE+Vr|Spj90kF0F^aeD4IFGN#u`M8Oz96Q<KY%2REm&{Ad1tw
zFWWv|AgI{hjSZ`_@Kr%=XVmy@z+s{WMsvq+aQYVxC}FQ5*RFZ)w)v=yK^5Afb0e`Q
zoFn07T&FdAO54fna15e=j;|uk01J)&8xG5~D37<~W}1qxJv>IXGu+&!jHVZ1SIFDA
z-Q9^wu2?zkyjIvg)xWNmih4?zj>e<@PV!ehC?lFgJPm$%Bv3SxW01eow~^p;Y|&u&
z=D+=&P_;bIx$W>8ck<9kFz1^UXRuz&{IAuGEU#{`ot9;=*htcJna82~{J#CSuDnyv
z*SW4<l{AGGgOq%Br-IMdV^7Iy#&v(b7%AMks_nO4#tCidG~`YS!ZgTqZ`JBr63Ghc
z-Zl#^MzXC|7<l+JeGtEFfGCDGpY-!^h3sb7GQPHvEQ`_Q93fwNjdZ_gRmdb<L?r5q
z#hG3h%{t<U<C=t&lS*S^jf;b#izweEeD=nDFz<izi&x@KM3D~}*^s&cd@xrg#iXp%
zGl#9Govqj;u-z5V<zuC33JW7|F+Z@ktL3vxmdoDh5ufN4y~%-BIT;Us`c_NoRC^|Q
zS68aYIj0CF@0{yG?7x;OIYN<8E1Bm0<}=<65>BdBFbE-I;DeM=?@qX%?jGJCBt3IC
z&f2PhTbeu${}7qvHMRDSZ(sz><_5Z(8v#$l7uPhpVw%DP&!uCoDOpS2$&uaqvSNH|
zC}6>?)^)5-FHWXD4VPO_Mi+9nWgr2H3ztK`$2%Y2o2f2AuPI}b8^~S$o||*JCwf|o
z&b*_osmqO=o==88y0DX4TN&In-+MI$u91_hzz3ZXTb?oL)o{)2%tV(ruofgd*z_j)
zRvvd;y|uqLPB9_mMZ5T~`Ny3vd1<`#9!4Ot@t5Juaktlzf^YS_?uLo+r(Uix&?;ii
zF+94W*NIo^fP2A#*DRabHy7aRff1<r67O7C-ZpXLvMM^DYFAsaJV76cb1g5;+uQy>
zw$R~%PExi47QL>a2ArcsOIbGUSQhC(_s-#&6|UeB8@(u%QcrV-pdT<Ne^(MTHRm=E
zBPw+ovIQJy$9%heCQ-)dMc0B&(9f7$QhFJU>QV7^&H#Kfc?j2ZU1U{{TYn$>UiGWI
zhJ9*Ig{RZMbsB2YCkAxCu~v9n+8!DQ^67%}clXB&Mr-|^-F-92H#N1uZ=WZXmEl%*
z6lJczB!0#ev0KHW``k|_{t35~Uz_)#w8|)1idU+ngGT=6VDu5xlzQ?{-aZ6Y{h!?B
zyl{d$sCA2yV=?+DNfin0YOX-FeU1_ho>hdYi|1w-^o7B$u>7)!eAuBmlFyHc<o*$~
zZ~YXd9>G|~DXECc*jS7*9q0RWG^myQm}>0WzeE=#u4BUOyuP3dB3LX^?R{pU_1}YK
z5Byvu6H;8AduFbq(2vW#2~7{T(`BI4Dp8Z$4;11Ig~ZRMwb{12zGu|Fh7MwQTaMV~
z|Aa`$DMKTG8;nV)sB>ae&~PousF8@|nuflviXX1kTByy-(_BiGQ0Ez7M`>B*#Oh#y
z1+c0)XY@)O7HphxrG1o@VI@!+dgSRWB-Rj9Fm<S~DXFc!{Yt*ru@sInMwlFUV*5(s
z68wN*ui6J%Hcy18{A7o2Ns>o4cWj&}jpO01;!paO#fY_5lGK0+K5#4Ng(r)lxlEO5
zmz1V!l)2Pfi4PKALDQWZBX?C@%Zss`Bz7+l+sD-b0Cd5DC4dV^i!23(LDSwdHY3{>
zs$q(bjQG=z2Nh!~N11?~iU5KF{jS&)_AV=JHitDV0TlFZ*uQ=~I2=q7ILc3U!%=jE
z$9)1?e!AFAVp)_aw)xbC=3IG?&E~!AOTGB4>@uJFZ=n<LbTX87&eN#vBrr;(d^uc|
zL~~8H==EiQimEI_tM2St`3G`}hzfXA#F`Fxa*?4-a5%{@*tF>UtX$eY>{cr^bfEa!
z7D8;2&tCf_l6gV$&~T@_%Vw$C7M^`u!8-`v@#d=pBKSt7J`|yIze%GvWyg{(pXG04
zdJ34XdNs)R9;Y`RYypq?o05lA_&MexrgiOBka1Rb9A7CX)&pezsMO=+ZDlI}dxSbo
z*~WU5b7}r;<h0~VgujR(%fs8!3a;S|bvFa<&-0pYGQTfxqq+k4>D?xKb#p$GCKkfl
z*2v<$4eq{HuVB;b31R#Ly0cyD$Z9Tk*`c|*@7m2#sOjXhX%D>&vyCcS{<k}cJh12h
zi9@tR6VRqpq|1-~HSdo1NOaqo`OQBhhBwN;or|(Wrq7%<9B(swnczvRT1-$CW%^+c
z^WOM<j%NbV=EWDnx72Q}+jd#ll+}fN1w&aR0yV||uA<I_sBW56U5^+tuynUZv^VhU
zf<MYZR~LExlM7~~qXmO1LL*{Us2G-f>iAKMwA@~ZZG$_)AdhfVVE?~s83bA1#UiBY
zp=Jby?3Pxc5R+5U7ehiia_nc+4y_5h;8LmK9L2ngy^G5bSYKe^uLm)D{_&dk%g)~i
z57_v0o>1w0`U$o)Pa}fqIlC))j0xW@x46gCg=vpb)ouUB>sbe%@`Qq}j@#S8u46T^
zO9?6rMqqIw!x_JOB%-K~fB6#GziNo*?rn=??fMNtA-7u{3i<|3#h?2_pG5MUNWp&C
zG@x49(kI*Wnf{Cdl2vEZ_V-7;)Q*0bhhtujU{{CR9)!tr!D~PdhWD?wayGwwU!RAA
zPlyLNp3wY2F=1nTvw6{ZhoW$xhdntV4NngC<u|RRv0{5Aj2K`OVvWxCxtWD7*i9&f
zQdYH)%ee0k5reYmgH%o#W}`WL_51Ly2q$+xg&c41t>>qNlr{My0uaj5q9SFk%)!^G
z>|{#tIQ{n}EH+pDHs8bFMVNEg;|#l!@@fbQT(na%4yI!;Rp4Mrm2u$W_cxHJ?(pL+
z1Nrs3xYplY>tq`lS~DwFN8yXi<!qZDo(=h5#(#un4K2LAukojtU|<6iD-UppX6{}u
zWC)I?tAJ2Unhv=Fpf(@H=r=bSj);DwkTU&r%QY7QzQG~(+Dbg40|dg*MRq*p9dbtz
zw;wOqxP#+z99Gr3D2D`RmuBocz@9~Wm^BWsup&A4eNK5Y8h6a^68BKkNO?#AQML2Q
zmSw~7%F+YWa?3lVVeU^<+vw%PW^{9#Lmao?wUF)1!K66|CowCS7LjkFs$h0RZbR3n
zEQ#X4(MM+-ImR)ZA5wuRgYM5sX}=b^zX~cZHWunF2bi~Psy*3+Z3=upJzE(WiI-V_
zjWyVw2&ZXcscp6!xvRE+8~_RMVT@Hf#_{hvkPpNziG8-oT_I|mlMvbeN?v)G_yrK>
z7qOy4*MMn6hZV0#HF&mqMs}7Zvo`(gz>`rKR{Wgs{ip@XAd~Oo2wE0bBMp(@2FiFw
zW<?rxI+?qUF0t1qC-V8zY%wR}m@LSYK=LFSL}M7<*?se4D!nc&#t`Ua4*oDFuV>j<
zn`?~)LQ&B$bTQ7^{d;k4Wv`hI$coh9J%7_;*dDjjgpWNx-7JkzNWO2>R%)|Axj6y@
zg3`4h3NSCjApgIfDzreRo;VvgLt{mbe`M2=^DXV8S<8tZQCOtPky37B!9tz(JaP&x
zj70xP{c;e7*27y4k#{Y?zh~<kl+<}Trl(Iv)4ve|_X~iCrZr6`>^}ICJvbi|;I+FR
z)L}Q!7q`O+otnr%EfvxW3I1Qll<3453|~93ks?WD0L<zP*ltphy^)zDyG7S?Rtr)R
zMbR6>)8HUQzD-}W+FY+pb4@ObM%6|Z;p5xao;T|P+O6SMR^6o22lA;aa=q4@*PdZV
zzG;#izJ;Sw+yn|e(03ixf8RUsRWEw@Z1*OXQxwZSocCF01iq<(<T?m@fFaf|UMi<#
zW;b~QMyIn)|B^7E=#w69KtAxcIO}QBYunWC+f8Z%kos%B*;%Ef?d2c6b>2FRs<t}Q
zF6WtCCO%acQ{ls#j^4|;#&6&GD6(h|tsal$nY{A#zuBrM40EAJoBBQ7C>1apov?vx
zI?h#;uioG{-_5pf^et~>HbALF9;?dL{+{>4@dc*7UXu+z7S~TGe66AYWB#}EOFROK
z?5P~T$0MYpJW=nG)i?0;z;=`l=-cI3YQWw!gVOuOqWw*`z}4tcTqiIDZ?g7M$n&zV
zb?wPV#B%<$iftATIh>#=FiqZVWo2#9MpN`g$D5^_7KZtvLD~H1;+1sTij3yZN&s0V
z@R#c^*Fdkp6$L}OmH25A{~NeT<N>6jqasQq{B9wL0pAkzjvN{1eN({E^m-y6)Ji~$
zjW}6Y;{CZwJmx320=|dS+XmoAFwmC5_JbNc0RM=5X;z;9i0T&-l-de(>2tvh!G#Nw
zf&II&^L|wGisP2s3f$36N~1FJb;!Rpx48Qan&)%TwaEujNYUR?-T8<4Ifpw}!Srpd
za$4kyS!iVn;@&K+Hpct)_G@oTLPIJfKFWYk`>5N11Dh5HohT_5WO+GwZkPUDuHk>4
zL<39Tpg5Z5>m6GRV4r)=U<hcuuPk)>d75;ZlzDM)%<xJ#mXlY5$yXd~2IK!f)RlAY
zvIXm;!}5iX+D%EMEqcQAi|7<=E~8EqnUT2>UsOh4XME!Eqz)p(2)@)V`&5eh)^=UT
z54s-lAht7(bGGaZ(V4%~vD7Sbjsm6C>7g9F@Vu%9{zrd*gL=P$lSR$J1g`fH2Q|0h
z6;O8awzZbz3F0zU=<8~8;hS)#{}<gUBG#jV4katC0RsjEL~Ia;;QPf!(shUV!`+&4
z4)c_-f6@e|ad?Y28Tbt8okebT%#+bKft2sAr2D9S=|~0wCU^Bp{!(4}x^>;r)sA^W
zy@O-({;qkKmi<Zd0!R~F^{XwCfRc{1z_YyMJM)w(*H91>)2br{g&0uiH-m@<GGKup
zTXmVC5>jcv9YqVAh@)0}ouCOFl}6PHQ49cVnpl<|LX_1&7Gp+@W)n^v7;1<V;-zyW
zVVnl4!!<@ZLJ2d(ekg}|b(egVR6&VInjn@XH5@}EMGhAuw5mWwZO0&{5g<ilC?GT@
zX_gXFLL!E$Q5bd1t0I-qI<YLSg(wJ(x=Rhspn(AtL<r-6DMmU&aFAew($HwH%lvW!
zji3aJo<)l2b+TX4LXr--p(q&>B_aqJPC_-`YLYl0s72UU=1B+7%*$=w`~%V`^N*M}
zcgLSJ&okpj+4ej=mVp&P({W83d}khFWpBT2*X>5l(M?na9z~{IcFQt^Hu60H?0U_y
zUDw7oteGgPnC*1}%Ed}*c(zvupwaNXY87i0S<J{48$#rp=|sggZM))_Wu1#T80&n$
z>k_>Jy-FgUxoZ+Jp2Iv?bvsGLIU;Gzsboo!2}M$h%VMPJIYC*;V)(jI^-Ku8{gp<|
zZf9{|*))CVl@ri0PXsJiVQeF8+52rhs>6zAm0cH>L@Q($0omi0Ep{;*jj|cnbf>*e
z)F|0iNGi2PS@g_?qSwCTS$5S;hZq!F$71t}X~S~aLJbB@yHXS59rFZhZm-B|o-`qi
zGXIEqYaaYb^C$sO5HJ=J#|X-ZWBi?YGE}TU!&IS|89c&Vh8zST=crC}3qTV95{enl
z%0z`u!UEJ@7ZF4(o>;n$42CrlSs0o;Km<jE5sGM_GXrCV5Ud6v!a?NdftU-J#u#Uw
zrNO{x`bZo_2!R~t(H8bQNfAnuGNYZ8BxzEUFu*trP-sP23}26ZkrX1sz!q&1;;T%g
zrRk6(cg&Na2q+pwiH2egJ5Aa&V#Ez2DnZOOR~7L>)G`duY(bv^%ZajeoDhN_W=(F1
zVU8@G_L{}0j<Fbmh!H{+tHn7kN0uG{j119S3mM(!NjFZ7&6EBjQ(pW@^HK~g7SD1?
z26B{uuPIE)%!^gfjT(cm2o>;|0CXXj>S9*_;*bGQ5*4z=?U|A|`b??Rb=Kz@BLDI?
zEbOOAgHOYyP7F=d>5&C=%rm~3=&qF*UpCRYYNj=k5-5c~y29>~GU@dVOPlkna+^0r
z8e#qs^Q2RYhHu`qo^1U3pfIuc)Q!{cru{w}rcHY*w*Fm~_c!m|uf$oNbM4#XnRx?W
zd0I<62BXWosb?<bG;hyAX@vP#%zJoX=;jrBvSecSS4Rh>6|+uXesz7_+SAL5zr&_0
z#p2|h;_oxBpLz3(Ia&0wSiE-amufy7H1UpO#bV(}aX=1s%o8lSp3^+>dlw_jzha*B
z;KY#4>&J@Am%jgd=ozbaUzwQLv-`y1`O-jaz9dbVoSY{On{M-Rv*fuXO-?SA{syad
zN>e8%bM!6G@37pNo6o$>&!rLOBYAhsnm8E8ar`$pWqU-B5W+KvTR;>fMsbi#n!&Bc
zK~gzMNYWwPpdb>2+G{+-@w#}OI##&By_;kza?tm4ZpT5>OCk4MLccHJfzR{s4FBgz
zjXdtD%1$0&54hLY!!Wh0(Rp&H?<(&#bl-<UqNdf@%j>Sd6Ve2WdLCp#f@I|Ff5ekl
zA0or0Mjq5rj-for;#<<oMUyrMqVUZcyJ{mBd*e6yQI`+)^2{;5+t&tI)b2rV?(UM2
z*WXlP$wRAw?3NyR@Rls)q4LDzuN8E&?VqP-g~8rlsCRX|)q(<wkBFGd>(~Il1C!_i
zkZmL+kIv%B^C+@gisWIwXDJU2p%Z_Zsuh{~7n}1bxLmbm)!?G!HSuR3M_wl}d6H0$
zC9fnRyQN7UR#uLoJa7jM?~coNUCp082%`8f^EFrc{qKlF0X1D`US$s^Q5TT4#N^G*
zSn_bKBeSJS9<%|+P#$tktK3EGUi}zsWjzbR8R43aZ*M51IU-^$ulr@XhU$Ywi^-*j
z1x-wzvX3XPW+AhsOCDswU?>l0Z%GLW#2GbrO~_W*ezoR-q;rUfoxH;2QijwAi;9y8
z4=+eeUR{hQ4?+o9EoJhce|A{P3kW$NsfN=60-BeC`En$Pt)1;uO8-wD4upe4!KB+M
zpvj5J!-k6|&+(Df(k2hw@}l&w<#7!|=arh&aeBKO5<Z?@!tiGpzMLM9v>mvzDX9vt
z8`U!6RxT>{?iC5cgVpOO`2v6fFKfHjP<U>(B_kL8^x&tO>>W>Gcslkvs$NP!UilYw
zXY<-b5Qgz#$tgFbAf-qS!JAkth*bzo>B)GTgCDbqNJYnwLm;3tjyN7fl=ZR@m9SLU
z;IbwwthXfO6c72pAR(cPhxTCp5@(Y%U9{2NsE|CDl+4aEeIMr6X%;r`_2spVd#Tou
zR<Hdqkv@B2_1F^l*NUCKer0vjk1*Bv*nR(?*L%~G`K8i#H%@M@%J5=;L*A3MaO8oN
z(_le0cjN(Z>r?take58VHtg11DCA{N)q1lz7<4K%2kEFnImCnz200amj+|PhGw_<t
zx@>Qp_uIF26Y^%}($oa6q`no3^+5$09ZaC``xO`S)yC?X@xtfQk1icwxpx2J#?dj#
zW4x}XdxdPd*{SIQl5wZw&-v2Mt>moao!CmZ{D$Y!6Y12!gi-0*@#@8Vp_ka4oxk8c
z-o{S5Btw)LdNM+ppdmO!gkZ==@yYc3@bZ4I+Q!+{^;mTB?%oMW-pQw6K{bcuf%wZ)
z=l&ge$<0iw<ZqOLBUtnX{HkU{!x{KHsk)Q?<xAmD?_}$`_3Jc^df9JRf<<lJz47$a
z%C*}UH;x^7zYW&A*)lTKUuWi6?Q<a$k51lFrj%{!BsZlFTMUZ%kK15YB`-AE`E1#9
z1a{Ss*}2$n_b!wBcrqD@yzNUN$@BX^FQ(>^JOH*|f8F{A<i)lc)%<5q$9u}Zkj9W#
zEVMJ%4{&M~>GWNQ%kA}FP9+K8CHbwo&!<n6+WVewoQX!>(QiehDSuFomP_wu*(IB;
zLeUeJGNUXigKMqDLX!tp!jkuPQJ)o>Px1iRx_o*0FUd=GOCHQAL0KkK0#cz%{8yW3
zN>2V1lsRN)awQI&X3c72bR@0k`N8!z8(O(`z{<>S9=2I-zWJi7zh6oHURsJkUaVWT
zM-{q%s6g|oOHubos<bQ3!ubbJ*`E^KEu~_i$%};~4_v$f7E<#{o@DR!n|FUh-dfoT
z{INHnV9>2F<T5gRzrrDhqQ+gQD1^F{^l`-;ZYE;a2&plDpk`>$CeW}*Mb0`9DQ^R*
zDpvNb+)7Pol5@fc<`XML%Yws{bw@VPVD|n>79_;<SAwIXi}oUrm+{72>69?TVy2*E
zQUN1oumau&(bN`-)YxCQ@DzWjDJrv|B?J`IBm!fuL2PJTnTWQ-lK1*;Sn^`8mcb%w
ze#w((!}+U;_{`+BZ6zS&AV)KFglgQu!#9vtF|aOQBmp|B5QGpl{1FEu;h^CFf*sA_
zHR0$;&I*Wr1X6ib*|#D{NBW*5kDwYHTlvby4huay8gwYv(PSm)aOkhp9}%)wh(KPC
zjmtD~hY6%R#(}Ta=P+1-ngx**F9u>XVc(kKFMn@lbK)Rhw1Sjc-_iP%_Elw~&&e?4
zy#wIcg|Os-w^!p}0X5I$0T55E++LoMykyS{98-Opy3mE3V#u&BNG*dBP6;$I=CC4%
z_D5{$i*Sq?)}Urk1_fmSkj>`K2_V(oRebqYm~(C$1Y^QG!nYcodAte^Q<V-!naK^f
zzmm;d9s4Ws(V5pwMIdiEJDTL`w$K<f1mk>Cfvw{}1#0~wh^BPiFJ|m-PW9&|Cc4Hg
zPI-qwAv7TaXN<uKd3HS$=FGdZ0YK(OX!5|>OJD&t-{eW1d$^KF&UT08Xsb~Fj&q7s
z4k`yyb5eo-M02X5acwf91_yrD9^5ULy6fZcT4h)96`Y|`YjW|l&s6bm_2I<nE8$`z
z(jAuNk~ea6&ENXwc0VLyk-Jmx6q<BzWI(j3{^}%Een@cKE40=@81fR2{bwD+lLyW(
z;xh|B*gKb3C898Ze~Veot{9MEy3Asj7G=r|h@&DTE<%lj7NZg}iK3~q5Ctt03pCRT
zAt)_+Q5b1YsmLCSsGtW$v<X@iREu8pM|8%ltj3z@Vb|&hF79|ec)xRgbB^cx&UN0K
z2Y}bZ=Tq#DWfeHrLzLgT^=#uYG$?+GCx_QRyVm=1f82b0fNtsPtjpc?!X(_s2a`89
zw?Di4NYI;A@LcLWJNli?*`0&qCmXv_axoDp={op}k;*Rd<4s4`=#Si*qw60JP=b@h
zG-eg*8Gxv)!#u2GgqdRo%tN#zlo+@CXeOO7rytxovbWpq4Q^dry=nR3GrP3Oc68mw
zeJAeS8uYr|y~iJ&Ohs|{is0>B@A;P_^33{s*Sn93p;)Y6j)FRdhkJ9kR)3j(+rIqD
zpnK$IB#_-rn=Se>>4f)EE_dz9Mio6!LgZGjd;De_hTuWh@ZfKFbFuD`v<6m)pS*Jf
zMNv~`^vSUen@}1Qg|1s`)!TY9=_wk3tLQKfh|ugET9^&<5W#8)$@f#$W2_MMq?{*j
zKT=9Cx_1%XT&~x<J{X{q7NuxqFu1Oal+jark6e0m`((z-2ftHB#<}`Oa`}9&H&6;+
zP|LaF53Oxb;OJH@Uo%zJ@Arh8esrXJw>zw<8N7exZEw^93Q~H%i7NPbLRa+|Ve>w`
zv=_CB-pw`TPwJso(S74K?Ge<DJRQtb3t@_KB>Z*bLL4+;5zO)W$=jDkG3*W_i0WBT
z-3kg{@BPIt3Lc7G+$^xl9pSvm$7we&9syv(dd!1%cIJ+!&y0CP`LYF<<=0hAV=PWE
zoY0mG!Km3xMuUa}g20E8xCH|}7${%Aw!lmX5=j~ahW%=#R@|{AA8X!lwm}t~X)jcL
zC1Al)!$F1jJ9$QD#WYKBs8OYU61Dn$m}opx+}xR?OgHSwG;bI>s4axU2r2=L2Bo&Z
zF*vIdJ>1*rlvs`Cxyy=cy3B)UXx2_G%#wMCXq5`oIUVNd2lIy|&_jum@YsR^d>hWY
zIa|xewmFJM7W9Fey^R1xRjM~1aF<v9UWnnrUolLfQtw2SPVolr7qu)^`*vy4lV``x
z3nuiN2ktonW{Mdz57DL_RN2UCHcvlMO2_t}Wq*NXE9JblekZK<@U~b!AFFNI*6h!P
z!>vxo;W&T(ejpI2RBr6vEsA2HP!JV%@7{gmMg<X2xPLwz+E%N@V)-vv`|B8MH5QA#
zZ+pQwpZ#VGQLAm)vTa*)>(;HW6`p?0i!q+ow!L`!2Js5<@-t44nI}|qn}_hsn0m8k
z9)k5WOgoA^`<u)I%)dhdZ3w0eaHbMiKo1syjMpRk1E#rtvmY{r9f942#QszJWjV3A
zfSgloyI&C#`^&|M&*?1<58fQ$Y=;<zAr$Z-tb)a2!4%-vfLU;54bBM%ltnmcwz+wa
z@nGC#O_kE#dd5l9$w;wLKD9rwS-cVGn6g{9ZTSH5u-We<l2(!#UyL3w1W`M@@t~{J
zXi|-aT`p7{VXEJ3ZtZ6&$L0oI;tj+n^VqOn^G52;)Pi#+%~K%aWs|AwVzij2pWvJi
z0}b$3%|N2ZkAtxUgT`i*wDBGf?{?di51c`RBpoo2d_3i)lSR8M23nLd!kc1)c9HXR
zfMFaN&-mp|Gq$$b-zEn^mH5@n7dIiB)mcqA`giX~_AGtR*3kp;@rnkPHTNu?8HkxR
zPl4qlj*znZ{>#k+ETU!FqgxD=<^-4$EA|L{IwGH^s2yT8b1*n>tEL)da0eW2^-imX
z4WPne;sF)TaElPs@G%DZNdvh1YIM<zuw>OtoA+t=J&}oGD(3y(X^$1{7}gM3m=ef5
z7-5k-UjOD6MLFbT$u|GaWs4~Qsn8*HM0PQ(xWM@+kq4l3uQo?WSsQ4?eGvZ`OshKz
zQ|8=XV4i}7%*b=pX4;L9oA;ZiJz*{lgd+*l1DW?F1gYf2DU=hBq}}t!K)i1O0K@qN
zq5e6##i^PH08u$ao7f*T4;WJlv_T)FgXWYq$R8|u{bd*F6%G>cE&iJ~5iFRtheP*^
zDGAeWp8jdic9n!+)(BGsnKub`VSH#{OHQW9d9cY>b%BEGs(V84cy;p-tTUDnFw+m2
z2W-W!(Y*aZKTp#>nr_EWNlt69Y?a_!{5S7AaK@xaQGq*g;aA8UF&RJ|uZe>abpWOb
zGVgofQL9hxP;-?=ofqKG5g!S_v@)Ou)1uYQQ?UC?GNs$R5yc9w=?%6^F0yE|nd3~{
ziy6};uKs_{`*|?;ZT}655+hav_OsJoQl$j~8X}9?MHZ$CGEXZ4pDCj$@P0YNbMOb?
z8y~u2b3?t0Wcx((l2&l#{p`4TI{#;@#$I<}FoG1|yZkp#YxNi=xsnAFb-rA;F+Vfy
zxv4KY(y+i00d9vArVKJqCycv3EPpAcNgSq$rwSja%AiUyG0D6%ja+kP-9Kg?EDU+K
zurR;*yUiQH@EN;<niKt{kE0LM)sb7i{1Up*f;{fYnqk_E%+rrxq~-9a{&PB{eDIM-
z0#u2}6U^fqifi2Y2KGbd0lsCyd_OGvPnh@dD=BHYr3jcd6Slk+H)zGRAhy6aej&1;
zwmY%IY|sod1@pAkf0iB84%z7@K!E|wpdA79khG4^Ze>u#o>VvQfcV}u+SJx*o@7us
z7*l#|BJ<%`!#`x+JNC}+wP_#@;NOEj?a3Oe)?p<y1t0to=gl7z6)P5h5Nt`J8=TQi
zGf@{<O4vZOY}u4e)~DbGV;hCa-h2@DI(%@?LZ8GJ_iy;<rLEeuH*74{So@(|+gw}v
zy}M7pmn-Cc2)K6W?YURsd*SU}O>=R3yLR{eZMql#$z9EJ@>4L(tB{CZA9&OICif&7
z9=q$K5yIdH?*^Vz<Lu7(ILb7y|5(+v``JtW%nR?TaG|Eh7?J#a{y+07JiT{k@&bq2
zL^@FBRoK&aXk#KA-fq4T{EgQv)Y89jmYs!%?y}5-UiaqG^C1`VWgcpRu6Z8XYF-GS
z?=xy1Bkz{)OoW{2=N(}lW%T9b@Ru6%<bqpr1`RhYv^ZYxz6K8`cGv}fcW;7a-oX$Y
z3qhG1i*aA(an4?a1`@07y^LGN3SIM9wAVcFXC9|WypbVANf!yWc|}jl%m2Q4t`HZ|
zPfTZ-NRDa~v1nmlawLBGx6PY74_?kL^aG1$Q!MjdFTw5as`h0EedRnStr+SEDcUAc
z(<GxByj0{>OCySCsd}tyUOhoJ&)h2USXOuir%HKE*Ne8Y)p6!cPdiCPCRi`<YHD3q
zbmA=kws~VwsAGH#m`pupm^XV3R-#a4b!g2`&MWOD1Y1PGDvhJ0<BTHdhD?%*%8PnY
z?3!0kkj<m;sTf9Fren_Y8qK4`@L?4pGvE5z(Vax2jOT$xPxeHkd2{2ni=%tqjQc;&
z<B-Ascvyw&&Ni8}-#omITsHSs$6CJ5tb7u=9OffMRAtjGFh7DQ5`AAxpoc1O>$JL1
zu78KBNN74AaP!JvU&I2USbLY<pO?*+&Fh5*LW@U$#o2R>=3UNt{tygJzVK_FnIg|=
zY~2+WAGFWB5u7m{Nz;gtG(}z3QBfwcVLN(0f@OzS-yvHOiC`*<pxL61C?mWhE22q?
zGL<qT397AYmcE53x*=d$m)r`Shz91<I-imC<$&kB!@474ODBe=(TMnhiisO47HHcL
ziQY6_6>Pz;c|I=9H*medy!Zx0J+sr7D?ZI5f+Aso_JF0Dyrt+w!O5<9Y}#+0H9|6`
zszemk=1t5S$i#WYl58U{IaboL5i&5&(>f_5N`iqXW;&^saEi~U>xffT%eEz3QxPSl
z8ir%ZI;}vurC3I(h>0VrL7#0Ftc>J{N!kuHL8wNmL<g#jkZqDo#&p}qDM`qT_#Cgy
z+2PXrC18-q#_P?a=Jwq2%wA`wLVnE4)2>i$(^Aq(T3RPWvQ=B?n#ZL5=52|n77oQc
zYk9JTIEmg@4|6D~iJCio*%D1!Zfein*OaT0nxMtF@13p*wVxVv^ISMAZdSjzW^v_5
z9I7Ew@H<|IfW;vDs+gk>8_YWx@}|wn-H)c4HqUcf$BW;eb!VGw+HT&dNV~wnHSc7E
z3C}p@YrNhX1Qz$R%&fN&tv7FSA1v;HpA4Ud;jw1SYbKbkc^=wr-o69-+s5gjnzx71
z2v@G`-ydFRJpKuMaA_Wx48GsZ>WjsC^XSTEuCF#N&H~I1z%RMV3g${fB!8d(*t~xc
zYTln`_!7j!AD}ftLvdiTn!OB-tX!%yZ}g^TaTDTqXptKLzl>jpM0f#$N9UUN^=K#3
z4)f;w;QGA>J<u9!w}4H2;Rz%6W)oiX&dk8l4Ad}`@U31ocdku52+s9sI^R6#=}7wF
z&$AMRw-YbULW>MM+VP5Q^P$Gvbkl1d^wzD?%#Q*kmji!1Jp#iMF9W#8SLd47<#%~F
zSRSyg5}KLYF%fO_G`#xYs@J?@37A=hpF}4sKF|A3;O<74!J}(lmwKjlmv<^}nf{OC
zumht*ccG5sGhXvnj>Fs`s3BXiHTM?$kh@IHdji3uYu-2Z?&dXZFb?4OkC2l$i!erz
z9)ddzi6{hLilBI^9ts|`R-`Kyoz{aW%+-}msO#)H2EwWn*TKq4t!-Hkg;siyb+8vI
zrA#(@Xb;_gupM_=Vm5u^%i3<r96TQ}@e=g`zdV{?f;IE=<tgFsv5OPX!9b+t1s1=H
z%1ar6_5i~6v*VE%d8zX7oQM4%6pkPbNey%XX-Kfpp6sO!#B>D`Ezj!*7}B9zL_lEN
zmcT`gJK|Ts7KHwT%fp(ykxH+EK@VMFWpo{@j443mTO2Kqlmi6gfG}u5V3w=lf<~lL
zJxutZ|KRelCIgdm!f(B<xEj$y2UAT|UVCXp$^(W4vdlsQ?IJ;-Dqn?*k<HDKFmHkW
zgUiF3%{``NP8zyI5{t<?Rav%(%A+4&2hlDT2!sI<52IJKxR|XuHJ!K${RfwaiDWm5
z2M7CW2f30}0^MOQo1s10V?@h?r!EE5SqA|m5f&O0TeA8?$0iA#k>lo#<Kl8IS|}Xs
z>_?-8Xl}Xq(WdN&)BWM{uvXd9TKd#|N^n+A3P&t-h0O2NbtvRnf<@0gHs~agog)BX
z8x%F{hdSe<P8p3J6$|@c@-N&3?+NkK=~(*nTA}o|PwK<vVGWk(O`^elR+K?^=&2FK
zxw~Na<9UNI@gI%Y2n~o?CDf@RK4_&#E$`euB|2J2?-$J^{Cl`OtnNy5fe@K4lF%Jw
zxNL>CRAmcN9!w<Qmu*^@<`8rl2~qauSxe65+}RW291ADQeelB%mxtArUhp%Wd!1Xp
zI&~AAlNsn1Ns{t)x||nM9t0eu==|`r8bBxy0WlTPfXLLo(4G6|{mF{3=Ek#%wio^P
z>EZIQx*Wl|XSc80tGlKu!5RCYAI;JV=nC~SW*FKKE`}zQhaM=`Z`QVoNB|KJh@NuM
zp%#rm2yeB@4WXJ}ucXriv3t8>y1h4Mx7#bbb`w!Eb_|z?%fosk$4?K{N~JPgxpS||
ze~9m741UL<hoa<iIpc9ULWa1_IAvD;VUN0m#{z8xsij7Fpp^?Xv2x&PW1>vAp94xm
zC6O{qV?uf9%JgnJovyCf2_n6>XWGk~cK7m?%AIwh#(o5shs(oyWaCWulIG@DwZ+;1
zG_Z7bRIk)pje(@Wzc|ED6rbavBQ!rTXw`Bx>SQFGV=0zt*WReh*C-EE5f$(}LQ^3t
zASBc%ph#<ELQ+qAIZxEMiHe&a$Ej(WTMIW~RQ&SGW8XLnmxRm1dW9|K3DlFFx6lEa
zqu1h_3ESd^#OzBsMe%cEVl!?_1(m8RY6UbKk9mfng~XX8>*ax(NdYJB1D-<e1w=NY
z0}(bAiPIoZY~;=sh~HW{$&G}Q4u)bJ$<%O`gbTvuVLhWbe!W7}nWCvb3z!~PDT66n
z{8{Xf;ZuCM7)shwT8&pHsios*%+Mk*^cA^^S16_c&={abCqA^`b<rS3&d5VoKp?Re
z3tvnNZlaxq+dGreThdKotEJ)a{Gy!lBqnC`G@K8YhxLrUw04r8URm8ue~x}qcPnU}
zSECIF=KZsexDX#X{hQ3gyZWpIM1~JkwekcrE-$ch5a<~NAS`kjG}AaUT9VI6nRv*x
zHJMv`S6NTrTfe#b>ecF)>0Y|J^E$fx;qA!K!yD4aPU8~t_6B@|o7)uSG#HgjBy@ty
z!+MJL`Ael^>4u(#-XQ5wO)$XBJz!jvd?+z*n^Jn!BmjJUbkryhmNI^Ih6X{<Ns1z6
zWY9vF+)7##5yMF4)?^YuV(?7ON;4lnaHDzuaVu<<PR`T7VT1Qw8OKn>A8dLOT5pXz
zxG6AXGY=@B6)q3^!Os1=s11ez9DjyP*-`=`FCjy)Q-~xWCXo!mp?D5%Ni&3NP%b29
zkPHSyM2Nr18oTICu7bnuLT^91^bhHO&{NdQ^{(wft8~a`$si`vmv?wxp7U~wz!N&0
zSx)hLB9Ixur8}NZAu9T^7;lPs@l-s^844WG+Vk`%=4H{h!E83$7*$?Z0ifsAiBF@`
zY_JdXC!%wzw51zb-=qdY0e*(pU8~O_)2^zSXTPT9rOs<0K#ORTaBi4tYI!&*e9tS|
zPzyPPxNvP<-PU<!q-BrkJ>ZZ~Hbh9m*uLLW`i4A+N)rYE3u=Ik2@8%a8*7gjSmGbT
z6s^2ID$yBuP3|$kfJ3VVT{(OzEiZLm>pb*E<2+nIR*PILd2cg=y1OdGyxOXHI@KPJ
zMmk-T!~zu25)$hEyQhdMw%@y2Y~_8XB0Z04rUKZ#QcKio^*>@xT!1A0D)zN?==WU)
zegV&44MK$OJKcINEid&u|I6;Nx8XZmi+L&!a={`|hxQIH2SYB{9fWA4D{A(X$kGrF
zXP4*8&7SX-6E7W8eqdDL87IJCQu;X`EMJSF#oD9>Wek2zysg?1TFDcMOv_8X>i^k2
z_C|#!r%8$1V-xB#95r|KU^jZyA_33dZKvJ?LR=_UT}kB4D*$P|U33S2z$UXHITdnT
zxI*h915fC%0IrF{3uMvuL)kVSl+W;6q|}n<$XvH1JS{H;aE|g=xS(twweVwkQ1{iR
z+ti0A&FRG)2nLM-V7%m3<&Z*+#t6ywHdYIe#852Yt0q7pPsfO}t2c*P!S2Mn;YfH*
zhAtsn)(ubnV^p{_kxjJfHS2}6ywtygA9jyj6iFN2X_AiL%f};(6OPBwb<K#2xP{nb
zDjg%xhqLjjgm$y=W?95whhdNaWK@I$JlnMb6H#%GR%^y}u7VhNU7^xL^R>sBP96S?
z6lj}5n#1yGd8vOQ-tQjE!=;tKa9=-wn1e?O*9`Yhf?(m~T@C3<Fuws19Tc`KO7Cn~
z>M#rMg$S9p<q%^5cvN|j)lz6W)4N7{wrLw&Nm~q@mRreQGZ-W>&A^Wru^)69c#CCm
zaA&Nh?sZyT>ThNCyT>wcaqpRLn?4#c@Tk&g3jn~6+7cu!tI>#<ZRW`09-26SI3#Lt
zlKZI62x7cx1QUUWJszI9`!vdx5koGstlM&?-u>J#I8*mJv@2vt*kh|J6xd^8En)B`
zWW|Y*DR=kziD`L1*tMIM1cERQ;QL(X_8=W1GNPhOP_`b*ut+I-Au>wTQ!6Z9A`poU
zI!Lu>Cg~-@ONk(K6BN4chv*b_aKB2=Sjuc6E*<PLKUkJ|*_mfx|A&Q@T$^E{e3`M8
zCng%oKUMrYZ&5>inKlfffj+wL;5>6UE~1vx5RI<_7g5KsFg%L+{kdi}H4%8pBx2dZ
ze7P7FPB(R#N0X(-mY9QYG*w7Gn&UU)vFu40P2W^1kV7NUjHN5gCEr}q%-m(yo5eVD
zK_*WkA8l1&+p0<(0()B-0-n4$8mOsU+pzh~ph`qKYKS{@Aq{hFUGH1F{m{@6(Rn(J
z&*%%&rh&W!BzD{8Q{A)AlYB#UAe70I$krx^TJXdB(2}PhIF&r`3o848M)MU0@&}ci
z8Jul_d#F^)x9K(bo5<TtF?N9?Fo;Z^BqUGppUFEt0OG9y@zl?*pv&8PZOQH=)cg@@
zS*=d>p@k#pvP!+=kqv4^fscAd+^t<%g;2C^@f8lIsUR=V1AAbFXhXhA4T$C6dj_u4
z)lHs45a9m$yNAkiY$KB=eX?W5D3ZfK6vbySGhu_k2T-6Bbl^;2p{lHK(4epafzl4z
zxg%v0H3&N218g=I@Qqg9^|Iw1I-QxoFR(>f`jUcY8Gl>zY*Cm-0%c<`=`DuZ4nt0s
zBH&G!_gIE7dN`Ld5#+`g#Gnud0<GfmF3daSa}7lk|7PoiYxaPS47!{ODN8qDUZY-X
z(3OxnZyzm3;^1yek6sza5isz}Xefk%Of2lmlzU+wEiYJTs#V;!Af`FYI}qma41ZqA
zGpjE)FR-UGkTb4$OqS0!a~CNDb~p?3MjdzrH6j=a+7o*O^w=+`q6+fNvyTPA<Lk6#
zqtao`_Ia?Fb<Kv!&@9d4OAShhXiV^mP$4U;OTOltC3XlG%aaM<OuS~YU`U>Mi;B5}
zRI9rCy#+bVVcx+%^IEWFB;yG0c>o9*J+dlOCjHswwarGmU3Libn)YKyQ4!|t>d%J=
z^Ztj`+GW3(cHfrft(?qO-A{9TZ5~MT;`@Tp#6B4I068E><1puVS6}vr1>BE%J*7AO
z#Jm-iF5=O>5lHj)a9dUh9?hQT=4o2jFuT;(8?|+sQ@0=f!Mxa$C!O2H)iGq7jh}~k
z2hu!2+eqRs_U`YcsW6V?_;xbG>PAilLMR6&>lZm;k_E!ikV-hTv+aOvV}UR_83#PL
zi;W=zDR0KS*&voT>X@=ANOwv%38D_Y&@Li^UHAv&U(iL*en>Ih4~5y&<L7lX7|-RK
z=RD{8j79%hc`-*)5M)IW+7*XTp*)k#IH3DIK9ffhq?q69v*;F|$+uCpJWK4BchVbk
zj`->=FRIOY@(-v^*|8DTjFMl`eHvL_r96+x8P%fXD5{X<`&Ms~oGvw8@bD-P=zPO+
z<fdYj?l^6_q9tu!xk7mp^2nqh%d$h36kWF3lu1FiI+Ut=-h$wfEYiS$X42{{Pxo4*
zv3=cdRYr_LQc#?>Nl7tjRmzLT{0`kKBxPyex?r^^o{v78(=YK(?0G<Wc|{Ww#eKL!
zc}L$QqkkXW__FdU;Hy7vmTR{-j3t7lu)K14$C)ND&x7*vX8Eelvpl}GQh9t$da=Au
zeEmx0@n>TxmdDqG@*wYxxw^qNL&KHI8#LGU+(0}s*P-7)JTafbLj%!0Xg;GM1M%8i
zW_sFO<_F3HY81+2(oo;&eyi~7I>7lsd8P8cTmv{yD6dc+4gY8h7YoWmc~Bpehw|Y6
zhw@M!)Cc9&q4(vxUX%wEW3;@tt*v7y4=BcHc~_^VZlFA%7^CGGrr+b72Q*`>yq>nP
z?d)1+etu_Xem=97{o2>niSmF#ES1;2nHgVSa^*O}9p{L+%f3FIS?)!7KpS?-`|v$I
zljBa13#YTMP#(~QnesN%Dp!%3UPgI96;{gI+;Wq`A}KC!afK`QZ5ibOO;{;!+|3ay
zyNjO#;ee`gEA}}x97u^`c!tY!kE1-G2`l9#B8Bn-c48)|a*^<A&~9H6cT*uQFPA`h
zKoeHVdytOg%L|HuKuB~2U7{;Ax2i4%LlLnMUb-LU0Zmvb&+s&H5a*7#<N41aH&+xt
zNW8!s7N80<<r#X$5?f-NJ0>n}B|17WRRFrMQy$T^Oh)MkGoqTxxw%|Q6~hN)kiPf0
z36pf74O8V2O~a!dd)wIzxjUBK-s>10?!moHpb=~3{ec_PK(lA}?_C%_Q65m^_T3Al
zr&4*%O#sk1%_o=Fas>byspZ7-0HD#1mv?vx02=MEyu7B%0PwpVJ0<`C06-YjzqKXY
i2Qlb*OaK4?fRh0kZC&FHGqsTb0000<MNUMnLSTZ44j$V8

literal 0
HcmV?d00001

diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
index 208d3a3b..fcfb6922 100644
--- a/popmon/notebooks/popmon_tutorial_advanced.ipynb
+++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -4,6 +4,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "collapsed": false,
     "jupyter": {
      "outputs_hidden": false
     },
@@ -394,16 +395,21 @@
     "    time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
     ")\n",
     "\n",
-    "modules = [\n",
-    "    HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
-    "    HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
-    "    SectionGenerator(\n",
-    "        section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
-    "    ),\n",
-    "    ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
-    "]\n",
     "\n",
-    "pipeline = Pipeline(modules)\n",
+    "class CustomPipeline(Pipeline):\n",
+    "    def __init__(self):\n",
+    "        modules = [\n",
+    "            HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
+    "            HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
+    "            SectionGenerator(\n",
+    "                section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
+    "            ),\n",
+    "            ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
+    "        ]\n",
+    "        super().__init__(modules)\n",
+    "\n",
+    "\n",
+    "pipeline = CustomPipeline()\n",
     "\n",
     "stability_report = StabilityReport()\n",
     "stability_report.transform(pipeline.transform(datastore))\n",
@@ -430,24 +436,29 @@
     "    time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
     ")\n",
     "\n",
-    "modules = [\n",
-    "    HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
-    "    HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
-    "    ReferenceHistComparer(\n",
-    "        reference_key=\"split_hists\",\n",
-    "        assign_to_key=\"split_hists\",\n",
-    "        store_key=\"comparisons\",\n",
-    "    ),\n",
-    "    SectionGenerator(\n",
-    "        section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
-    "    ),\n",
-    "    SectionGenerator(\n",
-    "        section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\"\n",
-    "    ),\n",
-    "    ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
-    "]\n",
     "\n",
-    "pipeline = Pipeline(modules)\n",
+    "class CustomComparisonsPipeline(Pipeline):\n",
+    "    def __init__(self):\n",
+    "        modules = [\n",
+    "            HistSplitter(read_key=\"hists\", store_key=\"split_hists\", feature_begins_with=\"DATE\"),\n",
+    "            HistProfiler(read_key=\"split_hists\", store_key=\"profiles\"),\n",
+    "            ReferenceHistComparer(\n",
+    "                reference_key=\"split_hists\",\n",
+    "                assign_to_key=\"split_hists\",\n",
+    "                store_key=\"comparisons\",\n",
+    "            ),\n",
+    "            SectionGenerator(\n",
+    "                section_name=\"Profiles\", read_key=\"profiles\", store_key=\"report_sections\"\n",
+    "            ),\n",
+    "            SectionGenerator(\n",
+    "                section_name=\"Comparisons\", read_key=\"comparisons\", store_key=\"report_sections\"\n",
+    "            ),\n",
+    "            ReportGenerator(read_key=\"report_sections\", store_key=\"html_report\"),\n",
+    "        ]\n",
+    "        super().__init__(modules)\n",
+    "\n",
+    "        \n",
+    "pipeline = CustomComparisonsPipeline()\n",
     "\n",
     "stability_report = StabilityReport()\n",
     "stability_report.transform(pipeline.transform(datastore))\n",
@@ -463,6 +474,21 @@
     "Using the custom pipelines it becomes relatively easy to include new profiles and new comparisons. \n",
     "If you do, be sure to let us know! You may be able to make a pull request and add it to the package."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Pipeline Visualization\n",
+    "\n",
+    "[![Pipeline visualization](https://raw.githubusercontent.com/ing-bank/popmon/master/docs/source/assets/pipeline.png)](https://github.com/ing-bank/popmon/files/7417124/pipeline_amazingpipeline_subgraphs_unversioned.pdf)\n",
+    "(Click to enlarge)\n",
+    "\n",
+    "Visualization of the pipeline can be useful when debugging, or for didactic purposes.\n",
+    "There is a `script <https://github.com/ing-bank/popmon/tree/master/tools/>`_ included with the package that you can use.\n",
+    "The plotting is configurable, and depending on the options you will obtain a result that can be used for understanding the data flow, the high-level components and the (re)use of datasets.\n",
+    "The parameters are: subgraph (yes/no), version datasets (yes/no) and display edge labels (yes/no)."
+   ]
   }
  ],
  "metadata": {
@@ -470,7 +496,7 @@
    "name": "python3"
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -484,7 +510,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.6"
+   "version": "3.8.8"
   },
   "nteract": {
    "version": "0.15.0"

From b141965fcf71e90215ab73c89945acb70bcce738 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:32:15 +0200
Subject: [PATCH 27/34] refactor: the StabilityReport is not a module

It does not transform. Rather it allows rendering to notebook/html.
---
 .../notebooks/popmon_tutorial_advanced.ipynb  | 30 +++++++++----------
 popmon/pipeline/report.py                     | 27 +++++++----------
 2 files changed, 25 insertions(+), 32 deletions(-)

diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
index fcfb6922..65c3d6ba 100644
--- a/popmon/notebooks/popmon_tutorial_advanced.ipynb
+++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -4,7 +4,6 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "collapsed": false,
     "jupyter": {
      "outputs_hidden": false
     },
@@ -390,10 +389,9 @@
     "    \"*_zscore\": [7, 4, -4, -7],\n",
     "    \"[!p]*_unknown_labels\": [0.5, 0.5, 0, 0],\n",
     "}\n",
-    "datastore = {}\n",
-    "datastore[\"hists\"] = df.pm_make_histograms(\n",
-    "    time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
-    ")\n",
+    "datastore = {\n",
+    "    \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
+    "}\n",
     "\n",
     "\n",
     "class CustomPipeline(Pipeline):\n",
@@ -410,9 +408,9 @@
     "\n",
     "\n",
     "pipeline = CustomPipeline()\n",
+    "datastore = pipeline.transform(datastore)\n",
     "\n",
-    "stability_report = StabilityReport()\n",
-    "stability_report.transform(pipeline.transform(datastore))\n",
+    "stability_report = StabilityReport(datastore)\n",
     "stability_report"
    ]
   },
@@ -431,10 +429,10 @@
    "source": [
     "from popmon.analysis.comparison.hist_comparer import ReferenceHistComparer\n",
     "\n",
-    "datastore = {}\n",
-    "datastore[\"hists\"] = df.pm_make_histograms(\n",
-    "    time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
-    ")\n",
+    "\n",
+    "datastore = {\n",
+    "    \"hists\": df.pm_make_histograms(time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\")\n",
+    "}\n",
     "\n",
     "\n",
     "class CustomComparisonsPipeline(Pipeline):\n",
@@ -459,9 +457,9 @@
     "\n",
     "        \n",
     "pipeline = CustomComparisonsPipeline()\n",
+    "datastore = pipeline.transform(datastore)\n",
     "\n",
-    "stability_report = StabilityReport()\n",
-    "stability_report.transform(pipeline.transform(datastore))\n",
+    "stability_report = StabilityReport(datastore)\n",
     "stability_report"
    ]
   },
@@ -485,7 +483,7 @@
     "(Click to enlarge)\n",
     "\n",
     "Visualization of the pipeline can be useful when debugging, or for didactic purposes.\n",
-    "There is a `script <https://github.com/ing-bank/popmon/tree/master/tools/>`_ included with the package that you can use.\n",
+    "There is a [script](https://github.com/ing-bank/popmon/tree/master/tools/) included with the package that you can use.\n",
     "The plotting is configurable, and depending on the options you will obtain a result that can be used for understanding the data flow, the high-level components and the (re)use of datasets.\n",
     "The parameters are: subgraph (yes/no), version datasets (yes/no) and display edge labels (yes/no)."
    ]
@@ -496,7 +494,7 @@
    "name": "python3"
   },
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -510,7 +508,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.9.7"
   },
   "nteract": {
    "version": "0.15.0"
diff --git a/popmon/pipeline/report.py b/popmon/pipeline/report.py
index dc5259cf..c00746fd 100644
--- a/popmon/pipeline/report.py
+++ b/popmon/pipeline/report.py
@@ -27,7 +27,6 @@
     make_histograms,
 )
 
-from ..base import Module
 from ..config import config
 from ..pipeline.report_pipelines import ReportPipe, get_report_pipeline_class
 from ..resources import templates_env
@@ -160,8 +159,9 @@ def stability_report(
 
     # execute reporting pipeline
     pipeline = get_report_pipeline_class(reference_type, reference)(**cfg)
-    stability_report = StabilityReport()
-    stability_report.transform(pipeline.transform(datastore))
+    result = pipeline.transform(datastore)
+
+    stability_report = StabilityReport(datastore=result)
     return stability_report
 
 
@@ -400,7 +400,7 @@ def df_stability_report(
     )
 
 
-class StabilityReport(Module):
+class StabilityReport:
     """Representation layer of the report.
 
     Stability report module wraps the representation functionality of the report
@@ -408,24 +408,18 @@ class StabilityReport(Module):
     as a HTML string, HTML file or Jupyter notebook's cell output.
     """
 
-    _input_keys = ("read_key",)
-    _output_keys = ()
-
-    def __init__(self, read_key="html_report"):
+    def __init__(self, datastore, read_key="html_report"):
         """Initialize an instance of StabilityReport.
 
         :param str read_key: key of HTML report data to read from data store. default is html_report.
         """
-        super().__init__()
         self.read_key = read_key
-        self.datastore = {}
+        self.datastore = datastore
+        self.logger = logging.getLogger()
 
     @property
     def html_report(self):
-        return self.get_datastore_object(self.datastore, self.read_key, str)
-
-    def transform(self, datastore):
-        self.datastore = datastore
+        return self.datastore[self.read_key]
 
     def _repr_html_(self):
         """HTML representation of the class (report) embedded in an iframe.
@@ -531,6 +525,7 @@ def regenerate(
             report_filepath=report_filepath,
             show_stats=show_stats,
         )
-        stability_report = StabilityReport()
-        stability_report.transform(pipeline.transform(self.datastore))
+        result = pipeline.transform(self.datastore)
+
+        stability_report = StabilityReport(datastore=result)
         return stability_report

From 678145909fef37fdd9bbf72f3cb7ccbe91ae494b Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:34:04 +0200
Subject: [PATCH 28/34] refactor: remove dead code

---
 popmon/analysis/apply_func.py              | 1 -
 popmon/analysis/profiling/hist_profiler.py | 1 -
 popmon/stats/numpy.py                      | 4 ----
 popmon/visualization/backend.py            | 5 -----
 popmon/visualization/histogram_section.py  | 1 -
 tests/popmon/stats/test_numpy.py           | 1 -
 6 files changed, 13 deletions(-)

diff --git a/popmon/analysis/apply_func.py b/popmon/analysis/apply_func.py
index 37a4296c..00664b77 100644
--- a/popmon/analysis/apply_func.py
+++ b/popmon/analysis/apply_func.py
@@ -288,7 +288,6 @@ def apply_func(feature, selected_metrics, df, arr):
             else selected_metrics
         )
         metrics = [m for m in metrics if m in df.columns]
-        # assert all(m in df.columns for m in metrics)
         if len(metrics) == 0:
             return {}
         df = df[metrics] if len(metrics) >= 2 else df[metrics[0]]
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index 86c63ff3..3f1f762e 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -171,7 +171,6 @@ def _profile_2d_histogram(self, name, hist):
         # calculate phik correlation
         try:
             phi_k = phik.phik_from_hist2d(observed=grid)
-            # p, Z = significance.significance_from_hist2d(values=grid, significance_method='asymptotic')
         except ValueError:
             self.logger.debug(
                 f"Not enough values in the 2d `{name}` time-split histogram to apply the phik test."
diff --git a/popmon/stats/numpy.py b/popmon/stats/numpy.py
index a3718b74..e5a7cbd7 100644
--- a/popmon/stats/numpy.py
+++ b/popmon/stats/numpy.py
@@ -458,8 +458,4 @@ def mad(a, c=0.6745, axis=0):
     center = a.median(axis=axis)
     rel_abs_diff = (a - center).abs() / c
     mad = rel_abs_diff.median(axis=axis)
-
-    # mad = np.median((np.abs(a-center)) / c, axis=axis)
-    # if isinstance(a, pd.DataFrame):
-    #     mad = pd.Series(data=mad, index=a.columns)
     return mad
diff --git a/popmon/visualization/backend.py b/popmon/visualization/backend.py
index f209da95..48ce5ab5 100644
--- a/popmon/visualization/backend.py
+++ b/popmon/visualization/backend.py
@@ -107,11 +107,6 @@ def set_matplotlib_backend(backend=None, batch=None, silent=True):
             raise RuntimeError(
                 "Cannot set Matplotlib backend: pyplot module already loaded."
             )
-        # Warning is too verbose
-        # else:
-        #     logger.warning(
-        #         "Cannot set Matplotlib backend: pyplot module already loaded."
-        #     )
         return
 
     # set matplotlib backend
diff --git a/popmon/visualization/histogram_section.py b/popmon/visualization/histogram_section.py
index 2f685ac2..4611aa4d 100644
--- a/popmon/visualization/histogram_section.py
+++ b/popmon/visualization/histogram_section.py
@@ -184,7 +184,6 @@ def _plot_histograms(feature, date, hc_list, hist_names):
             hists, feature, hist_names, y_label, is_num, is_ts
         )
     elif hc_list[0].n_dim == 2:
-        # grid2d_list, xkeys, ykeys = get_consistent_numpy_2dgrids(hc_list, get_bin_labels=True)
         plot = ""
     else:
         plot = ""
diff --git a/tests/popmon/stats/test_numpy.py b/tests/popmon/stats/test_numpy.py
index 3cf15ff7..0a382cda 100644
--- a/tests/popmon/stats/test_numpy.py
+++ b/tests/popmon/stats/test_numpy.py
@@ -255,7 +255,6 @@ def test_probability_distribution_mean_covariance():
     n_histos = 5000
     max_hist_entries = 10000
     rel_error = 0.1
-    # basic = np.random.uniform(0, 1, size=n_bins)
     bin_entries = []
     for k in range(n_histos):
         bin_probs = np.random.normal(1.0, rel_error, size=n_bins)  # + basic

From 8efc072468be407eb2fa7dab6a94be3da6e8d782 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:35:09 +0200
Subject: [PATCH 29/34] refactor: move visualization code

---
 popmon/base/pipeline.py | 63 --------------------------------
 tools/pipeline_viz.py   | 81 ++++++++++++++++++++++++++++++++++++-----
 2 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index ae18013c..9afef85e 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -17,10 +17,8 @@
 # IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 
-import json
 import logging
 from abc import ABC
-from pathlib import Path
 
 
 class Pipeline(ABC):
@@ -73,64 +71,3 @@ def transform(self, datastore):
             else:
                 datastore = module._transform(datastore)
         return datastore
-
-    def visualize(self, versioned=True, funcs=None, dsets=None):
-        if dsets is None:
-            dsets = {}
-        if funcs is None:
-            funcs = {}
-
-        modules = []
-        for module in self.modules:
-            name = module.__class__.__name__
-            if isinstance(module, Pipeline):
-                modules.append(module.visualize(versioned, funcs, dsets))
-            else:
-                in_keys = module.get_inputs()
-
-                if versioned:
-                    new_ins = {}
-                    for k, in_key in in_keys.items():
-                        if in_key not in dsets:
-                            dsets[in_key] = 1
-                        in_key += f" (v{dsets[in_key]})"
-                        new_ins[k] = in_key
-                    in_keys = new_ins
-
-                out_keys = module.get_outputs()
-                if versioned:
-                    new_outs = {}
-                    for k, out_key in out_keys.items():
-                        if out_key in dsets:
-                            dsets[out_key] += 1
-                        else:
-                            dsets[out_key] = 1
-                        out_key += f" (v{dsets[out_key]})"
-                        new_outs[k] = out_key
-                    out_keys = new_outs
-
-                self.logger.debug(f"{name}(inputs={in_keys}, outputs={out_keys})")
-
-                # add unique id
-                if name not in funcs:
-                    funcs[name] = {}
-                if id(module) not in funcs[name]:
-                    funcs[name][id(module)] = len(funcs[name]) + 1
-
-                modules.append(
-                    {
-                        "type": "module",
-                        "name": f"{name}",
-                        "i": f"{funcs[name][id(module)]}",
-                        "desc": module.get_description(),
-                        "in": in_keys,
-                        "out": out_keys,
-                    }
-                )
-        data = {"type": "subgraph", "name": self.__class__.__name__, "modules": modules}
-        return data
-
-    def to_json(self, file_name, versioned=True):
-        d = self.visualize(versioned=versioned)
-        data = json.dumps(d, indent=4, sort_keys=True)
-        Path(file_name).write_text(data)
diff --git a/tools/pipeline_viz.py b/tools/pipeline_viz.py
index d6f53765..64919586 100644
--- a/tools/pipeline_viz.py
+++ b/tools/pipeline_viz.py
@@ -4,6 +4,70 @@
 
 import pygraphviz as pgv
 
+from popmon.base import Pipeline
+
+
+def serialize_module(module, versioned, funcs, dsets):
+    in_keys = module.get_inputs()
+    name = module.__class__.__name__
+
+    if versioned:
+        new_ins = {}
+        for k, in_key in in_keys.items():
+            if in_key not in dsets:
+                dsets[in_key] = 1
+            in_key += f" (v{dsets[in_key]})"
+            new_ins[k] = in_key
+        in_keys = new_ins
+
+    out_keys = module.get_outputs()
+    if versioned:
+        new_outs = {}
+        for k, out_key in out_keys.items():
+            if out_key in dsets:
+                dsets[out_key] += 1
+            else:
+                dsets[out_key] = 1
+            out_key += f" (v{dsets[out_key]})"
+            new_outs[k] = out_key
+        out_keys = new_outs
+
+    # add unique id
+    if name not in funcs:
+        funcs[name] = {}
+    if id(module) not in funcs[name]:
+        funcs[name][id(module)] = len(funcs[name]) + 1
+
+    return {
+        "type": "module",
+        "name": f"{name}",
+        "i": f"{funcs[name][id(module)]}",
+        "desc": module.get_description(),
+        "in": in_keys,
+        "out": out_keys,
+    }
+
+
+def serialize_pipeline(pipeline, versioned=True, funcs=None, dsets=None):
+    if dsets is None:
+        dsets = {}
+    if funcs is None:
+        funcs = {}
+
+    modules = []
+    for module in pipeline.modules:
+        if isinstance(module, Pipeline):
+            modules.append(serialize_pipeline(module, versioned, funcs, dsets))
+        else:
+            modules.append(serialize_module(module, versioned, funcs, dsets))
+    return {"type": "pipeline", "name": pipeline.__class__.__name__, "modules": modules}
+
+
+def pipeline_to_json(pipeline, file_name, versioned=True):
+    d = serialize_pipeline(pipeline, versioned=versioned)
+    data = json.dumps(d, indent=4, sort_keys=True)
+    Path(file_name).write_text(data)
+
 
 def generate_pipeline_visualisation(
     input_file,
@@ -38,10 +102,10 @@ def generate_pipeline_visualisation(
     ]
 
     colors = [f"#{r:02x}{g:02x}{b:02x}" for r, g, b in tableau20]
-    subgraph_colors = cycle(colors)
+    pipeline_colors = cycle(colors)
+    pipeline_style = {}
     module_style = {"shape": "rectangle", "fillcolor": "chartreuse", "style": "filled"}
     dataset_style = {"shape": "oval", "fillcolor": "orange", "style": "filled"}
-    subgraph_style = {}
     edge_style = {"fontcolor": "gray50"}
 
     def get_module_label(module):
@@ -53,13 +117,13 @@ def get_module_label(module):
         return label
 
     def process(data, G):
-        if data["type"] == "subgraph":
+        if data["type"] == "pipeline":
             if include_subgraphs:
                 c = G.add_subgraph(
                     name=f'cluster_{data["name"]}',
                     label=data["name"],
-                    color=next(subgraph_colors),
-                    **subgraph_style,
+                    color=next(pipeline_colors),
+                    **pipeline_style,
                 )
             else:
                 c = G
@@ -80,12 +144,11 @@ def process(data, G):
                     kwargs["taillabel"] = k
                 G.add_edge(name, v, **edge_style, **kwargs)
         else:
-            raise ValueError("type should be 'subgraph' or 'module'")
+            raise ValueError("type should be 'pipeline' or 'module'")
 
     g = pgv.AGraph(name="popmon-pipeline", directed=True)
     g.node_attr.update(**dataset_style)
     process(data, g)
-
     g.layout("dot")
     g.draw(output_file)
 
@@ -118,14 +181,14 @@ def process(data, G):
     name = pipeline.__class__.__name__.lower()
 
     input_file = data_path / f"pipeline_{name}_unversioned.json"
-    pipeline.to_json(input_file, versioned=False)
+    pipeline_to_json(pipeline, input_file, versioned=False)
     output_file = f"pipeline_{name}_subgraphs_unversioned.pdf"
     generate_pipeline_visualisation(input_file, output_file, include_subgraphs=True)
     output_file = f"pipeline_{name}_unversioned.pdf"
     generate_pipeline_visualisation(input_file, output_file, include_subgraphs=False)
 
     input_file = data_path / f"pipeline_{name}_versioned.json"
-    pipeline.to_json(input_file, versioned=True)
+    pipeline_to_json(pipeline, input_file, versioned=True)
     output_file = f"pipeline_{name}_subgraphs_versioned.pdf"
     generate_pipeline_visualisation(input_file, output_file, include_subgraphs=True)
     output_file = f"pipeline_{name}_versioned.pdf"

From 871d2c6bd986532be35fb4180a3215ced660fb59 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:37:38 +0200
Subject: [PATCH 30/34] test: remove boilerplate code

---
 tests/popmon/base/test_module.py | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/tests/popmon/base/test_module.py b/tests/popmon/base/test_module.py
index 3c25d080..c5322b73 100644
--- a/tests/popmon/base/test_module.py
+++ b/tests/popmon/base/test_module.py
@@ -5,6 +5,9 @@
 
 def test_popmon_module():
     class Scaler(Module):
+        _input_keys = ("input_key",)
+        _output_keys = ("output_key",)
+
         def __init__(self, input_key, output_key, mean, std):
             super().__init__()
             self.input_key = input_key
@@ -12,16 +15,12 @@ def __init__(self, input_key, output_key, mean, std):
             self.mean = mean
             self.std = std
 
-        def transform(self, datastore):
-            input_array = self.get_datastore_object(
-                datastore, self.input_key, dtype=np.ndarray
-            )
+        def transform(self, input_array: np.ndarray):
             res = input_array - np.mean(input_array)
             res = res / np.std(res)
             res = res * self.std
             res = res + self.mean
-            datastore[self.output_key] = res
-            return datastore
+            return res
 
     test_module = Scaler(input_key="x", output_key="scaled_x", mean=2.0, std=0.3)
 

From c3bffb0e97a199d37d00f21d0233a51049389c4d Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:41:58 +0200
Subject: [PATCH 31/34] refactor: module transform rather than _transform

Refactor using metaclass
---
 popmon/base/module.py                         | 133 ++++++++++--------
 popmon/base/pipeline.py                       |   5 +-
 popmon/io/json_reader.py                      |   2 +-
 .../popmon/alerting/test_compute_tl_bounds.py |   4 +-
 .../analysis/profiling/test_apply_func.py     |   2 +-
 .../popmon/analysis/test_merge_statistics.py  |   2 +-
 tests/popmon/io/test_file_reader.py           |   2 +-
 tests/popmon/io/test_file_writer.py           |  12 +-
 tests/popmon/io/test_json_reader.py           |   2 +-
 9 files changed, 91 insertions(+), 73 deletions(-)

diff --git a/popmon/base/module.py b/popmon/base/module.py
index de24b50d..a56e02dc 100644
--- a/popmon/base/module.py
+++ b/popmon/base/module.py
@@ -19,10 +19,73 @@
 
 
 import logging
-from abc import ABC
+from abc import ABCMeta
+from functools import wraps
 
 
-class Module(ABC):
+def datastore_helper(func):
+    """Decorator for passing and storing only the relevant keys in the datastore to
+    the transform() method."""
+
+    @wraps(func)
+    def _transform(self, datastore):
+        """Transformation helper function"""
+        inputs = {}
+        self.logger.debug(f"load from: {type(self)}")
+        for key in self._input_keys:
+            key_value = self.__dict__[key]
+            if key_value and len(key_value) > 0:
+                if isinstance(key_value, list):
+                    inputs[key] = [datastore.get(k) for k in key_value]
+                else:
+                    inputs[key] = datastore.get(key_value)
+            else:
+                inputs[key] = None
+
+            self.logger.debug(
+                f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})"
+            )
+
+        # transformation
+        outputs = func(self, *list(inputs.values()))
+
+        # transform returns None if no update needs to be made
+        if outputs is not None:
+            if len(self._output_keys) == 1:
+                outputs = (outputs,)
+
+            for k, v in zip(self._output_keys, outputs):
+                key_value = self.__dict__[k]
+                self.logger.debug(
+                    f"store(key={k}, key_value={key_value}, value={str(v):.100s})"
+                )
+                if key_value and len(key_value) > 0:
+                    datastore[key_value] = v
+
+        return datastore
+
+    return _transform
+
+
+class ModuleMetaClass(type):
+    """Metaclass that wraps all transform() methods using the datastore_helper
+    This obviates the need to decorate all methods in subclasses"""
+
+    def __new__(cls, name, bases, local):
+        if "transform" in local:
+            value = local["transform"]
+            if callable(value):
+                local["transform"] = datastore_helper(value)
+        return type.__new__(cls, name, bases, local)
+
+
+def combine_classes(*args):
+    """Combine multiple metaclasses"""
+    name = "".join(a.__name__ for a in args)
+    return type(name, args, {})
+
+
+class Module(metaclass=combine_classes(ABCMeta, ModuleMetaClass)):
     """Abstract base class used for modules in a pipeline."""
 
     _input_keys = None
@@ -35,23 +98,21 @@ def __init__(self):
         self.feature_begins_with = []
         self.ignore_features = []
 
+    def _get_values(self, keys):
+        """Get the class attribute values for certain keys."""
+        values = {}
+        for x in keys:
+            value = self.__dict__[x]
+            if value != "" and value is not None and value not in values:
+                values[x] = value
+        return values
+
     def get_inputs(self):
-        in_keys = {}
-        for x in self._input_keys:
-            in_key = self.__dict__[x]
-            if in_key != "" and in_key is not None and in_key not in in_keys:
-                in_keys[x] = in_key
-        return in_keys
+        return self._get_values(self._input_keys)
 
     def get_outputs(self):
-        out_keys = {}
-        for x in self._output_keys:
-            out_key = self.__dict__[x]
-            if out_key != "" and out_key is not None and out_key not in out_keys:
-                out_keys[x] = out_key
-        return out_keys
-
-    # @abstractmethod
+        return self._get_values(self._output_keys)
+
     def get_description(self):
         return ""
 
@@ -110,46 +171,6 @@ def get_features(self, all_features: list) -> list:
         features = [feature for feature in features if feature in all_features]
         return features
 
-    def _transform(self, datastore):
-        """Transformation helper function"""
-
-        inputs = {}
-        self.logger.debug(f"load from: {type(self)}")
-        for key in self._input_keys:
-            key_value = self.__dict__[key]
-            if key_value and len(key_value) > 0:
-                if isinstance(key_value, list):
-                    inputs[key] = [datastore.get(k) for k in key_value]
-                else:
-                    inputs[key] = datastore.get(key_value)
-            else:
-                inputs[key] = None
-
-            self.logger.debug(
-                f"load(key={key}, key_value={key_value}, value={str(inputs[key]):.100s})"
-            )
-
-        # cache datastore
-        self._datastore = datastore
-
-        # transformation
-        outputs = self.transform(*list(inputs.values()))
-
-        # transform returns None if no update needs to be made
-        if outputs is not None:
-            if len(self._output_keys) == 1:
-                outputs = (outputs,)
-
-            for k, v in zip(self._output_keys, outputs):
-                key_value = self.__dict__[k]
-                self.logger.debug(
-                    f"store(key={k}, key_value={key_value}, value={str(v):.100s})"
-                )
-                if key_value and len(key_value) > 0:  # and v is not None:
-                    datastore[key_value] = v
-
-        return datastore
-
     def transform(self, *args):
         """Central function of the module.
 
diff --git a/popmon/base/pipeline.py b/popmon/base/pipeline.py
index 9afef85e..bf34f253 100644
--- a/popmon/base/pipeline.py
+++ b/popmon/base/pipeline.py
@@ -66,8 +66,5 @@ def transform(self, datastore):
 
         for module in self.modules:
             self.logger.debug(f"transform {module.__class__.__name__}")
-            if isinstance(module, Pipeline):
-                datastore = module.transform(datastore)
-            else:
-                datastore = module._transform(datastore)
+            datastore = module.transform(datastore)
         return datastore
diff --git a/popmon/io/json_reader.py b/popmon/io/json_reader.py
index aaf0c492..80f5ba16 100644
--- a/popmon/io/json_reader.py
+++ b/popmon/io/json_reader.py
@@ -37,4 +37,4 @@ def __init__(self, file_path: Union[str, Path], store_key: str):
         super().__init__(store_key, file_path, apply_func=json.loads)
 
     def transform(self, *args):
-        return super().transform(*args)
+        return super().transform.__wrapped__(self, *args)
diff --git a/tests/popmon/alerting/test_compute_tl_bounds.py b/tests/popmon/alerting/test_compute_tl_bounds.py
index b2211866..9e97ded3 100644
--- a/tests/popmon/alerting/test_compute_tl_bounds.py
+++ b/tests/popmon/alerting/test_compute_tl_bounds.py
@@ -35,7 +35,7 @@ def test_compute_traffic_light_bounds():
         monitoring_rules=conf["monitoring_rules"],
     )
 
-    output = module._transform(datastore)["output_data"]
+    output = module.transform(datastore)["output_data"]
     assert "dummy_feature:mae" not in output.keys()
     assert output["the_feature:mae"] == [8, 4, 2, 2]
     assert output["the_feature:mse"] == [0.2, 0.11, 0.09, 0]
@@ -60,7 +60,7 @@ def test_compute_traffic_light_funcs():
         monitoring_rules=conf["monitoring_rules"],
     )
 
-    output = module._transform(datastore)["output_data"]
+    output = module.transform(datastore)["output_data"]
     assert len(output) == 3
 
     assert output[0]["features"] == ["dummy_feature"]
diff --git a/tests/popmon/analysis/profiling/test_apply_func.py b/tests/popmon/analysis/profiling/test_apply_func.py
index 4adff82e..8a53e87e 100644
--- a/tests/popmon/analysis/profiling/test_apply_func.py
+++ b/tests/popmon/analysis/profiling/test_apply_func.py
@@ -60,7 +60,7 @@ def func(x):
     module.add_apply_func(np.mean, entire=True)
     module.add_apply_func(func)
 
-    datastore = module._transform(datastore)
+    datastore = module.transform(datastore)
 
     p = datastore["profiled"]["asc_numbers"]
 
diff --git a/tests/popmon/analysis/test_merge_statistics.py b/tests/popmon/analysis/test_merge_statistics.py
index ff474311..cc7c1a54 100644
--- a/tests/popmon/analysis/test_merge_statistics.py
+++ b/tests/popmon/analysis/test_merge_statistics.py
@@ -40,7 +40,7 @@ def test_merge_statistics():
     }
     datastore = MergeStatistics(
         read_keys=["first_df", "second_df"], store_key="output_df"
-    )._transform(datastore)
+    ).transform(datastore)
 
     pd.testing.assert_frame_equal(df1.combine_first(df2), out)
     pd.testing.assert_frame_equal(datastore["output_df"]["feature_1"], out)
diff --git a/tests/popmon/io/test_file_reader.py b/tests/popmon/io/test_file_reader.py
index d953d3d2..9ad91703 100644
--- a/tests/popmon/io/test_file_reader.py
+++ b/tests/popmon/io/test_file_reader.py
@@ -10,7 +10,7 @@ def test_file_reader_json():
         store_key="example",
         apply_func=json.loads,
     )
-    datastore = fr._transform(datastore={})
+    datastore = fr.transform(datastore={})
 
     assert datastore["example"]["boolean"]
     assert len(datastore["example"]["array"]) == 3
diff --git a/tests/popmon/io/test_file_writer.py b/tests/popmon/io/test_file_writer.py
index 7471a067..c00fa308 100644
--- a/tests/popmon/io/test_file_writer.py
+++ b/tests/popmon/io/test_file_writer.py
@@ -23,26 +23,26 @@ def to_pandas(data):
 
 def test_file_writer_json():
     datastore = get_ready_ds()
-    FileWriter("my_data", apply_func=to_json)._transform(datastore)
+    FileWriter("my_data", apply_func=to_json).transform(datastore)
     assert datastore["my_data"] == to_json(DATA)
 
 
 def test_file_writer_json_with_kwargument():
     datastore = get_ready_ds()
-    FileWriter("my_data", apply_func=to_json, indent=4)._transform(datastore)
+    FileWriter("my_data", apply_func=to_json, indent=4).transform(datastore)
     assert datastore["my_data"] == to_json(DATA, indent=4)
 
 
 def test_file_writer_not_a_func():
     datastore = get_ready_ds()
     with pytest.raises(TypeError):
-        FileWriter("my_data", apply_func={})._transform(datastore)
+        FileWriter("my_data", apply_func={}).transform(datastore)
 
 
 def test_file_writer_df():
     datastore = get_ready_ds()
-    FileWriter(
-        "my_data", store_key="transformed_data", apply_func=to_pandas
-    )._transform(datastore)
+    FileWriter("my_data", store_key="transformed_data", apply_func=to_pandas).transform(
+        datastore
+    )
     assert datastore["my_data"] == DATA
     assert datastore["transformed_data"].to_dict() == to_pandas(DATA).to_dict()
diff --git a/tests/popmon/io/test_json_reader.py b/tests/popmon/io/test_json_reader.py
index d47e155b..4a46651b 100644
--- a/tests/popmon/io/test_json_reader.py
+++ b/tests/popmon/io/test_json_reader.py
@@ -4,7 +4,7 @@
 
 def test_json_reader():
     jr = JsonReader(file_path=resources.data("example.json"), store_key="example")
-    datastore = jr._transform(datastore={})
+    datastore = jr.transform(datastore={})
 
     assert datastore["example"]["boolean"]
     assert len(datastore["example"]["array"]) == 3

From bbccba83ccf187c674236255935ac43a71a0b396 Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 27 Oct 2021 23:45:43 +0200
Subject: [PATCH 32/34] chore: clean up

---
 popmon/pipeline/metrics_pipelines.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/popmon/pipeline/metrics_pipelines.py b/popmon/pipeline/metrics_pipelines.py
index 170695af..8c1f26ba 100644
--- a/popmon/pipeline/metrics_pipelines.py
+++ b/popmon/pipeline/metrics_pipelines.py
@@ -70,13 +70,11 @@ def create_metrics_pipeline(
     reference_type="self",
     reference=None,
     hists_key="hists",
-    # ref_hists_key="ref_hists",
     time_axis="",
     window=10,
     monitoring_rules={},
     pull_rules={},
     features=None,
-    # shift=1,
     **kwargs,
 ):
     # configuration and datastore for report pipeline
@@ -87,8 +85,6 @@ def create_metrics_pipeline(
         "monitoring_rules": monitoring_rules,
         "pull_rules": pull_rules,
         "features": features,
-        # "ref_hists_key": ref_hists_key,
-        # "shift": shift,
         **kwargs,
     }
 

From 4ee8c5208b2e8970784b1a9226aaa8be8339829a Mon Sep 17 00:00:00 2001
From: Simon Brugman <sfbbrugman@gmail.com>
Date: Wed, 24 Nov 2021 15:53:21 +0100
Subject: [PATCH 33/34] lint: try-except best practices

---
 .pre-commit-config.yaml                    | 3 ++-
 popmon/analysis/profiling/hist_profiler.py | 4 ++--
 popmon/base/module.py                      | 4 ++--
 popmon/visualization/utils.py              | 8 +++++---
 4 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 60d700ff..e8589e1d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -15,7 +15,8 @@ repos:
     -   id: flake8
         additional_dependencies:
             - flake8-comprehensions
-        args: [ "--select=E9,F63,F7,F82,C4,F401"]
+            - tryceratops
+        args: [ "--select=E9,F63,F7,F82,C4,F401,TR004,TC200,TC201,TC202"]
 -   repo: https://github.com/asottile/pyupgrade
     rev: v2.29.1
     hooks:
diff --git a/popmon/analysis/profiling/hist_profiler.py b/popmon/analysis/profiling/hist_profiler.py
index 3f1f762e..7bce2396 100644
--- a/popmon/analysis/profiling/hist_profiler.py
+++ b/popmon/analysis/profiling/hist_profiler.py
@@ -162,8 +162,8 @@ def _profile_2d_histogram(self, name, hist):
             return []
         try:
             grid = get_2dgrid(hist)
-        except Exception as e:
-            raise e
+        except Exception:
+            raise
 
         # calc some basic 2d-histogram statistics
         sume = int(sum_entries(hist))
diff --git a/popmon/base/module.py b/popmon/base/module.py
index a56e02dc..5bb49e62 100644
--- a/popmon/base/module.py
+++ b/popmon/base/module.py
@@ -140,8 +140,8 @@ def get_datastore_object(datastore, feature, dtype, default=None):
         else:
             try:
                 obj = datastore[feature]
-            except KeyError:
-                raise ValueError(f"`{feature}` not found in the datastore!")
+            except KeyError as e:
+                raise ValueError(f"`{feature}` not found in the datastore!") from e
 
         if not isinstance(obj, dtype):
             raise TypeError(f"obj `{feature}` is not an instance of `{dtype}`!")
diff --git a/popmon/visualization/utils.py b/popmon/visualization/utils.py
index 9cf3e7e4..924014c7 100644
--- a/popmon/visualization/utils.py
+++ b/popmon/visualization/utils.py
@@ -147,7 +147,7 @@ def plot_bars_b64(data, labels=None, bounds=None, ylim=False, skip_empty=True):
             if y_max > y_min:
                 ax.set_ylim(y_min, y_max)
     except Exception:
-        pass
+        logger.debug("unable to plot boundaries")
 
     ax.grid(True, linestyle=":")
 
@@ -368,8 +368,10 @@ def plot_overlay_1d_histogram_b64(
         try:
             hist_values = hist[0]
             hist_bins = hist[1]
-        except BaseException:
-            raise ValueError("Cannot extract binning and values from input histogram")
+        except BaseException as e:
+            raise ValueError(
+                "Cannot extract binning and values from input histogram"
+            ) from e
 
         assert hist_values is not None and len(
             hist_values

From 0bccc7e4e7725fee00d37b1279fb8988dacccbec Mon Sep 17 00:00:00 2001
From: Simon Brugman <sbrugman@users.noreply.github.com>
Date: Wed, 24 Nov 2021 16:16:02 +0100
Subject: [PATCH 34/34] docs: refresh notebooks (#151)

* docs: improve flow of basic tutorial
* docs: improve flow of advanced tutorial
* docs: improve flow of incremental tutorial
* docs: reorder advanced tutorial
---
 .../notebooks/popmon_tutorial_advanced.ipynb  |  74 +++++++++--
 popmon/notebooks/popmon_tutorial_basic.ipynb  | 115 +++++++++++++-----
 .../popmon_tutorial_incremental_data.ipynb    |  66 ++++++----
 3 files changed, 182 insertions(+), 73 deletions(-)

diff --git a/popmon/notebooks/popmon_tutorial_advanced.ipynb b/popmon/notebooks/popmon_tutorial_advanced.ipynb
index 65c3d6ba..7272761b 100644
--- a/popmon/notebooks/popmon_tutorial_advanced.ipynb
+++ b/popmon/notebooks/popmon_tutorial_advanced.ipynb
@@ -1,9 +1,17 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Tutorial on advanced features"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "collapsed": false,
     "jupyter": {
      "outputs_hidden": false
     },
@@ -25,11 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%capture\n",
     "# install popmon (if not installed yet)\n",
     "import sys\n",
-    "\n",
-    "!\"{sys.executable}\" -m pip install popmon"
+    "!\"{sys.executable}\" -m pip install -q popmon"
    ]
   },
   {
@@ -58,9 +64,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_csv(\n",
-    "    resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"]\n",
-    ")"
+    "df = pd.read_csv(resources.data(\"flight_delays.csv.gz\"), index_col=0, parse_dates=[\"DATE\"])"
    ]
   },
   {
@@ -74,7 +78,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
     "df.pm_stability_report(time_axis=\"DATE\")"
@@ -263,7 +269,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# Plotting the individual histograms\n",
+    "# Accessing the datastore\n",
+    "When you need programmtic access to popmon's results, then you can access the datastore directly.\n",
+    "For instanfce, you would like the exact maximum value of a histogram."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Plotting the individual histograms\n",
     "Sometimes, when you're diving into alerts from the report, you may want to plot some individual histograms. \n",
     "Fortunately, you can! Let's first have a look at how these histograms are stored."
    ]
@@ -276,7 +291,24 @@
    "source": [
     "report = df.pm_stability_report(\n",
     "    time_axis=\"DATE\", time_width=\"1w\", time_offset=\"2015-07-02\"\n",
-    ")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(report.datastore.keys())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "split_hists = report.datastore[\"split_hists\"][\"DEPARTURE_DELAY\"]\n",
     "split_hists"
    ]
@@ -295,7 +327,7 @@
    "outputs": [],
    "source": [
     "split_hist = split_hists.query(\"date == '2015-07-05 12:00:00'\")\n",
-    "split_hist.histogram[0].plot.matplotlib()"
+    "split_hist.histogram[0].plot.matplotlib();"
    ]
   },
   {
@@ -311,7 +343,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "split_hist.histogram_ref[0].plot.matplotlib()"
+    "split_hist.histogram_ref[0].plot.matplotlib();"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Integrations"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Access to the datastore means that its possible to integrate popmon in almost any workflow. To give an example, one could store the histogram data in a [PostgreSQL](https://www.psycopg.org/docs/) database and load that from [Grafana](https://github.com/grafana/grafana) and benefit from their visualisation and alert handling features (e.g. send an email or slack message upon alert) [[#158]](https://github.com/ing-bank/popmon/issues/158). Similar flows are possible when popmon is integrated in a workflow scheduler framework, such as [Airflow](https://airflow.apache.org/). \n",
+    "\n",
+    "If you have set up such a workflow, please consider contributing this as a feature. In order to do so, [open an issue](https://github.com/ing-bank/popmon) in the repository."
    ]
   },
   {
@@ -361,7 +409,7 @@
     "    report_filepath=None,\n",
     "    store_key=\"html_report\",\n",
     "    sections_key=\"report_sections\",\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -494,7 +542,7 @@
    "name": "python3"
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
diff --git a/popmon/notebooks/popmon_tutorial_basic.ipynb b/popmon/notebooks/popmon_tutorial_basic.ipynb
index 13ac619c..3c086ea0 100644
--- a/popmon/notebooks/popmon_tutorial_basic.ipynb
+++ b/popmon/notebooks/popmon_tutorial_basic.ipynb
@@ -1,9 +1,24 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# `popmon` introductory notebook"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This notebook contains examples of how to generate `popmon` reports from a pandas DataFrame."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
+    "collapsed": false,
     "jupyter": {
      "outputs_hidden": false
     },
@@ -26,7 +41,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Reporting given a histograms object (dict)"
+    "## Setup `popmon` and load our dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install popmon (if not installed yet) in the current environment."
    ]
   },
   {
@@ -35,11 +57,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%capture\n",
-    "# install popmon (if not installed yet)\n",
     "import sys\n",
-    "\n",
-    "!\"{sys.executable}\" -m pip install popmon"
+    "!\"{sys.executable}\" -m pip install -q popmon"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Import pandas and popmon, load and example dataset provided by popmon and show the first few results."
    ]
   },
   {
@@ -71,40 +97,48 @@
     "df.head()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Reporting given a pandas.DataFrame"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {
-    "scrolled": true
-   },
+   "metadata": {},
    "outputs": [],
    "source": [
-    "# first we generate histograms,\n",
-    "# but we could load pre-generated histograms from a pickle or json file as well.\n",
-    "hists = df.pm_make_histograms(\n",
+    "report = df.pm_stability_report(\n",
+    "    # Use the 'date' column as our time axis\n",
     "    time_axis=\"date\",\n",
+    "    # Create batches for every two weeks of data\n",
     "    time_width=\"2w\",\n",
-    "    features=[\"date:age\", \"date:gender\", \"date:isActive\"],\n",
+    "    # Select a subset of features\n",
+    "    features=[\"date:age\", \"date:isActive\", \"date:eyeColor\"],\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "print(hists.keys())"
+    "report"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "# generate report based on histograms\n",
-    "report = popmon.stability_report(hists)"
+    "### Regenerate the report\n",
+    "You can change the report parameters without having to rerun the computational part of the pipeline using the `regenerate` method. For example: a short (limited) report will be generated since `extended_report` flag is set to `False`. If a user wants to configure which statistics she/he wants to see, `show_stats` argument has to be set accordingly.\n",
+    "\n",
+    "Another option is to change the `plot_hist_n` parameter to control the number of histograms being displayed per feature."
    ]
   },
   {
@@ -115,16 +149,22 @@
    },
    "outputs": [],
    "source": [
-    "report  # or report_.to_notebook_iframe()"
+    "report.regenerate(extended_report=False, plot_hist_n=3)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Regenerate the report\n",
-    "A short (limited) report will be generated since `extended_report` flag is set to `False`. \n",
-    "If a user wants to configure which statistics she/he wants to see, `show_stats` argument has to be set accordingly."
+    "## Reporting given a histograms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If the user would like to generate the report directly from histograms, then popmon also supports that.\n",
+    "First, we generate histograms, (but we could load pre-generated histograms from a pickle or json file as well)"
    ]
   },
   {
@@ -135,14 +175,27 @@
    },
    "outputs": [],
    "source": [
-    "report.regenerate(extended_report=False)"
+    "hists = df.pm_make_histograms(\n",
+    "    time_axis=\"date\",\n",
+    "    time_width=\"2w\",\n",
+    "    features=[\"date:age\", \"date:gender\", \"date:isActive\"],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "list(hists.keys())"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Reporting given a pandas.DataFrame"
+    "And then generate the report based on histograms:"
    ]
   },
   {
@@ -151,11 +204,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "report_ = df.pm_stability_report(\n",
-    "    time_axis=\"date\",\n",
-    "    time_width=\"2w\",\n",
-    "    features=[\"date:age\", \"date:isActive\", \"date:eyeColor\"],\n",
-    ")"
+    "report = popmon.stability_report(hists)"
    ]
   },
   {
@@ -166,13 +215,13 @@
    },
    "outputs": [],
    "source": [
-    "report_"
+    "report"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -186,7 +235,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.8"
   },
   "pycharm": {
    "stem_cell": {
diff --git a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
index 719571ff..ed30289e 100644
--- a/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
+++ b/popmon/notebooks/popmon_tutorial_incremental_data.ipynb
@@ -4,15 +4,16 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# This notebook shows how to generate reports on incremental datasets\n",
+    "# Working with incremental data\n",
+    "\n",
+    "This notebook shows how to generate reports on incremental datasets\n",
     "\n",
     "The incremental data will either have a proper time-axis, or will be batches of data without \n",
     "a specific time-axis. \n",
     "\n",
     "The histograms of these datasets will be stitched together, and we generate a (consistent) report on the stitched dataset.\n",
     "\n",
-    "Note that we always generate the report on the full stitched histograms, b/c algorithms like trend detection\n",
-    "and comparison with reference histograms rely on having the historical histograms in place."
+    "Note that we always generate the report on the full stitched histograms, because algorithms like trend detection and comparison with reference histograms rely on having the historical histograms in place."
    ]
   },
   {
@@ -22,17 +23,21 @@
     "## Reporting given a histograms object (dict)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Install popmon (if not installed yet)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "%%capture\n",
-    "# install popmon (if not installed yet)\n",
     "import sys\n",
-    "\n",
-    "!\"{sys.executable}\" -m pip install popmon"
+    "!\"{sys.executable}\" -m pip install -q popmon"
    ]
   },
   {
@@ -57,27 +62,26 @@
     "df = pd.read_csv(resources.data(\"test.csv.gz\"), parse_dates=[\"date\"])"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Add month column, so we can make data batches per month."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "# add month and week, so we can make data batches per month and week\n",
     "def to_month(x):\n",
     "    date = pd.to_datetime(x)\n",
     "    return str(12 * date.year + date.month)\n",
     "\n",
     "\n",
-    "def to_week(x):\n",
-    "    date = pd.to_datetime(x)\n",
-    "    return 52 * date.year + date.week\n",
-    "\n",
-    "\n",
     "df[\"month\"] = df[\"date\"].apply(to_month)\n",
-    "df[\"week\"] = df[\"date\"].apply(to_week)\n",
-    "months = df.month.unique()\n",
-    "weeks = df.week.unique().tolist()"
+    "months = df.month.unique()"
    ]
   },
   {
@@ -202,7 +206,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "####  Adding to existing histograms"
+    "###  Adding to existing histograms"
    ]
   },
   {
@@ -277,6 +281,21 @@
     "bin_specs = popmon.get_bin_specs(hists, skip_first_axis=True)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def to_week(x):\n",
+    "    date = pd.to_datetime(x)\n",
+    "    return 52 * date.year + date.week\n",
+    "\n",
+    "\n",
+    "df[\"week\"] = df[\"date\"].apply(to_week)\n",
+    "weeks = df.week.unique().tolist()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -340,7 +359,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "#### Adding to an existing stitched histograms"
+    "### Adding to an existing stitched histograms"
    ]
   },
   {
@@ -403,13 +422,6 @@
     ")"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -441,7 +453,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -455,7 +467,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.7"
+   "version": "3.8.8"
   },
   "pycharm": {
    "stem_cell": {