diff --git a/.github/workflows/build-and-deploy-docs.yml b/.github/workflows/build-and-deploy-docs.yml index 2bf9d2b..03487bf 100644 --- a/.github/workflows/build-and-deploy-docs.yml +++ b/.github/workflows/build-and-deploy-docs.yml @@ -12,10 +12,17 @@ jobs: build-and-deploy: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: 3.x + - run: echo "cache_id=$(date --utc '+%V')" >> $GITHUB_ENV + - uses: actions/cache@v4 + with: + key: mkdocs-material-${{ env.cache_id }} + path: .cache + restore-keys: | + mkdocs-material- - run: pip install \ mkdocs-material \ mkdocs-glightbox diff --git a/.startup/env_var_defaults.json b/.startup/env_var_defaults.json index 2ad1769..3f42038 100644 --- a/.startup/env_var_defaults.json +++ b/.startup/env_var_defaults.json @@ -395,7 +395,6 @@ "DWH_POSTGRES_USER": ".env.om_db::POSTGRES_DB" } }, - ".env.om_server::SERVER_PORT": { "file": ".env.om_server", "name": "SERVER_PORT", @@ -482,6 +481,27 @@ "user_input": false, "dependant_on_other_env_vars": false }, + ".env.om_server::AUTHORIZER_ADMIN_PRINCIPALS": { + "file": ".env.om_server", + "name": "AUTHORIZER_ADMIN_PRINCIPALS", + "group": "Open Metadata Server", + "default_value": "admin", + "user_input": true, + "valid_pattern": null, + "invalid_substrings": "[\\s]", + "is_list": true, + "dependant_on_other_env_vars": false + }, + ".env.om_server::AUTHORIZER_PRINCIPAL_DOMAIN": { + "file": ".env.om_server", + "name": "AUTHORIZER_PRINCIPAL_DOMAIN", + "group": "Open Metadata Server", + "default_value": "open-metadata.org", + "user_input": true, + "valid_pattern": null, + "invalid_substrings": "[\\s]", + "dependant_on_other_env_vars": false + }, ".env.om_server::AUTHORIZER_CLASS_NAME": { "file": ".env.om_server", "name": "AUTHORIZER_CLASS_NAME", diff --git a/.startup/make_env_docker.py b/.startup/make_env_docker.py index 87a176a..8983238 100644 --- a/.startup/make_env_docker.py +++ b/.startup/make_env_docker.py @@ -3,7 +3,7 @@ from pathlib import Path import re import subprocess -from typing import Dict, List, Optional +from typing import Optional import uuid import urllib @@ -36,7 +36,7 @@ def dot_env_file_already_exists(startup_dir: Path, file_name: str = ".env") -> b return False -def load_env_var_defaults_file(startup_dir: Path) -> Dict: +def load_env_var_defaults_file(startup_dir: Path) -> dict: default_env_vars_file_path = Path(startup_dir).joinpath("env_var_defaults.json") with open(default_env_vars_file_path, "r") as jf: default_env_vars_json = json.load(jf) @@ -47,7 +47,8 @@ def get_and_validate_user_input( env_var: str, default_value: str, valid_input_pattern: Optional[str] = None, - invalid_substrings: List[str] = [" ", "\n", "\t"], + invalid_substrings: list[str] = [" ", "\n", "\t"], + is_list: bool = False, max_tries: int = MAX_TRIES, ) -> str: msg = f"{env_var} [leave blank for default value: '{default_value}']: " @@ -56,8 +57,8 @@ def get_and_validate_user_input( while tries_remaining > 0: input_val = input(msg) if input_val == "": - return default_value - + output_value = default_value + break if isinstance(invalid_substrings, str) and (invalid_substrings in input_val): print(f"Invalid value entered, can't contain this substring: {invalid_substrings}") tries_remaining = tries_remaining - 1 @@ -75,17 +76,20 @@ def get_and_validate_user_input( continue elif valid_input_pattern is not None: if re.match(valid_input_pattern, input_val): - return input_val + output_value = input_val + break else: print(f"Invalid value entered, must match pattern {valid_input_pattern}") tries_remaining = tries_remaining - 1 continue - return input_val + if is_list: + output_value = f"[{','.join([el.strip() for el in output_value.split()])}]" + return output_value except KeyboardInterrupt: print("Keyboard interrupted") -def orchestrate_user_input_prompts(env_var_dict: Dict) -> Dict: +def orchestrate_user_input_prompts(env_var_dict: dict) -> dict: for env_var_id, env_var_payload in env_var_dict.items(): if env_var_payload["user_input"] == True: env_var_dict[env_var_id]["set_value"] = get_and_validate_user_input( @@ -93,6 +97,7 @@ def orchestrate_user_input_prompts(env_var_dict: Dict) -> Dict: default_value=env_var_payload["default_value"], valid_input_pattern=env_var_payload["valid_pattern"], invalid_substrings=env_var_payload["invalid_substrings"], + is_list=env_var_payload.get("is_list", False), ) elif env_var_payload["dependant_on_other_env_vars"] == True: env_var_mapper = env_var_payload["env_var_mappings"] @@ -108,23 +113,23 @@ def orchestrate_user_input_prompts(env_var_dict: Dict) -> Dict: return env_var_dict -def get_env_var_payloads(env_var_dict: Dict) -> List: +def get_env_var_payloads(env_var_dict: dict) -> list: env_var_payloads = [v for k, v in env_var_dict.items()] return env_var_payloads -def get_distinct_dot_env_file_names(env_var_payloads: List) -> List: +def get_distinct_dot_env_file_names(env_var_payloads: list) -> list: return list(set([p["file"] for p in env_var_payloads])) -def create_dot_env_files(output_dir: Path, env_var_dict: Dict) -> None: +def create_dot_env_files(output_dir: Path, env_var_dict: dict) -> None: all_lines_all_files = prepare_dot_env_file_lines(output_dir, env_var_dict) for file_name, lines in all_lines_all_files.items(): with open(file_name, "x") as f: f.write(lines) -def prepare_dot_env_file_lines(output_dir: Path, env_var_dict: Dict) -> None: +def prepare_dot_env_file_lines(output_dir: Path, env_var_dict: dict) -> None: env_var_payloads = get_env_var_payloads(env_var_dict=env_var_dict) dot_env_file_names = get_distinct_dot_env_file_names(env_var_payloads=env_var_payloads) all_lines_all_files = {} @@ -147,28 +152,13 @@ def prepare_dot_env_file_lines(output_dir: Path, env_var_dict: Dict) -> None: return all_lines_all_files -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument("--startup_dir", default=".", help="The project's top-level directory") - parser.add_argument( - "--mode", - default="interactive", - help="Credential-defining process: options: ['interactive', 'dev']", - ) - args = parser.parse_args() - - startup_dir = Path(args.startup_dir) - if args.mode == "dev": - output_dir = startup_dir.joinpath(".dev") - output_dir.mkdir(exist_ok=True) - else: - output_dir = startup_dir - dot_env_exists = dot_env_file_already_exists(startup_dir=output_dir, file_name=".env") - dwh_dot_env_exists = dot_env_file_already_exists(startup_dir=output_dir, file_name=".env.dwh") - superset_dot_env_exists = dot_env_file_already_exists( - startup_dir=output_dir, file_name=".env.superset" - ) - if dot_env_exists or dwh_dot_env_exists or superset_dot_env_exists: +def main(output_dir: Path) -> None: + env_exists = dot_env_file_already_exists(output_dir, file_name=".env") + dwh_exists = dot_env_file_already_exists(output_dir, file_name=".env.dwh") + ss_exists = dot_env_file_already_exists(output_dir, file_name=".env.superset") + om_db_exists = dot_env_file_already_exists(output_dir, file_name=".env.om_db") + om_server_exists = dot_env_file_already_exists(output_dir, file_name=".env.om_server") + if env_exists or dwh_exists or ss_exists or om_db_exists or om_server_exists: raise Exception( f"One or more dot-env file(s) would be overwritten. Backup and move .env files and " + "try again" @@ -225,6 +215,23 @@ def prepare_dot_env_file_lines(output_dir: Path, env_var_dict: Dict) -> None: "group": "Open Metadata Server", "set_value": str(uuid.uuid4()), } - # file_lines = prepare_dot_env_file_lines(output_dir=output_dir, env_var_dict=env_var_dict) - # print(file_lines) create_dot_env_files(output_dir=output_dir, env_var_dict=env_var_dict) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--startup_dir", default=".", help="The project's top-level directory") + parser.add_argument( + "--mode", + default="interactive", + help="Credential-defining process: options: ['interactive', 'dev']", + ) + args = parser.parse_args() + + startup_dir = Path(args.startup_dir) + if args.mode == "dev": + output_dir = startup_dir.joinpath(".dev") + output_dir.mkdir(exist_ok=True) + else: + output_dir = startup_dir + main(output_dir) diff --git a/docs/assets/imgs/openmetadata/om_setup__initial_login.png b/docs/assets/imgs/openmetadata/om_setup__initial_login.png new file mode 100644 index 0000000..f787e02 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__initial_login.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_1.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_1.png new file mode 100644 index 0000000..a18206f Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_1.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_2.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_2.png new file mode 100644 index 0000000..184a7c8 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_2.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_3.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_3.png new file mode 100644 index 0000000..5e2498c Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_3.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_4.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_4.png new file mode 100644 index 0000000..c0e06a0 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_4.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_5.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_5.png new file mode 100644 index 0000000..8d032db Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_5.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_6.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_6.png new file mode 100644 index 0000000..bd43e4d Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_6.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_7.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_7.png new file mode 100644 index 0000000..cf52e1c Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_7.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_8.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_8.png new file mode 100644 index 0000000..70178ff Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_8.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_conn_9.png b/docs/assets/imgs/openmetadata/om_setup__make_conn_9.png new file mode 100644 index 0000000..14767d3 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_conn_9.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_1.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_1.png new file mode 100644 index 0000000..a7383bf Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_1.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_2.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_2.png new file mode 100644 index 0000000..5300b3c Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_2.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_3.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_3.png new file mode 100644 index 0000000..ac85441 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_3.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_4.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_4.png new file mode 100644 index 0000000..0f18f30 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_4.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_5.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_5.png new file mode 100644 index 0000000..b763191 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_5.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_6.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_6.png new file mode 100644 index 0000000..6738d80 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_6.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__make_ingestion_7.png b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_7.png new file mode 100644 index 0000000..e2ef19c Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__make_ingestion_7.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__pw_change_1.png b/docs/assets/imgs/openmetadata/om_setup__pw_change_1.png new file mode 100644 index 0000000..ae5c4c1 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__pw_change_1.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__pw_change_2.png b/docs/assets/imgs/openmetadata/om_setup__pw_change_2.png new file mode 100644 index 0000000..5bc47f6 Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__pw_change_2.png differ diff --git a/docs/assets/imgs/openmetadata/om_setup__pw_change_3.png b/docs/assets/imgs/openmetadata/om_setup__pw_change_3.png new file mode 100644 index 0000000..e286e9b Binary files /dev/null and b/docs/assets/imgs/openmetadata/om_setup__pw_change_3.png differ diff --git a/docs/index.md b/docs/index.md index 0ece619..fd6c2a1 100644 --- a/docs/index.md +++ b/docs/index.md @@ -19,7 +19,7 @@ At present, it uses docker to provision and run: * Dashboarding and Reporting ![Geospatial Data Analysis](/assets/imgs/superset/deckgl_polygon_chart_demo.png) - ![Time Series Analysis](/assets/imgs/superset/median_sale_price_by_property_class.png) + ![Time Series Analysis](/assets/imgs/superset/median_sale_price_by_property_class.png) ![Dashboarding](/assets/imgs/superset/dashboard_demo.png) * a **pgAdmin4** database administration interface, @@ -33,11 +33,11 @@ At present, it uses docker to provision and run: * **dbt** to: * manage sequential data transformation + cleaning tasks, * serve data documentation and data lineage graphs, and - * facilitate search of the data dictionary and data catalog - + * facilitate search of the data dictionary and data catalog + ![dbt Data Lineage Graph](/assets/imgs/systems/dbt_lineage_graph_of_parcel_sales.png) ![All Data Tables' Lineage Graphs](/assets/imgs/dbt/lineage_graph_of_all_nodes.png) - ![One Data Set's Lineage Graph](/assets/imgs/systems/dbt_data_docs_interface_showing_parcel_sales.png) + ![One Data Set's Lineage Graph](/assets/imgs/systems/dbt_data_docs_interface_showing_parcel_sales.png) * great_expectations for anomaly detection and data monitoring, and @@ -47,6 +47,6 @@ At present, it uses docker to provision and run: ![data-loading TaskGroups in load_data_tg TaskGroup](/assets/imgs/Socrata_ELT_DAG/Full_view_data_loaders_in_load_data_tg.PNG) - ![load_data_tg TaskGroup High Level](/assets/imgs/Socrata_ELT_DAG/load_data_task_group_w_checkpoints.png) + ![load_data_tg TaskGroup High Level](/assets/imgs/Socrata_ELT_DAG/load_data_task_group_w_checkpoints.png) ![automate as much pipeline development as possible](/assets/imgs/Socrata_ELT_DAG/generate_and_run_dbt_models.png) diff --git a/docs/setup/index.md b/docs/setup/index.md index d95ae80..c06bbd5 100644 --- a/docs/setup/index.md +++ b/docs/setup/index.md @@ -16,4 +16,5 @@ Work through these steps to set up all ADWH credentials and services. It should 1. [Set up your credentials and build the images](/setup/getting_started) 2. [Configure database connections for Superset](/setup/superset_setup) -3. [Configure database connections for pgAdmin4](/setup/pgAdmin4) \ No newline at end of file +3. [Configure database connections for pgAdmin4](/setup/pgAdmin4) +4. [Configure database connections and ingestions for OpenMetadata](/setup/openMetadata) \ No newline at end of file diff --git a/docs/setup/openMetadata.md b/docs/setup/openMetadata.md new file mode 100644 index 0000000..d565a1b --- /dev/null +++ b/docs/setup/openMetadata.md @@ -0,0 +1,88 @@ +# Setting up OpenMetadata + +## Logging into the OpenMetadata UI +To access the OpenMetadata UI, go to [http://localhost:8585](http://localhost:8585) and log in using credentials defined in [the initial setup step](/setup/getting_setup). + +* **Email:** This will consist of two environment variables from `.env.om_server` in the form below. The default email is `admin@open-metadata.org`. + + `AUTHORIZER_ADMIN_PRINCIPALS@AUTHORIZER_PRINCIPAL_DOMAIN` + +* **Password:** the default password will be `admin`. You should probably [change that password](#changing-your-password) on your first login. + +
![](/assets/imgs/openmetadata/om_setup__initial_login.png)
+ +!!! note + + If you're hosting the ADWH system on another machine, replace `localhost` with the domain name or IP addess of that remote machine. + + +## Define a Connection to the Data Warehouse Database + +1. Go to **Settings** > **Services** > **Databases** and **Add a New Service** + +
![](/assets/imgs/openmetadata/om_setup__make_conn_1.png)
+
![](/assets/imgs/openmetadata/om_setup__make_conn_2.png)
+
![](/assets/imgs/openmetadata/om_setup__make_conn_3.png)
+
![](/assets/imgs/openmetadata/om_setup__make_conn_4.png)
+ +2. Make a Postgres Database Service + +
![](/assets/imgs/openmetadata/om_setup__make_conn_5.png)
+ + 2.a. Enter a name and a brief description of the database. + +
![](/assets/imgs/openmetadata/om_setup__make_conn_6.png)
+ + 2.b. Enter credentials for the DWH database role you want the service to use along with the other connection info. After entering credentials and other info, **Test your Connection**. + +
![](/assets/imgs/openmetadata/om_setup__make_conn_7.png)
+
![](/assets/imgs/openmetadata/om_setup__make_conn_8.png)
+ + If all connection checks pass, click **OK** and **Save** the connection. + +If everything was successful, you should now see your `ADWH Data Warehouse` **Database Service**. + +
![](/assets/imgs/openmetadata/om_setup__make_conn_9.png)
+ +## Configure Metadata Ingestion + +Contuing from the last image, go to the page for the `ADWH Data Warehouse` **Database Service**. You will see the different postgres databases in the `dwh_db` postgres instance. + +To scan those databases for **Data Assets** to catalog, you have to configure an **Ingestion**. + +1. Click **Ingestions** > **Add Ingestion** > **Add Metadata Ingestion** + +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_1.png)
+ +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_2.png)
+ +2. Specify the assets to include in this **Metadata Ingestion** configuration + + Enter regex patterns to specify which database(s), schema(s), and table(s) should be included. + +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_3.png)
+ +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_4.png)
+ +3. Set the schedule for running this **Ingestion** then **Add & Deploy** it + +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_5.png)
+
![](/assets/imgs/openmetadata/om_setup__make_ingestion_6.png)
+ +4. Trigger an initial **Run** to immediately run the **Metadata Ingestion** + +
![](/assets/imgs/openmetadata/om_setup__make_ingestion_7.png)
+ +Now you should have a catalog of metadata for all **Data Assets** in the main DWH schemas. + +### Changing your password + +1. From the user profile dropdown in the upper right corner, click your username to addess your user page + + ![](/assets/imgs/openmetadata/om_setup__pw_change_1.png) + +2. Change your password + + ![](/assets/imgs/openmetadata/om_setup__pw_change_2.png) + +
![](/assets/imgs/openmetadata/om_setup__pw_change_3.png)
\ No newline at end of file diff --git a/docs/setup/pgAdmin4.md b/docs/setup/pgAdmin4.md index 11aeb05..7a6a699 100644 --- a/docs/setup/pgAdmin4.md +++ b/docs/setup/pgAdmin4.md @@ -40,7 +40,7 @@ Don't worry about the **Server group** field, the default is fine. * **Host name/address:** `airflow_db` * This is defined [here](https://github.com/MattTriano/analytics_data_where_house/blob/c75869ba6fae5c033e6601b9203fd178148f2777/docker-compose.yml#L34) in the `docker-compose.yml` file -* **Port:** 5432 +* **Port:** 5432 * This is the database's port number inside the container, as defined to the right of the colon [here](https://github.com/MattTriano/analytics_data_where_house/blob/c75869ba6fae5c033e6601b9203fd178148f2777/docker-compose.yml#L44). * **Username:** the `POSTGRES_USER` value in your `.env` file. * **Password:** the `POSTGRES_PASSWORD` value in your `.env` file. @@ -66,7 +66,7 @@ Repeat the process to connect to the data warehouse database. * **Host name/address:** `dwh_db` * This is defined [here](https://github.com/MattTriano/analytics_data_where_house/blob/c75869ba6fae5c033e6601b9203fd178148f2777/docker-compose.yml#L61) in the `docker-compose.yml` file -* **Port:** 5432 +* **Port:** 5432 * This is the database's port number inside the container, as defined to the right of the colon [here](https://github.com/MattTriano/analytics_data_where_house/blob/c75869ba6fae5c033e6601b9203fd178148f2777/docker-compose.yml#L71) * **Username:** the `DWH_POSTGRES_USER` value in your `.env` file * **Password:** the `DWH_POSTGRES_PASSWORD` value in your `.env` file diff --git a/mkdocs.yml b/mkdocs.yml index e0328e7..e3424eb 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -92,8 +92,8 @@ markdown_extensions: - pymdownx.inlinehilite - pymdownx.snippets - pymdownx.emoji: - emoji_index: !!python/name:materialx.emoji.twemoji - emoji_generator: !!python/name:materialx.emoji.to_svg + emoji_index: !!python/name:material.extensions.emoji.twemoji + emoji_generator: !!python/name:material.extensions.emoji.to_svg - pymdownx.superfences: custom_fences: - name: mermaid