From b2cb0e15b34202e290b8b86d5107f0148f4dc563 Mon Sep 17 00:00:00 2001 From: Bruno Antonellini Date: Tue, 2 Jul 2024 14:34:56 -0300 Subject: [PATCH] Dcv 2515 dbt coves setup (#472) * DCV-2515 dbt-coves setup --- dbt_coves/config/config.py | 16 +- dbt_coves/tasks/setup/base.py | 22 -- dbt_coves/tasks/setup/dbt.py | 126 -------- dbt_coves/tasks/setup/git.py | 171 ----------- dbt_coves/tasks/setup/main.py | 129 +++++++- dbt_coves/tasks/setup/pre_commit.py | 59 ---- dbt_coves/tasks/setup/ssh.py | 280 ------------------ .../setup/templates/pre_commit/copier.yml | 45 --- ... %}pre-commit-config.yaml{% endif %}.jinja | 51 ---- ... use_sqlfluff %}.sqlfluffignore{% endif %} | 7 - ... use_sqlfluff %}.sqlfluff{% endif %}.jinja | 89 ------ .../{% if use_yamllint %}.yamllint{% endif %} | 35 --- ..._conf.answers_file }}-precommit.yaml.jinja | 2 - dbt_coves/tasks/setup/utils.py | 15 +- dbt_coves/utils/flags.py | 16 +- docs/2 - Commands/dbt/README.md | 31 ++ docs/2 - Commands/extract/airbyte/README.md | 32 ++ 17 files changed, 196 insertions(+), 930 deletions(-) delete mode 100644 dbt_coves/tasks/setup/base.py delete mode 100644 dbt_coves/tasks/setup/dbt.py delete mode 100644 dbt_coves/tasks/setup/git.py delete mode 100644 dbt_coves/tasks/setup/pre_commit.py delete mode 100644 dbt_coves/tasks/setup/ssh.py delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/copier.yml delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/{% if use_sqlfluff or use_yamllint or use_dbt_checkpoint %}pre-commit-config.yaml{% endif %}.jinja delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluffignore{% endif %} delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluff{% endif %}.jinja delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_yamllint %}.yamllint{% endif %} delete mode 100644 dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{{ _copier_conf.answers_file }}-precommit.yaml.jinja create mode 100644 docs/2 - Commands/dbt/README.md create mode 100644 docs/2 - Commands/extract/airbyte/README.md diff --git a/dbt_coves/config/config.py b/dbt_coves/config/config.py index 67683072..63beb9ea 100644 --- a/dbt_coves/config/config.py +++ b/dbt_coves/config/config.py @@ -129,17 +129,9 @@ class LoadModel(BaseModel): fivetran: Optional[LoadFivetranModel] = LoadFivetranModel() -class SetupSshModel(BaseModel): - open_ssl_public_key: Optional[bool] = False - - -class SetupGitModel(BaseModel): - no_prompt: Optional[bool] = False - - class SetupModel(BaseModel): - ssh: Optional[SetupSshModel] = SetupSshModel() - git: Optional[SetupGitModel] = SetupGitModel() + no_prompt: Optional[bool] = False + quiet: Optional[bool] = False class RunDbtModel(BaseModel): @@ -234,8 +226,8 @@ class DbtCovesConfig: "load.airbyte.secrets_project", "load.airbyte.secrets_tags", "load.airbyte.secrets_key", - "setup.ssh.open_ssl_public_key", - "setup.git.no_prompt", + "setup.no_prompt", + "setup.quiet", "dbt.command", "dbt.project_dir", "dbt.virtualenv", diff --git a/dbt_coves/tasks/setup/base.py b/dbt_coves/tasks/setup/base.py deleted file mode 100644 index d8b697af..00000000 --- a/dbt_coves/tasks/setup/base.py +++ /dev/null @@ -1,22 +0,0 @@ -from dbt_coves.tasks.base import NonDbtBaseTask - - -class BaseSetupException(Exception): - pass - - -class BaseSetupTask(NonDbtBaseTask): - """ - Provides common functionality for all "Setup" sub tasks. - """ - - arg_parser = None - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def run(self) -> int: - raise NotImplementedError() - - def get_config_value(self, key): - return self.coves_config.integrated["setup"][self.args.task][key] diff --git a/dbt_coves/tasks/setup/dbt.py b/dbt_coves/tasks/setup/dbt.py deleted file mode 100644 index a2081a55..00000000 --- a/dbt_coves/tasks/setup/dbt.py +++ /dev/null @@ -1,126 +0,0 @@ -import os -from pathlib import Path - -from rich.console import Console - -from dbt_coves.config.config import DbtCovesConfig -from dbt_coves.tasks.setup.base import BaseSetupTask -from dbt_coves.utils.shell import run_and_capture_cwd -from dbt_coves.utils.tracking import trackable - -from .utils import file_exists, print_row - -console = Console() - - -class SetupDbtTask(BaseSetupTask): - """ - Task that runs ssh key generation, git repo clone and db connection setup - """ - - @classmethod - def register_parser(cls, sub_parsers, base_subparser): - subparser = sub_parsers.add_parser( - "dbt", - parents=[base_subparser], - help="Set up dbt for dbt-coves project", - ) - subparser.set_defaults(cls=cls, which="dbt") - return subparser - - @trackable - def run(self) -> int: - config_folder = self.get_config_folder(mandatory=False) - self.dbt_init(config_folder) - self.dbt_debug(config_folder) - self.dbt_deps(config_folder) - return 0 - - def get_config_folder(self, mandatory=True): - workspace_path = os.environ.get("WORKSPACE_PATH", Path.cwd()) - return DbtCovesConfig.get_config_folder(workspace_path=workspace_path, mandatory=mandatory) - - def dbt_debug(self, config_folder=None): - if not config_folder: - config_folder = self.get_config_folder(mandatory=False) - - if config_folder: - dbt_project_yaml_path = Path(config_folder.parent) / "dbt_project.yml" - else: - dbt_project_yaml_path = file_exists(Path(os.getcwd()), "dbt_project.yml") - - debug_status = "[red]FAIL[/red]" - console.print("\n") - - output = run_and_capture_cwd(["dbt", "debug"], dbt_project_yaml_path.parent) - - if output.returncode == 0: - debug_status = "[green]SUCCESS :heavy_check_mark:[/green]" - print_row( - "dbt debug", - debug_status, - new_section=True, - ) - if output.returncode > 0: - raise Exception("dbt debug error. Check logs.") - - def dbt_init(self, config_folder=None): - if not config_folder: - config_folder = self.get_config_folder(mandatory=False) - - if config_folder: - dbt_project_yaml_path = Path(config_folder.parent) / "dbt_project.yml" - else: - dbt_project_yaml_path = file_exists(Path.cwd(), "dbt_project.yml") - - if not dbt_project_yaml_path: - output = run_and_capture_cwd(["dbt", "init"], Path.cwd()) - - else: - init_status = "[green]FOUND :heavy_check_mark:[/green] project already exists" - print_row( - "dbt init", - init_status, - new_section=True, - ) - output = run_and_capture_cwd(["dbt", "init"], dbt_project_yaml_path.parent) - if output.returncode == 0: - init_status = "[green]SUCCESS :heavy_check_mark:[/green]" - print_row( - "dbt init", - init_status, - new_section=True, - ) - else: - raise Exception("dbt init error. Check logs.") - - def dbt_deps(self, config_folder=None): - if not config_folder: - config_folder = self.get_config_folder(mandatory=False) - - if config_folder: - dbt_project_yaml_path = Path(config_folder.parent) / "dbt_project.yml" - else: - dbt_project_yaml_path = file_exists(Path(os.getcwd()), "dbt_project.yml") - - if dbt_project_yaml_path.exists(): - output = run_and_capture_cwd(["dbt", "deps"], dbt_project_yaml_path.parent) - - if output.returncode == 0: - deps_status = "[green]SUCCESS :heavy_check_mark:[/green]" - else: - deps_status = "[red]FAIL :cross_mark:[/red]" - print_row( - "dbt deps", - deps_status, - new_section=True, - ) - if output.returncode > 0: - raise Exception("dbt deps error. Check logs.") - else: - deps_status = "[green]FOUND :heavy_check_mark:[/green] dbt project not found" - print_row( - "dbt deps", - deps_status, - new_section=True, - ) diff --git a/dbt_coves/tasks/setup/git.py b/dbt_coves/tasks/setup/git.py deleted file mode 100644 index c38eda46..00000000 --- a/dbt_coves/tasks/setup/git.py +++ /dev/null @@ -1,171 +0,0 @@ -import os -from pathlib import Path -from urllib.parse import urlparse - -import questionary -from rich.console import Console - -from dbt_coves.tasks.setup.base import BaseSetupTask -from dbt_coves.utils.shell import run, run_and_capture -from dbt_coves.utils.tracking import trackable - -from .utils import print_row - -console = Console() - - -class SetupGitException(Exception): - pass - - -class SetupGitTask(BaseSetupTask): - """ - Task that runs ssh key generation, git repo clone and db connection setup - """ - - key_column_with = 50 - value_column_with = 30 - - @classmethod - def register_parser(cls, sub_parsers, base_subparser): - subparser = sub_parsers.add_parser( - "git", - parents=[base_subparser], - help="Set up Git repository of dbt project", - ) - subparser.add_argument( - "--no-prompt", - help="Configure Git without user intervention", - action="store_true", - default=False, - ) - subparser.set_defaults(cls=cls, which="git") - return subparser - - @trackable - def run(self, workspace_path=Path.cwd()) -> int: - self.run_git_config() - self.run_git_clone(workspace_path) - return 0 - - def run_git_config(self): - config_status = "[red]MISSING[/red]" - - email_output = run_and_capture(["git", "config", "--global", "--get", "user.email"]) - email_exists = email_output.returncode == 0 and email_output.stdout - email = email_output.stdout.replace("\n", "") - - name_output = run_and_capture(["git", "config", "--global", "--get", "user.name"]) - name_exists = name_output.returncode == 0 and name_output.stdout - name = name_output.stdout.replace("\n", "") - if email_exists and name_exists: - config_status = "[green]FOUND :heavy_check_mark:[/green]" - print_row("Checking git config", config_status, new_section=True) - if name: - print_row(" - user.name ", name) - if email: - print_row(" - user.email ", email) - - if not email_exists or not name_exists: - name = "" - email = "" - no_prompt = self.get_config_value("no_prompt") - if no_prompt: - name = os.environ.get("USER_FULLNAME", "") - email = os.environ.get("USER_EMAIL", "") - if not (name and email): - raise SetupGitException( - f"[yellow]USER_FULLNAME ({name or 'missing'})[/yellow] and" - f"[yellow]USER_EMAIL ({email or 'missing'})[/yellow] environment" - "variables must be set in order to setup Git with [i]--no-prompt[/i]" - ) - else: - default_name = os.environ.get("USER_FULLNAME", "") - name = questionary.text("Please type your full name:", default=default_name).ask() - if name: - default_email = os.environ.get("USER_EMAIL", "") - email = questionary.text( - "Please type your email address:", default=default_email - ).ask() - if name and email: - name_output = run_and_capture(["git", "config", "--global", "user.name", name]) - if name_output.returncode != 0: - console.print("Could not set user.name") - return 1 - email_output = run_and_capture(["git", "config", "--global", "user.email", email]) - if email_output.returncode != 0: - console.print("Could not set user.email") - return 1 - console.print("[green]:heavy_check_mark: Git user configured successfully.") - - def run_git_clone(self, workspace_path): - repo_url = "" - cloned_status = "[red]MISSING[/red]" - cloned_exists = Path(workspace_path, ".git").exists() - if cloned_exists: - cloned_status = "[green]FOUND :heavy_check_mark:[/green]" - print_row("Checking for git repo", cloned_status, new_section=True) - - if cloned_exists: - return - - if any(os.scandir(workspace_path)): - console.print(f"Folder '{workspace_path}' is not empty.") - return - - no_prompt = self.get_config_value("no_prompt") - if no_prompt: - repo_url = os.environ.get("GIT_REPO_URL", "") - if not repo_url: - raise SetupGitException( - "[yellow]GIT_REPO_URL[/yellow] environment variable must be set" - "in order to clone Git repository with [i]--no-prompt[/i]" - ) - else: - default_repo_url = os.environ.get("GIT_REPO_URL", "") - repo_url = questionary.text( - "Please type the git repo SSH url:", default=default_repo_url - ).ask() - - if repo_url: - ssh_repo_url = f"ssh://{repo_url}" if "ssh://" not in repo_url else repo_url - url_parsed = urlparse(ssh_repo_url) - domain = url_parsed.hostname - port = None - try: - port = url_parsed.port - except ValueError: - pass - if port: - output = run_and_capture(["ssh-keyscan", "-t", "rsa", "-p", str(port), domain]) - else: - output = run_and_capture(["ssh-keyscan", "-t", "rsa", domain]) - - if output.returncode != 0: - raise Exception(f"Failed to run ssh-keyscan. {output.stderr}") - - new_host = output.stdout - known_hosts_path = Path("~/.ssh/known_hosts").expanduser() - if not known_hosts_path.exists(): - known_hosts_path.parent.mkdir(parents=True, exist_ok=True) - open(known_hosts_path, "w") - - hosts = open(known_hosts_path, "r").read() - if domain not in hosts: - with open(known_hosts_path, "a") as file: - file.write(new_host) - console.print(f"[green]:heavy_check_mark: {domain} registared as a SSH known host.") - - if output.returncode == 0: - output = run(["git", "clone", repo_url, workspace_path]) - if output.returncode == 0: - console.print( - f"[green]:heavy_check_mark: Repo cloned successfully on '{workspace_path}'" - ) - else: - raise Exception(f"Failed to clone git repo '{repo_url}'") - else: - raise Exception(f"Failed to clone git repo '{repo_url}': {output.stderr}") - - def get_config_value(self, key): - return self.coves_config.integrated["setup"]["git"][key] diff --git a/dbt_coves/tasks/setup/main.py b/dbt_coves/tasks/setup/main.py index c623bdb4..646e2911 100644 --- a/dbt_coves/tasks/setup/main.py +++ b/dbt_coves/tasks/setup/main.py @@ -1,11 +1,22 @@ +import os +from pathlib import Path + +import copier +import questionary from rich.console import Console from dbt_coves.tasks.base import NonDbtBaseTask +from dbt_coves.utils.tracking import trackable + +from .utils import get_dbt_projects -from .dbt import SetupDbtTask -from .git import SetupGitTask -from .pre_commit import SetupPrecommitTask -from .ssh import SetupSSHTask +AVAILABLE_SERVICES = { + "Base dbt project": "setup_dbt_project", + "dbt profile for automated runs": "setup_dbt_profile", + "Initial CI/CD scripts": "setup_ci_cd", + "Linting with SQLFluff, dbt-checkpoint and/or YMLLint": "setup_precommit", + "Sample Airflow DAGs": "setup_airflow_dag", +} console = Console() @@ -19,13 +30,6 @@ class SetupTask(NonDbtBaseTask): Task that code-gen dbt resources """ - tasks = [ - SetupGitTask, - SetupDbtTask, - SetupSSHTask, - SetupPrecommitTask, - ] - key_column_with = 20 value_column_with = 50 arg_parser = None @@ -35,11 +39,106 @@ def register_parser(cls, sub_parsers, base_subparser): ext_subparser = sub_parsers.add_parser( "setup", parents=[base_subparser], - help="Set up project components (git, dbt, vscode, sqlfluff, pre-commit, etc)", + help="Set up dbt project components (dbt project, CI, pre-commit, Airflow DAGs)", + ) + ext_subparser.add_argument( + "--no-prompt", + action="store_true", + help="Generate all Datacoves components without prompting for confirmation", + default=False, + ) + ext_subparser.add_argument( + "--quiet", + action="store_true", + help="Skip rendering results", + default=False, ) ext_subparser.set_defaults(cls=cls, which="setup") - sub_parsers = ext_subparser.add_subparsers(title="dbt-coves setup commands", dest="task") - # Register a separate sub parser for each sub task. - [x.register_parser(sub_parsers, base_subparser) for x in cls.tasks] cls.arg_parser = ext_subparser return ext_subparser + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def get_config_value(self, key): + return self.coves_config.integrated["setup"][key] + + @trackable + def run(self) -> int: + self.repo_path = os.environ.get("DATACOVES__REPO_PATH", Path().resolve()) + self.copier_context = {"no_prompt": self.get_config_value("no_prompt")} + return self.setup_datacoves() + + def setup_datacoves(self): + choices = questionary.checkbox( + "What services would you like to set up?", + choices=list(AVAILABLE_SERVICES.keys()), + ).ask() + services = [AVAILABLE_SERVICES[service] for service in choices] + # dbt project + dbt_projects = get_dbt_projects(self.repo_path) + if not dbt_projects: + if "setup_dbt_project" in services: + project_dir = questionary.select( + "Where should the dbt project be created?", + choices=["current directory", "transform"], + ).ask() + if "current directory" in project_dir: + project_dir = "." + self.copier_context["dbt_project_dir"] = project_dir + self.copier_context["dbt_project_name"] = questionary.text( + "What is the name of the dbt project?" + ).ask() + elif "setup_precommit" in services: + raise DbtCovesSetupException( + "No dbt project found in the current directory." + "Please create one before setting up dbt components." + ) + self.copier_context["is_new_project"] = True + elif len(dbt_projects) == 1: + self.copier_context["dbt_project_dir"] = dbt_projects[0].get("path") + self.copier_context["dbt_project_name"] = dbt_projects[0].get("name") + else: + project_dir = questionary.select( + "In which dbt project would you like to perform setup?", + choices=[prj.get("path") for prj in dbt_projects], + ).ask() + self.copier_context["dbt_project_dir"] = project_dir + self.copier_context["dbt_project_name"] = [ + prj.get("name") for prj in dbt_projects if prj.get("path") == project_dir + ][0] + + # dbt profile data gathering + airflow_profile_path = os.environ.get("DATACOVES__AIRFLOW_DBT_PROFILE_PATH", "automate/dbt") + if not airflow_profile_path: + airflow_profile_path = "automate/dbt" + self.copier_context["airflow_profile_path"] = airflow_profile_path + + dbt_adapter = os.environ.get("DATACOVES__DBT_ADAPTER") + if dbt_adapter: + self.copier_context["dbt_adapter"] = dbt_adapter + + # sample DAG data + if "setup_airflow_dag" in services: + dags_path = os.environ.get("DATACOVES__AIRFLOW_DAGS_PATH") + if not dags_path: + self.copier_context["airflow_dags_confirm_path"] = True + self.copier_context["tentative_dags_path"] = "orchestrate/dags" + else: + self.copier_context["dags_path"] = dags_path + + yml_dags_path = os.environ.get("DATACOVES__AIRFLOW_DAGS_YML_PATH") + if not yml_dags_path: + self.copier_context["yml_dags_confirm_path"] = True + self.copier_context["tentative_yml_dags_path"] = "orchestrate/dag_yml_definitions" + else: + self.copier_context["yml_dags_path"] = yml_dags_path + for service in services: + self.copier_context[service] = True + copier.run_auto( + src_path="git@github.com:datacoves/setup_template.git", + dst_path=self.repo_path, + data=self.copier_context, + quiet=self.get_config_value("quiet"), + ) + return 0 diff --git a/dbt_coves/tasks/setup/pre_commit.py b/dbt_coves/tasks/setup/pre_commit.py deleted file mode 100644 index d2ca4c83..00000000 --- a/dbt_coves/tasks/setup/pre_commit.py +++ /dev/null @@ -1,59 +0,0 @@ -from pathlib import Path - -import copier -import questionary -from rich.console import Console - -from dbt_coves.utils.tracking import trackable - -from .main import NonDbtBaseTask -from .utils import get_dbt_projects, get_git_root - -console = Console() - - -class SetupPrecommitException(Exception): - pass - - -class SetupPrecommitTask(NonDbtBaseTask): - """ - Task that runs pre-commit setup - """ - - @classmethod - def register_parser(cls, sub_parsers, base_subparser): - subparser = sub_parsers.add_parser( - "precommit", - parents=[base_subparser], - help="Set up pre-commit for dbt-coves project", - ) - subparser.set_defaults(cls=cls, which="precommit") - return subparser - - @trackable - def run(self) -> int: - self.setup_precommit() - return 0 - - @classmethod - def setup_precommit(self): - repo_root = get_git_root() - dbt_project_paths = get_dbt_projects(repo_root) - if not dbt_project_paths: - raise SetupPrecommitException( - "Your repository doesn't contain any dbt project where to install pre-commit into" - ) - data = {} - if len(dbt_project_paths) == 1: - data["dbt_project_dir"] = dbt_project_paths[0] - else: - data["dbt_project_dir"] = questionary.select( - "In which dbt project would you like to install pre-commit?", - choices=dbt_project_paths, - ).ask() - copier.run_auto( - src_path=str(Path(__file__).parent.joinpath("templates", "pre_commit").resolve()), - dst_path=repo_root, - data=data, - ) diff --git a/dbt_coves/tasks/setup/ssh.py b/dbt_coves/tasks/setup/ssh.py deleted file mode 100644 index ef45e131..00000000 --- a/dbt_coves/tasks/setup/ssh.py +++ /dev/null @@ -1,280 +0,0 @@ -import os -import subprocess -from pathlib import Path -from subprocess import CalledProcessError - -import questionary -from rich.console import Console - -from dbt_coves.tasks.base import NonDbtBaseTask -from dbt_coves.utils.shell import run_and_capture, shell_run -from dbt_coves.utils.tracking import trackable - -from .utils import print_row - -console = Console() - - -class SetupSSHException(Exception): - pass - - -class SetupSSHTask(NonDbtBaseTask): - """ - Task that runs ssh key generation, git repo clone and db connection setup - """ - - key_column_with = 50 - value_column_with = 30 - - @classmethod - def register_parser(cls, sub_parsers, base_subparser): - subparser = sub_parsers.add_parser( - "ssh", - parents=[base_subparser], - help="Set up SSH Key for dbt-coves project", - ) - subparser.add_argument( - "--open-ssl-public-key", - help="Determines whether an Open SSL key will also be generated", - action="store_true", - default=False, - ) - subparser.set_defaults(cls=cls, which="ssh") - cls.arg_parser = base_subparser - return subparser - - def setup_ssh(self): - ssh_status = "[red]MISSING[/red]" - ssh_configured = False - ssh_keys_dir = "~/.ssh/" - self.ssh_keys_dir_abs = os.path.abspath(Path(ssh_keys_dir).expanduser()) - - provided_key_path = f"{self.ssh_keys_dir_abs}/id_datacoves" - - key_path_abs = f"{self.ssh_keys_dir_abs}/id_ecdsa" - Path(self.ssh_keys_dir_abs).mkdir(parents=True, exist_ok=True) - - public_key_path_abs = f"{key_path_abs}.pub" - - found_keys = [ - file - for file in os.listdir(self.ssh_keys_dir_abs) - if "id_" in file.lower() and ".p" not in file.lower() - ] - - if found_keys: - ssh_status = "[green]FOUND :heavy_check_mark:[/green]" - print_row( - f"Checking for SSH keys in '{ssh_keys_dir}'", - ssh_status, - new_section=False, - ) - if len(found_keys) == 1: - selected_ssh_key = found_keys[0] - else: - selected_ssh_key = questionary.select( - "Which of these SSH Keys would you like to" - "associate to your dbt-coves project?:", - choices=found_keys, - ).ask() - - key_path_abs = f"{self.ssh_keys_dir_abs}/{selected_ssh_key}" - public_key_path_abs = f"{key_path_abs}.pub" - - ssh_configured = self.output_public_key_for_private(key_path_abs, public_key_path_abs) - else: - print_row(f"Checking for key in '{ssh_keys_dir}'", ssh_status, new_section=False) - action = ( - questionary.select( - "Would you like to provide your existent" - "private SSH key or generate a new one?", - choices=["Provide", "Generate"], - ) - .ask() - .lower() - ) - if action == "provide": - ssh_key = questionary.text("Please paste your private SSH key:").ask() - ssh_key += "\n" - with open(provided_key_path, "w") as file: - file.write(ssh_key) - - os.chmod(provided_key_path, 0o600) - - ssh_configured = self.transform_default_private(provided_key_path) - if action == "generate": - output = self.generate_ecdsa_keys(key_path_abs) - if output.returncode == 0: - console.print( - f"[green]:heavy_check_mark: SSH key generated on '{key_path_abs}'[/green]" - ) - ssh_configured = self.output_public_keys(public_key_path_abs) - if ssh_configured: - return 0 - else: - raise Exception( - "You must first configure you SSH key in your Git server" - "then rerun 'dbt-coves setup'" - ) - - @trackable - def run(self) -> int: - return self.setup_ssh() - - def generate_ecdsa_keys(self, key_path_abs): - try: - return shell_run(args=["ssh-keygen", "-q", "-t", "ecdsa", "-f", key_path_abs]) - except CalledProcessError as e: - raise SetupSSHException(e.output) - - def generate_ecdsa_public_key(self, private_path_abs): - keygen_args = [ - "ssh-keygen", - "-y", - "-f", - private_path_abs, - ">>", - f"{private_path_abs}.pub", - ] - try: - return shell_run(args=keygen_args) - except CalledProcessError as e: - raise SetupSSHException(e.output) - - def transform_default_private(self, provided_key_path): - types_filename_dict = { - "ssh-dss": "id_dsa", - "ecdsa-sha2-nistp256": "id_ecdsa", - "ssh-ed25519": "id_ed25519", - "ssh-rsa": "id_rsa", - } - # Get public key from private - public_output, public_type = self.ssh_keygen_get_public_key(provided_key_path) - - ssh_file_name = types_filename_dict.get(public_type) - - if not ssh_file_name: - os.remove(provided_key_path) - raise SetupSSHException( - f"Provided ssh key type {public_type} is not supported" - "(must provide dsa/ecdsa/ed25519/rsa). Please try again" - ) - - private_key_path = provided_key_path.replace("id_datacoves", ssh_file_name) - - os.rename(provided_key_path, private_key_path) - public_key_path = f"{private_key_path}.pub" - - with open(public_key_path, "w") as file: - file.write(public_output) - - openssl_private_path = private_key_path if public_type == "ssh-rsa" else None - - # Return public key to configure - return self.output_public_keys(public_key_path, openssl_private_path) - - def gen_openssl_private_key(self, openssl_private_key_path): - # openssl genrsa 2048 | openssl pkcs8 -topk8 -inform PEM -out rsa_key.p8 -nocrypt - keygen_args_genrsa = [ - "openssl", - "genrsa", - "2048", - ] - keygen_args_openssl = [ - "openssl", - "pkcs8", - "-topk8", - "-inform", - "PEM", - "-out", - openssl_private_key_path, - "-nocrypt", - ] - try: - ps = subprocess.Popen(keygen_args_genrsa, stdout=subprocess.PIPE) - subprocess.check_output(keygen_args_openssl, stdin=ps.stdout) - except CalledProcessError as e: - raise SetupSSHException(e.output) - - def gen_print_openssl_public_key( - self, openssl_private_key_path, openssl_public_key_path, private_generated - ): - keygen_args = [ - "openssl", - "rsa", - "-in", - openssl_private_key_path, - "-pubout", - "-out", - openssl_public_key_path, - ] - - openssl_public_output = run_and_capture(keygen_args) - if openssl_public_output.returncode != 0: - if private_generated: - raise SetupSSHException(openssl_public_output.stderr) - else: - raise ValueError( - "The private key provided can't be used to generate public RSA openssl keys." - ) - - console.print(f"\nOpenSSL public key saved at {openssl_public_key_path}") - console.print( - "Please configure the following key (yellow text) in services that require" - "OpenSSL public keys to authenticate you (snowflake, etc.)\n" - ) - openssl_pub_key = open(openssl_public_key_path, "r").read() - openssl_pub_key = openssl_pub_key.replace("-----BEGIN PUBLIC KEY-----\n", "").replace( - "-----END PUBLIC KEY-----\n", "" - ) - console.print(f"[yellow]{openssl_pub_key}[/yellow]") - - def gen_print_openssl_key( - self, generate_private, openssl_private_key_path, openssl_public_key_path - ): - if generate_private: - self.gen_openssl_private_key(openssl_private_key_path) - self.gen_print_openssl_public_key( - openssl_private_key_path, openssl_public_key_path, generate_private - ) - - def ssh_keygen_get_public_key(self, private_key_path): - keygen_args = ["ssh-keygen", "-y", "-f", private_key_path] - public_output = run_and_capture(keygen_args) - - public_type = public_output.stdout.split()[0] - if public_output.stderr: - raise SetupSSHException(public_output.stderr) - return public_output.stdout, public_type - - def output_public_key_for_private(self, private_path_abs, public_key_path_abs): - public_ssh_key, public_type = self.ssh_keygen_get_public_key(private_path_abs) - with open(public_key_path_abs, "w") as file: - file.write(public_ssh_key) - openssl_private_path = private_path_abs if public_type == "ssh-rsa" else None - return self.output_public_keys(public_key_path_abs, openssl_private_path) - - def output_public_keys(self, public_key_path_abs, openssl_priv_path=None): - openssl = self.get_config_value("open_ssl_public_key") - if openssl: - openssl_private_key_path = openssl_priv_path or f"{self.ssh_keys_dir_abs}/rsa_key.p8" - openssl_public_key_path = f"{self.ssh_keys_dir_abs}/rsa_key.pub" - self.gen_print_openssl_key( - openssl_priv_path is None, - openssl_private_key_path, - openssl_public_key_path, - ) - console.print( - "Please configure the following key (yellow text) in your" - "Git server (Gitlab, Github, Bitbucket, etc):\n" - ) - console.print(f"[yellow]{open(public_key_path_abs, 'r').read()}[/yellow]") - return questionary.confirm( - "Have you configured your services and Git server with the keys above?" - if openssl - else "Have you configured your Git server with the key above?" - ).ask() - - def get_config_value(self, key): - return self.coves_config.integrated["setup"][self.args.task][key] diff --git a/dbt_coves/tasks/setup/templates/pre_commit/copier.yml b/dbt_coves/tasks/setup/templates/pre_commit/copier.yml deleted file mode 100644 index 7ddd6d5f..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/copier.yml +++ /dev/null @@ -1,45 +0,0 @@ -# This is a minimum working example of how -# copier can be used to better manage dbt-coves -# files. It can be extended well beyond this. -dbt_project_dir: - type: str - help: Enter your dbt project directory - -use_dbt_checkpoint: - type: bool - default: false - help: Do you want to use dbt-checkpoint? - -use_sqlfluff: - type: bool - default: false - help: Do you want to use Sqlfluff? - -use_yamllint: - type: bool - default: false - help: Do you want to use Yamllint? - -adapter: - type: str - choices: - - snowflake - - bigquery - - redshift - - postgres - - spark - - sqlite - - duckdb - help: What dbt adapter are you using? - when: "{{use_sqlfluff}}" - -comma_style: - type: str - choices: - - leading - - trailing - help: Where should commas be placed? - default: trailing - when: "{{use_sqlfluff}}" - -_answers_file: ".dbt-coves" diff --git a/dbt_coves/tasks/setup/templates/pre_commit/{% if use_sqlfluff or use_yamllint or use_dbt_checkpoint %}pre-commit-config.yaml{% endif %}.jinja b/dbt_coves/tasks/setup/templates/pre_commit/{% if use_sqlfluff or use_yamllint or use_dbt_checkpoint %}pre-commit-config.yaml{% endif %}.jinja deleted file mode 100644 index 6c2926d9..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/{% if use_sqlfluff or use_yamllint or use_dbt_checkpoint %}pre-commit-config.yaml{% endif %}.jinja +++ /dev/null @@ -1,51 +0,0 @@ -files: ^{{ dbt_project_dir }}/models/ - -repos: -{% if use_dbt_checkpoint %} - repo: https://github.com/dbt-checkpoint/dbt-checkpoint - rev: v1.0.0 - - hooks: - - id: dbt-docs-generate - args: ["--cmd-flags", "++project+dir", "{{ dbt_project_dir }}", "++no+compile"] - - id: check-source-table-has-description - files: ^{{ dbt_project_dir }}/models/ - - - id: check-script-semicolon - - id: check-script-has-no-table-name - - id: check-script-ref-and-source - args: ["--manifest", "{{ dbt_project_dir }}/target/manifest.json"] - - id: check-model-has-description - args: ["--manifest", "{{ dbt_project_dir }}/target/manifest.json"] - - id: check-model-has-properties-file - args: ["--manifest", "{{ dbt_project_dir }}/target/manifest.json"] - - # This does not work with deferral because dbt docs generate does not include models that dont exist in current db - - id: check-model-has-all-columns - args: - [ - "--manifest", - "{{ dbt_project_dir }}/target/manifest.json", - "--catalog", - "{{ dbt_project_dir }}/target/catalog.json", - ]{% endif %} - -{% if use_sqlfluff %} - repo: https://github.com/sqlfluff/sqlfluff - # this is the version of sqlfluff, needs to be updated when using a new sqlfluff version (pip show sqlfluff) - rev: 2.0.3 - hooks: - - id: sqlfluff-lint - language: python - # Need these two dependencies. - # sqlfluff-templater-dbt should match the version of sqlfluff above in rev (pip show sqlfluff-templater-dbt) - # dbt-snowflake needs to match the version in {{ dbt_project_dir }} tab of Datacoves (pip show dbt-snowflake) - additional_dependencies: - ["sqlfluff-templater-dbt==2.0.3", "dbt-snowflake==1.3.1"] - args: [--config, {{ dbt_project_dir }}/.sqlfluff]{% endif %} - -{% if use_yamllint %} - repo: https://github.com/adrienverge/yamllint.git - rev: v1.17.0 - hooks: - - id: yamllint - args: [-c=.yamllint] - exclude: ^{{ dbt_project_dir }}/.dbt_coves/templates -{% endif %} diff --git a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluffignore{% endif %} b/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluffignore{% endif %} deleted file mode 100644 index d8c6307f..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluffignore{% endif %} +++ /dev/null @@ -1,7 +0,0 @@ -target/ -dbt_modules/ -dbt_packages/ -templates/ -macros/ -models_bq/ -models_rs diff --git a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluff{% endif %}.jinja b/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluff{% endif %}.jinja deleted file mode 100644 index 04819301..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_sqlfluff %}.sqlfluff{% endif %}.jinja +++ /dev/null @@ -1,89 +0,0 @@ -[sqlfluff] - -# Supported dialects https://docs.sqlfluff.com/en/stable/dialects.html -# Or run 'sqlfluff dialects' -dialect = {{adapter}} - -# One of [raw|jinja|python|placeholder] -templater = dbt - -# Comma separated list of rules to exclude, or None -exclude_rules = AM05, RF05, ST06 - -# Ignore linting errors found within sections of code coming directly from -# templated code (e.g. from within Jinja curly braces. Note that it does not -# ignore errors from literal code found within template loops. -ignore_templated_areas = True - -# CPU processes to use while linting. -# If positive, just implies number of processes. -# If negative or zero, implies number_of_cpus - specified_number. -# e.g. -1 means use all processors but one. 0 means all cpus. -processes = -1 - -# Max line length is set by default to be in line with the dbt style guide. -# https://github.com/dbt-labs/corp/blob/main/dbt_style_guide.md -# Set to zero or negative to disable checks. -max_line_length = 140 - -[sqlfluff:templater:dbt] -project_dir = ./ - -[sqlfluff:rules:aliasing.table] -# Aliasing preference for tables -aliasing = explicit - -[sqlfluff:rules:aliasing.column] -# Aliasing preference for columns -aliasing = explicit - -[sqlfluff:rules:aliasing.length] -min_alias_length = 3 - -[sqlfluff:rules:aliasing.forbid] -# Avoid table aliases in from clauses and join conditions. -# Disabled by default for all dialects unless explicitly enabled. -# We suggest instead using aliasing.length (AL06) in most cases. -force_enable = False - -[sqlfluff:rules:ambiguous.join] -# Fully qualify JOIN clause -fully_qualify_join_types = outer - -[sqlfluff:rules:ambiguous.column_references] -# GROUP BY/ORDER BY column references -group_by_and_order_by_style = consistent - -[sqlfluff:rules:capitalisation.keywords] -# Keywords -capitalisation_policy = lower - -[sqlfluff:rules:capitalisation.identifiers] -# Unquoted identifiers -extended_capitalisation_policy = lower - -[sqlfluff:rules:capitalisation.functions] -# Function names -extended_capitalisation_policy = lower - -[sqlfluff:rules:capitalisation.literals] -# Null & Boolean Literals -capitalisation_policy = lower - -[sqlfluff:rules:capitalisation.types] -# Data Types -extended_capitalisation_policy = lower - -[sqlfluff:rules:references.consistent] -# References must be consistently used -# Disabled for some dialects (e.g. bigquery) -single_table_references = unqualified - -[sqlfluff:rules:references.keywords] -# Comma separated list of words to ignore for this rule -# cases seems like a strange one to exclude, bug with "case"? -ignore_words = cases, date, desc, queries, role, storage, url, value, warehouse_name, warehouse_size, warehouse_type - -[sqlfluff:rules:structure.subquery] -# By default, allow subqueries in from clauses, but not join clauses -forbid_subquery_in = both diff --git a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_yamllint %}.yamllint{% endif %} b/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_yamllint %}.yamllint{% endif %} deleted file mode 100644 index 6c6bf8fa..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{% if use_yamllint %}.yamllint{% endif %} +++ /dev/null @@ -1,35 +0,0 @@ ---- - -yaml-files: - - '*.yaml' - - '*.yml' - - '.yamllint' - -rules: - braces: enable - brackets: enable - colons: enable - commas: enable - comments: - level: warning - comments-indentation: - level: warning - document-end: disable - document-start: disable - empty-lines: enable - empty-values: disable - hyphens: enable - indentation: enable - key-duplicates: enable - key-ordering: disable - new-line-at-end-of-file: enable - new-lines: enable - octal-values: disable - quoted-strings: disable - trailing-spaces: enable - truthy: - level: warning - # 120 chars should be enough, but don't fail if a line is longer - line-length: - max: 150 - level: warning diff --git a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{{ _copier_conf.answers_file }}-precommit.yaml.jinja b/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{{ _copier_conf.answers_file }}-precommit.yaml.jinja deleted file mode 100644 index 69141bbf..00000000 --- a/dbt_coves/tasks/setup/templates/pre_commit/{{ dbt_project_dir }}/{{ _copier_conf.answers_file }}-precommit.yaml.jinja +++ /dev/null @@ -1,2 +0,0 @@ -# Changes here will be overwritten by Copier; NEVER EDIT MANUALLY -{{ _copier_answers|to_nice_yaml -}} diff --git a/dbt_coves/tasks/setup/utils.py b/dbt_coves/tasks/setup/utils.py index cf2f36e4..3f7c5839 100644 --- a/dbt_coves/tasks/setup/utils.py +++ b/dbt_coves/tasks/setup/utils.py @@ -6,6 +6,8 @@ from rich.console import Console from rich.table import Table +from dbt_coves.utils.yaml import open_yaml + console = Console() KEY_COLUMN_WIDTH = 50 @@ -43,11 +45,12 @@ def get_git_root(path=None): raise Exception(f"{path or 'current path'} doesn't belong to a git repository") -def get_dbt_projects(path=None): - if not path: - path = os.getcwd() - dbt_project_dirs = [] +def get_dbt_projects(path=os.getcwd()): + dbt_projects = [] for file in Path(path).rglob("dbt_project.yml"): if "dbt_packages" not in str(file): - dbt_project_dirs.append(str(file.relative_to(path).parent)) - return dbt_project_dirs + project_name = open_yaml(file)["name"] + project_path = str(file.relative_to(path).parent) + dbt_projects.append({"path": project_path, "name": project_name}) + + return dbt_projects diff --git a/dbt_coves/utils/flags.py b/dbt_coves/utils/flags.py index f7e2c438..02aba32b 100644 --- a/dbt_coves/utils/flags.py +++ b/dbt_coves/utils/flags.py @@ -134,8 +134,7 @@ def __init__(self, cli_parser: ArgumentParser) -> None: "current-dir": False, } self.setup = { - "ssh": {"open_ssl_public_key": False}, - "git": {"no_prompt": False}, + "no_prompt": False, } self.dbt = {"command": None, "project_dir": None, "virtualenv": None, "cleanup": False} self.data_sync = {"redshift": {"tables": []}, "snowflake": {"tables": []}} @@ -379,15 +378,12 @@ def parse_args(self, cli_args: List[str] = list()) -> None: if self.args.credentials: self.extract["fivetran"]["credentials"] = self.args.credentials - # setup ssh - if self.args.cls.__name__ == "SetupSSHTask": - if self.args.open_ssl_public_key: - self.setup["ssh"]["open_ssl_public_key"] = self.args.open_ssl_public_key - - # setup git - if self.args.cls.__name__ == "SetupGitTask": + # setup + if self.args.cls.__name__ == "SetupTask": if self.args.no_prompt: - self.setup["git"]["no_prompt"] = self.args.no_prompt + self.setup["no_prompt"] = self.args.no_prompt + if self.args.quiet: + self.setup["quiet"] = self.args.quiet # run dbt if self.args.cls.__name__ == "RunDbtTask": diff --git a/docs/2 - Commands/dbt/README.md b/docs/2 - Commands/dbt/README.md new file mode 100644 index 00000000..34614136 --- /dev/null +++ b/docs/2 - Commands/dbt/README.md @@ -0,0 +1,31 @@ +## dbt-coves dbt + +This dbt-coves command allows us to run dbt commands on special environments such as Airflow, or CI workers, with the possibility of changing dbt project location and activating a specific virtual environment in which running the desired command. + +If the project directory is read-only (widely seen in Airflow projects), it is copied to a temporary folder to perform the desired execution. + +```shell +dbt-coves dbt -- +``` + +### Arguments + +`dbt-coves dbt` supports the following arguments + +```shell +--project-dir +# Path of the dbt project where command will be executed, i.e.: /opt/user/dbt_project +``` + +```shell +--virtualenv +# Virtual environment path. i.e.: /opt/user/virtualenvs/airflow +``` + +### Sample usage + +```shell +dbt-coves dbt --project-dir /opt/user/dbt_project --virtualenv /opt/user/virtualenvs/airflow -- run -s model --vars \"{key: value}\" +# Make sure to escape special characters such as quotation marks +# Double dash (--) between and are mandatory +``` diff --git a/docs/2 - Commands/extract/airbyte/README.md b/docs/2 - Commands/extract/airbyte/README.md new file mode 100644 index 00000000..cea3ebba --- /dev/null +++ b/docs/2 - Commands/extract/airbyte/README.md @@ -0,0 +1,32 @@ +## dbt-coves extract airbyte + +Extracts the configuration from your Airbyte sources, connections and destinations (excluding credentials) and stores it in the specified folder. The main goal of this feature is to keep track of the configuration changes in your git repo, and rollback to a specific version when needed. + +```shell +dbt-coves extract airbyte +``` + +### Arguments + +`dbt-coves extract airbyte` supports the following arguments + +```shell +--path +# Path where configuration json files will be created, i.e. '/var/data/airbyte_extract/' +``` + +```shell +--host +# Airbyte's API hostname, i.e. 'http://airbyte-server' +``` + +```shell +--port +# Airbyte's API port, i.e. '8001' +``` + +### Sample usage + +```shell +dbt-coves extract airbyte --host http://airbyte-server --port 8001 --path /config/workspace/load/airbyte +```