Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: project governance llm checks #45

Merged
merged 13 commits into from
Dec 30, 2024
13 changes: 13 additions & 0 deletions src/datapilot/clients/altimate/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,3 +91,16 @@ def validate_upload_to_integration(self):
def start_dbt_ingestion(self, params=None):
endpoint = "/dbt/v1/start_dbt_ingestion"
return self.post(endpoint, data=params)

def get_project_governance_llm_checks(self, params=None):
endpoint = "/project_governance/checks"
return self.get(endpoint, params=params)

def run_project_governance_llm_checks(self, manifest, catalog, check_names):
endpoint = "/project_governance/run_checks"
data = {
"manifest": manifest,
"catalog": catalog,
"check_names": check_names,
}
return self.post(endpoint, data=data)
21 changes: 21 additions & 0 deletions src/datapilot/clients/altimate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,24 @@ def start_dbt_ingestion(api_token, tenant, dbt_core_integration_id, dbt_core_int
"ok": False,
"message": "Error starting dbt ingestion worker. ",
}


def get_project_governance_llm_checks(
api_token,
tenant,
backend_url,
):
api_client = APIClient(api_token=api_token, base_url=backend_url, tenant=tenant)
return api_client.get_project_governance_llm_checks()


def run_project_governance_llm_checks(
api_token,
tenant,
backend_url,
manifest,
catalog,
check_names,
):
api_client = APIClient(api_token=api_token, base_url=backend_url, tenant=tenant)
return api_client.run_project_governance_llm_checks(manifest, catalog, check_names)
27 changes: 26 additions & 1 deletion src/datapilot/core/platforms/dbt/cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import click

from datapilot.clients.altimate.utils import check_token_and_instance
from datapilot.clients.altimate.utils import get_project_governance_llm_checks
from datapilot.clients.altimate.utils import onboard_file
from datapilot.clients.altimate.utils import run_project_governance_llm_checks
from datapilot.clients.altimate.utils import start_dbt_ingestion
from datapilot.clients.altimate.utils import validate_credentials
from datapilot.clients.altimate.utils import validate_permissions
Expand All @@ -28,6 +30,8 @@ def dbt():


@dbt.command("project-health")
@click.option("--token", prompt="API Token", help="Your API token for authentication.")
@click.option("--instance-name", prompt="Instance Name", help="Your tenant ID.")
@click.option(
"--manifest-path",
required=True,
Expand All @@ -49,7 +53,10 @@ def dbt():
default=None,
help="Selective model testing. Specify one or more models to run tests on.",
)
def project_health(manifest_path, catalog_path, config_path=None, select=None):
@click.option("--backend-url", required=False, help="Altimate's Backend URL", default="https://api.myaltimate.com")
def project_health(
token, instance_name, manifest_path, catalog_path, config_path=None, select=None, backend_url="https://api.myaltimate.com"
):
"""
Validate the DBT project's configuration and structure.
:param manifest_path: Path to the DBT manifest file.
Expand All @@ -62,6 +69,11 @@ def project_health(manifest_path, catalog_path, config_path=None, select=None):
selected_models = select.split(" ")
manifest = load_manifest(manifest_path)
catalog = load_catalog(catalog_path) if catalog_path else None

llm_checks = get_project_governance_llm_checks(token, instance_name, backend_url)
check_names = [check["name"] for check in llm_checks if check["alias"] not in config.get("disabled_insights", [])]
llm_check_results = run_project_governance_llm_checks(token, instance_name, backend_url, manifest, catalog, check_names)

insight_generator = DBTInsightGenerator(manifest=manifest, catalog=catalog, config=config, selected_models=selected_models)
reports = insight_generator.run()

Expand All @@ -85,6 +97,19 @@ def project_health(manifest_path, catalog_path, config_path=None, select=None):
click.echo("--" * 50)
click.echo(tabulate_data(project_report, headers="keys"))

if llm_check_results:
click.echo("--" * 50)
click.echo("Project Governance LLM Insights")
click.echo("--" * 50)
for check in llm_check_results["results"]:
click.echo(f"Check: {check['name']}")
for answer in check["answer"]:
click.echo(f"Rule: {answer['Rule']}")
click.echo(f"Location: {answer['Location']}")
click.echo(f"Issue: {answer['Issue']}")
click.echo(f"Fix: {answer['Fix']}")
click.echo("\n")


@dbt.command("onboard")
@click.option("--token", prompt="API Token", help="Your API token for authentication.")
Expand Down
Loading