Skip to content

Commit

Permalink
[BACKPORT 1.7] Exclude password-like fields for considering reparse (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
ChenyuLInx authored Apr 8, 2024
1 parent b8681a3 commit 947f397
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 33 deletions.
6 changes: 6 additions & 0 deletions .changes/unreleased/Fixes-20240402-135556.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
kind: Fixes
body: Exclude password-like fields for considering reparse
time: 2024-04-02T13:55:56.169953-07:00
custom:
Author: ChenyuLInx
Issue: "9795"
30 changes: 11 additions & 19 deletions core/dbt/parser/manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -820,13 +820,6 @@ def is_partial_parsable(self, manifest: Manifest) -> Tuple[bool, Optional[str]]:
)
valid = False
reparse_reason = ReparseReason.proj_env_vars_changed
if (
self.manifest.state_check.profile_env_vars_hash
!= manifest.state_check.profile_env_vars_hash
):
fire_event(UnableToPartialParse(reason="env vars used in profiles.yml have changed"))
valid = False
reparse_reason = ReparseReason.prof_env_vars_changed

missing_keys = {
k
Expand Down Expand Up @@ -965,18 +958,18 @@ def build_manifest_state_check(self):
env_var_str += f"{key}:{config.project_env_vars[key]}|"
project_env_vars_hash = FileHash.from_contents(env_var_str)

# Create a FileHash of the env_vars in the project
key_list = list(config.profile_env_vars.keys())
key_list.sort()
env_var_str = ""
for key in key_list:
env_var_str += f"{key}:{config.profile_env_vars[key]}|"
profile_env_vars_hash = FileHash.from_contents(env_var_str)
# Create a hash of the connection_info, which user has access to in
# jinja context. Thus attributes here may affect the parsing result.
# Ideally we should not expose all of the connection info to the jinja.

# Create a FileHash of the profile file
profile_path = os.path.join(get_flags().PROFILES_DIR, "profiles.yml")
with open(profile_path) as fp:
profile_hash = FileHash.from_contents(fp.read())
# Renaming this variable mean that we will have to do a whole lot more
# change to make sure the previous manifest can be loaded correctly.
# This is an example of naming should be chosen based on the functionality
# rather than the implementation details.
connection_keys = list(config.credentials.connection_info())
# avoid reparsing because of ordering issues
connection_keys.sort()
profile_hash = FileHash.from_contents(pprint.pformat(connection_keys))

# Create a FileHashes for dbt_project for all dependencies
project_hashes = {}
Expand All @@ -988,7 +981,6 @@ def build_manifest_state_check(self):
# Create the ManifestStateCheck object
state_check = ManifestStateCheck(
project_env_vars_hash=project_env_vars_hash,
profile_env_vars_hash=profile_env_vars_hash,
vars_hash=vars_hash,
profile_hash=profile_hash,
project_hashes=project_hashes,
Expand Down
29 changes: 29 additions & 0 deletions tests/functional/partial_parsing/test_partial_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
run_dbt_and_capture,
rename_dir,
)
import yaml
from tests.functional.utils import up_one
from dbt.tests.fixtures.project import write_project_files
from tests.functional.partial_parsing.fixtures import (
Expand Down Expand Up @@ -824,3 +825,31 @@ def test_pp_renamed_project_dir_changed_project_contents(self, project):
run_dbt(["deps"])
len(run_dbt(["--partial-parse", "seed"])) == 1
len(run_dbt(["--partial-parse", "run"])) == 3


class TestProfileChanges:
@pytest.fixture(scope="class")
def models(self):
return {
"model.sql": "select 1 as id",
}

def test_profile_change(self, project, dbt_profile_data):
# Fist run not partial parsing
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because saved manifest not found" in stdout

_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout

# change dbname which is included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["dbname"] = "dbt2"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing because profile has changed" in stdout

# Change the password which is not included in the connection_info
dbt_profile_data["test"]["outputs"]["default"]["pass"] = "another_password"
write_file(yaml.safe_dump(dbt_profile_data), project.profiles_dir, "profiles.yml")
_, stdout = run_dbt_and_capture(["parse"])
assert "Unable to do partial parsing" not in stdout
21 changes: 7 additions & 14 deletions tests/functional/partial_parsing/test_pp_vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,40 +313,33 @@ def dbt_profile_target(self):
# calls 'load_config' before the tests are run.
# Note: only the specified profile is rendered, so there's no
# point it setting env_vars in non-used profiles.
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"
return {
"type": "postgres",
"threads": 4,
"host": "localhost",
"host": "{{ env_var('ENV_VAR_HOST') }}",
"port": 5432,
"user": "{{ env_var('ENV_VAR_USER') }}",
"pass": "{{ env_var('ENV_VAR_PASS') }}",
"user": "root",
"pass": "password",
"dbname": "dbt",
}

def test_profile_env_vars(self, project, logs_dir):

# Initial run
os.environ["ENV_VAR_USER"] = "root"
os.environ["ENV_VAR_PASS"] = "password"
os.environ["ENV_VAR_HOST"] = "localhost"

run_dbt(["run"])
manifest = get_manifest(project.project_root)
env_vars_checksum = manifest.state_check.profile_env_vars_hash.checksum

# Change env_vars, the user doesn't exist, this should fail
os.environ["ENV_VAR_USER"] = "fake_user"
os.environ["ENV_VAR_HOST"] = "wrong_host"

# N.B. run_dbt_and_capture won't work here because FailedToConnectError ends the test entirely
with pytest.raises(FailedToConnectError):
run_dbt(["run"], expect_pass=False)

log_output = Path(logs_dir, "dbt.log").read_text()
assert "env vars used in profiles.yml have changed" in log_output

manifest = get_manifest(project.project_root)
assert env_vars_checksum != manifest.state_check.profile_env_vars_hash.checksum
assert "Unable to do partial parsing because profile has changed" in log_output


class TestProfileSecretEnvVars:
Expand Down
23 changes: 23 additions & 0 deletions tests/unit/test_parse_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,18 @@ def _new_file(self, searched, name, match):


class TestPartialParse(unittest.TestCase):
def setUp(self) -> None:
mock_project = MagicMock(RuntimeConfig)
mock_project.cli_vars = ""
mock_project.args = MagicMock()
mock_project.args.profile = "test"
mock_project.args.target = "test"
mock_project.project_env_vars = {}
mock_project.profile_env_vars = {}
mock_project.project_target_path = "mock_target_path"
mock_project.credentials = MagicMock()
self.mock_project = mock_project

@patch("dbt.parser.manifest.ManifestLoader.build_manifest_state_check")
@patch("dbt.parser.manifest.os.path.exists")
@patch("dbt.parser.manifest.open")
Expand All @@ -121,3 +133,14 @@ def test_partial_parse_file_path(self, patched_open, patched_os_exist, patched_s
ManifestLoader(mock_project, {})
# if specified in flags, we use the specified path
patched_open.assert_called_with("specified_partial_parse_path", "rb")

def test_profile_hash_change(self):
# This test validate that the profile_hash is updated when the connection keys change
profile_hash = "750bc99c1d64ca518536ead26b28465a224be5ffc918bf2a490102faa5a1bcf5"
self.mock_project.credentials.connection_info.return_value = "test"
set_from_args(Namespace(), {})
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum == profile_hash
self.mock_project.credentials.connection_info.return_value = "test1"
manifest = ManifestLoader(self.mock_project, {})
assert manifest.manifest.state_check.profile_hash.checksum != profile_hash

0 comments on commit 947f397

Please sign in to comment.