From f743d0e51f76a64c3c14c32e4faf8f50ec705be3 Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Tue, 14 Nov 2023 14:44:39 -0500 Subject: [PATCH 1/2] Preserve custom loader with harden-pyyaml --- src/core_codemods/harden_pyyaml.py | 14 ++++++++++---- tests/codemods/base_codemod_test.py | 6 +++--- tests/codemods/test_harden_pyyaml.py | 20 ++++++++++++++++++++ 3 files changed, 33 insertions(+), 7 deletions(-) diff --git a/src/core_codemods/harden_pyyaml.py b/src/core_codemods/harden_pyyaml.py index 5638eccd..2bec5395 100644 --- a/src/core_codemods/harden_pyyaml.py +++ b/src/core_codemods/harden_pyyaml.py @@ -31,8 +31,11 @@ def rule(cls): - metavariable-pattern: metavariable: $ARG patterns: - - pattern-not: - pattern: yaml.SafeLoader + - pattern-either: + - pattern: yaml.Loader + - pattern: yaml.BaseLoader + - pattern: yaml.FullLoader + - pattern: yaml.UnsafeLoader - patterns: - pattern: yaml.load(...) - pattern-inside: | @@ -42,8 +45,11 @@ def rule(cls): - metavariable-pattern: metavariable: $ARG patterns: - - pattern-not: - pattern: yaml.SafeLoader + - pattern-either: + - pattern: yaml.Loader + - pattern: yaml.BaseLoader + - pattern: yaml.FullLoader + - pattern: yaml.UnsafeLoader """ diff --git a/tests/codemods/base_codemod_test.py b/tests/codemods/base_codemod_test.py index 78d7fbf0..1a80b099 100644 --- a/tests/codemods/base_codemod_test.py +++ b/tests/codemods/base_codemod_test.py @@ -68,7 +68,7 @@ def setup_class(cls): def results_by_id_filepath(self, input_code, file_path): with open(file_path, "w", encoding="utf-8") as tmp_file: - tmp_file.write(input_code) + tmp_file.write(dedent(input_code)) name = self.codemod.name() results = self.registry.match_codemods(codemod_include=[name]) @@ -82,7 +82,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected): registry=mock.MagicMock(), repo_manager=mock.MagicMock(), ) - input_tree = cst.parse_module(input_code) + input_tree = cst.parse_module(dedent(input_code)) all_results = self.results_by_id_filepath(input_code, file_path) results = all_results.results_for_rule_and_file(self.codemod.name(), file_path) self.file_context = FileContext( @@ -99,7 +99,7 @@ def run_and_assert_filepath(self, root, file_path, input_code, expected): ) output_tree = command_instance.transform_module(input_tree) - assert output_tree.code == expected + assert output_tree.code == dedent(expected) class BaseDjangoCodemodTest(BaseSemgrepCodemodTest): diff --git a/tests/codemods/test_harden_pyyaml.py b/tests/codemods/test_harden_pyyaml.py index 0e3e47f3..d783e2f1 100644 --- a/tests/codemods/test_harden_pyyaml.py +++ b/tests/codemods/test_harden_pyyaml.py @@ -60,3 +60,23 @@ def test_import_alias(self, tmpdir): deserialized_data = yam.load(data, yam.SafeLoader) """ self.run_and_assert(tmpdir, input_code, expected) + + def test_preserve_custom_loader(self, tmpdir): + expected = input_code = """ + import yaml + from custom import CustomLoader + + yaml.load(data, CustomLoader) + """ + + self.run_and_assert(tmpdir, input_code, expected) + + def test_preserve_custom_loader_kwarg(self, tmpdir): + expected = input_code = """ + import yaml + from custom import CustomLoader + + yaml.load(data, Loader=CustomLoader) + """ + + self.run_and_assert(tmpdir, input_code, expected) From 13063c8834a5818274ff553e3efeda065028174a Mon Sep 17 00:00:00 2001 From: Daniel D'Avella Date: Tue, 14 Nov 2023 14:54:15 -0500 Subject: [PATCH 2/2] Preserve kwarg --- integration_tests/test_harden_pyyaml.py | 5 +++-- src/core_codemods/harden_pyyaml.py | 4 +++- tests/codemods/test_harden_pyyaml.py | 4 ++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/integration_tests/test_harden_pyyaml.py b/integration_tests/test_harden_pyyaml.py index 8240b623..1a7caca4 100644 --- a/integration_tests/test_harden_pyyaml.py +++ b/integration_tests/test_harden_pyyaml.py @@ -10,9 +10,10 @@ class TestHardenPyyaml(BaseIntegrationTest): codemod = HardenPyyaml code_path = "tests/samples/unsafe_yaml.py" original_code, expected_new_code = original_and_expected_from_code_path( - code_path, [(3, "deserialized_data = yaml.load(data, yaml.SafeLoader)\n")] + code_path, + [(3, "deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)\n")], ) - expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n import yaml\n \n data = b"!!python/object/apply:subprocess.Popen \\\\n- ls"\n-deserialized_data = yaml.load(data, Loader=yaml.Loader)\n+deserialized_data = yaml.load(data, yaml.SafeLoader)\n' + expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n import yaml\n \n data = b"!!python/object/apply:subprocess.Popen \\\\n- ls"\n-deserialized_data = yaml.load(data, Loader=yaml.Loader)\n+deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)\n' expected_line_change = "4" change_description = HardenPyyaml.CHANGE_DESCRIPTION # expected exception because the yaml.SafeLoader protects against unsafe code diff --git a/src/core_codemods/harden_pyyaml.py b/src/core_codemods/harden_pyyaml.py index 2bec5395..d3523610 100644 --- a/src/core_codemods/harden_pyyaml.py +++ b/src/core_codemods/harden_pyyaml.py @@ -60,6 +60,8 @@ def on_result_found(self, original_node, updated_node): self.add_needed_import(self._module_name) new_args = [ *updated_node.args[:1], - self.parse_expression(f"{maybe_name}.SafeLoader"), + updated_node.args[1].with_changes( + value=self.parse_expression(f"{maybe_name}.SafeLoader") + ), ] return self.update_arg_target(updated_node, new_args) diff --git a/tests/codemods/test_harden_pyyaml.py b/tests/codemods/test_harden_pyyaml.py index d783e2f1..d5619b46 100644 --- a/tests/codemods/test_harden_pyyaml.py +++ b/tests/codemods/test_harden_pyyaml.py @@ -42,7 +42,7 @@ def test_all_unsafe_loaders_kwarg(self, tmpdir, loader): expected = """import yaml data = b'!!python/object/apply:subprocess.Popen \\n- ls' -deserialized_data = yaml.load(data, yaml.SafeLoader) +deserialized_data = yaml.load(data, Loader=yaml.SafeLoader) """ self.run_and_assert(tmpdir, input_code, expected) @@ -57,7 +57,7 @@ def test_import_alias(self, tmpdir): from yaml import Loader data = b'!!python/object/apply:subprocess.Popen \\n- ls' -deserialized_data = yam.load(data, yam.SafeLoader) +deserialized_data = yam.load(data, Loader=yam.SafeLoader) """ self.run_and_assert(tmpdir, input_code, expected)