diff --git a/aws_lambda_builders/utils.py b/aws_lambda_builders/utils.py index d8db688fe..6dabe05bd 100644 --- a/aws_lambda_builders/utils.py +++ b/aws_lambda_builders/utils.py @@ -1,7 +1,7 @@ """ Common utilities for the library """ - +import locale import logging import os import shutil @@ -231,3 +231,28 @@ def extract_tarfile(tarfile_path: Union[str, os.PathLike], unpack_dir: Union[str raise tarfile.ExtractError("Attempted Path Traversal in Tar File") tar.extractall(unpack_dir) + + +def decode(to_decode: bytes, encoding: Optional[str] = None) -> str: + """ + Perform a "safe" decoding of a series of bytes. If the decoding works, returns the decoded bytes. + If the decoding fails, returns an empty string instead of throwing an exception. + + Parameters + ---------- + to_decode: bytes + Series of bytes to be decoded + encoding: Optional[str] + Encoding type. If None, will attempt to find the correct encoding based on locale. + + Returns + ------- + str + Decoded string if decoding succeeds, empty string if decoding fails + """ + encoding = encoding if encoding else locale.getpreferredencoding() + try: + return to_decode.decode(encoding).strip() + except UnicodeDecodeError: + LOG.debug(f"Unable to decode bytes: {to_decode} with encoding: {encoding}") + return "" diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py index 1cbf0fa19..f78a9b927 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py @@ -2,10 +2,10 @@ Wrapper around calls to dotent CLI through a subprocess. """ -import locale import logging -from .utils import OSUtils +from aws_lambda_builders.utils import decode +from aws_lambda_builders.workflows.dotnet_clipackage.utils import OSUtils LOG = logging.getLogger(__name__) @@ -52,7 +52,6 @@ def run(self, args, cwd=None): # DotNet output is in system locale dependent encoding # https://learn.microsoft.com/en-us/dotnet/api/system.console.outputencoding?view=net-6.0#remarks # "The default code page that the console uses is determined by the system locale." - encoding = locale.getpreferredencoding() p = self.os_utils.popen(invoke_dotnet, stdout=self.os_utils.pipe, stderr=self.os_utils.pipe, cwd=cwd) out, err = p.communicate() @@ -60,7 +59,7 @@ def run(self, args, cwd=None): # The package command contains lots of useful information on how the package was created and # information when the package command was not successful. For that reason the output is # always written to the output to help developers diagnose issues. - LOG.info(out.decode(encoding).strip()) + LOG.info(decode(out)) if p.returncode != 0: - raise DotnetCLIExecutionError(message=err.decode(encoding).strip()) + raise DotnetCLIExecutionError(message=decode(err)) diff --git a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py index b04e8dcb5..a1c0608f3 100644 --- a/aws_lambda_builders/workflows/dotnet_clipackage/utils.py +++ b/aws_lambda_builders/workflows/dotnet_clipackage/utils.py @@ -7,7 +7,7 @@ import subprocess import zipfile -from aws_lambda_builders.utils import which +from aws_lambda_builders.utils import decode, which LOG = logging.getLogger(__name__) @@ -96,7 +96,7 @@ def _extract(self, file_info, output_dir, zip_ref): if not self._is_symlink(file_info): return zip_ref.extract(file_info, output_dir) - source = zip_ref.read(file_info.filename).decode("utf8") + source = decode(zip_ref.read(file_info.filename)) link_name = os.path.normpath(os.path.join(output_dir, file_info.filename)) # make leading dirs if needed diff --git a/aws_lambda_builders/workflows/java_gradle/gradle_validator.py b/aws_lambda_builders/workflows/java_gradle/gradle_validator.py index 5a190c183..2972e75b2 100644 --- a/aws_lambda_builders/workflows/java_gradle/gradle_validator.py +++ b/aws_lambda_builders/workflows/java_gradle/gradle_validator.py @@ -5,6 +5,7 @@ import logging import re +from aws_lambda_builders.utils import decode from aws_lambda_builders.validator import RuntimeValidator from aws_lambda_builders.workflows.java.utils import OSUtils @@ -81,6 +82,6 @@ def _get_jvm_string(self, gradle_path): return None for line in stdout.splitlines(): - l_dec = line.decode() + l_dec = decode(line) if l_dec.startswith("JVM"): return l_dec diff --git a/aws_lambda_builders/workflows/java_maven/maven.py b/aws_lambda_builders/workflows/java_maven/maven.py index 5f4dc614a..dbede6a09 100644 --- a/aws_lambda_builders/workflows/java_maven/maven.py +++ b/aws_lambda_builders/workflows/java_maven/maven.py @@ -5,6 +5,8 @@ import logging import subprocess +from aws_lambda_builders.utils import decode + LOG = logging.getLogger(__name__) @@ -28,10 +30,10 @@ def build(self, scratch_dir): args = ["clean", "install"] ret_code, stdout, _ = self._run(args, scratch_dir) - LOG.debug("Maven logs: %s", stdout.decode("utf8").strip()) + LOG.debug("Maven logs: %s", decode(stdout)) if ret_code != 0: - raise MavenExecutionError(message=stdout.decode("utf8").strip()) + raise MavenExecutionError(message=decode(stdout)) def copy_dependency(self, scratch_dir): include_scope = "runtime" @@ -40,7 +42,7 @@ def copy_dependency(self, scratch_dir): ret_code, stdout, _ = self._run(args, scratch_dir) if ret_code != 0: - raise MavenExecutionError(message=stdout.decode("utf8").strip()) + raise MavenExecutionError(message=decode(stdout)) def _run(self, args, cwd=None): p = self.os_utils.popen( diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index 97d2ec16f..ab703765c 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1,7 +1,10 @@ +import platform + from unittest import TestCase from unittest.mock import patch from aws_lambda_builders import utils +from aws_lambda_builders.utils import decode class Test_create_symlink_or_copy(TestCase): @@ -30,3 +33,29 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched pathced_os.symlink.assert_called_once() patched_copy_tree.assert_called_with(source_path, destination_path) + + +class TestDecode(TestCase): + def test_does_not_crash_non_utf8_encoding(self): + message = "hello\n\n ß".encode("iso-8859-1") + # Windows will decode this string as expected, *nix systems won't + expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else "" + response = decode(message) + self.assertEqual(response, expected_message) + + def test_is_able_to_decode_non_utf8_encoding(self): + message = "hello\n\n ß".encode("iso-8859-1") + response = decode(message, "iso-8859-1") + self.assertEqual(response, "hello\n\n ß") + + @patch("aws_lambda_builders.utils.locale") + def test_isa_able_to_decode_non_utf8_locale(self, mock_locale): + mock_locale.getpreferredencoding.return_value = "iso-8859-1" + message = "hello\n\n ß".encode("iso-8859-1") + response = decode(message) + self.assertEqual(response, "hello\n\n ß") + + def test_succeeds_with_utf8_encoding(self): + message = "hello".encode("utf-8") + response = decode(message) + self.assertEqual(response, "hello")