Skip to content

Commit

Permalink
fix: Decoding non-unicode characters raises unhandled exception (aws#554
Browse files Browse the repository at this point in the history
)

* fix: Don't crash on decoding failure

* Format files

* Fix Windows test failure

* Add comment for test case

* Update paths to be absolute
  • Loading branch information
mildaniel authored Oct 10, 2023
1 parent 9656295 commit 2dcdd2a
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 12 deletions.
27 changes: 26 additions & 1 deletion aws_lambda_builders/utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Common utilities for the library
"""

import locale
import logging
import os
import shutil
Expand Down Expand Up @@ -231,3 +231,28 @@ def extract_tarfile(tarfile_path: Union[str, os.PathLike], unpack_dir: Union[str
raise tarfile.ExtractError("Attempted Path Traversal in Tar File")

tar.extractall(unpack_dir)


def decode(to_decode: bytes, encoding: Optional[str] = None) -> str:
"""
Perform a "safe" decoding of a series of bytes. If the decoding works, returns the decoded bytes.
If the decoding fails, returns an empty string instead of throwing an exception.
Parameters
----------
to_decode: bytes
Series of bytes to be decoded
encoding: Optional[str]
Encoding type. If None, will attempt to find the correct encoding based on locale.
Returns
-------
str
Decoded string if decoding succeeds, empty string if decoding fails
"""
encoding = encoding if encoding else locale.getpreferredencoding()
try:
return to_decode.decode(encoding).strip()
except UnicodeDecodeError:
LOG.debug(f"Unable to decode bytes: {to_decode} with encoding: {encoding}")
return ""
9 changes: 4 additions & 5 deletions aws_lambda_builders/workflows/dotnet_clipackage/dotnetcli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
Wrapper around calls to dotent CLI through a subprocess.
"""

import locale
import logging

from .utils import OSUtils
from aws_lambda_builders.utils import decode
from aws_lambda_builders.workflows.dotnet_clipackage.utils import OSUtils

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,15 +52,14 @@ def run(self, args, cwd=None):
# DotNet output is in system locale dependent encoding
# https://learn.microsoft.com/en-us/dotnet/api/system.console.outputencoding?view=net-6.0#remarks
# "The default code page that the console uses is determined by the system locale."
encoding = locale.getpreferredencoding()
p = self.os_utils.popen(invoke_dotnet, stdout=self.os_utils.pipe, stderr=self.os_utils.pipe, cwd=cwd)

out, err = p.communicate()

# The package command contains lots of useful information on how the package was created and
# information when the package command was not successful. For that reason the output is
# always written to the output to help developers diagnose issues.
LOG.info(out.decode(encoding).strip())
LOG.info(decode(out))

if p.returncode != 0:
raise DotnetCLIExecutionError(message=err.decode(encoding).strip())
raise DotnetCLIExecutionError(message=decode(err))
4 changes: 2 additions & 2 deletions aws_lambda_builders/workflows/dotnet_clipackage/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import subprocess
import zipfile

from aws_lambda_builders.utils import which
from aws_lambda_builders.utils import decode, which

LOG = logging.getLogger(__name__)

Expand Down Expand Up @@ -96,7 +96,7 @@ def _extract(self, file_info, output_dir, zip_ref):
if not self._is_symlink(file_info):
return zip_ref.extract(file_info, output_dir)

source = zip_ref.read(file_info.filename).decode("utf8")
source = decode(zip_ref.read(file_info.filename))
link_name = os.path.normpath(os.path.join(output_dir, file_info.filename))

# make leading dirs if needed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import re

from aws_lambda_builders.utils import decode
from aws_lambda_builders.validator import RuntimeValidator
from aws_lambda_builders.workflows.java.utils import OSUtils

Expand Down Expand Up @@ -81,6 +82,6 @@ def _get_jvm_string(self, gradle_path):
return None

for line in stdout.splitlines():
l_dec = line.decode()
l_dec = decode(line)
if l_dec.startswith("JVM"):
return l_dec
8 changes: 5 additions & 3 deletions aws_lambda_builders/workflows/java_maven/maven.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import logging
import subprocess

from aws_lambda_builders.utils import decode

LOG = logging.getLogger(__name__)


Expand All @@ -28,10 +30,10 @@ def build(self, scratch_dir):
args = ["clean", "install"]
ret_code, stdout, _ = self._run(args, scratch_dir)

LOG.debug("Maven logs: %s", stdout.decode("utf8").strip())
LOG.debug("Maven logs: %s", decode(stdout))

if ret_code != 0:
raise MavenExecutionError(message=stdout.decode("utf8").strip())
raise MavenExecutionError(message=decode(stdout))

def copy_dependency(self, scratch_dir):
include_scope = "runtime"
Expand All @@ -40,7 +42,7 @@ def copy_dependency(self, scratch_dir):
ret_code, stdout, _ = self._run(args, scratch_dir)

if ret_code != 0:
raise MavenExecutionError(message=stdout.decode("utf8").strip())
raise MavenExecutionError(message=decode(stdout))

def _run(self, args, cwd=None):
p = self.os_utils.popen(
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import platform

from unittest import TestCase
from unittest.mock import patch

from aws_lambda_builders import utils
from aws_lambda_builders.utils import decode


class Test_create_symlink_or_copy(TestCase):
Expand Down Expand Up @@ -30,3 +33,29 @@ def test_must_copy_if_symlink_fails(self, patched_copy_tree, pathced_os, patched

pathced_os.symlink.assert_called_once()
patched_copy_tree.assert_called_with(source_path, destination_path)


class TestDecode(TestCase):
def test_does_not_crash_non_utf8_encoding(self):
message = "hello\n\n ß".encode("iso-8859-1")
# Windows will decode this string as expected, *nix systems won't
expected_message = "hello\n\n ß" if platform.system().lower() == "windows" else ""
response = decode(message)
self.assertEqual(response, expected_message)

def test_is_able_to_decode_non_utf8_encoding(self):
message = "hello\n\n ß".encode("iso-8859-1")
response = decode(message, "iso-8859-1")
self.assertEqual(response, "hello\n\n ß")

@patch("aws_lambda_builders.utils.locale")
def test_isa_able_to_decode_non_utf8_locale(self, mock_locale):
mock_locale.getpreferredencoding.return_value = "iso-8859-1"
message = "hello\n\n ß".encode("iso-8859-1")
response = decode(message)
self.assertEqual(response, "hello\n\n ß")

def test_succeeds_with_utf8_encoding(self):
message = "hello".encode("utf-8")
response = decode(message)
self.assertEqual(response, "hello")

0 comments on commit 2dcdd2a

Please sign in to comment.