From cf43d08ed99955d00d8cc3909ad62e80eaf00ec6 Mon Sep 17 00:00:00 2001 From: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> Date: Tue, 29 Oct 2024 05:54:03 -0300 Subject: [PATCH] #990:Implemented escape sequence for jar option with trailing whitespaces (#472) related to exasol/script-languages-release#990 --- .../javacontainer/script_options/converter.cc | 10 +-- .../script_options/parser_ctpg.cc | 13 +++- .../script_options/string_ops.cc | 77 +++++++++++++++++++ .../javacontainer/script_options/string_ops.h | 2 + .../script_options/test/converter_test.cc | 1 - .../script_options/test/string_ops_tests.cc | 32 ++++++++ .../cpp/javacontainer_extractor_v2_test.cc | 16 ++++ .../test/cpp/javacontainer_test.cc | 14 ---- 8 files changed, 143 insertions(+), 22 deletions(-) diff --git a/exaudfclient/base/javacontainer/script_options/converter.cc b/exaudfclient/base/javacontainer/script_options/converter.cc index 7139f2de..e937a5a5 100644 --- a/exaudfclient/base/javacontainer/script_options/converter.cc +++ b/exaudfclient/base/javacontainer/script_options/converter.cc @@ -1,5 +1,5 @@ #include "base/javacontainer/script_options/converter.h" -#include "base/javacontainer/script_options/string_ops.h" + #include namespace SWIGVMContainers { @@ -11,11 +11,11 @@ Converter::Converter() , m_whitespace(" \t\f\v") {} void Converter::convertScriptClassName(const std::string & value) { - std::string trimmedValue(value); - StringOps::trim(trimmedValue); - if (value != "") { - m_jvmOptions.push_back("-Dexasol.scriptclass=" + trimmedValue); + + if (value.empty()) { + return; } + m_jvmOptions.push_back("-Dexasol.scriptclass=" + value); } void Converter::convertJvmOption(const std::string & value) { diff --git a/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc b/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc index 3f576242..5e5c3650 100644 --- a/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc +++ b/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc @@ -3,6 +3,7 @@ #include "base/utils/exceptions.h" #include "base/script_options_parser/exception.h" #include "base/swig_factory/swig_factory.h" +#include "base/javacontainer/script_options/string_ops.h" #include @@ -24,7 +25,11 @@ void ScriptOptionLinesParserCTPG::prepareScriptCode(const std::string & scriptCo void ScriptOptionLinesParserCTPG::parseForScriptClass(std::function callback) { try { - parseForSingleOption(m_keywords.scriptClassKeyword(), callback); + parseForSingleOption(m_keywords.scriptClassKeyword(), [&](const std::string &option) { + std::string trimmedValue(option); + StringOps::trim(trimmedValue); + callback(trimmedValue); + }); } catch(const ExecutionGraph::OptionParserException& ex) { Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1623"); } @@ -40,7 +45,11 @@ void ScriptOptionLinesParserCTPG::parseForJvmOptions(std::function callback) { try { - parseForMultipleOption(m_keywords.jarKeyword(), callback); + parseForMultipleOption(m_keywords.jarKeyword(), [callback] (const std::string &option) { + std::string formattedValue(option); + StringOps::replaceTrailingEscapeWhitespaces(formattedValue); + callback(formattedValue); + }); } catch(const ExecutionGraph::OptionParserException& ex) { Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1625"); } diff --git a/exaudfclient/base/javacontainer/script_options/string_ops.cc b/exaudfclient/base/javacontainer/script_options/string_ops.cc index df1b6081..4f37fb97 100644 --- a/exaudfclient/base/javacontainer/script_options/string_ops.cc +++ b/exaudfclient/base/javacontainer/script_options/string_ops.cc @@ -1 +1,78 @@ #include "base/javacontainer/script_options/string_ops.h" +#include +#include + +namespace SWIGVMContainers { + +namespace JavaScriptOptions { + +namespace StringOps { + +inline uint32_t countBackslashesBackwards(const std::string & s, size_t pos) { + uint32_t retVal(0); + while (pos >= 0 && s.at(pos--) == '\\') retVal++; + return retVal; +} + +inline size_t replaceOnlyBackslashSequencesButKeepChar(std::string & s, size_t backslashStartIdx, size_t nBackslashes) { + const uint32_t nHalfBackslashes = (nBackslashes>>1); + if (nHalfBackslashes > 0) { + s = s.erase(backslashStartIdx, nHalfBackslashes ); + } + const size_t newBackslashEndIdx = backslashStartIdx + nHalfBackslashes; + return newBackslashEndIdx + 1; //+1 because of the last None-Whitespace character we need to keep +} + +inline size_t replaceBackslashSequencesAndWhitespaceSequence(std::string & s, size_t backslashStartIdx, + size_t nBackslashes, const char* replacement) { + const uint32_t nHalfBackslashes = (nBackslashes>>1); + s = s.erase(backslashStartIdx, nHalfBackslashes+1 ); //Delete also backslash of whitespace escape sequence + const size_t newBackslashEndIdx = backslashStartIdx + nHalfBackslashes; + s = s.replace(newBackslashEndIdx, 1, replacement); + return newBackslashEndIdx + 1; //+1 because of the replaced whitespace character +} + +inline size_t replaceCharAtPositionAndBackslashes(std::string & s, size_t pos, const char* replacement) { + const size_t nBackslashes = (pos > 0) ? countBackslashesBackwards(s, pos-1) : 0; + + const size_t backslashStartIdx = pos-nBackslashes; + if(nBackslashes % 2 == 0) { + return replaceOnlyBackslashSequencesButKeepChar(s, backslashStartIdx, nBackslashes); + } + else { + return replaceBackslashSequencesAndWhitespaceSequence(s, backslashStartIdx, nBackslashes, replacement); + } +} + +void replaceTrailingEscapeWhitespaces(std::string & s) { + if (s.size() > 0) { + const size_t lastNoneWhitespaceIdx = s.find_last_not_of(" \t\v\f"); + if (lastNoneWhitespaceIdx != std::string::npos) { + size_t firstWhitespaceAfterNoneWhitespaceIdx = lastNoneWhitespaceIdx + 1; + if (s.size() > 1) { + if(s[lastNoneWhitespaceIdx] == 't') { + firstWhitespaceAfterNoneWhitespaceIdx = replaceCharAtPositionAndBackslashes(s, lastNoneWhitespaceIdx, "\t"); + } else if (s[lastNoneWhitespaceIdx] == '\\' && lastNoneWhitespaceIdx < (s.size() - 1) && s.at(lastNoneWhitespaceIdx+1) == ' ') { + firstWhitespaceAfterNoneWhitespaceIdx = replaceCharAtPositionAndBackslashes(s, lastNoneWhitespaceIdx+1, " "); + } else if (s[lastNoneWhitespaceIdx] == 'f') { + firstWhitespaceAfterNoneWhitespaceIdx = replaceCharAtPositionAndBackslashes(s, lastNoneWhitespaceIdx, "\f"); + } else if (s[lastNoneWhitespaceIdx] == 'v') { + firstWhitespaceAfterNoneWhitespaceIdx = replaceCharAtPositionAndBackslashes(s, lastNoneWhitespaceIdx, "\v"); + } + } + if (firstWhitespaceAfterNoneWhitespaceIdx != std::string::npos && + firstWhitespaceAfterNoneWhitespaceIdx < s.size()) { + s = s.substr(0, firstWhitespaceAfterNoneWhitespaceIdx); //Right Trim the string + } + } else { + s = ""; + } + } +} + +} //namespace StringOps + + +} //namespace JavaScriptOptions + +} //namespace SWIGVMContainers diff --git a/exaudfclient/base/javacontainer/script_options/string_ops.h b/exaudfclient/base/javacontainer/script_options/string_ops.h index 8e0ff72c..7da1e53f 100644 --- a/exaudfclient/base/javacontainer/script_options/string_ops.h +++ b/exaudfclient/base/javacontainer/script_options/string_ops.h @@ -31,6 +31,8 @@ inline void trim(std::string &s) { rtrim(s); } +void replaceTrailingEscapeWhitespaces(std::string & s); + } //namespace StringOps diff --git a/exaudfclient/base/javacontainer/script_options/test/converter_test.cc b/exaudfclient/base/javacontainer/script_options/test/converter_test.cc index dc2725bc..8a6da96c 100644 --- a/exaudfclient/base/javacontainer/script_options/test/converter_test.cc +++ b/exaudfclient/base/javacontainer/script_options/test/converter_test.cc @@ -40,7 +40,6 @@ class ConverterV2JarTest : public ::testing::TestWithParam> option_value = GetParam(); const std::string jar_option_value = option_value.first; - std::cerr << "DEBUG: " << jar_option_value << std::endl; ConverterV2 converter; converter.convertExternalJar(option_value.first); diff --git a/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc b/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc index 8eaf722b..dd647569 100644 --- a/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc +++ b/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc @@ -25,3 +25,35 @@ TEST(StringOpsTest, trimWithNoneASCII) { EXPECT_EQ(sample, "\xa0Hello World\xa0"); } +class ReplaceTrailingEscapeWhitespacesTest : public ::testing::TestWithParam> {}; + +TEST_P(ReplaceTrailingEscapeWhitespacesTest, s) { + const std::pair underTest = GetParam(); + + std::string str = underTest.first; + StringOps::replaceTrailingEscapeWhitespaces(str); + ASSERT_EQ(str, underTest.second); +} + +const std::vector> replace_trailing_escape_whitespaces_strings = + { + std::make_pair("hello world", std::string("hello world")), + std::make_pair("hello world ", std::string("hello world")), + std::make_pair("hello world\\t", std::string("hello world\t")), + std::make_pair("hello world\\f", std::string("hello world\f")), + std::make_pair("hello world\\v", std::string("hello world\v")), + std::make_pair("hello world\\\\t", std::string("hello world\\t")), + std::make_pair("hello world\\\\t\t", std::string("hello world\\t")), + std::make_pair("hello world\\\\\\t\t", std::string("hello world\\\t")), + std::make_pair("hello world\\\\\\\\t\t", std::string("hello world\\\\t")), + std::make_pair("hello worl\td\\\\\\\\t\t", std::string("hello worl\td\\\\t")), + std::make_pair("t\t ", std::string("t")), + std::make_pair("hello worl\td\\\\\\\\", std::string("hello worl\td\\\\\\\\")), //If no whitespace escape sequence, backslashes are not interpreted as backslash escape sequence + }; + + +INSTANTIATE_TEST_SUITE_P( + StringOpsTest, + ReplaceTrailingEscapeWhitespacesTest, + ::testing::ValuesIn(replace_trailing_escape_whitespaces_strings) +); \ No newline at end of file diff --git a/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc b/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc index ec615b63..0fe8c9f5 100644 --- a/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc +++ b/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc @@ -110,3 +110,19 @@ TEST(JavaContainer, import_script_with_escaped_options) { "-XX:+UseSerialGC" }; EXPECT_EQ(vm.getJavaVMInternalStatus().m_jvmOptions, expectedJVMOptions); } + +TEST(JavaContainer, basic_jar_with_trailing_escape) { + const std::string script_code = "%scriptclass com.exasol.udf_profiling.UdfProfiler;\n" + "%jar base/javacontainer/test/test.jar\\t\t;"; + EXPECT_THROW({ + try + { + JavaVMTest vm(script_code); + } + catch( const SWIGVMContainers::JavaVMach::exception& e ) + { + EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar\t': No such file or directory$")); + throw; + } + }, SWIGVMContainers::JavaVMach::exception ); +} diff --git a/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc b/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc index 88de7e0d..ed916c8f 100644 --- a/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc +++ b/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc @@ -49,22 +49,8 @@ TEST(JavaContainer, basic_jar_script_class_with_white_spaces) { TEST(JavaContainer, basic_jar_with_white_spaces) { const std::string script_code = "%jar base/javacontainer/test/test.jar \t ;"; -#ifndef USE_EXTRACTOR_V2 //The parsers behave differently: The legacy parser removes trailing white spaces. JavaVMTest vm(script_code); EXPECT_EQ(vm.getJavaVMInternalStatus().m_classpath, "/exaudf/base/javacontainer/exaudf_deploy.jar:base/javacontainer/test/test.jar"); -#else - EXPECT_THROW({ - try - { - JavaVMTest vm(script_code); - } - catch( const SWIGVMContainers::JavaVMach::exception& e ) - { - EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar \t ': No such file or directory$")); - throw; - } - }, SWIGVMContainers::JavaVMach::exception ); -#endif } TEST(JavaContainer, basic_jars_ordering) {