From 56202bf0da4bcea52345fba899535d442a5ba343 Mon Sep 17 00:00:00 2001 From: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> Date: Fri, 25 Oct 2024 16:51:51 -0300 Subject: [PATCH] #990:Implemented escape sequence for jar option with trailing whitespaces --- .../javacontainer/script_options/converter.cc | 6 +- .../script_options/parser_ctpg.cc | 13 +++- .../script_options/string_ops.cc | 69 +++++++++++++++++++ .../javacontainer/script_options/string_ops.h | 4 +- .../script_options/test/string_ops_tests.cc | 30 ++++++++ .../cpp/javacontainer_extractor_v2_test.cc | 16 +++++ .../test/cpp/javacontainer_test.cc | 14 ---- 7 files changed, 131 insertions(+), 21 deletions(-) diff --git a/exaudfclient/base/javacontainer/script_options/converter.cc b/exaudfclient/base/javacontainer/script_options/converter.cc index 7139f2de..5498f143 100644 --- a/exaudfclient/base/javacontainer/script_options/converter.cc +++ b/exaudfclient/base/javacontainer/script_options/converter.cc @@ -1,5 +1,5 @@ #include "base/javacontainer/script_options/converter.h" -#include "base/javacontainer/script_options/string_ops.h" + #include namespace SWIGVMContainers { @@ -11,10 +11,8 @@ Converter::Converter() , m_whitespace(" \t\f\v") {} void Converter::convertScriptClassName(const std::string & value) { - std::string trimmedValue(value); - StringOps::trim(trimmedValue); if (value != "") { - m_jvmOptions.push_back("-Dexasol.scriptclass=" + trimmedValue); + m_jvmOptions.push_back("-Dexasol.scriptclass=" + value); } } diff --git a/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc b/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc index 3f576242..5e5c3650 100644 --- a/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc +++ b/exaudfclient/base/javacontainer/script_options/parser_ctpg.cc @@ -3,6 +3,7 @@ #include "base/utils/exceptions.h" #include "base/script_options_parser/exception.h" #include "base/swig_factory/swig_factory.h" +#include "base/javacontainer/script_options/string_ops.h" #include @@ -24,7 +25,11 @@ void ScriptOptionLinesParserCTPG::prepareScriptCode(const std::string & scriptCo void ScriptOptionLinesParserCTPG::parseForScriptClass(std::function callback) { try { - parseForSingleOption(m_keywords.scriptClassKeyword(), callback); + parseForSingleOption(m_keywords.scriptClassKeyword(), [&](const std::string &option) { + std::string trimmedValue(option); + StringOps::trim(trimmedValue); + callback(trimmedValue); + }); } catch(const ExecutionGraph::OptionParserException& ex) { Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1623"); } @@ -40,7 +45,11 @@ void ScriptOptionLinesParserCTPG::parseForJvmOptions(std::function callback) { try { - parseForMultipleOption(m_keywords.jarKeyword(), callback); + parseForMultipleOption(m_keywords.jarKeyword(), [callback] (const std::string &option) { + std::string formattedValue(option); + StringOps::replaceTrailingEscapeWhitespaces(formattedValue); + callback(formattedValue); + }); } catch(const ExecutionGraph::OptionParserException& ex) { Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1625"); } diff --git a/exaudfclient/base/javacontainer/script_options/string_ops.cc b/exaudfclient/base/javacontainer/script_options/string_ops.cc index df1b6081..bf420ed0 100644 --- a/exaudfclient/base/javacontainer/script_options/string_ops.cc +++ b/exaudfclient/base/javacontainer/script_options/string_ops.cc @@ -1 +1,70 @@ #include "base/javacontainer/script_options/string_ops.h" +#include +#include + +namespace SWIGVMContainers { + +namespace JavaScriptOptions { + +namespace StringOps { + +inline uint32_t countBackslashesBackwards(const std::string & s, size_t pos) { + uint32_t retVal(0); + while (pos >= 0 && s[pos--] == '\\') retVal++; + return retVal; +} + +inline size_t replaceCharAtPositionAndBackslashes(std::string & s, size_t pos, const char* replacement) { + std::cerr << "Called replaceCharAtPositionAndBackslashes with pos=" << pos << " replacement='" << replacement << "'" << std::endl; + const uint32_t nBackslashes = countBackslashesBackwards(s, pos-1); + std::cerr << "nBackslashes=" << nBackslashes << std::endl; + size_t rtrimIdx = std::string::npos; + if(nBackslashes % 2 == 0) { + // does not belong to an escape sequence + //Delete half of the backslashes because they belong to the escape sequences + if (nBackslashes > 0) { + s = s.erase(pos-nBackslashes, (nBackslashes>>1) ); + } + rtrimIdx = pos + 1 - (nBackslashes>>1); + } + else { + // does belong to an escape sequence + //Delete half of the backslashes because they belong to the escape sequences + 1 of the + s = s.erase(pos-nBackslashes, (nBackslashes>>1)+1 ); + s = s.replace(pos - (nBackslashes>>1) - 1, 1, replacement); + rtrimIdx = pos - (nBackslashes>>1); + } + return rtrimIdx; +} + +void replaceTrailingEscapeWhitespaces(std::string & s) { + if (s.size() > 0) { + const size_t lastIdx = s.find_last_not_of(" \t\v\f"); + if (lastIdx != std::string::npos) { + size_t rtrimIdx = lastIdx + 1; + if (s.size() > 1) { + if(s[lastIdx] == 't') { + rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\t"); + } else if (s[lastIdx] == '\\' && s[lastIdx+1] == ' ') { + rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx+1, " "); + } else if (s[lastIdx] == 'f') { + rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\f"); + } else if (s[lastIdx] == 'v') { + rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\v"); + } + } + if (rtrimIdx != std::string::npos && rtrimIdx < s.size()) { + s = s.substr(0, rtrimIdx); + } + } else { + s = ""; + } + } +} + +} //namespace StringOps + + +} //namespace JavaScriptOptions + +} //namespace SWIGVMContainers diff --git a/exaudfclient/base/javacontainer/script_options/string_ops.h b/exaudfclient/base/javacontainer/script_options/string_ops.h index 8e0ff72c..eb23a636 100644 --- a/exaudfclient/base/javacontainer/script_options/string_ops.h +++ b/exaudfclient/base/javacontainer/script_options/string_ops.h @@ -21,7 +21,7 @@ inline void ltrim(std::string &s) { } inline void rtrim(std::string &s) { - s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) { + s.erase(std::find_if(s.rbegin(), s.rend(), [&](unsigned char ch) { return !std::isspace(ch); }).base(), s.end()); } @@ -31,6 +31,8 @@ inline void trim(std::string &s) { rtrim(s); } +void replaceTrailingEscapeWhitespaces(std::string & s); + } //namespace StringOps diff --git a/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc b/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc index 8eaf722b..48d7eedc 100644 --- a/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc +++ b/exaudfclient/base/javacontainer/script_options/test/string_ops_tests.cc @@ -25,3 +25,33 @@ TEST(StringOpsTest, trimWithNoneASCII) { EXPECT_EQ(sample, "\xa0Hello World\xa0"); } +class ReplaceTrailingEscapeWhitespacesTest : public ::testing::TestWithParam> {}; + +TEST_P(ReplaceTrailingEscapeWhitespacesTest, s) { + const std::pair underTest = GetParam(); + + std::string str = underTest.first; + StringOps::replaceTrailingEscapeWhitespaces(str); + std::cerr << "str='" << str << "' underTest.second='" << underTest.second << "'" << std::endl; + ASSERT_EQ(str, underTest.second); +} + +const std::vector> replace_trailing_escape_whitespaces_strings = + { + std::make_pair("hello world", std::string("hello world")), + std::make_pair("hello world ", std::string("hello world")), + std::make_pair("hello world\\t", std::string("hello world\t")), + std::make_pair("hello world\\f", std::string("hello world\f")), + std::make_pair("hello world\\v", std::string("hello world\v")), + std::make_pair("hello world\\\\t", std::string("hello world\\t")), + std::make_pair("hello world\\\\t\t", std::string("hello world\\t")), + std::make_pair("hello world\\\\\\t\t", std::string("hello world\\\t")), + std::make_pair("hello world\\\\\\\\t\t", std::string("hello world\\\\t")), + std::make_pair("hello worl\td\\\\\\\\t\t", std::string("hello worl\td\\\\t")), + }; + +INSTANTIATE_TEST_SUITE_P( + StringOpsTest, + ReplaceTrailingEscapeWhitespacesTest, + ::testing::ValuesIn(replace_trailing_escape_whitespaces_strings) +); \ No newline at end of file diff --git a/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc b/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc index ec615b63..0fe8c9f5 100644 --- a/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc +++ b/exaudfclient/base/javacontainer/test/cpp/javacontainer_extractor_v2_test.cc @@ -110,3 +110,19 @@ TEST(JavaContainer, import_script_with_escaped_options) { "-XX:+UseSerialGC" }; EXPECT_EQ(vm.getJavaVMInternalStatus().m_jvmOptions, expectedJVMOptions); } + +TEST(JavaContainer, basic_jar_with_trailing_escape) { + const std::string script_code = "%scriptclass com.exasol.udf_profiling.UdfProfiler;\n" + "%jar base/javacontainer/test/test.jar\\t\t;"; + EXPECT_THROW({ + try + { + JavaVMTest vm(script_code); + } + catch( const SWIGVMContainers::JavaVMach::exception& e ) + { + EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar\t': No such file or directory$")); + throw; + } + }, SWIGVMContainers::JavaVMach::exception ); +} diff --git a/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc b/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc index 88de7e0d..ed916c8f 100644 --- a/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc +++ b/exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc @@ -49,22 +49,8 @@ TEST(JavaContainer, basic_jar_script_class_with_white_spaces) { TEST(JavaContainer, basic_jar_with_white_spaces) { const std::string script_code = "%jar base/javacontainer/test/test.jar \t ;"; -#ifndef USE_EXTRACTOR_V2 //The parsers behave differently: The legacy parser removes trailing white spaces. JavaVMTest vm(script_code); EXPECT_EQ(vm.getJavaVMInternalStatus().m_classpath, "/exaudf/base/javacontainer/exaudf_deploy.jar:base/javacontainer/test/test.jar"); -#else - EXPECT_THROW({ - try - { - JavaVMTest vm(script_code); - } - catch( const SWIGVMContainers::JavaVMach::exception& e ) - { - EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar \t ': No such file or directory$")); - throw; - } - }, SWIGVMContainers::JavaVMach::exception ); -#endif } TEST(JavaContainer, basic_jars_ordering) {