Skip to content

Commit

Permalink
#990:Implemented escape sequence for jar option with trailing whitesp…
Browse files Browse the repository at this point in the history
…aces
  • Loading branch information
tomuben committed Oct 28, 2024
1 parent 41b656a commit 56202bf
Show file tree
Hide file tree
Showing 7 changed files with 131 additions and 21 deletions.
6 changes: 2 additions & 4 deletions exaudfclient/base/javacontainer/script_options/converter.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "base/javacontainer/script_options/converter.h"
#include "base/javacontainer/script_options/string_ops.h"

#include <iostream>

namespace SWIGVMContainers {
Expand All @@ -11,10 +11,8 @@ Converter::Converter()
, m_whitespace(" \t\f\v") {}

void Converter::convertScriptClassName(const std::string & value) {
std::string trimmedValue(value);
StringOps::trim(trimmedValue);
if (value != "") {
m_jvmOptions.push_back("-Dexasol.scriptclass=" + trimmedValue);
m_jvmOptions.push_back("-Dexasol.scriptclass=" + value);
}
}

Expand Down
13 changes: 11 additions & 2 deletions exaudfclient/base/javacontainer/script_options/parser_ctpg.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include "base/utils/exceptions.h"
#include "base/script_options_parser/exception.h"
#include "base/swig_factory/swig_factory.h"
#include "base/javacontainer/script_options/string_ops.h"
#include <sstream>


Expand All @@ -24,7 +25,11 @@ void ScriptOptionLinesParserCTPG::prepareScriptCode(const std::string & scriptCo

void ScriptOptionLinesParserCTPG::parseForScriptClass(std::function<void(const std::string &option)> callback) {
try {
parseForSingleOption(m_keywords.scriptClassKeyword(), callback);
parseForSingleOption(m_keywords.scriptClassKeyword(), [&](const std::string &option) {
std::string trimmedValue(option);
StringOps::trim(trimmedValue);
callback(trimmedValue);
});
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1623");
}
Expand All @@ -40,7 +45,11 @@ void ScriptOptionLinesParserCTPG::parseForJvmOptions(std::function<void(const st

void ScriptOptionLinesParserCTPG::parseForExternalJars(std::function<void(const std::string &option)> callback) {
try {
parseForMultipleOption(m_keywords.jarKeyword(), callback);
parseForMultipleOption(m_keywords.jarKeyword(), [callback] (const std::string &option) {
std::string formattedValue(option);
StringOps::replaceTrailingEscapeWhitespaces(formattedValue);
callback(formattedValue);
});
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1625");
}
Expand Down
69 changes: 69 additions & 0 deletions exaudfclient/base/javacontainer/script_options/string_ops.cc
Original file line number Diff line number Diff line change
@@ -1 +1,70 @@
#include "base/javacontainer/script_options/string_ops.h"
#include <regex>
#include <iostream>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

namespace StringOps {

inline uint32_t countBackslashesBackwards(const std::string & s, size_t pos) {
uint32_t retVal(0);
while (pos >= 0 && s[pos--] == '\\') retVal++;
return retVal;
}

inline size_t replaceCharAtPositionAndBackslashes(std::string & s, size_t pos, const char* replacement) {
std::cerr << "Called replaceCharAtPositionAndBackslashes with pos=" << pos << " replacement='" << replacement << "'" << std::endl;
const uint32_t nBackslashes = countBackslashesBackwards(s, pos-1);
std::cerr << "nBackslashes=" << nBackslashes << std::endl;
size_t rtrimIdx = std::string::npos;
if(nBackslashes % 2 == 0) {
//<replacement> does not belong to an escape sequence
//Delete half of the backslashes because they belong to the escape sequences
if (nBackslashes > 0) {
s = s.erase(pos-nBackslashes, (nBackslashes>>1) );
}
rtrimIdx = pos + 1 - (nBackslashes>>1);
}
else {
//<replacement> does belong to an escape sequence
//Delete half of the backslashes because they belong to the escape sequences + 1 of the <replacement>
s = s.erase(pos-nBackslashes, (nBackslashes>>1)+1 );
s = s.replace(pos - (nBackslashes>>1) - 1, 1, replacement);
rtrimIdx = pos - (nBackslashes>>1);
}
return rtrimIdx;
}

void replaceTrailingEscapeWhitespaces(std::string & s) {
if (s.size() > 0) {
const size_t lastIdx = s.find_last_not_of(" \t\v\f");
if (lastIdx != std::string::npos) {
size_t rtrimIdx = lastIdx + 1;
if (s.size() > 1) {
if(s[lastIdx] == 't') {
rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\t");
} else if (s[lastIdx] == '\\' && s[lastIdx+1] == ' ') {
rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx+1, " ");
} else if (s[lastIdx] == 'f') {
rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\f");
} else if (s[lastIdx] == 'v') {
rtrimIdx = replaceCharAtPositionAndBackslashes(s, lastIdx, "\v");
}
}
if (rtrimIdx != std::string::npos && rtrimIdx < s.size()) {
s = s.substr(0, rtrimIdx);
}
} else {
s = "";
}
}
}

} //namespace StringOps


} //namespace JavaScriptOptions

} //namespace SWIGVMContainers
4 changes: 3 additions & 1 deletion exaudfclient/base/javacontainer/script_options/string_ops.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ inline void ltrim(std::string &s) {
}

inline void rtrim(std::string &s) {
s.erase(std::find_if(s.rbegin(), s.rend(), [](unsigned char ch) {
s.erase(std::find_if(s.rbegin(), s.rend(), [&](unsigned char ch) {
return !std::isspace(ch);
}).base(), s.end());
}
Expand All @@ -31,6 +31,8 @@ inline void trim(std::string &s) {
rtrim(s);
}

void replaceTrailingEscapeWhitespaces(std::string & s);

} //namespace StringOps


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,33 @@ TEST(StringOpsTest, trimWithNoneASCII) {
EXPECT_EQ(sample, "\xa0Hello World\xa0");
}

class ReplaceTrailingEscapeWhitespacesTest : public ::testing::TestWithParam<std::pair<std::string, std::string>> {};

TEST_P(ReplaceTrailingEscapeWhitespacesTest, s) {
const std::pair<std::string, std::string> underTest = GetParam();

std::string str = underTest.first;
StringOps::replaceTrailingEscapeWhitespaces(str);
std::cerr << "str='" << str << "' underTest.second='" << underTest.second << "'" << std::endl;
ASSERT_EQ(str, underTest.second);
}

const std::vector<std::pair<std::string, std::string>> replace_trailing_escape_whitespaces_strings =
{
std::make_pair("hello world", std::string("hello world")),
std::make_pair("hello world ", std::string("hello world")),
std::make_pair("hello world\\t", std::string("hello world\t")),
std::make_pair("hello world\\f", std::string("hello world\f")),
std::make_pair("hello world\\v", std::string("hello world\v")),
std::make_pair("hello world\\\\t", std::string("hello world\\t")),
std::make_pair("hello world\\\\t\t", std::string("hello world\\t")),
std::make_pair("hello world\\\\\\t\t", std::string("hello world\\\t")),
std::make_pair("hello world\\\\\\\\t\t", std::string("hello world\\\\t")),
std::make_pair("hello worl\td\\\\\\\\t\t", std::string("hello worl\td\\\\t")),
};

INSTANTIATE_TEST_SUITE_P(
StringOpsTest,
ReplaceTrailingEscapeWhitespacesTest,
::testing::ValuesIn(replace_trailing_escape_whitespaces_strings)
);
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,19 @@ TEST(JavaContainer, import_script_with_escaped_options) {
"-XX:+UseSerialGC" };
EXPECT_EQ(vm.getJavaVMInternalStatus().m_jvmOptions, expectedJVMOptions);
}

TEST(JavaContainer, basic_jar_with_trailing_escape) {
const std::string script_code = "%scriptclass com.exasol.udf_profiling.UdfProfiler;\n"
"%jar base/javacontainer/test/test.jar\\t\t;";
EXPECT_THROW({
try
{
JavaVMTest vm(script_code);
}
catch( const SWIGVMContainers::JavaVMach::exception& e )
{
EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar\t': No such file or directory$"));
throw;
}
}, SWIGVMContainers::JavaVMach::exception );
}
14 changes: 0 additions & 14 deletions exaudfclient/base/javacontainer/test/cpp/javacontainer_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -49,22 +49,8 @@ TEST(JavaContainer, basic_jar_script_class_with_white_spaces) {
TEST(JavaContainer, basic_jar_with_white_spaces) {
const std::string script_code = "%jar base/javacontainer/test/test.jar \t ;";

#ifndef USE_EXTRACTOR_V2 //The parsers behave differently: The legacy parser removes trailing white spaces.
JavaVMTest vm(script_code);
EXPECT_EQ(vm.getJavaVMInternalStatus().m_classpath, "/exaudf/base/javacontainer/exaudf_deploy.jar:base/javacontainer/test/test.jar");
#else
EXPECT_THROW({
try
{
JavaVMTest vm(script_code);
}
catch( const SWIGVMContainers::JavaVMach::exception& e )
{
EXPECT_THAT( e.what(), MatchesRegex("^.*Java VM cannot find 'base/javacontainer/test/test\\.jar \t ': No such file or directory$"));
throw;
}
}, SWIGVMContainers::JavaVMach::exception );
#endif
}

TEST(JavaContainer, basic_jars_ordering) {
Expand Down

0 comments on commit 56202bf

Please sign in to comment.