From c52fc446ce62195a474dc9b2742f95dfeba8314f Mon Sep 17 00:00:00 2001 From: Thomas Ubensee <34603111+tomuben@users.noreply.github.com> Date: Thu, 10 Oct 2024 10:25:16 -0300 Subject: [PATCH] #973: Support new lines and white spaces at the beginning in script options values (#457) related to exasol/script-languages-release#973 --- .../ctpg/script_option_lines_ctpg.cc | 63 +++++++++++-- .../ctpg/test/script_option_lines_test.cpp | 91 +++++++++++++++++++ 2 files changed, 145 insertions(+), 9 deletions(-) diff --git a/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc b/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc index 9baa6c67..7c9229b8 100644 --- a/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc +++ b/exaudfclient/base/script_options_parser/ctpg/script_option_lines_ctpg.cc @@ -53,11 +53,47 @@ auto&& add_option(Option&& e, options_type&& ob) return std::move(ob); } +const auto convert_escape_seq(std::string_view escape_seq) { + std::string retVal; + if (escape_seq == R"_(\;)_") { + retVal = ";"; + } else if (escape_seq == R"_(\n)_") { + retVal = "\n"; + } else if (escape_seq == R"_(\r)_") { + retVal = "\r"; + } else { + throw OptionParserException(std::string("Internal parser error: Unexpected escape sequence " + std::string(escape_seq))); + } + + return retVal; +} + + +const auto convert_whitespace_escape_seq(std::string_view escape_seq) { + std::string retVal; + if (escape_seq == R"_(\ )_") { + retVal = " "; + } else if (escape_seq == R"_(\t)_") { + retVal = "\t"; + } else if (escape_seq == R"_(\f)_") { + retVal = "\f"; + } else if (escape_seq == R"_(\v)_") { + retVal = "\v"; + } else { + throw OptionParserException(std::string("Internal parser error: Unexpected white space escape sequence " + std::string(escape_seq))); + } + + return retVal; +} + constexpr char alpha_numeric_pattern[] = R"_([0-9a-zA-Z_]+)_"; constexpr char not_semicolon_pattern[] = R"_([^;])_"; constexpr char whitespaces_pattern[] = R"_([ \x09\x0c\x0b]+)_"; +constexpr char escape_pattern[] = R"_(\\;|\\n|\\r)_"; +constexpr char whitespace_escape_pattern[] = R"_(\\ |\\t|\\f|\\v)_"; + constexpr char_term start_option_token('%'); @@ -65,7 +101,8 @@ constexpr char_term end_option_token(';'); constexpr regex_term alpha_numeric("alpha_numeric"); constexpr regex_term not_semicolon("not_semicolon"); constexpr regex_term whitespaces("whitespace"); -constexpr string_term semicolon_escape(R"_(\;)_"); +constexpr regex_term escape_seq("escape_seq"); +constexpr regex_term whitespace_escape_seq("escape_seq"); constexpr nterm text("text"); constexpr nterm options("options"); @@ -76,7 +113,7 @@ constexpr nterm option_value("option_value"); constexpr parser option_parser( text, - terms(start_option_token, semicolon_escape, whitespaces, end_option_token, alpha_numeric, not_semicolon), + terms(start_option_token, escape_seq, whitespace_escape_seq, whitespaces, end_option_token, alpha_numeric, not_semicolon), nterms(text, option_value, options, option_element, rest), rules( text(rest) @@ -97,14 +134,16 @@ constexpr parser option_parser( >= [](auto o) { return std::string(o.get_value()); }, option_value(not_semicolon) >= [](auto o) { return std::string(o.get_value()); }, - option_value(whitespaces) - >= [](auto o) { return std::string(o.get_value()); }, - option_value(semicolon_escape) - >= [](auto o) { return std::string(";"); }, + option_value(whitespace_escape_seq) + >= [](auto o) { return std::string(convert_whitespace_escape_seq(o.get_value())); }, + option_value(escape_seq) + >= [](auto es) { return convert_escape_seq(es.get_value()); }, option_value(option_value, not_semicolon) >= [](auto&& ov, auto v) { return std::move(ov.append(v.get_value())); }, - option_value(option_value, semicolon_escape) - >= [](auto&& ov, auto v) { return std::move(ov.append(";")); }, + option_value(option_value, whitespace_escape_seq) + >= [](auto&& ov, auto es) { return std::move(ov.append(es.get_value())); }, + option_value(option_value, escape_seq) + >= [](auto&& ov, auto es) { return std::move(ov.append(convert_escape_seq(es.get_value()))); }, option_value(option_value, start_option_token) >= [](auto&& ov, auto v) { return std::move(ov.append("%")); }, option_value(option_value, alpha_numeric) @@ -115,7 +154,9 @@ constexpr parser option_parser( >= [](auto r) { return 0;}, rest(whitespaces) >= [](auto r) { return 0;}, - rest(semicolon_escape) + rest(escape_seq) + >= [](auto r) { return 0;}, + rest(whitespace_escape_seq) >= [](auto r) { return 0;}, rest(end_option_token) >= [](auto r) { return 0;}, @@ -130,6 +171,10 @@ constexpr parser option_parser( rest(rest, end_option_token) >= [](auto r, skip) { return 0;}, rest(rest, start_option_token) + >= [](auto r, skip) { return 0;}, + rest(rest, escape_seq) + >= [](auto r, skip) { return 0;}, + rest(rest, whitespace_escape_seq) >= [](auto r, skip) { return 0;} ) ); diff --git a/exaudfclient/base/script_options_parser/ctpg/test/script_option_lines_test.cpp b/exaudfclient/base/script_options_parser/ctpg/test/script_option_lines_test.cpp index 11f04b6c..f7cd3540 100644 --- a/exaudfclient/base/script_options_parser/ctpg/test/script_option_lines_test.cpp +++ b/exaudfclient/base/script_options_parser/ctpg/test/script_option_lines_test.cpp @@ -215,3 +215,94 @@ TEST(ScriptOptionLinesTest, test_multiple_lines_with_code) { ASSERT_EQ(jar_option_result->second.size(), 1); ASSERT_EQ(jar_option_result->second[0], buildOption("/buckets/bucketfs1/jars/exajdbc.jar", 49, 41)); } + + +class ScriptOptionLinesEscapeSequenceTest : public ::testing::TestWithParam> {}; + +TEST_P(ScriptOptionLinesEscapeSequenceTest, test_escape_seq_in_option_value) { + const std::pair option_value = GetParam(); + /** + Verify that the parser replaces escape sequences correctly. + */ + const std::string code = + "%jvmoption " + option_value.first + "; class Abc{};\n" + "%jar /buckets/bucketfs1/jars/exajdbc.jar; class DEF{};\n"; + + options_map_t result; + parseOptions(code, result); + ASSERT_EQ(result.size(), 2); + + const auto jvm_option_result = result.find("jvmoption"); + ASSERT_NE(jvm_option_result, result.end()); + ASSERT_EQ(jvm_option_result->second.size(), 1); + EXPECT_EQ(jvm_option_result->second[0].value, option_value.second); + + const auto jar_option_result = result.find("jar"); + ASSERT_NE(jar_option_result, result.end()); + ASSERT_EQ(jar_option_result->second.size(), 1); + ASSERT_EQ(jar_option_result->second[0].value, "/buckets/bucketfs1/jars/exajdbc.jar"); +} + +/* + '\n' -> new line character + '\r' -> return character + '\;' -> semicolon + '\ ' or '\t' or '\f' or '\v' at start of option value -> replaced by the respective white space character + '\ ' or '\t' or '\f' or '\v' in the middle of option value -> should not be replaced + '\a' -> anything else should not be replaced. + */ +const std::vector> escape_sequences = + { + std::make_pair("-Dhttp.agent=ABC\\nDEF", "-Dhttp.agent=ABC\nDEF"), + std::make_pair("-Dhttp.agent=ABC\\rDEF", "-Dhttp.agent=ABC\rDEF"), + std::make_pair("-Dhttp.agent=ABC\\;DEF", "-Dhttp.agent=ABC;DEF"), + std::make_pair("-Dhttp.agent=ABC\\aDEF", "-Dhttp.agent=ABC\\aDEF"), //any other escape sequence must stay as is + std::make_pair("\\n-Dhttp.agent=ABCDEF", "\n-Dhttp.agent=ABCDEF"), + std::make_pair("\\r-Dhttp.agent=ABCDEF", "\r-Dhttp.agent=ABCDEF"), + std::make_pair("\\;-Dhttp.agent=ABCDEF", ";-Dhttp.agent=ABCDEF"), + std::make_pair("-Dhttp.agent=ABCDEF\\n", "-Dhttp.agent=ABCDEF\n"), + std::make_pair("-Dhttp.agent=ABCDEF\\r", "-Dhttp.agent=ABCDEF\r"), + std::make_pair("-Dhttp.agent=ABCDEF\\;", "-Dhttp.agent=ABCDEF;"), + std::make_pair("-Dhttp.agent=ABC\\ DEF", "-Dhttp.agent=ABC\\ DEF"), //escaped white space in middle of string must stay as is + std::make_pair("\\ -Dhttp.agent=ABCDEF", " -Dhttp.agent=ABCDEF"), + std::make_pair("\\ \t -Dhttp.agent=ABCDEF", " \t -Dhttp.agent=ABCDEF"), + std::make_pair("\\t-Dhttp.agent=ABCDEF", "\t-Dhttp.agent=ABCDEF"), + std::make_pair("\\f-Dhttp.agent=ABCDEF", "\f-Dhttp.agent=ABCDEF"), + std::make_pair("\\v-Dhttp.agent=ABCDEF", "\v-Dhttp.agent=ABCDEF") + }; + +INSTANTIATE_TEST_SUITE_P( + ScriptOptionLines, + ScriptOptionLinesEscapeSequenceTest, + ::testing::ValuesIn(escape_sequences) +); + +class ScriptOptionLinesRestTest : public ::testing::TestWithParam {}; + +TEST_P(ScriptOptionLinesRestTest, test_rest_with_tokens) { + const std::string rest = GetParam(); + /** + Verify that the parser correctly ignores character sequences containing special parser tokens + after the options in a line. + */ + const std::string code = + "%jvmoption -Dhttp.agent=abc; class Abc{};" + rest; + + options_map_t result; + parseOptions(code, result); + ASSERT_EQ(result.size(), 1); + + const auto jvm_option_result = result.find("jvmoption"); + ASSERT_NE(jvm_option_result, result.end()); + ASSERT_EQ(jvm_option_result->second.size(), 1); + ASSERT_EQ(jvm_option_result->second[0], buildOption("-Dhttp.agent=abc", 0, 28)); +} + +const std::vector rest_strings = + {"\\n", "\\r", "something %blabla;", ";", "\\;", "\\;blabla", "\\ blabla", "\\t blabla"}; + +INSTANTIATE_TEST_SUITE_P( + ScriptOptionLines, + ScriptOptionLinesRestTest, + ::testing::ValuesIn(rest_strings) +); \ No newline at end of file