Skip to content

Commit

Permalink
#973: Support new lines and white spaces at the beginning in script o…
Browse files Browse the repository at this point in the history
…ptions values (#457)

related to exasol/script-languages-release#973
  • Loading branch information
tomuben authored Oct 10, 2024
1 parent 7756307 commit c52fc44
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,56 @@ auto&& add_option(Option&& e, options_type&& ob)
return std::move(ob);
}

const auto convert_escape_seq(std::string_view escape_seq) {
std::string retVal;
if (escape_seq == R"_(\;)_") {
retVal = ";";
} else if (escape_seq == R"_(\n)_") {
retVal = "\n";
} else if (escape_seq == R"_(\r)_") {
retVal = "\r";
} else {
throw OptionParserException(std::string("Internal parser error: Unexpected escape sequence " + std::string(escape_seq)));
}

return retVal;
}


const auto convert_whitespace_escape_seq(std::string_view escape_seq) {
std::string retVal;
if (escape_seq == R"_(\ )_") {
retVal = " ";
} else if (escape_seq == R"_(\t)_") {
retVal = "\t";
} else if (escape_seq == R"_(\f)_") {
retVal = "\f";
} else if (escape_seq == R"_(\v)_") {
retVal = "\v";
} else {
throw OptionParserException(std::string("Internal parser error: Unexpected white space escape sequence " + std::string(escape_seq)));
}

return retVal;
}



constexpr char alpha_numeric_pattern[] = R"_([0-9a-zA-Z_]+)_";
constexpr char not_semicolon_pattern[] = R"_([^;])_";
constexpr char whitespaces_pattern[] = R"_([ \x09\x0c\x0b]+)_";
constexpr char escape_pattern[] = R"_(\\;|\\n|\\r)_";
constexpr char whitespace_escape_pattern[] = R"_(\\ |\\t|\\f|\\v)_";



constexpr char_term start_option_token('%');
constexpr char_term end_option_token(';');
constexpr regex_term<alpha_numeric_pattern> alpha_numeric("alpha_numeric");
constexpr regex_term<not_semicolon_pattern> not_semicolon("not_semicolon");
constexpr regex_term<whitespaces_pattern> whitespaces("whitespace");
constexpr string_term semicolon_escape(R"_(\;)_");
constexpr regex_term<escape_pattern> escape_seq("escape_seq");
constexpr regex_term<whitespace_escape_pattern> whitespace_escape_seq("escape_seq");

constexpr nterm<options_type> text("text");
constexpr nterm<options_type> options("options");
Expand All @@ -76,7 +113,7 @@ constexpr nterm<std::string> option_value("option_value");

constexpr parser option_parser(
text,
terms(start_option_token, semicolon_escape, whitespaces, end_option_token, alpha_numeric, not_semicolon),
terms(start_option_token, escape_seq, whitespace_escape_seq, whitespaces, end_option_token, alpha_numeric, not_semicolon),
nterms(text, option_value, options, option_element, rest),
rules(
text(rest)
Expand All @@ -97,14 +134,16 @@ constexpr parser option_parser(
>= [](auto o) { return std::string(o.get_value()); },
option_value(not_semicolon)
>= [](auto o) { return std::string(o.get_value()); },
option_value(whitespaces)
>= [](auto o) { return std::string(o.get_value()); },
option_value(semicolon_escape)
>= [](auto o) { return std::string(";"); },
option_value(whitespace_escape_seq)
>= [](auto o) { return std::string(convert_whitespace_escape_seq(o.get_value())); },
option_value(escape_seq)
>= [](auto es) { return convert_escape_seq(es.get_value()); },
option_value(option_value, not_semicolon)
>= [](auto&& ov, auto v) { return std::move(ov.append(v.get_value())); },
option_value(option_value, semicolon_escape)
>= [](auto&& ov, auto v) { return std::move(ov.append(";")); },
option_value(option_value, whitespace_escape_seq)
>= [](auto&& ov, auto es) { return std::move(ov.append(es.get_value())); },
option_value(option_value, escape_seq)
>= [](auto&& ov, auto es) { return std::move(ov.append(convert_escape_seq(es.get_value()))); },
option_value(option_value, start_option_token)
>= [](auto&& ov, auto v) { return std::move(ov.append("%")); },
option_value(option_value, alpha_numeric)
Expand All @@ -115,7 +154,9 @@ constexpr parser option_parser(
>= [](auto r) { return 0;},
rest(whitespaces)
>= [](auto r) { return 0;},
rest(semicolon_escape)
rest(escape_seq)
>= [](auto r) { return 0;},
rest(whitespace_escape_seq)
>= [](auto r) { return 0;},
rest(end_option_token)
>= [](auto r) { return 0;},
Expand All @@ -130,6 +171,10 @@ constexpr parser option_parser(
rest(rest, end_option_token)
>= [](auto r, skip) { return 0;},
rest(rest, start_option_token)
>= [](auto r, skip) { return 0;},
rest(rest, escape_seq)
>= [](auto r, skip) { return 0;},
rest(rest, whitespace_escape_seq)
>= [](auto r, skip) { return 0;}
)
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,3 +215,94 @@ TEST(ScriptOptionLinesTest, test_multiple_lines_with_code) {
ASSERT_EQ(jar_option_result->second.size(), 1);
ASSERT_EQ(jar_option_result->second[0], buildOption("/buckets/bucketfs1/jars/exajdbc.jar", 49, 41));
}


class ScriptOptionLinesEscapeSequenceTest : public ::testing::TestWithParam<std::pair<std::string, std::string>> {};

TEST_P(ScriptOptionLinesEscapeSequenceTest, test_escape_seq_in_option_value) {
const std::pair<std::string, std::string> option_value = GetParam();
/**
Verify that the parser replaces escape sequences correctly.
*/
const std::string code =
"%jvmoption " + option_value.first + "; class Abc{};\n"
"%jar /buckets/bucketfs1/jars/exajdbc.jar; class DEF{};\n";

options_map_t result;
parseOptions(code, result);
ASSERT_EQ(result.size(), 2);

const auto jvm_option_result = result.find("jvmoption");
ASSERT_NE(jvm_option_result, result.end());
ASSERT_EQ(jvm_option_result->second.size(), 1);
EXPECT_EQ(jvm_option_result->second[0].value, option_value.second);

const auto jar_option_result = result.find("jar");
ASSERT_NE(jar_option_result, result.end());
ASSERT_EQ(jar_option_result->second.size(), 1);
ASSERT_EQ(jar_option_result->second[0].value, "/buckets/bucketfs1/jars/exajdbc.jar");
}

/*
'\n' -> new line character
'\r' -> return character
'\;' -> semicolon
'\ ' or '\t' or '\f' or '\v' at start of option value -> replaced by the respective white space character
'\ ' or '\t' or '\f' or '\v' in the middle of option value -> should not be replaced
'\a' -> anything else should not be replaced.
*/
const std::vector<std::pair<std::string, std::string>> escape_sequences =
{
std::make_pair("-Dhttp.agent=ABC\\nDEF", "-Dhttp.agent=ABC\nDEF"),
std::make_pair("-Dhttp.agent=ABC\\rDEF", "-Dhttp.agent=ABC\rDEF"),
std::make_pair("-Dhttp.agent=ABC\\;DEF", "-Dhttp.agent=ABC;DEF"),
std::make_pair("-Dhttp.agent=ABC\\aDEF", "-Dhttp.agent=ABC\\aDEF"), //any other escape sequence must stay as is
std::make_pair("\\n-Dhttp.agent=ABCDEF", "\n-Dhttp.agent=ABCDEF"),
std::make_pair("\\r-Dhttp.agent=ABCDEF", "\r-Dhttp.agent=ABCDEF"),
std::make_pair("\\;-Dhttp.agent=ABCDEF", ";-Dhttp.agent=ABCDEF"),
std::make_pair("-Dhttp.agent=ABCDEF\\n", "-Dhttp.agent=ABCDEF\n"),
std::make_pair("-Dhttp.agent=ABCDEF\\r", "-Dhttp.agent=ABCDEF\r"),
std::make_pair("-Dhttp.agent=ABCDEF\\;", "-Dhttp.agent=ABCDEF;"),
std::make_pair("-Dhttp.agent=ABC\\ DEF", "-Dhttp.agent=ABC\\ DEF"), //escaped white space in middle of string must stay as is
std::make_pair("\\ -Dhttp.agent=ABCDEF", " -Dhttp.agent=ABCDEF"),
std::make_pair("\\ \t -Dhttp.agent=ABCDEF", " \t -Dhttp.agent=ABCDEF"),
std::make_pair("\\t-Dhttp.agent=ABCDEF", "\t-Dhttp.agent=ABCDEF"),
std::make_pair("\\f-Dhttp.agent=ABCDEF", "\f-Dhttp.agent=ABCDEF"),
std::make_pair("\\v-Dhttp.agent=ABCDEF", "\v-Dhttp.agent=ABCDEF")
};

INSTANTIATE_TEST_SUITE_P(
ScriptOptionLines,
ScriptOptionLinesEscapeSequenceTest,
::testing::ValuesIn(escape_sequences)
);

class ScriptOptionLinesRestTest : public ::testing::TestWithParam<std::string> {};

TEST_P(ScriptOptionLinesRestTest, test_rest_with_tokens) {
const std::string rest = GetParam();
/**
Verify that the parser correctly ignores character sequences containing special parser tokens
after the options in a line.
*/
const std::string code =
"%jvmoption -Dhttp.agent=abc; class Abc{};" + rest;

options_map_t result;
parseOptions(code, result);
ASSERT_EQ(result.size(), 1);

const auto jvm_option_result = result.find("jvmoption");
ASSERT_NE(jvm_option_result, result.end());
ASSERT_EQ(jvm_option_result->second.size(), 1);
ASSERT_EQ(jvm_option_result->second[0], buildOption("-Dhttp.agent=abc", 0, 28));
}

const std::vector<std::string> rest_strings =
{"\\n", "\\r", "something %blabla;", ";", "\\;", "\\;blabla", "\\ blabla", "\\t blabla"};

INSTANTIATE_TEST_SUITE_P(
ScriptOptionLines,
ScriptOptionLinesRestTest,
::testing::ValuesIn(rest_strings)
);

0 comments on commit c52fc44

Please sign in to comment.