Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#969: Use new CTPG parser in java vm #455

Merged
merged 43 commits into from
Oct 15, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
77e9955
#969: Use new CTPG parser in java vm
tomuben Oct 2, 2024
780ff35
Added CTPG specific tests
tomuben Oct 10, 2024
6739678
Findings from review
tomuben Oct 11, 2024
3c66a0b
Limit max recursion for script importer and added a test.
tomuben Oct 11, 2024
ba28b85
Added valgrind/asan tests
tomuben Oct 11, 2024
fa1ccb9
Fixed GH Workflow
tomuben Oct 11, 2024
dfdda46
Fixed GH Workflow
tomuben Oct 11, 2024
79553b2
Fixed GH Workflow
tomuben Oct 11, 2024
b3c1c7c
Fixed GH Workflow
tomuben Oct 11, 2024
25e21e9
Fixed GH Workflow
tomuben Oct 11, 2024
0ef2b94
Fixed GH Workflow
tomuben Oct 11, 2024
9be6303
Fixed GH Workflow
tomuben Oct 11, 2024
ee31d02
Fixed memory allocation/deallocation issue reported by ASAN in tests
tomuben Oct 11, 2024
f9663d1
Upload test logs only in error case
tomuben Oct 11, 2024
c53ce9e
Deactivate slow tests under valgrind
tomuben Oct 11, 2024
9dba9d2
Inject a memory problem....let's see which checks find the problem....
tomuben Oct 11, 2024
cd61bc0
Removed memory corruption
tomuben Oct 13, 2024
a145b98
Added bazel config for valgrind
tomuben Oct 13, 2024
a01b4fd
Removed (unnecessary) timeout for asan test
tomuben Oct 13, 2024
4607321
Added builder for JavaVM
tomuben Oct 13, 2024
3be1631
Added description to ctpg_script_importer_test.cc
tomuben Oct 14, 2024
60915e5
Optimization in script_option_lines_ctpg.cc
tomuben Oct 14, 2024
9dd5a31
Added performance tests
tomuben Oct 14, 2024
85ca346
Activated valgrind memory leak check
tomuben Oct 14, 2024
56c94fc
Install R for test_package_management_scripts.yaml
tomuben Oct 14, 2024
967c71f
Added comment in parser_ctpg_script_importer.cc
tomuben Oct 14, 2024
8dc5834
Move SwigFactory creation from udf client into JavaContainerBuilder c…
tomuben Oct 14, 2024
f1b1940
Fixed JavaContainerBuilder::useCtpgParser
tomuben Oct 14, 2024
a846279
Added virtual destructor to SwigFactory.
tomuben Oct 14, 2024
ef099fd
Fixed compiler errors
tomuben Oct 15, 2024
e63355c
Added a bazl build check
tomuben Oct 15, 2024
5bd6219
Fixed formatting in check_bazel_tests.yml
tomuben Oct 15, 2024
66afcb0
Fixed formatting in check_bazel_tests.yml
tomuben Oct 15, 2024
f6ebc7e
Removed Java from bazel build (dependencies not installed)
tomuben Oct 15, 2024
8f570d6
Added Python env variables in check_bazel_tests.yml
tomuben Oct 15, 2024
89f29f2
fixed Python env variables in check_bazel_tests.yml
tomuben Oct 15, 2024
c6ca998
Added protobuf compiler to build job in check_bazel_tests.yml
tomuben Oct 15, 2024
53c9606
Created installUdfClientDeps.sh
tomuben Oct 15, 2024
d69208b
Run installUdfClientDeps.sh as sudo
tomuben Oct 15, 2024
f09a77d
Update exaudfclient/base/javacontainer/script_options/parser_ctpg_scr…
tomuben Oct 15, 2024
4287b0e
Don't use installUdfClientDeps.sh for bazel test
tomuben Oct 15, 2024
e39d4c7
Fixed installation of bazel in check_bazel_tests.yml
tomuben Oct 15, 2024
c812821
Fixed execution of bazel tests in check_bazel_tests.yml
tomuben Oct 15, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions exaudfclient/base/javacontainer/javacontainer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
using namespace SWIGVMContainers;
using namespace std;

JavaVMach::JavaVMach(bool checkOnly, SwigFactory& swigFactory) {
JavaVMach::JavaVMach(bool checkOnly, SwigFactory& swigFactory, bool useCTPGParser) {
try {
m_impl = new JavaVMImpl(checkOnly, false, swigFactory);
m_impl = new JavaVMImpl(checkOnly, false, swigFactory, useCTPGParser);
} catch (std::exception& err) {
lock_guard<mutex> lock(exception_msg_mtx);
exception_msg = "F-UDF-CL-SL-JAVA-1000: "+std::string(err.what());
Expand Down
2 changes: 1 addition & 1 deletion exaudfclient/base/javacontainer/javacontainer.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class SwigFactory;

class JavaVMach: public SWIGVM {
public:
JavaVMach(bool checkOnly, SwigFactory& swigFactory);
JavaVMach(bool checkOnly, SwigFactory& swigFactory, bool useCTPGParser);
virtual ~JavaVMach() {}
virtual void shutdown();
virtual bool run();
Expand Down
37 changes: 23 additions & 14 deletions exaudfclient/base/javacontainer/javacontainer_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
#include "base/javacontainer/javacontainer_impl.h"
#include "base/javacontainer/script_options/extractor.h"
#include "base/javacontainer/script_options/parser_legacy.h"
#include "base/javacontainer/script_options/parser_ctpg.h"
#include "base/swig_factory/swig_factory.h"


using namespace SWIGVMContainers;
using namespace std;

JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory)
JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory, bool useCTPGParser)
: m_checkOnly(checkOnly)
, m_exaJavaPath("")
, m_localClasspath("/tmp") // **IMPORTANT**: /tmp needs to be in the classpath, otherwise ExaCompiler crashe with com.exasol.ExaCompilationException: /DATE_STRING.java:3: error: error while writing DATE_STRING: could not create parent directories
Expand All @@ -34,19 +35,12 @@ JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory)
stringstream ss;
m_exaJavaPath = "/exaudf/base/javacontainer"; // TODO hardcoded path

JavaScriptOptions::ScriptOptionLinesParserLegacy scriptOptionsParser;

JavaScriptOptions::Extractor extractor(scriptOptionsParser, swigFactory);

DBG_FUNC_CALL(cerr,extractor.extract(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used

DBG_FUNC_CALL(cerr,setClasspath());

m_jvmOptions = std::move(extractor.moveJvmOptions());

for (set<string>::iterator it = extractor.getJarPaths().begin(); it != extractor.getJarPaths().end();
++it) {
addJarToClasspath(*it);
if (useCTPGParser) {
JavaScriptOptions::ScriptOptionLinesParserCTPG parser;
parseScriptOptions(parser, swigFactory);
} else {
JavaScriptOptions::ScriptOptionLinesParserLegacy parser;
parseScriptOptions(parser, swigFactory);
}

m_needsCompilation = checkNeedsCompilation();
Expand All @@ -64,6 +58,21 @@ JavaVMImpl::JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory)
}
}

void JavaVMImpl::parseScriptOptions(JavaScriptOptions::ScriptOptionsParser & scriptOptionsParser, SwigFactory& swigFactory) {
JavaScriptOptions::Extractor extractor(scriptOptionsParser, swigFactory);

DBG_FUNC_CALL(cerr,extractor.extract(m_scriptCode)); // To be called before scripts are imported. Otherwise, the script classname from an imported script could be used
tkilias marked this conversation as resolved.
Show resolved Hide resolved

DBG_FUNC_CALL(cerr,setClasspath());

m_jvmOptions = std::move(extractor.moveJvmOptions());

for (set<string>::iterator it = extractor.getJarPaths().begin(); it != extractor.getJarPaths().end();
++it) {
addJarToClasspath(*it);
}
}

void JavaVMImpl::shutdown() {
if (m_checkOnly)
throwException("F-UDF.CL.SL.JAVA-1159: Java VM in check only mode");
Expand Down
7 changes: 6 additions & 1 deletion exaudfclient/base/javacontainer/javacontainer_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@ class JavaVMTest;

namespace SWIGVMContainers {

namespace JavaScriptOptions {
struct ScriptOptionsParser;
}

class JavaVMImpl {
public:
friend class ::JavaVMTest;
JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory);
JavaVMImpl(bool checkOnly, bool noJNI, SwigFactory& swigFactory, bool useCTPGParser);
~JavaVMImpl() {}
void shutdown();
bool run();
Expand All @@ -37,6 +41,7 @@ class JavaVMImpl {
void throwException(const std::string& ex);
void setJvmOptions();
void addJarToClasspath(const std::string& path);
void parseScriptOptions(JavaScriptOptions::ScriptOptionsParser & scriptOptionsParser, SwigFactory& swigFactory);
bool m_checkOnly;
std::string m_exaJavaPath;
std::string m_localClasspath;
Expand Down
8 changes: 5 additions & 3 deletions exaudfclient/base/javacontainer/script_options/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ package(default_visibility = ["//visibility:public"])

cc_library(
name = "java_script_option_lines",
hdrs = [":extractor.h", ":parser_legacy.h"],
hdrs = [":extractor.h", ":parser_legacy.h", ":parser_ctpg.h"],
srcs = [":parser.h", ":converter.h", ":converter.cc", ":parser_legacy.cc", ":extractor.cc",
":keywords.h", ":checksum.h", ":checksum.cc"],
deps = ["//base/script_options_parser/legacy:script_option_lines_parser_legacy", "//base/utils:utils",
":keywords.h", ":keywords.cc", ":checksum.h", ":checksum.cc", ":parser_ctpg.cc",
":parser_ctpg_script_importer.cc", ":parser_ctpg_script_importer.h"],
deps = ["//base/script_options_parser/legacy:script_option_lines_parser_legacy",
"//base/script_options_parser/ctpg:script_option_lines_parser_ctpg", "//base/utils:utils",
"//base/exaudflib:header", "//base/exaudflib:exaudflib-deps", "//base/swig_factory:swig_factory_if",
"//base/script_options_parser:exception"],
)
33 changes: 33 additions & 0 deletions exaudfclient/base/javacontainer/script_options/keywords.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#include "base/javacontainer/script_options/keywords.h"
#include <string_view>

namespace SWIGVMContainers {

namespace JavaScriptOptions {

Keywords::Keywords(bool withScriptOptionsPrefix)
: m_jarKeyword()
, m_scriptClassKeyword()
, m_importKeyword()
, m_jvmKeyword() {
const std::string_view jar{"%jar"};
const std::string_view scriptClass{"%scriptclass"};
const std::string_view import{"%import"};
const std::string_view jvm{"%jvmoption"};
if (withScriptOptionsPrefix) {
m_jarKeyword = jar;
m_scriptClassKeyword = scriptClass;
m_importKeyword = import;
m_jvmKeyword = jvm;
} else {
m_jarKeyword.assign(jar.substr(1));
m_scriptClassKeyword.assign(scriptClass.substr(1));
m_importKeyword.assign(import.substr(1));
m_jvmKeyword.assign(jvm.substr(1));
}
}

} //namespace JavaScriptOptions

} //namespace SWIGVMContainers

12 changes: 4 additions & 8 deletions exaudfclient/base/javacontainer/script_options/keywords.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,16 @@ namespace JavaScriptOptions {

class Keywords {
public:
Keywords()
: m_jarKeyword("%jar")
, m_scriptClassKeyword("%scriptclass")
, m_importKeyword("%import")
, m_jvmOptionKeyword("%jvmoption") {}
const std::string & jarKeyword() { return m_jarKeyword; }
Keywords(bool withScriptOptionsPrefix);
const std::string & scriptClassKeyword() { return m_scriptClassKeyword; }
const std::string & importKeyword() { return m_importKeyword; }
const std::string & jvmOptionKeyword() { return m_jvmOptionKeyword; }
const std::string & jvmKeyword() { return m_jvmKeyword; }
const std::string & jarKeyword() { return m_jarKeyword; }
private:
std::string m_jarKeyword;
std::string m_scriptClassKeyword;
std::string m_importKeyword;
std::string m_jvmOptionKeyword;
std::string m_jvmKeyword;
};

} //namespace JavaScriptOptions
Expand Down
138 changes: 138 additions & 0 deletions exaudfclient/base/javacontainer/script_options/parser_ctpg.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,147 @@
#include "base/javacontainer/script_options/parser_ctpg.h"
#include "base/javacontainer/script_options/parser_ctpg_script_importer.h"
#include "base/utils/exceptions.h"
#include "base/script_options_parser/exception.h"
#include <sstream>


namespace ctpg_parser = ExecutionGraph::OptionsLineParser::CTPG;

namespace SWIGVMContainers {

namespace JavaScriptOptions {

ScriptOptionLinesParserCTPG::ScriptOptionLinesParserCTPG()
: m_scriptCode()
, m_keywords(false)
, m_needParsing(true) {}

void ScriptOptionLinesParserCTPG::prepareScriptCode(const std::string & scriptCode) {
m_scriptCode = scriptCode;
}

void ScriptOptionLinesParserCTPG::parseForScriptClass(std::function<void(const std::string &option)> callback) {
try {
parseForSingleOption(m_keywords.scriptClassKeyword(), callback);
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1623");
}
}

void ScriptOptionLinesParserCTPG::parseForJvmOptions(std::function<void(const std::string &option)> callback) {
try {
parseForMultipleOption(m_keywords.jvmKeyword(), callback);
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1624");
}
}

void ScriptOptionLinesParserCTPG::parseForExternalJars(std::function<void(const std::string &option)> callback) {
try {
parseForMultipleOption(m_keywords.jarKeyword(), callback);
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1625");
}
}

void ScriptOptionLinesParserCTPG::extractImportScripts(SwigFactory & swigFactory) {

try {
parse();
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1626");
}

const auto optionIt = m_foundOptions.find(m_keywords.importKeyword());
if (optionIt != m_foundOptions.end()) {
CTPG::ScriptImporter scriptImporter(swigFactory, m_keywords);
scriptImporter.importScript(m_scriptCode, m_foundOptions);
//The imported scripts will change the location of the other options in m_foundOptions
//Also there might be new JVM / External Jar options
//=> We need to clear the option map and reset the parser.
m_foundOptions.clear();
m_needParsing = true;
}
}

std::string && ScriptOptionLinesParserCTPG::getScriptCode() {
try {
parse();
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1627");
}
//Remove all options from script code in reverse order
struct option_location {
size_t pos;
size_t len;
};
struct comp {
bool operator()(option_location a, option_location b) const {
return a.pos > b.pos;
}
};
std::set<option_location, comp> option_locations;
for (const auto & option: m_foundOptions) {
for (const auto & option_loc: option.second) {
option_location loc = { .pos = option_loc.idx_in_source, .len = option_loc.size};
option_locations.insert(loc);
}
}
for (const auto option_loc: option_locations) {
m_scriptCode.erase(option_loc.pos, option_loc.len);
}
return std::move(m_scriptCode);
}

void ScriptOptionLinesParserCTPG::parse() {
if (m_needParsing) {
if(!m_foundOptions.empty()) {
throw std::logic_error("F-UDF-CL-SL-JAVA-1620 Internal error. Parser result is not empty.");
tkilias marked this conversation as resolved.
Show resolved Hide resolved
}
try {
ExecutionGraph::OptionsLineParser::CTPG::parseOptions(m_scriptCode, m_foundOptions);
} catch(const ExecutionGraph::OptionParserException& ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1621");
}

m_needParsing = false;

//Check for unknown options
for (const auto & option: m_foundOptions) {
if (m_keywords.jarKeyword() != option.first &&
m_keywords.scriptClassKeyword() != option.first &&
m_keywords.importKeyword() != option.first &&
m_keywords.jvmKeyword() != option.first) {
std::stringstream ss;
ss << "F-UDF-CL-SL-JAVA-1622 " << "Unexpected option: " << option.first;
throw std::invalid_argument(ss.str());
tkilias marked this conversation as resolved.
Show resolved Hide resolved
}
}
}
}

void ScriptOptionLinesParserCTPG::parseForSingleOption(const std::string key, std::function<void(const std::string &option)> callback) {
parse();
const auto optionIt = m_foundOptions.find(key);
if (optionIt != m_foundOptions.end()) {
if (optionIt->second.size() != 1) {
std::stringstream ss;
ss << "F-UDF-CL-SL-JAVA-1628 found " << optionIt->second.size() << key << " options" << std::endl;
tomuben marked this conversation as resolved.
Show resolved Hide resolved
throw std::invalid_argument(ss.str());
}
callback(optionIt->second[0].value);
}
}

void ScriptOptionLinesParserCTPG::parseForMultipleOption(const std::string key, std::function<void(const std::string &option)> callback) {
parse();
const auto optionIt = m_foundOptions.find(key);
if (optionIt != m_foundOptions.end()) {
for (const auto & option : optionIt->second) {
callback(option.value);
}
}
}

} //namespace JavaScriptOptions

Expand Down
36 changes: 36 additions & 0 deletions exaudfclient/base/javacontainer/script_options/parser_ctpg.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,46 @@
#ifndef SCRIPTOPTIONLINEPARSERCTPGY_H
#define SCRIPTOPTIONLINEPARSERCTPGY_H 1

#include "base/javacontainer/script_options/parser.h"
#include "base/javacontainer/script_options/keywords.h"
#include "base/script_options_parser/ctpg/script_option_lines_ctpg.h"


namespace SWIGVMContainers {

namespace JavaScriptOptions {

class ScriptOptionLinesParserCTPG : public ScriptOptionsParser {

public:
ScriptOptionLinesParserCTPG();

void prepareScriptCode(const std::string & scriptCode) override;

void parseForScriptClass(std::function<void(const std::string &option)> callback) override;

void parseForJvmOptions(std::function<void(const std::string &option)> callback) override;

void parseForExternalJars(std::function<void(const std::string &option)> callback) override;

void extractImportScripts(SwigFactory & swigFactory) override;

std::string && getScriptCode() override;

private:
void parse();

void parseForSingleOption(const std::string key, std::function<void(const std::string &option)> callback);
void parseForMultipleOption(const std::string key, std::function<void(const std::string &option)> callback);

void importScripts(SwigFactory & swigFactory);

private:
std::string m_scriptCode;
Keywords m_keywords;
ExecutionGraph::OptionsLineParser::CTPG::options_map_t m_foundOptions;
bool m_needParsing;
};

} //namespace JavaScriptOptions

Expand Down
Loading