Skip to content

Commit

Permalink
#983: Refactor CTPG script options Java parser code (#462)
Browse files Browse the repository at this point in the history
  • Loading branch information
tomuben authored Oct 21, 2024
1 parent c289794 commit 684d0ef
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 64 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,40 @@ void ScriptImporter::importScript(std::string & scriptCode,
importScript(scriptCode, options, 0);
}

void ScriptImporter::collectImportScripts(const ScriptImporter::OptionValues_t & option_values,
const size_t recursionDepth,
std::vector<CollectedScript> &result) {
for (const auto & option: option_values) {
const char *importScriptCode = findImportScript(option.value);
std::string importScriptCodeStr;
if (m_importedScriptChecksums.addScript(importScriptCode) ) {
// Script has not been imported yet
// If this imported script contains %import statements
// they will be resolved in the next recursion.
ctpg_parser::options_map_t newOptions;
try {
ExecutionGraph::OptionsLineParser::CTPG::parseOptions(importScriptCode, newOptions);
} catch(const ExecutionGraph::OptionParserException & ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1630");
}
importScriptCodeStr.assign(importScriptCode);
importScript(importScriptCodeStr, newOptions, recursionDepth + 1);
}
CollectedScript replacedScript = {.script = std::move(importScriptCodeStr), .origPos = option.idx_in_source, .origLen = option.size };
result.push_back(std::move(replacedScript));
}
}

void ScriptImporter::replaceImportScripts(std::string & scriptCode,
const std::vector<CollectedScript> &collectedImportScripts) {
//Replace the imported script bodies from end to start.
//Doing it in forward order would invalidate the offsets of later import scripts.
for (auto optionIt = collectedImportScripts.crbegin(); optionIt != collectedImportScripts.crend(); optionIt++) {
scriptCode.replace(optionIt->origPos, optionIt->origLen, optionIt->script);
}
}


void ScriptImporter::importScript(std::string & scriptCode,
ctpg_parser::options_map_t & options,
const size_t recursionDepth) {
Expand All @@ -43,52 +77,27 @@ void ScriptImporter::importScript(std::string & scriptCode,
{
return first.idx_in_source < second.idx_in_source;
});
struct ReplacedScripts {
ReplacedScripts(ReplacedScripts&&) = default;
std::string script;
size_t origPos;
size_t origLen;
};
std::vector<ReplacedScripts> replacedScripts;
replacedScripts.reserve(optionIt->second.size());
//In order to continue compatibility with legacy implementation we must collect import scripts in forward direction
//but then replace in reverse direction (in order to keep consistency of positions)
for (const auto & option: optionIt->second) {
const char *importScriptCode = findImportScript(option.value);
std::string importScriptCodeStr;
if (m_importedScriptChecksums.addScript(importScriptCode) ) {
// Script has not been imported yet
// If this imported script contains %import statements
// they will be resolved in the next recursion.
ctpg_parser::options_map_t newOptions;
try {
ExecutionGraph::OptionsLineParser::CTPG::parseOptions(importScriptCode, newOptions);
} catch(const ExecutionGraph::OptionParserException & ex) {
Utils::rethrow(ex, "F-UDF-CL-SL-JAVA-1630");
}
importScriptCodeStr.assign(importScriptCode);
importScript(importScriptCodeStr, newOptions, recursionDepth + 1);
}
ReplacedScripts replacedScript = {.script = std::move(importScriptCodeStr), .origPos = option.idx_in_source, .origLen = option.size };
replacedScripts.push_back(std::move(replacedScript));
}
//Now replace the imported script bodies from end to start. Doing it in forward order would invalidate the offsets of later import scripts.
for (auto optionIt = replacedScripts.rbegin(); optionIt != replacedScripts.rend(); optionIt++) {
scriptCode.replace(optionIt->origPos, optionIt->origLen, optionIt->script);
}
std::vector<CollectedScript> collectedScripts;
collectedScripts.reserve(optionIt->second.size());
//In order to continue compatibility with legacy implementation
//we must collect import scripts in forward direction but then replace in reverse direction.
collectImportScripts(optionIt->second, recursionDepth, collectedScripts);
replaceImportScripts(scriptCode, collectedScripts);
}
}

const char* ScriptImporter::findImportScript(const std::string & scriptKey) {
if (!m_metaData) {
m_metaData.reset(m_swigFactory.makeSwigMetadata());
if (!m_metaData)
if (!m_metaData) {
throw std::runtime_error("F-UDF-CL-SL-JAVA-1631: Failure while importing scripts");
}
}
const char *importScriptCode = m_metaData->moduleContent(scriptKey.c_str());
const char *exception = m_metaData->checkException();
if (exception)
if (exception) {
throw std::runtime_error("F-UDF-CL-SL-JAVA-1632: " + std::string(exception));
}
return importScriptCode;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ namespace JavaScriptOptions {

namespace CTPG {



class ScriptImporter {

public:
Expand All @@ -24,17 +26,33 @@ class ScriptImporter {
void importScript(std::string & scriptCode, ExecutionGraph::OptionsLineParser::CTPG::options_map_t & options);

private:
struct CollectedScript {
CollectedScript(CollectedScript&&) = default;
std::string script;
size_t origPos;
size_t origLen;
};

typedef ExecutionGraph::OptionsLineParser::CTPG::options_map_t::mapped_type OptionValues_t;

void importScript(std::string & scriptCode,
ExecutionGraph::OptionsLineParser::CTPG::options_map_t & options,
const size_t recursionDepth);
const char* findImportScript(const std::string & scriptKey);
private:

void collectImportScripts(const OptionValues_t & option_values,
const size_t recursionDepth,
std::vector<CollectedScript> &result);

void replaceImportScripts(std::string & scriptCode,
const std::vector<CollectedScript> &collectedImportScripts);

Checksum m_importedScriptChecksums;
SwigFactory & m_swigFactory;
std::unique_ptr<SWIGMetadataIf> m_metaData;
Keywords & m_keywords;
//The empirical maximal value for recursion depth is ~26000. So we choose 20000 to have a certain buffer.
const size_t cMaxRecursionDepth = 20000;
//The empirical maximal value for recursion depth is ~18000. So we add a little bit extra to have some buffer.
const size_t cMaxRecursionDepth = 10000U;
};

} //namespace CTPG
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,10 @@ void parse(std::string&& code, options_type& result) {
parse_options{}.set_skip_whitespace(false),
buffers::string_buffer(std::move(code)),
error_buffer);
if (res.has_value())
{
if (res.has_value()) {
result = res.value();
}
else
{
else {
std::stringstream ss;
ss << "Error parsing script options: " << error_buffer.str();
throw OptionParserException(ss.str());
Expand All @@ -200,48 +198,72 @@ void parse(std::string&& code, options_type& result) {

} //namespace ParserInternals

struct LinePositions {
size_t mStartPos;
size_t mEndPos;
};

inline std::optional<LinePositions> getNextLine(const size_t current_pos, const std::string & scriptCode) {
/**
* Find first of occurence of '%', starting search from position 'current_pos'.
* If no '%' is found, return an empty result.
* If '%' is found, search backwards from '%' for '\n' or \r':
* 1. If not found, '%' was found in the first line. Then we can set 'new_option_start_pos'=0
* 2. If found, set new_option_start_pos to position 1 char behind pos of found '\n' or '\r'.
* Then search forward for next occurence of '\n' or \r' and assign to var 'line_end_pos':
1. If not found, 'line_end_pos' will get assigned std::string::npos (std::string::substr(...,npos), returns substring until end of string
2. If found, 'line_end_pos' will assigned to position of line end of line where '%' was found
*/
std::optional<LinePositions> retVal;
const size_t new_option_start_pos = scriptCode.find_first_of("%", current_pos);
if (new_option_start_pos == std::string::npos) {
return retVal;
}
size_t line_start_pos = scriptCode.find_last_of("\r\n", new_option_start_pos);
if (std::string::npos == line_start_pos) {
line_start_pos = 0;
}
else {
line_start_pos++;
}

const size_t line_end_pos = scriptCode.find_first_of("\r\n", line_start_pos);
retVal = LinePositions{ .mStartPos = line_start_pos, .mEndPos = line_end_pos};
return retVal;
}

void parseOptions(const std::string& code, options_map_t & result) {

size_t current_pos = 0;
std::optional<LinePositions> currentLinePositions = getNextLine(current_pos, code);
while (currentLinePositions) {

do {
const size_t new_option_start_pos = code.find_first_of("%", current_pos);
if (new_option_start_pos == std::string::npos)
break;
current_pos = code.find_last_of("\r\n", new_option_start_pos);
if (std::string::npos == current_pos)
current_pos = 0;
else
current_pos++;

const size_t new_pos = code.find_first_of("\r\n", current_pos);
std::string line = code.substr(current_pos, new_pos);
std::string line = code.substr(currentLinePositions->mStartPos, currentLinePositions->mEndPos);
options_type parser_result;
ParserInternals::parse(std::move(line), parser_result);
for (const auto & option: parser_result)
{
for (const auto & option: parser_result) {
ScriptOption entry = {
.value = option.value,
.idx_in_source = current_pos + option.start.column - 1,
.idx_in_source = currentLinePositions->mStartPos + option.start.column - 1,
.size = option.end.column - option.start.column + 1
};
auto it_in_result = result.find(option.key);
if (it_in_result == result.end())
{
if (it_in_result == result.end()) {
options_t new_options;
new_options.push_back(entry);
result.insert(std::make_pair(option.key, new_options));
}
else
{
else {
it_in_result->second.push_back(entry);
}
}
if (new_pos == std::string::npos) {
if (currentLinePositions->mEndPos == std::string::npos) {
break;
}
current_pos = new_pos + 1;
} while(true);
current_pos = currentLinePositions->mEndPos + 1;

currentLinePositions = getNextLine(current_pos, code);
}
}


Expand Down

0 comments on commit 684d0ef

Please sign in to comment.