Skip to content

Commit

Permalink
Merge branch 'tickets/DM-43497'
Browse files Browse the repository at this point in the history
  • Loading branch information
iagaponenko committed Mar 27, 2024
2 parents a4ccb92 + faf7d8a commit fd9c7dc
Show file tree
Hide file tree
Showing 8 changed files with 186 additions and 14 deletions.
2 changes: 1 addition & 1 deletion src/admin/python/lsst/qserv/admin/replicationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ def __init__(
self.repl_ctrl = urlparse(repl_ctrl_uri)
self.auth_key = auth_key
self.admin_auth_key = admin_auth_key
self.repl_api_version = 32
self.repl_api_version = 33
_log.debug(f"ReplicationInterface %s", self.repl_ctrl)

def version(self) -> str:
Expand Down
88 changes: 80 additions & 8 deletions src/czar/HttpCzarQueryModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@
using namespace std;
using json = nlohmann::json;

namespace {
// NOTE: values of the MySQL type B(N) are too reported as binary strings where
// the number of characters is equal to CEIL(N/8).
vector<string> const binTypes = {"BIT", "BINARY", "VARBINARY", "TINYBLOB", "BLOB", "MEDIUMBLOB", "LONGBLOB"};
} // namespace

namespace lsst::qserv::czar {

void HttpCzarQueryModule::process(string const& context, shared_ptr<qhttp::Request> const& req,
Expand Down Expand Up @@ -73,11 +79,24 @@ json HttpCzarQueryModule::executeImpl(string const& subModuleName) {
throw invalid_argument(context() + func + " unsupported sub-module");
}

HttpCzarQueryModule::BinaryEncodingMode HttpCzarQueryModule::_parseBinaryEncoding(string const& str) {
if (str == "hex")
return BinaryEncodingMode::BINARY_ENCODE_HEX;
else if (str == "array")
return BinaryEncodingMode::BINARY_ENCODE_ARRAY;
throw invalid_argument(context() + string(__func__) + " unsupported binary encoding '" + str + "'");
}

json HttpCzarQueryModule::_submit() {
debug(__func__);
checkApiVersion(__func__, 32);
checkApiVersion(__func__, 33);

string const binaryEncodingStr = body().optional<string>("binary_encoding", "hex");
BinaryEncodingMode const binaryEncoding = _parseBinaryEncoding(binaryEncodingStr);
debug(__func__, "binary_encoding=" + binaryEncodingStr);

SubmitResult const submitResult = _getRequestParamsAndSubmit(__func__, false);
return _waitAndExtractResult(submitResult);
return _waitAndExtractResult(submitResult, binaryEncoding);
}

json HttpCzarQueryModule::_submitAsync() {
Expand Down Expand Up @@ -127,8 +146,11 @@ json HttpCzarQueryModule::_status() {

json HttpCzarQueryModule::_result() {
debug(__func__);
checkApiVersion(__func__, 30);
return _waitAndExtractResult(_getQueryInfo());
checkApiVersion(__func__, 33);
string const binaryEncodingStr = query().optionalString("binary_encoding", "hex");
BinaryEncodingMode const binaryEncoding = _parseBinaryEncoding(binaryEncodingStr);
debug(__func__, "binary_encoding=" + binaryEncodingStr);
return _waitAndExtractResult(_getQueryInfo(), binaryEncoding);
}

QueryId HttpCzarQueryModule::_getQueryId() const {
Expand Down Expand Up @@ -156,7 +178,8 @@ SubmitResult HttpCzarQueryModule::_getQueryInfo() const {
return submitResult;
}

json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult) const {
json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult,
BinaryEncodingMode binaryEncoding) const {
// Block the current thread before the query will finish or fail.
string const messageSelectQuery =
"SELECT chunkId, code, message, severity+0, timeStamp FROM " + submitResult.messageTable;
Expand Down Expand Up @@ -226,7 +249,7 @@ json HttpCzarQueryModule::_waitAndExtractResult(SubmitResult const& submitResult
error(__func__, msg);
throw http::Error(context() + __func__, msg);
}
json rowsJson = _rowsToJson(resultQueryResults);
json rowsJson = _rowsToJson(resultQueryResults, schemaJson, binaryEncoding);
resultQueryResults.freeResults();
_dropTable(submitResult.resultTable);
return json::object({{"schema", schemaJson}, {"rows", rowsJson}});
Expand All @@ -251,18 +274,67 @@ json HttpCzarQueryModule::_schemaToJson(sql::Schema const& schema) const {
columnJson["table"] = colDef.table;
columnJson["column"] = colDef.name;
columnJson["type"] = colDef.colType.sqlType;
int isBinary = 0;
for (size_t binTypeIdx = 0; binTypeIdx < ::binTypes.size(); ++binTypeIdx) {
string const& binType = ::binTypes[binTypeIdx];
if (colDef.colType.sqlType.substr(0, binType.size()) == binType) {
isBinary = 1;
break;
}
}
columnJson["is_binary"] = isBinary;
schemaJson.push_back(columnJson);
}
return schemaJson;
}

json HttpCzarQueryModule::_rowsToJson(sql::SqlResults& results) const {
json HttpCzarQueryModule::_rowsToJson(sql::SqlResults& results, json const& schemaJson,
BinaryEncodingMode binaryEncoding) const {
// Extract the column binary attributes into the vector. Checkimg column type
// status in the vector should work significantly faster comparing with JSON.
size_t const numColumns = schemaJson.size();
vector<int> isBinary(numColumns, false);
for (size_t colIdx = 0; colIdx < numColumns; ++colIdx) {
isBinary[colIdx] = schemaJson[colIdx].at("is_binary");
}
json rowsJson = json::array();
for (sql::SqlResults::iterator itr = results.begin(); itr != results.end(); ++itr) {
sql::SqlResults::value_type const& row = *itr;
json rowJson = json::array();
for (size_t i = 0; i < row.size(); ++i) {
rowJson.push_back(string(row[i].first ? row[i].first : "NULL"));
if (row[i].first == nullptr) {
rowJson.push_back("NULL");
} else {
if (isBinary[i]) {
switch (binaryEncoding) {
case BinaryEncodingMode::BINARY_ENCODE_HEX:
rowJson.push_back(util::String::toHex(row[i].first, row[i].second));
break;
case BinaryEncodingMode::BINARY_ENCODE_ARRAY:
// Notes on the std::u8string type and constructor:
// 1. This string type is required for encoding binary data which is only possible
// with the 8-bit encoding and not possible with the 7-bit ASCII
// representation.
// 2. This from of string construction allows the line termination symbols \0
// within the binary data.
//
// ATTENTION: formally this way of type casting is wrong as it breaks strict
// aliasing.
// However, for all practical purposes, char8_t is basically a unsigned char
// which makes such operation possible. The problem could be addressed either by
// redesigning Qserv's SQL library to report data as char8_t, or by explicitly
// copying and translating each byte from char to char8_t representation (which
// would not be terribly efficient for the large result sets).
rowJson.push_back(
u8string(reinterpret_cast<char8_t const*>(row[i].first), row[i].second));
break;
default:
throw http::Error(context() + __func__, "unsupported binary encoding");
}
} else {
rowJson.push_back(string(row[i].first, row[i].second));
}
}
}
rowsJson.push_back(rowJson);
}
Expand Down
20 changes: 17 additions & 3 deletions src/czar/HttpCzarQueryModule.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,19 @@ class HttpCzarQueryModule : public czar::HttpModule {
HttpCzarQueryModule(std::string const& context, std::shared_ptr<qhttp::Request> const& req,
std::shared_ptr<qhttp::Response> const& resp);

/// Options for encoding data of the binary columns in the JSON result.
enum BinaryEncodingMode {
BINARY_ENCODE_HEX, ///< The hexadecimal representation stored as a string
BINARY_ENCODE_ARRAY ///< JSON array of 8-bit unsigned integers in a range of 0 .. 255.
};

/**
* @param str The string to parse.,
* @return The parsed and validated representation of the encoding.
* @throw std::invalid_argument If the input can't be translated into a valid mode.
*/
BinaryEncodingMode _parseBinaryEncoding(std::string const& str);

nlohmann::json _submit();
nlohmann::json _submitAsync();
nlohmann::json _cancel();
Expand All @@ -93,11 +106,12 @@ class HttpCzarQueryModule : public czar::HttpModule {
SubmitResult _getRequestParamsAndSubmit(std::string const& func, bool async);
SubmitResult _getQueryInfo() const;
QueryId _getQueryId() const;
nlohmann::json _waitAndExtractResult(SubmitResult const& submitResult) const;

nlohmann::json _waitAndExtractResult(SubmitResult const& submitResult,
BinaryEncodingMode binaryEncoding) const;
void _dropTable(std::string const& tableName) const;
nlohmann::json _schemaToJson(sql::Schema const& schema) const;
nlohmann::json _rowsToJson(sql::SqlResults& results) const;
nlohmann::json _rowsToJson(sql::SqlResults& results, nlohmann::json const& schemaJson,
BinaryEncodingMode binaryEncoding) const;
};

} // namespace lsst::qserv::czar
Expand Down
2 changes: 1 addition & 1 deletion src/http/MetaModule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ string const adminAuthKey;

namespace lsst::qserv::http {

unsigned int const MetaModule::version = 32;
unsigned int const MetaModule::version = 33;

void MetaModule::process(string const& context, nlohmann::json const& info, qhttp::Request::Ptr const& req,
qhttp::Response::Ptr const& resp, string const& subModuleName) {
Expand Down
19 changes: 19 additions & 0 deletions src/util/String.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,9 @@ vector<T> getNumericVectFromStr(string const& func, vector<string> const& string
}
return result;
}

char const hexChars[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};

} // namespace

namespace lsst::qserv::util {
Expand Down Expand Up @@ -107,4 +110,20 @@ vector<uint64_t> String::parseToVectUInt64(string const& str, string const& deli
throwOnError, defaultVal);
}

string String::toHex(char const* ptr, size_t length) {
if (ptr == nullptr) {
throw invalid_argument(CONTEXT_(__func__) + "sequnce pointer is nullptr");
}
if (length == 0) return string();
string out;
out.resize(2 * length);
char* outPtr = &out[0];
for (char const* inPtr = ptr; inPtr < ptr + length; ++inPtr) {
char const& byte = *inPtr;
*(outPtr++) = ::hexChars[(byte & 0xF0) >> 4];
*(outPtr++) = ::hexChars[(byte & 0x0F) >> 0];
}
return out;
}

} // namespace lsst::qserv::util
14 changes: 14 additions & 0 deletions src/util/String.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,20 @@ class String {
}
return ss.str();
}

/**
* Encode the input sequence of bytes into the hexadecimal representation packaged
* into a string.
* For example, the method will convert a sequence of bytes as shown below:
* @code
* {10,17,255,210} -> "0A11FFD2"
* @code
* @param ptr A pointer to the byte sequence.
* @param length The number of bytes to translate.
* @return The encoded sequence of bytes or the empty string if the length=0.
* @throw std::invalid_argument If the pointer is nullptr.
*/
static std::string toHex(char const* ptr, std::size_t length);
};

} // namespace lsst::qserv::util
Expand Down
53 changes: 53 additions & 0 deletions src/util/testString.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

// System headers
#include <limits>
#include <stdexcept>
#include <string>
#include <vector>

Expand Down Expand Up @@ -262,4 +263,56 @@ BOOST_AUTO_TEST_CASE(ToStringTest) {
BOOST_CHECK_EQUAL(util::String::toString(strings, " ", "[", "]"), "[a] [b] [c] [d] [e]");
}

BOOST_AUTO_TEST_CASE(ToHexTest) {
LOGS_INFO("ToHexTest test begins");

// This test ensures that the empty string is always returned for the empty
// input regardleass.
char const empty[] = "";
BOOST_CHECK_EQUAL(util::String::toHex(empty, 0), std::string());

// Null pointer is treated as an illegal input.
std::string hex;
BOOST_CHECK_THROW(util::String::toHex(nullptr, 0), std::invalid_argument);

// The translation map from unsigned character into the two character strings
// corresponding to the hexadecimal representation of the characters used as
// vector indexes.
std::vector<std::string> const char2hex = {
"00", "01", "02", "03", "04", "05", "06", "07", "08", "09", "0A", "0B", "0C", "0D", "0E", "0F",
"10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "1A", "1B", "1C", "1D", "1E", "1F",
"20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "2A", "2B", "2C", "2D", "2E", "2F",
"30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "3A", "3B", "3C", "3D", "3E", "3F",
"40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "4A", "4B", "4C", "4D", "4E", "4F",
"50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "5A", "5B", "5C", "5D", "5E", "5F",
"60", "61", "62", "63", "64", "65", "66", "67", "68", "69", "6A", "6B", "6C", "6D", "6E", "6F",
"70", "71", "72", "73", "74", "75", "76", "77", "78", "79", "7A", "7B", "7C", "7D", "7E", "7F",
"80", "81", "82", "83", "84", "85", "86", "87", "88", "89", "8A", "8B", "8C", "8D", "8E", "8F",
"90", "91", "92", "93", "94", "95", "96", "97", "98", "99", "9A", "9B", "9C", "9D", "9E", "9F",
"A0", "A1", "A2", "A3", "A4", "A5", "A6", "A7", "A8", "A9", "AA", "AB", "AC", "AD", "AE", "AF",
"B0", "B1", "B2", "B3", "B4", "B5", "B6", "B7", "B8", "B9", "BA", "BB", "BC", "BD", "BE", "BF",
"C0", "C1", "C2", "C3", "C4", "C5", "C6", "C7", "C8", "C9", "CA", "CB", "CC", "CD", "CE", "CF",
"D0", "D1", "D2", "D3", "D4", "D5", "D6", "D7", "D8", "D9", "DA", "DB", "DC", "DD", "DE", "DF",
"E0", "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "EA", "EB", "EC", "ED", "EE", "EF",
"F0", "F1", "F2", "F3", "F4", "F5", "F6", "F7", "F8", "F9", "FA", "FB", "FC", "FD", "FE", "FF"};

for (int i = 0; i < 256; ++i) {
char buf[1];
buf[0] = static_cast<char>(i);
BOOST_CHECK_EQUAL(util::String::toHex(buf, 1), char2hex[i]);
}

// Translate the long string that made of a monotonic sequence of all 256 characters.
std::string in;
in.resize(256);
for (size_t i = 0; i < 256; ++i) {
in[i] = static_cast<char>(i);
}
std::string out;
for (size_t i = 0; i < 256; ++i) {
out = out + char2hex[i];
}
BOOST_CHECK_EQUAL(util::String::toHex(in.data(), in.size()), out);
}

BOOST_AUTO_TEST_SUITE_END()
2 changes: 1 addition & 1 deletion src/www/qserv/js/Common.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ function(sqlFormatter,
_) {

class Common {
static RestAPIVersion = 32;
static RestAPIVersion = 33;
static query2text(query, expanded) {
if (expanded) {
return sqlFormatter.format(query, Common._sqlFormatterConfig);
Expand Down

0 comments on commit fd9c7dc

Please sign in to comment.