From 04be20ea58e5c2245b601e3805d7bba81e46ba24 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Sat, 11 May 2024 18:27:29 -0700 Subject: [PATCH 1/3] Added utilities for Base64 encoding/decoding --- src/util/String.cc | 46 +++++++++++++++++++++++++++++++++++++++++ src/util/String.h | 30 +++++++++++++++++++++++++++ src/util/testString.cc | 47 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 123 insertions(+) diff --git a/src/util/String.cc b/src/util/String.cc index e22b7c329..ebd575b3d 100644 --- a/src/util/String.cc +++ b/src/util/String.cc @@ -30,10 +30,18 @@ #include #include +// Third party headers +#include +#include +#include +#include + // LSST headers #include "lsst/log/Log.h" using namespace std; +using namespace boost::algorithm; +using namespace boost::archive::iterators; #define CONTEXT_(func) ("String::" + string(func) + " ") @@ -185,4 +193,42 @@ string String::toUpper(string const& str) { return result; } +string String::toBase64(char const* ptr, size_t length) { + if (ptr == nullptr) { + throw invalid_argument(CONTEXT_(__func__) + "sequnce pointer is nullptr"); + } + if (length == 0) return string(); + + size_t const padding = (3 - length % 3) % 3; // calculate padding size + size_t const encodedLength = (length + padding) * 4 / 3; // calculate encoded length + + string encoded; + encoded.reserve(encodedLength); + + // Append base64 characters to result string. + typedef base64_from_binary> base64_iterator; + for (base64_iterator itr(ptr), end(ptr + length); itr != end; ++itr) { + encoded.push_back(*itr); + } + + // Add padding characters if necessary. + for (size_t i = 0; i < padding; ++i) { + encoded.push_back('='); + } + return encoded; +} + +string String::fromBase64(string const& str) { + if (str.empty()) return string(); + string decoded; + try { + typedef transform_width, 8, 6> base64_decoder; + decoded = trim_right_copy_if(string(base64_decoder(str.begin()), base64_decoder(str.end())), + [](char c) { return c == '\0'; }); + } catch (exception const& ex) { + throw range_error(CONTEXT_(__func__) + "failed to decode base64 string: " + ex.what()); + } + return decoded; +} + } // namespace lsst::qserv::util diff --git a/src/util/String.h b/src/util/String.h index 19bb2b59f..5e61a80c1 100644 --- a/src/util/String.h +++ b/src/util/String.h @@ -158,6 +158,36 @@ class String { /// @param str A string to be translated /// @return The string with all characters converted to upper case. static std::string toUpper(std::string const& str); + + /** + * Encode the input sequence of bytes into the Base64 representation packaged + * into a string with ('=') padding as needed. + * + * For example, the method will convert a sequence of characters as shown below: + * @code + * "0123456789" -> "MDEyMzQ1Njc4OQ==" + * @endcode + * @param ptr A pointer to the byte sequence. + * @param length The number of bytes to translate. + * @return The encoded sequence of bytes or the empty string if the length=0. + * @throw std::invalid_argument If the pointer is nullptr. + */ + static std::string toBase64(char const* ptr, std::size_t length); + static std::string toBase64(std::string const& str) { return toBase64(str.data(), str.size()); } + + /** + * Decode the Base64-encoded (padded with '=' as needed) string into the binary string. + * + * For example, the method will decode the encoded Base64 string as shown below: + * @code + * "MDEyMzQ1Njc4OQ==" -> "0123456789" + * @endcode + * + * @param str The string to be decoded. + * @return The decoded sequence of bytes or the empty string if the input is emoty. + * @throw std::range_error For non-base64 characters in the input. + */ + static std::string fromBase64(std::string const& str); }; } // namespace lsst::qserv::util diff --git a/src/util/testString.cc b/src/util/testString.cc index 6ac5208fb..2d2443ced 100644 --- a/src/util/testString.cc +++ b/src/util/testString.cc @@ -25,6 +25,7 @@ #include #include #include +#include // LSST headers #include "lsst/log/Log.h" @@ -79,7 +80,22 @@ std::vector const char2hex_lower = { "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "da", "db", "dc", "dd", "de", "df", "e0", "e1", "e2", "e3", "e4", "e5", "e6", "e7", "e8", "e9", "ea", "eb", "ec", "ed", "ee", "ef", "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", "f8", "f9", "fa", "fb", "fc", "fd", "fe", "ff"}; + +std::unordered_map const str2base64 = { + {"0", "MA=="}, + {"01", "MDE="}, + {"012", "MDEy"}, + {"0123", "MDEyMw=="}, + {"01234", "MDEyMzQ="}, + {"012345", "MDEyMzQ1"}, + {"0123456", "MDEyMzQ1Ng=="}, + {"01234567", "MDEyMzQ1Njc="}, + {"012345678", "MDEyMzQ1Njc4"}, + {"0123456789", "MDEyMzQ1Njc4OQ=="}, + {"!@#$$\%\%^^&&**(())_)(**&&&", "IUAjJCQlJV5eJiYqKigoKSlfKSgqKiYmJg=="}}; + } // namespace + BOOST_AUTO_TEST_SUITE(Suite) BOOST_AUTO_TEST_CASE(SplitStringTest) { @@ -425,4 +441,35 @@ BOOST_AUTO_TEST_CASE(StringCaseTranslationTest) { BOOST_CHECK_EQUAL(util::String::toUpper("Mixed_Case"), "MIXED_CASE"); } +BOOST_AUTO_TEST_CASE(ToBase64Test) { + LOGS_INFO("ToBase64Test test begins"); + + // Null pointer is treated as an illegal input. + BOOST_CHECK_THROW(util::String::toBase64(nullptr, 0), std::invalid_argument); + + // This test ensures that the empty string is always returned for the empty + // input regardleass. + char const empty[] = ""; + BOOST_CHECK_EQUAL(util::String::toBase64(empty, 0), std::string()); + + for (auto const& [str, b64] : ::str2base64) { + BOOST_CHECK_EQUAL(util::String::toBase64(str), b64); + } +} + +BOOST_AUTO_TEST_CASE(FromBase64Test) { + LOGS_INFO("FromBase64Test test begins"); + + // Make sure the result is empty if no input beyond the optional + // prefix is present. + std::string const empty; + BOOST_CHECK_EQUAL(util::String::fromBase64(empty), std::string()); + + for (auto const& [str, b64] : ::str2base64) { + std::string const decoded = util::String::fromBase64(b64); + BOOST_CHECK_EQUAL(decoded.size(), str.size()); + BOOST_CHECK_EQUAL(decoded, str); + } +} + BOOST_AUTO_TEST_SUITE_END() From ade3f06b9887a0386310737da71891ec65fa35fa Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Sat, 11 May 2024 18:34:12 -0700 Subject: [PATCH 2/3] Incremented the version number of the REST API to be 35 --- src/admin/python/lsst/qserv/admin/replicationInterface.py | 2 +- src/http/MetaModule.cc | 2 +- src/www/qserv/js/Common.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/admin/python/lsst/qserv/admin/replicationInterface.py b/src/admin/python/lsst/qserv/admin/replicationInterface.py index fd82828e8..88f8ebfbc 100644 --- a/src/admin/python/lsst/qserv/admin/replicationInterface.py +++ b/src/admin/python/lsst/qserv/admin/replicationInterface.py @@ -201,7 +201,7 @@ def __init__( self.repl_ctrl = urlparse(repl_ctrl_uri) self.auth_key = auth_key self.admin_auth_key = admin_auth_key - self.repl_api_version = 34 + self.repl_api_version = 35 _log.debug(f"ReplicationInterface %s", self.repl_ctrl) def version(self) -> str: diff --git a/src/http/MetaModule.cc b/src/http/MetaModule.cc index 94b7f9df2..2965eefaa 100644 --- a/src/http/MetaModule.cc +++ b/src/http/MetaModule.cc @@ -37,7 +37,7 @@ string const adminAuthKey; namespace lsst::qserv::http { -unsigned int const MetaModule::version = 34; +unsigned int const MetaModule::version = 35; void MetaModule::process(string const& context, nlohmann::json const& info, qhttp::Request::Ptr const& req, qhttp::Response::Ptr const& resp, string const& subModuleName) { diff --git a/src/www/qserv/js/Common.js b/src/www/qserv/js/Common.js index 26ef6fdf4..d83bdbd18 100644 --- a/src/www/qserv/js/Common.js +++ b/src/www/qserv/js/Common.js @@ -6,7 +6,7 @@ function(sqlFormatter, _) { class Common { - static RestAPIVersion = 34; + static RestAPIVersion = 35; static query2text(query, expanded) { if (expanded) { return sqlFormatter.format(query, Common._sqlFormatterConfig); From 5a6b6bab5aec9b9f4ea37c842306321696ca99c1 Mon Sep 17 00:00:00 2001 From: Igor Gaponenko Date: Sat, 11 May 2024 18:49:51 -0700 Subject: [PATCH 3/3] Added support for Base64-encoding/decoding in the HTTP frontend The new feature applies to both query API and the table ingest API of the frontend's REST service. --- src/czar/HttpCzarIngestModule.cc | 2 +- src/czar/HttpCzarQueryModule.cc | 7 ++++-- src/http/BinaryEncoding.cc | 4 ++++ src/http/BinaryEncoding.h | 3 ++- src/replica/ingest/IngestDataHttpSvcMod.cc | 27 +++++++++++++++++++++- src/replica/ingest/IngestDataHttpSvcMod.h | 2 ++ 6 files changed, 40 insertions(+), 5 deletions(-) diff --git a/src/czar/HttpCzarIngestModule.cc b/src/czar/HttpCzarIngestModule.cc index 814e7118a..8159d2692 100644 --- a/src/czar/HttpCzarIngestModule.cc +++ b/src/czar/HttpCzarIngestModule.cc @@ -139,7 +139,7 @@ json HttpCzarIngestModule::executeImpl(string const& subModuleName) { json HttpCzarIngestModule::_ingestData() { debug(__func__); - checkApiVersion(__func__, 34); + checkApiVersion(__func__, 35); auto const databaseName = body().required("database"); auto const tableName = body().required("table"); diff --git a/src/czar/HttpCzarQueryModule.cc b/src/czar/HttpCzarQueryModule.cc index dbd972e73..5d45e77d0 100644 --- a/src/czar/HttpCzarQueryModule.cc +++ b/src/czar/HttpCzarQueryModule.cc @@ -81,7 +81,7 @@ json HttpCzarQueryModule::executeImpl(string const& subModuleName) { json HttpCzarQueryModule::_submit() { debug(__func__); - checkApiVersion(__func__, 33); + checkApiVersion(__func__, 35); string const binaryEncodingStr = body().optional("binary_encoding", "hex"); http::BinaryEncodingMode const binaryEncoding = http::parseBinaryEncoding(binaryEncodingStr); @@ -138,7 +138,7 @@ json HttpCzarQueryModule::_status() { json HttpCzarQueryModule::_result() { debug(__func__); - checkApiVersion(__func__, 33); + checkApiVersion(__func__, 35); string const binaryEncodingStr = query().optionalString("binary_encoding", "hex"); http::BinaryEncodingMode const binaryEncoding = http::parseBinaryEncoding(binaryEncodingStr); debug(__func__, "binary_encoding=" + http::binaryEncoding2string(binaryEncoding)); @@ -302,6 +302,9 @@ json HttpCzarQueryModule::_rowsToJson(sql::SqlResults& results, json const& sche case http::BinaryEncodingMode::HEX: rowJson.push_back(util::String::toHex(row[i].first, row[i].second)); break; + case http::BinaryEncodingMode::B64: + rowJson.push_back(util::String::toBase64(row[i].first, row[i].second)); + break; case http::BinaryEncodingMode::ARRAY: // Notes on the std::u8string type and constructor: // 1. This string type is required for encoding binary data which is only possible diff --git a/src/http/BinaryEncoding.cc b/src/http/BinaryEncoding.cc index 089c4149f..364c1c1c6 100644 --- a/src/http/BinaryEncoding.cc +++ b/src/http/BinaryEncoding.cc @@ -32,6 +32,8 @@ namespace lsst::qserv::http { BinaryEncodingMode parseBinaryEncoding(string const& str) { if (str == "hex") return BinaryEncodingMode::HEX; + else if (str == "b64") + return BinaryEncodingMode::B64; else if (str == "array") return BinaryEncodingMode::ARRAY; throw invalid_argument("http::" + string(__func__) + " unsupported mode '" + str + "'"); @@ -41,6 +43,8 @@ string binaryEncoding2string(BinaryEncodingMode mode) { switch (mode) { case BinaryEncodingMode::HEX: return "hex"; + case BinaryEncodingMode::B64: + return "b64"; case BinaryEncodingMode::ARRAY: return "array"; } diff --git a/src/http/BinaryEncoding.h b/src/http/BinaryEncoding.h index c800e2df2..f45f8b87a 100644 --- a/src/http/BinaryEncoding.h +++ b/src/http/BinaryEncoding.h @@ -29,11 +29,12 @@ namespace lsst::qserv::http { /// The names of the allowed modes. -static std::vector const allowedBinaryEncodingModes = {"hex", "array"}; +static std::vector const allowedBinaryEncodingModes = {"hex", "b64", "array"}; /// Options for encoding data of the binary columns in the JSON result. enum class BinaryEncodingMode : int { HEX, ///< The hexadecimal representation stored as a string + B64, ///< Data encoded using Base64 algorithm (with padding as needed) ARRAY ///< JSON array of 8-bit unsigned integers in a range of 0 .. 255. }; diff --git a/src/replica/ingest/IngestDataHttpSvcMod.cc b/src/replica/ingest/IngestDataHttpSvcMod.cc index c3382b011..bc66a6b23 100644 --- a/src/replica/ingest/IngestDataHttpSvcMod.cc +++ b/src/replica/ingest/IngestDataHttpSvcMod.cc @@ -92,7 +92,7 @@ json IngestDataHttpSvcMod::executeImpl(string const& subModuleName) { json IngestDataHttpSvcMod::_syncProcessData() { debug(__func__); - checkApiVersion(__func__, 34); + checkApiVersion(__func__, 35); auto const context_ = context() + __func__; auto const config = serviceProvider()->config(); @@ -258,6 +258,9 @@ json IngestDataHttpSvcMod::_syncProcessData() { case http::BinaryEncodingMode::HEX: row.append(_translateHexString(context_, jsonColumn, rowIdx, colIdx)); break; + case http::BinaryEncodingMode::B64: + row.append(_translateBase64String(context_, jsonColumn, rowIdx, colIdx)); + break; case http::BinaryEncodingMode::ARRAY: { u8string const str = _translateByteArray(context_, jsonColumn, rowIdx, colIdx); row.append(reinterpret_cast(str.data()), str.size()); @@ -333,6 +336,28 @@ string IngestDataHttpSvcMod::_translateHexString(string const& context_, json co throw http::Error(context_, _contrib.error); } +string IngestDataHttpSvcMod::_translateBase64String(string const& context_, json const& jsonColumn, + size_t rowIdx, size_t colIdx) { + if (jsonColumn.is_string()) { + try { + return util::String::fromBase64(jsonColumn.get()); + } catch (exception const& ex) { + _contrib.error = "failed to decode a value of the '" + + http::binaryEncoding2string(http::BinaryEncodingMode::B64) + + "' binary encoded column at row " + to_string(rowIdx) + " and column " + + to_string(colIdx) + ", ex: " + string(ex.what()); + } + } else { + _contrib.error = "unsupported type name '" + string(jsonColumn.type_name()) + "' found at row " + + to_string(rowIdx) + " and column " + to_string(colIdx) + + " where the string type was expected"; + } + bool const failed = true; + _contrib = serviceProvider()->databaseServices()->startedTransactionContrib(_contrib, failed); + _failed(context_); + throw http::Error(context_, _contrib.error); +} + u8string IngestDataHttpSvcMod::_translateByteArray(string const& context_, json const& jsonColumn, size_t rowIdx, size_t colIdx) { if (jsonColumn.is_array()) { diff --git a/src/replica/ingest/IngestDataHttpSvcMod.h b/src/replica/ingest/IngestDataHttpSvcMod.h index 5d8d0f60d..bfe6ff84d 100644 --- a/src/replica/ingest/IngestDataHttpSvcMod.h +++ b/src/replica/ingest/IngestDataHttpSvcMod.h @@ -101,6 +101,8 @@ class IngestDataHttpSvcMod : public http::ModuleBase, public IngestFileSvc { std::string _translateHexString(std::string const& context_, nlohmann::json const& jsonColumn, size_t rowIdx, size_t colIdx); + std::string _translateBase64String(std::string const& context_, nlohmann::json const& jsonColumn, + size_t rowIdx, size_t colIdx); std::u8string _translateByteArray(std::string const& context_, nlohmann::json const& jsonColumn, size_t rowIdx, size_t colIdx); std::string _translatePrimitiveType(std::string const& context_, nlohmann::json const& jsonColumn,