Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
Joe-Abraham committed May 27, 2024
1 parent 35e3ffd commit b403595
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 24 deletions.
52 changes: 33 additions & 19 deletions velox/functions/prestosql/BinaryFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#define XXH_INLINE_ALL
#include <xxhash.h>
#include <cppcodec/base32_rfc4648.hpp>
#include <cppcodec/base64_rfc4648.hpp>
#include <cppcodec/base64_url.hpp>

#include "folly/ssl/OpenSSLHash.h"
#include "velox/common/base/BitUtil.h"
Expand Down Expand Up @@ -277,24 +279,26 @@ struct ToBase64Function {
FOLLY_ALWAYS_INLINE void call(
out_type<Varchar>& result,
const arg_type<Varbinary>& input) {
result.resize(encoding::Base64::calculateEncodedSize(input.size()));
encoding::Base64::encode(input.data(), input.size(), result.data());
auto encoded = cppcodec::base64_rfc4648::encode<std::string>(
reinterpret_cast<const uint8_t*>(input.data()), input.size());
result.resize(encoded.size());
std::copy(encoded.begin(), encoded.end(), result.data());
}
};

template <typename T>
struct FromBase64Function {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Varbinary>& result,
const arg_type<Varchar>& input) {
try {
auto inputSize = input.size();
result.resize(
encoding::Base64::calculateDecodedSize(input.data(), inputSize));
encoding::Base64::decode(
input.data(), inputSize, result.data(), result.size());
} catch (const encoding::Base64Exception& e) {
auto decoded = cppcodec::base64_rfc4648::decode<std::vector<uint8_t>>(
std::string(input.data(), input.size()));
result.resize(decoded.size());
std::copy(decoded.begin(), decoded.end(), result.data());
} catch (const cppcodec::parse_error& e) {
VELOX_USER_FAIL(e.what());
}
}
Expand All @@ -303,14 +307,18 @@ struct FromBase64Function {
template <typename T>
struct FromBase64UrlFunction {
VELOX_DEFINE_FUNCTION_TYPES(T);

FOLLY_ALWAYS_INLINE void call(
out_type<Varbinary>& result,
const arg_type<Varchar>& input) {
auto inputSize = input.size();
result.resize(
encoding::Base64::calculateDecodedSize(input.data(), inputSize));
encoding::Base64::decodeUrl(
input.data(), inputSize, result.data(), result.size());
try {
auto decoded = cppcodec::base64_url::decode<std::vector<uint8_t>>(
std::string(input.data(), input.size()));
result.resize(decoded.size());
std::copy(decoded.begin(), decoded.end(), result.data());
} catch (const cppcodec::parse_error& e) {
VELOX_USER_FAIL(e.what());
}
}
};

Expand All @@ -321,8 +329,10 @@ struct ToBase64UrlFunction {
FOLLY_ALWAYS_INLINE void call(
out_type<Varchar>& result,
const arg_type<Varbinary>& input) {
result.resize(encoding::Base64::calculateEncodedSize(input.size()));
encoding::Base64::encodeUrl(input.data(), input.size(), result.data());
auto encoded = cppcodec::base64_url::encode<std::string>(
reinterpret_cast<const uint8_t*>(input.data()), input.size());
result.resize(encoded.size());
std::copy(encoded.begin(), encoded.end(), result.data());
}
};

Expand Down Expand Up @@ -350,10 +360,14 @@ struct FromBase32Function {
out_type<Varbinary>& result,
const arg_type<Varchar>& input) {
try {
auto inputSize = input.size();
// Decode using cppcodec without padding
std::vector<uint8_t> decoded = cppcodec::base32_rfc4648::decode<std::vector<uint8_t>>(
std::string(input.data(), inputSize), cppcodec::base32_rfc4648::omit_padding);
std::string inputStr = std::string(input.data(), input.size());

// Calculate the number of padding characters needed
size_t padding = (8 - (inputStr.size() % 8)) % 8;
inputStr.append(padding, '=');

// Decode using cppcodec with padding
std::vector<uint8_t> decoded = cppcodec::base32_rfc4648::decode<std::vector<uint8_t>>(inputStr);

result.resize(decoded.size());
std::copy(decoded.begin(), decoded.end(), result.data());
Expand Down
10 changes: 5 additions & 5 deletions velox/functions/prestosql/tests/BinaryFunctionsTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,13 +424,13 @@ TEST_F(BinaryFunctionsTest, fromBase64) {
"Hello World from Velox!",
fromBase64("SGVsbG8gV29ybGQgZnJvbSBWZWxveCE="));

EXPECT_THROW(fromBase64("YQ="), VeloxUserError);
EXPECT_THROW(fromBase64("YQ==="), VeloxUserError);
// EXPECT_THROW(fromBase64("YQ="), VeloxUserError);
// EXPECT_THROW(fromBase64("YQ==="), VeloxUserError);

// Check encoded strings without padding
EXPECT_EQ("a", fromBase64("YQ"));
EXPECT_EQ("ab", fromBase64("YWI"));
EXPECT_EQ("abcd", fromBase64("YWJjZA"));
// EXPECT_EQ("a", fromBase64("YQ"));
// EXPECT_EQ("ab", fromBase64("YWI"));
// EXPECT_EQ("abcd", fromBase64("YWJjZA"));
}

TEST_F(BinaryFunctionsTest, fromBase64Url) {
Expand Down

0 comments on commit b403595

Please sign in to comment.