Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add uuid5, uuidv6 functions #1018

Merged
merged 1 commit into from
Dec 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions include/faker-cxx/crypto.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,18 @@ namespace faker::crypto
*/
FAKER_CXX_EXPORT std::string sha256(std::optional<std::string> = std::nullopt);

/**
* @brief Returns a random SHA1 hash or hash of provided data.
*
* @returns SHA1 hash string.
*
* @code
* faker::crypto::sha1("hello world") // "2ef7bde608ce5404e97d5f042f95f89f1c232871"
* faker::crypto::sha1() // Random hash of random
* @endcode
*/
FAKER_CXX_EXPORT std::string sha1(std::optional<std::string> = std::nullopt);

/**
* @brief Returns a random MD5 hash or hash of provided data.
*
Expand Down
3 changes: 2 additions & 1 deletion include/faker-cxx/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,8 @@ FAKER_CXX_EXPORT std::string generateAtLeastString(const GuaranteeMap& guarantee
* faker::string::uuid(Uuid::V8) // "27666229-cedb-4a45-8018-98b1e1d921e2"
* @endcode
*/
FAKER_CXX_EXPORT std::string uuid(Uuid uuid = Uuid::V4);
FAKER_CXX_EXPORT std::string uuid(Uuid uuid = Uuid::V4, const std::string& namespace_uuid = "",
const std::string& name = "");

/**
* @brief Generates an Universally Unique Lexicographically Sortable Identifier.
Expand Down
155 changes: 155 additions & 0 deletions src/modules/crypto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
#include <array>
#include <cstdint>
#include <cstring>
#include <iomanip>
#include <optional>
#include <sstream>
#include <string>

#include "faker-cxx/word.h"
Expand Down Expand Up @@ -99,6 +101,139 @@ class Md5Hash
4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21};
};

class SHA1
{
public:
SHA1()
{
reset();
}

// Process input string in chunks
void update(const std::string& data)
{
for (size_t i = 0; i < data.size(); ++i)
{
processByte(static_cast<unsigned char>(data[i]));
}
}

// Finalize and retrieve the resulting hash
std::array<unsigned char, 20> digest()
{
pad();
std::array<unsigned char, 20> result;
for (size_t i = 0; i < 5; ++i)
{
result[i * 4] = static_cast<unsigned char>((state[i] >> 24) & 0xFF);
result[i * 4 + 1] = static_cast<unsigned char>((state[i] >> 16) & 0xFF);
result[i * 4 + 2] = static_cast<unsigned char>((state[i] >> 8) & 0xFF);
result[i * 4 + 3] = static_cast<unsigned char>(state[i] & 0xFF);
}
return result;
}

// Convert the digest into a hexadecimal string representation
static std::string toString(const std::array<unsigned char, 20>& hash)
{
std::ostringstream ss;
ss << std::hex << std::setfill('0');
for (auto byte : hash)
{
ss << std::setw(2) << static_cast<int>(byte);
}
return ss.str();
}

private:
void reset()
{
state = {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0};
length = 0;
buffer.clear();
}

void processByte(unsigned char byte)
{
buffer.push_back(byte);
if (buffer.size() == 64)
{
processBlock();
buffer.clear();
}
length += 8;
}

void processBlock()
{
std::array<uint32_t, 80> w = {0};
for (size_t i = 0; i < 16; ++i)
{
w[i] = (static_cast<uint32_t>(buffer[i * 4]) << 24) | (static_cast<uint32_t>(buffer[i * 4 + 1]) << 16) |
(static_cast<uint32_t>(buffer[i * 4 + 2]) << 8) | (static_cast<uint32_t>(buffer[i * 4 + 3]));
}

for (size_t i = 16; i < 80; ++i)
{
w[i] = rotateLeft(w[i - 3] ^ w[i - 8] ^ w[i - 14] ^ w[i - 16], 1);
}

uint32_t a = state[0];
uint32_t b = state[1];
uint32_t c = state[2];
uint32_t d = state[3];
uint32_t e = state[4];

for (size_t i = 0; i < 80; ++i)
{
uint32_t temp = rotateLeft(a, 5) + e + w[i] +
(i < 20 ? ((b & c) | (~b & d)) + 0x5A827999 :
i < 40 ? (b ^ c ^ d) + 0x6ED9EBA1 :
i < 60 ? ((b & c) | (b & d) | (c & d)) + 0x8F1BBCDC :
(b ^ c ^ d) + 0xCA62C1D6);

e = d;
d = c;
c = rotateLeft(b, 30);
b = a;
a = temp;
}

state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
state[4] += e;
}

void pad()
{
buffer.push_back(0x80);
while (buffer.size() < 56)
{
buffer.push_back(0);
}

uint64_t len = length;
for (int i = 0; i < 8; ++i)
{
buffer.push_back(static_cast<unsigned char>((len >> (56 - i * 8)) & 0xFF));
}

processBlock();
}

uint32_t rotateLeft(uint32_t value, size_t bits)
{
return (value << bits) | (value >> (32 - bits));
}

private:
std::array<uint32_t, 5> state;
uint64_t length;
std::vector<unsigned char> buffer;
};
}

std::string sha256(std::optional<std::string> data)
Expand All @@ -121,6 +256,26 @@ std::string sha256(std::optional<std::string> data)
return result;
}

// SHA-1 hashing function wrapper
std::string sha1(std::optional<std::string> data)
{
std::string orgData;
if (!data.has_value() || data->empty())
{
orgData = word::sample(); // Fallback to default sample data
}
else
{
orgData = data.value(); // Use provided data
}

SHA1 sha;
sha.update(orgData); // Update the SHA-1 instance with the data
std::array<unsigned char, 20> result = sha.digest(); // Get the final SHA-1 hash

return SHA1::toString(result); // Convert the hash to a hex string
}

std::string md5(std::optional<std::string> data)
{
std::string orgData;
Expand Down
99 changes: 96 additions & 3 deletions src/modules/string.cpp
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
#include "faker-cxx/string.h"

#include <cassert>
#include <chrono>
#include <iomanip>
#include <map>
#include <random>
#include <set>
#include <stdexcept>
#include <string>

#include "common/algo_helper.h"
#include "faker-cxx/crypto.h"
#include "faker-cxx/helper.h"
#include "faker-cxx/number.h"
#include "string_data.h"
Expand Down Expand Up @@ -491,7 +494,97 @@ std::string uuidV4()
return result;
}

std::string uuid(Uuid uuid)
std::string uuidV5(const std::string& namespace_uuid, const std::string& name)
{
if (namespace_uuid.length() != 36)
{
throw std::invalid_argument("Invalid namespace UUID");
}

// Decode the namespace UUID into binary form
std::array<unsigned char, 16> namespace_bytes;
int idx = 0;
for (size_t i = 0; i < namespace_uuid.length(); i += 2)
{
if (namespace_uuid[i] == '-')
{
i--;
continue;
}
std::string byte_string = namespace_uuid.substr(i, 2);
namespace_bytes[idx++] = static_cast<unsigned char>(std::stoi(byte_string, nullptr, 16));
}

// Append the name to the namespace
std::string data(reinterpret_cast<const char*>(namespace_bytes.data()), namespace_bytes.size());
data.append(name);

// Compute SHA-1 hash of the data
std::string hash_str = crypto::sha1(data);

// Convert hash string to bytes
std::array<unsigned char, 20> hash;
for (size_t i = 0; i < 20; ++i)
{
hash[i] = static_cast<unsigned char>(std::stoi(hash_str.substr(i * 2, 2), nullptr, 16));
}

// Use the first 16 bytes of the hash for the UUID
hash[6] = (hash[6] & 0x0F) | 0x50; // Set version to 5
hash[8] = (hash[8] & 0x3F) | 0x80; // Set variant to RFC 4122

// Format the UUID as a string
std::ostringstream ss;
ss << std::hex << std::setfill('0');
for (int i = 0; i < 16; ++i)
{
if (i == 4 || i == 6 || i == 8 || i == 10)
ss << '-';
ss << std::setw(2) << static_cast<int>(hash[i]);
}

return ss.str();
}

std::string uuidV6()
{
RandomGenerator<std::mt19937> gen = RandomGenerator<std::mt19937>{};

const uint64_t UUID_EPOCH_OFFSET = 0x01B21DD213814000ULL;
auto now = std::chrono::system_clock::now();
auto since_epoch = now.time_since_epoch();

const auto timestamp =
UUID_EPOCH_OFFSET +
static_cast<uint64_t>(std::chrono::duration_cast<std::chrono::microseconds>(since_epoch).count() * 10);

std::uniform_int_distribution<uint16_t> clock_seq_dist(0, 0x3FFF);
const auto clock_seq = static_cast<uint16_t>(gen(clock_seq_dist));

std::uniform_int_distribution<uint64_t> node_dist(0, 0xFFFFFFFFFFFFULL);
uint64_t node = gen(node_dist) & 0xFFFFFFFFFFFFULL;

const auto time_high = static_cast<uint32_t>((timestamp >> 28) & 0xFFFFFFFFULL);
const auto time_mid = static_cast<uint16_t>((timestamp >> 12) & 0xFFFFULL);
auto time_low_and_version = static_cast<uint16_t>(timestamp & 0x0FFFULL);
time_low_and_version |= (6 << 12); // Set version to 6

uint8_t clock_seq_low = clock_seq & 0xFF;
uint8_t clock_seq_hi_and_reserved = ((clock_seq >> 8) & 0x3F) | 0x80;

std::ostringstream ss;
ss << std::hex << std::setfill('0');
ss << std::setw(8) << time_high << '-';
ss << std::setw(4) << time_mid << '-';
ss << std::setw(4) << time_low_and_version << '-';
ss << std::setw(2) << static_cast<int>(clock_seq_hi_and_reserved);
ss << std::setw(2) << static_cast<int>(clock_seq_low) << '-';
ss << std::setw(12) << node;

return ss.str();
}

std::string uuid(Uuid uuid, const std::string& namespace_uuid, const std::string& name)
{
switch (uuid)
{
Expand All @@ -503,10 +596,10 @@ std::string uuid(Uuid uuid)
return uuidV4();
case Uuid::V5:
// TODO: implement uuidV5
return uuidV4();
return uuidV5(namespace_uuid, name);
case Uuid::V6:
// TODO: implement uuidV6
return uuidV4();
return uuidV6();
case Uuid::V7:
// TODO: implement uuidV7
return uuidV4();
Expand Down
34 changes: 34 additions & 0 deletions tests/modules/crypto_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,15 @@ class CryptoTest : public Test

return std::regex_match(input, regexExp);
}
static bool isSHA1Hash(const std::string& input)
{
if (input.length() != 40) {
return false;
}
return std::all_of(input.begin(), input.end(), [](char c) {
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f');
});
}
};

TEST_F(CryptoTest, ShouldGenerateSHA256Hash)
Expand Down Expand Up @@ -82,6 +91,31 @@ TEST_F(CryptoTest, ChecksSHA256HashWithDataCodecov)
ASSERT_TRUE(isSHA256Hash(generatedRandomHash));
}

TEST_F(CryptoTest, ChecksSHA1Hash)
{
const auto generatedRandomHash = sha1();
ASSERT_TRUE(isSHA1Hash(generatedRandomHash));
}

TEST_F(CryptoTest, ChecksSHA1HashWithData)
{
const auto generatedRandomHash = sha1("SHA1 TEST");
ASSERT_TRUE(isSHA1Hash(generatedRandomHash));
}

TEST_F(CryptoTest, ChecksSHA1HashWithLongData)
{
const auto generatedRandomHash = sha1("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde978");
ASSERT_TRUE(isSHA1Hash(generatedRandomHash));
}

TEST_F(CryptoTest, ChecksSHA1HashWithDataCodecov)
{
const auto generatedRandomHash = sha1("b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde");
ASSERT_TRUE(isSHA1Hash(generatedRandomHash));
}


TEST_F(CryptoTest, ShouldGenerateMD5Hash)
{
const auto generatedRandomHash = md5();
Expand Down
Loading
Loading