Skip to content

Commit

Permalink
pos encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
madMAx43v3r committed Nov 20, 2023
1 parent f048ae4 commit ab3546b
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 0 deletions.
29 changes: 29 additions & 0 deletions include/mmx/pos/encoding.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/*
* encoding.h
*
* Created on: Nov 20, 2023
* Author: mad
*/

#ifndef INCLUDE_MMX_POS_ENCODING_H_
#define INCLUDE_MMX_POS_ENCODING_H_

#include <vector>
#include <cstdint>
#include <utility>
#include <stdexcept>


namespace mmx {
namespace pos {

std::vector<uint64_t> encode(const std::vector<uint8_t>& symbols, uint64_t& total_bits);

std::vector<uint8_t> decode(const std::vector<uint64_t>& bit_stream, const uint64_t num_symbols);



} // pos
} // mmx

#endif /* INCLUDE_MMX_POS_ENCODING_H_ */
122 changes: 122 additions & 0 deletions src/pos/encoding.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
/*
* encoding.cpp
*
* Created on: Nov 20, 2023
* Author: mad
*/

#include <mmx/pos/encoding.h>


namespace mmx {
namespace pos {

std::pair<uint32_t, uint32_t> encode_symbol(const uint8_t sym)
{
switch(sym) {
case 0: return std::make_pair(0, 2);
case 1: return std::make_pair(1, 2);
case 2: return std::make_pair(2, 2);
}
const uint32_t index = sym / 3;
const uint32_t mod = sym % 3;

if(index > 15) {
throw std::logic_error("symbol out of range");
}
uint32_t out = uint32_t(-1) >> (32 - 2 * index);
out |= mod << (2 * index);
return std::make_pair(out, 2 * index + 2);
}

std::pair<uint32_t, uint32_t> decode_symbol(const uint32_t bits)
{
switch(bits & 3) {
case 0: return std::make_pair(0, 2);
case 1: return std::make_pair(1, 2);
case 2: return std::make_pair(2, 2);
}
uint32_t shift = bits;

for(uint32_t index = 0; index < 16; ++index)
{
const auto mod = shift & 3;
if(mod == 3) {
shift >>= 2;
} else {
return std::make_pair(3 * index + mod, 2 * index + 2);
}
}
return std::make_pair(48, 32);
}

std::vector<uint64_t> encode(const std::vector<uint8_t>& symbols, uint64_t& total_bits)
{
std::vector<uint64_t> out;

total_bits = 0;
uint32_t offset = 0;
uint64_t buffer = 0;

for(const auto sym : symbols)
{
const auto bits = encode_symbol(sym);
buffer |= uint64_t(bits.first) << offset;

const auto end = offset + bits.second;
if(end >= 64) {
out.push_back(buffer);
buffer = 0;
}
if(end > 64) {
buffer = bits.first >> (64 - offset);
}
offset = end % 64;

total_bits += bits.second;
}
if(offset) {
out.push_back(buffer);
}
return out;
}

std::vector<uint8_t> decode(const std::vector<uint64_t>& bit_stream, const uint64_t num_symbols)
{
std::vector<uint8_t> out;
out.reserve(num_symbols);

uint32_t bits = 0;
uint64_t offset = 0;
uint64_t buffer = 0;

while(out.size() < num_symbols)
{
if(bits <= 32) {
const auto index = offset / 64;
if(index < bit_stream.size()) {
buffer |= uint64_t((bit_stream[index] >> (offset % 64)) & 0xFFFFFFFF) << bits;
offset += 32;
bits += 32;
} else if(bits == 0) {
throw std::logic_error("bit stream underflow");
}
}
const auto sym = decode_symbol(buffer);
out.push_back(sym.first);

if(sym.second > bits) {
throw std::logic_error("symbol decode error");
}
buffer >>= sym.second;
bits -= sym.second;
}
return out;
}





} // pos
} // mmx
2 changes: 2 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ add_executable(test_swap_algo test_swap_algo.cpp)
add_executable(test_database_reads test_database_reads.cpp)
add_executable(test_mem_hash test_mem_hash.cpp)
add_executable(test_pos_compute test_pos_compute.cpp)
add_executable(test_encoding test_encoding.cpp)
add_executable(database_fill database_fill.cpp)

add_executable(mmx_tests mmx_tests.cpp)
Expand All @@ -23,6 +24,7 @@ target_link_libraries(test_database_reads mmx_db mmx_iface)
target_link_libraries(database_fill mmx_db mmx_iface)
target_link_libraries(test_mem_hash vnx_base mmx_pos)
target_link_libraries(test_pos_compute mmx_iface mmx_pos)
target_link_libraries(test_encoding mmx_pos)

target_link_libraries(mmx_tests mmx_iface)
target_link_libraries(vm_engine_tests mmx_vm)
Expand Down
55 changes: 55 additions & 0 deletions test/test_encoding.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* test_encoding.cpp
*
* Created on: Nov 20, 2023
* Author: mad
*/

#include <mmx/pos/encoding.h>

#include <iostream>


int main(int argc, char** argv)
{
const int num_symbols = argc > 1 ? ::atoi(argv[1]) : 4096;

std::vector<uint8_t> symbols;

for(int i = 0; i < num_symbols; ++i)
{
uint8_t sym = 0;
const auto ticket = ::rand() % 1000;
if(ticket < 900) {
sym = ticket % 3;
} else if(ticket < 990) {
sym = 3 + ticket % 3;
} else {
sym = 6 + ticket % 3;
}
symbols.push_back(sym);
}

for(auto sym : symbols) {
std::cout << int(sym) << " ";
}
std::cout << std::endl;

uint64_t total_bits = 0;
const auto bit_stream = mmx::pos::encode(symbols, total_bits);

std::cout << "symbols = " << num_symbols << std::endl;
std::cout << "bit_stream = " << (total_bits + 7) / 8 << " bytes, " << double(total_bits) / num_symbols << " bits / symbol" << std::endl;

const auto test = mmx::pos::decode(bit_stream, symbols.size());

if(test != symbols) {
for(auto sym : test) {
std::cout << int(sym) << " ";
}
throw std::logic_error("test != symbols");
}
return 0;
}


0 comments on commit ab3546b

Please sign in to comment.