Skip to content

Commit

Permalink
IID and non-IID tests reused the name 'B' as a constant / macro name.…
Browse files Browse the repository at this point in the history
… Changed non-IID to B_len.
  • Loading branch information
andrewmccaffreynist committed Jun 13, 2023
1 parent d72243d commit bf8f0ab
Showing 1 changed file with 27 additions and 27 deletions.
54 changes: 27 additions & 27 deletions cpp/non_iid/lz78y_test.h
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
#pragma once
#include "../shared/utils.h"

#define B 16
#define B_len 16
#define MAX_DICTIONARY_SIZE 65536

static double binaryLZ78YPredictionEstimate(const uint8_t *S, long L, const int verbose, const char *label)
{
long *binaryDict[B];
long *binaryDict[B_len];
long curRunOfCorrects=0;
long maxRunOfCorrects=0;
long correctCount=0;
long i, j;
uint32_t curPattern=0;
long dictElems=0;

assert(L>B);
assert(L-B > 2);
assert(B < 32); //B < 32 to make the bit shifts well defined
assert(L>B_len);
assert(L-B_len > 2);
assert(B_len < 32); //B < 32 to make the bit shifts well defined

//Initialize the data structure tables
for(j=0; j< B; j++) {
for(j=0; j< B_len; j++) {
//For a length m prefix, we need 2^m sets of length 2 arrays.
//Here, j+1 is the length of the prefix, so we need 2^(j+1) prefixes, or 2*2^(j+1) = 2^(j+2) storage total.
//Note: 2^(j+2) = 1<<(j+2).
Expand All @@ -29,28 +29,28 @@ static double binaryLZ78YPredictionEstimate(const uint8_t *S, long L, const int
}

// initialize B counts with {(S[15]), S[16]}, {(S[14], S[15]), S[16]}, ..., {(S[0]), S[1], ..., S[15]), S[16]},
for(j=0; j<B; j++) {
curPattern = curPattern | (((uint32_t)(S[B - j - 1]&1)) << j);
for(j=0; j<B_len; j++) {
curPattern = curPattern | (((uint32_t)(S[B_len - j - 1]&1)) << j);

//This is necessarily the first symbol of this length
(BINARYDICTLOC(j+1, curPattern))[S[B]&0x1] = 1;
(BINARYDICTLOC(j+1, curPattern))[S[B_len]&0x1] = 1;
dictElems++;
}

//In C, arrays are 0 indexed.
//i is the index of the bit to be predicted.
for(i=B+1; i<L; i++) {
for(i=B_len+1; i<L; i++) {
bool found_x;
bool havePrediction = false;
uint8_t roundPrediction=2;
uint8_t curPrediction=2;
long maxCount = 0;

//But the first B bits into curPattern
curPattern = compressedBitSymbols(S+i-B, B);
curPattern = compressedBitSymbols(S+i-B_len, B_len);

//j is the length of the prefix to be used
for(j=B; j>0; j--) {
for(j=B_len; j>0; j--) {
long curCount;
long *binaryDictEntry;

Expand Down Expand Up @@ -104,58 +104,58 @@ static double binaryLZ78YPredictionEstimate(const uint8_t *S, long L, const int
}
}

for(j=0; j<B; j++) {
for(j=0; j<B_len; j++) {
delete[](binaryDict[j]);
binaryDict[j] = NULL;
}

return(predictionEstimate(correctCount, L-B-1, maxRunOfCorrects, 2, "LZ78Y", verbose, label));
return(predictionEstimate(correctCount, L-B_len-1, maxRunOfCorrects, 2, "LZ78Y", verbose, label));
}

// Section 6.3.10 - LZ78Y Prediction Estimate
double LZ78Y_test(uint8_t *data, long len, int alph_size, const int verbose, const char *label) {
int dict_size;
long i, j, N, C, run_len, max_run_len;
array<uint8_t, B> x;
array<uint8_t, B_len> x;

if(alph_size==2) return binaryLZ78YPredictionEstimate(data, len, verbose, label);

array<map<array<uint8_t, B>, PostfixDictionary>, B> D;
array<map<array<uint8_t, B_len>, PostfixDictionary>, B_len> D;

if(len < B+2){
printf("\t*** Warning: not enough samples to run LZ78Y test (need more than %d) ***\n", B+2);
if(len < B_len+2){
printf("\t*** Warning: not enough samples to run LZ78Y test (need more than %d) ***\n", B_len+2);
return -1.0;
}

N = len-B-1;
N = len-B_len-1;
C = 0;
run_len = 0;
max_run_len = 0;

// initialize dictionary counts
dict_size = 0;
memset(x.data(), 0, B);
memset(x.data(), 0, B_len);
// initialize LZ78Y counts with {(S[15]), S[16]}, {(S[14], S[15]), S[16]}, ..., {(S[0]), S[1], ..., S[15]), S[16]}
for(j = 1; j <= B; j++){
memcpy(x.data(), data+B-j, j);
D[j-1][x].incrementPostfix(data[B], true);
for(j = 1; j <= B_len; j++){
memcpy(x.data(), data+B_len-j, j);
D[j-1][x].incrementPostfix(data[B_len], true);
dict_size++;
}

// perform predictions
for(i = B+1; i < len; i++) {
for(i = B_len+1; i < len; i++) {
bool found_x;
bool have_prediction = false;
uint8_t prediction = 0;
long max_count = 0;

for(j = B; j > 0; j--) {
map<array<uint8_t, B>, PostfixDictionary>::iterator curp;
for(j = B_len; j > 0; j--) {
map<array<uint8_t, B_len>, PostfixDictionary>::iterator curp;

// check if x has been previously seen.
//For the prediction, roundPrediction is the max across all pairs
//The prefix string should contain the j-tuple (S[i-j] ... S[i-1])
memset(x.data(), 0, B);
memset(x.data(), 0, B_len);
memcpy(x.data(), data+i-j, j);
curp = D[j-1].find(x);

Expand Down

0 comments on commit bf8f0ab

Please sign in to comment.