diff --git a/QB3lib/QB3decode.h b/QB3lib/QB3decode.h index af3ff57..c0d99bd 100644 --- a/QB3lib/QB3decode.h +++ b/QB3lib/QB3decode.h @@ -15,12 +15,10 @@ limitations under the License. Contributors: Lucian Plesea */ -#pragma once #include "QB3common.h" namespace QB3 { -// Decoding tables, twice as large as the encoding ones -// 2k for 0-7 +// Decoding tables, twice as large as the encoding ones, 2k for 0-7 static const uint16_t drg0[] = { 0x1000, 0x1001, 0x1000, 0x1001 }; static const uint16_t drg1[] = { 0x1000, 0x2001, 0x1000, 0x3002, 0x1000, 0x2001, 0x1000, 0x3003 }; static const uint16_t drg2[] = { 0x2000, 0x3002, 0x2001, 0x4004, 0x2000, 0x3003, 0x2001, 0x4005, 0x2000, 0x3002, 0x2001, 0x4006, @@ -92,33 +90,8 @@ static const uint16_t drg7[] = { 0x7000, 0x8040, 0x7001, 0x9080, 0x7002, 0x8041, 0x702e, 0x8077, 0x702f, 0x90f7, 0x7030, 0x8078, 0x7031, 0x90f8, 0x7032, 0x8079, 0x7033, 0x90f9, 0x7034, 0x807a, 0x7035, 0x90fa, 0x7036, 0x807b, 0x7037, 0x90fb, 0x7038, 0x807c, 0x7039, 0x90fc, 0x703a, 0x807d, 0x703b, 0x90fd, 0x703c, 0x807e, 0x703d, 0x90fe, 0x703e, 0x807f, 0x703f, 0x90ff }; - static const uint16_t* DRG[] = { drg0, drg1, drg2, drg3, drg4, drg5, drg6, drg7 }; -// rung 1 and 2 double value decoding tables, can use 8 bits -static const uint8_t DDRG1[] = { 0x20, 0x31, 0x34, 0x42, 0x20, 0x45, 0x48, 0x43, 0x20, 0x31, 0x34, 0x56, 0x20, 0x59, 0x4c, -0x57, 0x20, 0x31, 0x34, 0x42, 0x20, 0x45, 0x48, 0x43, 0x20, 0x31, 0x34, 0x6a, 0x20, 0x5d, 0x4c, 0x6b, 0x20, 0x31, 0x34, 0x42, -0x20, 0x45, 0x48, 0x43, 0x20, 0x31, 0x34, 0x56, 0x20, 0x59, 0x4c, 0x57, 0x20, 0x31, 0x34, 0x42, 0x20, 0x45, 0x48, 0x43, 0x20, -0x31, 0x34, 0x6e, 0x20, 0x5d, 0x4c, 0x6f }; -static const uint16_t DDRG2[] = { 0x4000, 0x5002, 0x4001, 0x6004, 0x5010, 0x5003, 0x5011, 0x6005, 0x4008, 0x6012, 0x4009, -0x6006, 0x6020, 0x6013, 0x6021, 0x6007, 0x4000, 0x500a, 0x4001, 0x7014, 0x5018, 0x500b, 0x5019, 0x7015, 0x4008, 0x7022, 0x4009, -0x7016, 0x6028, 0x7023, 0x6029, 0x7017, 0x4000, 0x5002, 0x4001, 0x600c, 0x5010, 0x5003, 0x5011, 0x600d, 0x4008, 0x601a, 0x4009, -0x600e, 0x6030, 0x601b, 0x6031, 0x600f, 0x4000, 0x500a, 0x4001, 0x8024, 0x5018, 0x500b, 0x5019, 0x8025, 0x4008, 0x702a, 0x4009, -0x8026, 0x6038, 0x702b, 0x6039, 0x8027, 0x4000, 0x5002, 0x4001, 0x6004, 0x5010, 0x5003, 0x5011, 0x6005, 0x4008, 0x6012, 0x4009, -0x6006, 0x6020, 0x6013, 0x6021, 0x6007, 0x4000, 0x500a, 0x4001, 0x701c, 0x5018, 0x500b, 0x5019, 0x701d, 0x4008, 0x7032, 0x4009, -0x701e, 0x6028, 0x7033, 0x6029, 0x701f, 0x4000, 0x5002, 0x4001, 0x600c, 0x5010, 0x5003, 0x5011, 0x600d, 0x4008, 0x601a, 0x4009, -0x600e, 0x6030, 0x601b, 0x6031, 0x600f, 0x4000, 0x500a, 0x4001, 0x802c, 0x5018, 0x500b, 0x5019, 0x802d, 0x4008, 0x703a, 0x4009, -0x802e, 0x6038, 0x703b, 0x6039, 0x802f, 0x4000, 0x5002, 0x4001, 0x6004, 0x5010, 0x5003, 0x5011, 0x6005, 0x4008, 0x6012, 0x4009, -0x6006, 0x6020, 0x6013, 0x6021, 0x6007, 0x4000, 0x500a, 0x4001, 0x7014, 0x5018, 0x500b, 0x5019, 0x7015, 0x4008, 0x7022, 0x4009, -0x7016, 0x6028, 0x7023, 0x6029, 0x7017, 0x4000, 0x5002, 0x4001, 0x600c, 0x5010, 0x5003, 0x5011, 0x600d, 0x4008, 0x601a, 0x4009, -0x600e, 0x6030, 0x601b, 0x6031, 0x600f, 0x4000, 0x500a, 0x4001, 0x8034, 0x5018, 0x500b, 0x5019, 0x8035, 0x4008, 0x702a, 0x4009, -0x8036, 0x6038, 0x702b, 0x6039, 0x8037, 0x4000, 0x5002, 0x4001, 0x6004, 0x5010, 0x5003, 0x5011, 0x6005, 0x4008, 0x6012, 0x4009, -0x6006, 0x6020, 0x6013, 0x6021, 0x6007, 0x4000, 0x500a, 0x4001, 0x701c, 0x5018, 0x500b, 0x5019, 0x701d, 0x4008, 0x7032, 0x4009, -0x701e, 0x6028, 0x7033, 0x6029, 0x701f, 0x4000, 0x5002, 0x4001, 0x600c, 0x5010, 0x5003, 0x5011, 0x600d, 0x4008, 0x601a, 0x4009, -0x600e, 0x6030, 0x601b, 0x6031, 0x600f, 0x4000, 0x500a, 0x4001, 0x803c, 0x5018, 0x500b, 0x5019, 0x803d, 0x4008, 0x703a, 0x4009, -0x803e, 0x6038, 0x703b, 0x6039, 0x803f }; -// rung 3 double value would be 2k by itself, the normal one is 64 bytes, and it gets worse from there - // Decoding tables for codeswitch static const uint16_t dsw3[] = { 0x3001, 0x4002, 0x3007, 0x5003, 0x3001, 0x4006, 0x3007, 0x5005, 0x3001, 0x4002, 0x3007, 0x5000, 0x3001, 0x4006, 0x3007, 0x5004 }; @@ -188,34 +161,38 @@ static bool gdecode(iBits& s, size_t rung, T* group, uint64_t acc, size_t abits) } // Table decoding if (sizeof(T) == 1 || rung < (sizeof(DRG) / sizeof(*DRG))) { - if (1 == rung) { // double barrel - for (size_t i = 0; i < B2; i += 2) { - auto v = DDRG1[acc & 0x3f]; - group[i] = v & 0x3; - group[i + 1] = (v >> 2) & 0x3; - abits += v >> 4; - acc >>= v >> 4; + if (1 == rung) { + // Use inline constants as nibble tables + // The lower two bits of the accumulator determine the size + for (size_t i=0; i < B2; i++) { + auto size = (0x31213121u >> ((acc & 7) << 2)) & 0xf; + group[i] = T((0x30102010u >> ((acc & 7) << 2)) & 0xf); + abits += size; + acc >>= size; } s.advance(abits); } - else if (2 == rung) { // double barrel, max sym len is 4, there are at least 14 in the accumulator - for (size_t i = 0; i < 14; i += 2) { - auto v = DDRG2[acc & 0xff]; - group[i] = v & 0x7; - group[i + 1] = (v >> 3) & 0x7; - abits += v >> 12; - acc >>= v >> 12; + else if (2 == rung) { // max symbol len is 4, there are at least 14 in the accumulator + // Use inline constants as nibble tables + unsigned int size; + for (size_t i = 0; i < 14; i++) { + size = (0x4232423242324232ull >> ((acc & 0xf) << 2)) & 0xf; + group[i] = T((0x7130612051304120ull >> ((acc & 0xf) << 2)) & 0xf); + abits += size; + acc >>= size; } if (abits > 56) { // Rare s.advance(abits); acc = s.peek(); abits = 0; } - // last pair - auto v = DDRG2[acc & 0xff]; - group[14] = v & 0x7; - group[15] = (v >> 3) & 0x7; - s.advance(abits + (v >> 12)); + size = (0x4232423242324232ull >> ((acc & 0xf) << 2)) & 0xf; + group[14] = T((0x7130612051304120ull >> ((acc & 0xf) << 2)) & 0xf); + acc >>= size; + abits += size; + size = (0x4232423242324232ull >> ((acc & 0xf) << 2)) & 0xf; + group[15] = T((0x7130612051304120ull >> ((acc & 0xf) << 2)) & 0xf); + s.advance(abits + size); } else if (6 > rung) { // Table decode at 3,4 and 5, half of the values per accumulator auto drg = DRG[rung]; @@ -412,10 +389,9 @@ static bool decode(uint8_t *src, size_t len, T* image, const decs &info) acc >>= (cs >> 12) - 1; // No flag abits += (cs >> 12) - 1; failed |= rung == 63; // TODO: Deal with 64bit overflow - // 16 index values in group, max is 7 + // 16 index values in group, max is 7, use rung 2 T maxval(0); for (int i = 0; i < B2; i++) { - // Could use ddrg2 auto v = DRG[2][acc & 0xf]; group[i] = static_cast(v); if (maxval < group[i])