Skip to content

Commit

Permalink
tweaks
Browse files Browse the repository at this point in the history
  • Loading branch information
radkesvat committed Nov 23, 2024
1 parent f9ad87f commit 17ff600
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 12 deletions.
2 changes: 1 addition & 1 deletion core/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ int main(void)
config_file_t *cfile = parseConfigFile(*k.ref);

/*
in case of error in config file, the details is already printed out and the
in case of error in config file, the details are already printed out and the
program will not reach this line.
*/

Expand Down
2 changes: 1 addition & 1 deletion ww/buffer_pool.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,7 @@ shift_buffer_t *duplicateBufferP(buffer_pool_t *pool, shift_buffer_t *b)
bnew = popSmallBuffer(pool);
}
setLen(bnew, bufLen(b));
memcpy(rawBufMut(bnew), rawBuf(b), bufLen(b));
memCopy128(rawBufMut(bnew), rawBuf(b), bufLen(b));
return bnew;
}

Expand Down
8 changes: 4 additions & 4 deletions ww/shiftbuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <stdint.h>
#include <string.h>

#define LEFTPADDING (RAM_PROFILE >= kRamProfileS2Memory ? (1U << 10) : (1U << 8))
#define LEFTPADDING ((RAM_PROFILE >= kRamProfileS2Memory ? (1U << 10) : (1U << 8)) - (sizeof(uint32_t) * 3))
#define RIGHTPADDING ((RAM_PROFILE >= kRamProfileS2Memory ? (1U << 9) : (1U << 7)))

#define TOTALPADDING ((uint32_t) (sizeof(shift_buffer_t) + (LEFTPADDING + RIGHTPADDING)))
Expand All @@ -24,7 +24,7 @@ shift_buffer_t *newShiftBuffer(uint32_t pre_cap) // NOLINT
}

uint32_t real_cap = pre_cap + TOTALPADDING;
shift_buffer_t *self = globalMalloc(real_cap);
shift_buffer_t *self = globalMalloc(real_cap + EXTRA_ALLOC);

self->len = 0;
self->curpos = LEFTPADDING;
Expand Down Expand Up @@ -61,7 +61,7 @@ shift_buffer_t *duplicateBuffer(shift_buffer_t *b)
uint32_t pre_cap = bufCap(b) - TOTALPADDING;
shift_buffer_t *newbuf = newShiftBuffer(pre_cap);
setLen(newbuf, bufLen(b));
memcpy(rawBufMut(newbuf), rawBuf(b), bufLen(b));
memCopy128(rawBufMut(newbuf), rawBuf(b), bufLen(b));
return newbuf;
}

Expand All @@ -87,7 +87,7 @@ shift_buffer_t *sliceBufferTo(shift_buffer_t *restrict dest, shift_buffer_t *res
dest = bigger_buf;
}
setLen(dest, bytes);
memcpy(rawBufMut(dest), rawBuf(source), bytes);
memCopy128(rawBufMut(dest), rawBuf(source), bytes);
shiftr(source, bytes);

return dest;
Expand Down
64 changes: 58 additions & 6 deletions ww/shiftbuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,68 @@
*/

#if defined(WW_AVX) && defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))

#define EXTRA_ALLOC 128
#define BUF_USES_AVX 1

#include <x86intrin.h>
static inline void memCopy128(void *dest, const void *src, long int n)
{
__m256i *d_vec = (__m256i *) (dest);
const __m256i *s_vec = (const __m256i *) (src);

if ((uintptr_t) dest % 128 != 0 || (uintptr_t) src % 128 != 0)
{

while (n > 0)
{
_mm256_storeu_si256(d_vec, _mm256_loadu_si256(s_vec));
_mm256_storeu_si256(d_vec + 1, _mm256_loadu_si256(s_vec + 1));
_mm256_storeu_si256(d_vec + 2, _mm256_loadu_si256(s_vec + 2));
_mm256_storeu_si256(d_vec + 3, _mm256_loadu_si256(s_vec + 3));

n -= 128;
d_vec += 4;
s_vec += 4;
}

return;
}

while (n > 0)
{
_mm256_store_si256(d_vec, _mm256_load_si256(s_vec));
_mm256_store_si256(d_vec + 1, _mm256_load_si256(s_vec + 1));
_mm256_store_si256(d_vec + 2, _mm256_load_si256(s_vec + 2));
_mm256_store_si256(d_vec + 3, _mm256_load_si256(s_vec + 3));

n -= 128;
d_vec += 4;
s_vec += 4;
}
}
#elif

#define EXTRA_ALLOC 0
#define BUF_USES_AVX 0

static inline void memCopy128(void *__restrict __dest, const void *__restrict __src, size_t __n)
{
memcpy(__dest, __src, __n);
}

#endif

struct shift_buffer_s
{
uint32_t len;
uint32_t curpos;
uint64_t capacity;
uint8_t buf[];
uint32_t capacity;
#if BUF_USES_AVX
uint8_t _pad_[EXTRA_ALLOC];
#endif
uint8_t buf[];
};

typedef struct shift_buffer_s shift_buffer_t;
Expand All @@ -37,8 +92,6 @@ shift_buffer_t *sliceBufferTo(shift_buffer_t *restrict dest, shift_buffer_t *res
shift_buffer_t *sliceBuffer(shift_buffer_t *self, uint32_t bytes);
shift_buffer_t *duplicateBuffer(shift_buffer_t *b);



static inline unsigned int bufCap(shift_buffer_t *const self)
{
return self->capacity;
Expand Down Expand Up @@ -167,7 +220,7 @@ static inline shift_buffer_t *reserveBufSpace(shift_buffer_t *const self, const
{
shift_buffer_t *bigger_buf = newShiftBuffer(bytes);
setLen(bigger_buf, bufLen(self));
memcpy(rawBufMut(bigger_buf), rawBuf(self), bufLen(self));
memCopy128(rawBufMut(bigger_buf), rawBuf(self), bufLen(self));
destroyShiftBuffer(self);
return bigger_buf;
}
Expand All @@ -182,7 +235,6 @@ static inline void concatBufferNoCheck(shift_buffer_t *restrict root, const shif
memcpy(rawBufMut(root) + root_length, rawBuf(buf), append_length);
}


#ifdef DEBUG
// free and re create the buffer so in case of use after free we catch it
static shift_buffer_t *debugBufferWontBeReused(shift_buffer_t *b)
Expand Down

0 comments on commit 17ff600

Please sign in to comment.