Skip to content

Commit

Permalink
Implement RC4 PRNG with AVX2 and SSE4.2 Optimizations
Browse files Browse the repository at this point in the history
This commit introduces a high-performance RC4-based pseudorandom number generator (PRNG) optimized for modern CPU architectures. Key changes and improvements over the traditional RC4 implementation include:

- **CTR Mode**: Added a counter-based mode to ensure unique pseudorandom streams and prevent repetition.
- **RC4-Drop**: Discarded the first 256 bytes of the stream to mitigate known biases in the initial output of RC4.
- **SIMD Optimizations**: Leveraged SSE4.2 and AVX2 instructions to process data in parallel, improving throughput by handling 16 bytes (SSE4.2) or 32 bytes (AVX2) per iteration.
- **Hardware Prefetching**: Implemented prefetching to optimize memory access to the S-Box, reducing cache misses and latency.
- **PRNG Purpose**: Designed specifically as a pseudorandom number generator (PRNG) for non-cryptographic purposes.

This RC4 PRNG is now faster and more suitable for generating large volumes of random data, taking full advantage of modern hardware capabilities. It is **not** intended for cryptographic security purposes.
  • Loading branch information
Knogle committed Sep 12, 2024
1 parent f250aee commit a61bf44
Show file tree
Hide file tree
Showing 8 changed files with 627 additions and 5 deletions.
4 changes: 2 additions & 2 deletions src/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# what flags you want to pass to the C compiler & linker
#CFLAGS = -lncurses -lparted
AM_CFLAGS =
AM_CFLAGS = -march=native -O2
AM_LDFLAGS =

# this lists the binaries to produce, the (non-PHONY, binary) targets in
# the previous manual Makefile
bin_PROGRAMS = nwipe
nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c
nwipe_SOURCES = context.h logging.h options.h prng.h version.h temperature.h nwipe.c gui.c method.h pass.c device.c gui.h rc4/rc4_prng.h rc4/rc4_prng.c isaac_rand/isaac_standard.h isaac_rand/isaac_rand.h isaac_rand/isaac_rand.c isaac_rand/isaac64.h isaac_rand/isaac64.c mt19937ar-cok/mt19937ar-cok.c nwipe.h mt19937ar-cok/mt19937ar-cok.h alfg/add_lagg_fibonacci_prng.h alfg/add_lagg_fibonacci_prng.c xor/xoroshiro256_prng.h xor/xoroshiro256_prng.c pass.h device.h logging.c method.c options.c prng.c version.c temperature.c PDFGen/pdfgen.h PDFGen/pdfgen.c create_pdf.c create_pdf.h embedded_images/shred_db.jpg.c embedded_images/shred_db.jpg.h embedded_images/tick_erased.jpg.c embedded_images/tick_erased.jpg.h embedded_images/redcross.c embedded_images/redcross.h hpa_dco.h hpa_dco.c miscellaneous.h miscellaneous.c embedded_images/nwipe_exclamation.jpg.h embedded_images/nwipe_exclamation.jpg.c conf.h conf.c customers.h customers.c hddtemp_scsi/hddtemp.h hddtemp_scsi/scsi.h hddtemp_scsi/scsicmds.h hddtemp_scsi/get_scsi_temp.c hddtemp_scsi/scsi.c hddtemp_scsi/scsicmds.c
nwipe_LDADD = $(PARTED_LIBS) $(LIBCONFIG)
2 changes: 1 addition & 1 deletion src/create_pdf.c
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ int create_pdf( nwipe_context_t* ptr )
{
if( nwipe_options.prng == &nwipe_xoroshiro256_prng )
{
snprintf( prng_type, sizeof( prng_type ), "XORshiro256" );
snprintf( prng_type, sizeof( prng_type ), "XORoshiro256" );
}
else
{
Expand Down
44 changes: 43 additions & 1 deletion src/gui.c
Original file line number Diff line number Diff line change
Expand Up @@ -1616,11 +1616,12 @@ void nwipe_gui_prng( void )
extern nwipe_prng_t nwipe_aes_ctr_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_rc4_prng;

extern int terminate_signal;

/* The number of implemented PRNGs. */
const int count = 5;
const int count = 6;

/* The first tabstop. */
const int tab1 = 2;
Expand Down Expand Up @@ -1662,6 +1663,10 @@ void nwipe_gui_prng( void )
{
focus = 4;
}
if( nwipe_options.prng == &nwipe_rc4_prng )
{
focus = 5;
}
do
{
/* Clear the main window. */
Expand All @@ -1678,6 +1683,7 @@ void nwipe_gui_prng( void )
mvwprintw( main_window, yy++, tab1, " %s", nwipe_isaac64.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_add_lagg_fibonacci_prng.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_xoroshiro256_prng.label );
mvwprintw( main_window, yy++, tab1, " %s", nwipe_rc4_prng.label );
yy++;

/* Print the cursor. */
Expand Down Expand Up @@ -1852,6 +1858,38 @@ void nwipe_gui_prng( void )
tab1,
"especially for legacy systems, due to its efficiency and minimal demands. " );
break;

case 5:

mvwprintw( main_window,
yy++,
tab1,
"RC4, originally designed by Ron Rivest, is a widely used symmetric stream cipher " );
mvwprintw( main_window,
yy++,
tab1,
"algorithm that can also function as a pseudo-random number generator (PRNG). " );
mvwprintw( main_window,
yy++,
tab1,
"Although it was primarily intended for encryption, RC4 has been adapted for various " );
mvwprintw( main_window,
yy++,
tab1,
"applications that require random number generation. The algorithm features a variable " );
mvwprintw( main_window,
yy++,
tab1,
"key length and generates numbers in a fast, byte-oriented manner. It is suitable for " );
mvwprintw( main_window,
yy++,
tab1,
"scenarios requiring simplicity and speed, but newer PRNGs may offer better randomness " );
mvwprintw( main_window,
yy++,
tab1,
"for cryptographic purposes. " );
break;
}

/* switch */
Expand Down Expand Up @@ -1922,6 +1960,10 @@ void nwipe_gui_prng( void )
{
nwipe_options.prng = &nwipe_xoroshiro256_prng;
}
if( focus == 5 )
{
nwipe_options.prng = &nwipe_rc4_prng;
}
return;

case KEY_BACKSPACE:
Expand Down
14 changes: 13 additions & 1 deletion src/options.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ int nwipe_options_parse( int argc, char** argv )
extern nwipe_prng_t nwipe_isaac64;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;
extern nwipe_prng_t nwipe_rc4_prng;

/* The getopt() result holder. */
int nwipe_opt;
Expand Down Expand Up @@ -503,6 +504,11 @@ int nwipe_options_parse( int argc, char** argv )
nwipe_options.prng = &nwipe_xoroshiro256_prng;
break;
}
if( strcmp( optarg, "rc4_prng" ) == 0 )
{
nwipe_options.prng = &nwipe_rc4_prng;
break;
}

/* Else we do not know this PRNG. */
fprintf( stderr, "Error: Unknown prng '%s'.\n", optarg );
Expand Down Expand Up @@ -554,6 +560,7 @@ void nwipe_options_log( void )
extern nwipe_prng_t nwipe_isaac64;
extern nwipe_prng_t nwipe_add_lagg_fibonacci_prng;
extern nwipe_prng_t nwipe_xoroshiro256_prng;
extern nwipe_prng_t nwipe_rc4_prng;

/**
* Prints a manifest of options to the log.
Expand Down Expand Up @@ -623,6 +630,11 @@ void nwipe_options_log( void )
{
nwipe_log( NWIPE_LOG_NOTICE, " prng = Isaac" );
}
if( nwipe_options.prng == &nwipe_rc4_prng )
{
nwipe_log( NWIPE_LOG_NOTICE, " prng = RC4" );
}

else
{
if( nwipe_options.prng == &nwipe_isaac64 )
Expand Down Expand Up @@ -714,7 +726,7 @@ void display_help()
puts( " -l, --logfile=FILE Filename to log to. Default is STDOUT\n" );
puts( " -P, --PDFreportpath=PATH Path to write PDF reports to. Default is \".\"" );
puts( " If set to \"noPDF\" no PDF reports are written.\n" );
puts( " -p, --prng=METHOD PRNG option (mersenne|twister|isaac|isaac64|add_lagg_fibonacci_prng)\n" );
puts( " -p, --prng=METHOD PRNG option (mersenne|twister|isaac|isaac64|add_lagg_fibonacci_prng|rc4_prng)\n" );
puts( " -q, --quiet Anonymize logs and the GUI by removing unique data, i.e." );
puts( " serial numbers, LU WWN Device ID, and SMBIOS/DMI data" );
puts( " XXXXXX = S/N exists, ????? = S/N not obtainable\n" );
Expand Down
160 changes: 160 additions & 0 deletions src/prng.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,14 @@
#include "prng.h"
#include "context.h"
#include "logging.h"
#include <stdio.h>

#include "mt19937ar-cok/mt19937ar-cok.h"
#include "isaac_rand/isaac_rand.h"
#include "isaac_rand/isaac64.h"
#include "alfg/add_lagg_fibonacci_prng.h" //Lagged Fibonacci generator prototype
#include "xor/xoroshiro256_prng.h" //XORoshiro-256 prototype
#include "rc4/rc4_prng.h" //RC4 protoype

nwipe_prng_t nwipe_twister = { "Mersenne Twister (mt19937ar-cok)", nwipe_twister_init, nwipe_twister_read };

Expand All @@ -40,6 +42,70 @@ nwipe_prng_t nwipe_add_lagg_fibonacci_prng = { "Lagged Fibonacci generator",
/* XOROSHIRO-256 PRNG Structure */
nwipe_prng_t nwipe_xoroshiro256_prng = { "XORoshiro-256", nwipe_xoroshiro256_prng_init, nwipe_xoroshiro256_prng_read };

/* RC4 PRNG Structure */
nwipe_prng_t nwipe_rc4_prng = { "RC4", nwipe_rc4_prng_init, nwipe_rc4_prng_read };

#if defined( __AVX2__ ) || defined( __SSE4_2__ )
#include <cpuid.h>
#if defined( __AVX2__ )
#include <immintrin.h> // For _xgetbv and AVX intrinsics
#endif

// Function to check if SSE4.2 is supported
int check_sse42_support()
{
uint32_t eax, ebx, ecx, edx;
__cpuid( 1, eax, ebx, ecx, edx );

// Check bit 20 of ECX register for SSE4.2 support
return ( ecx & ( 1 << 20 ) ) != 0;
}

// Function to check if AVX2 is supported
int check_avx2_support()
{
#if defined( __AVX2__ )
uint32_t eax, ebx, ecx, edx;

// First check if OS supports XGETBV and AVX
__cpuid( 1, eax, ebx, ecx, edx );

// Check if the OS uses XSAVE/XRSTOR to manage XMM and YMM state
if( ( ecx & ( 1 << 27 ) ) == 0 )
{
return 0; // AVX not supported
}

// Check if XGETBV indicates the OS supports XMM, YMM state
uint64_t xcr_feature_mask = _xgetbv( 0 );
if( ( xcr_feature_mask & 0x6 ) != 0x6 )
{
return 0; // AVX not enabled in the OS
}

// Check if AVX2 is supported (bit 5 of EBX from CPUID leaf 7)
__cpuid_count( 7, 0, eax, ebx, ecx, edx );
return ( ebx & ( 1 << 5 ) ) != 0;
#else
return 0; // AVX2 not supported by this compiler or platform
#endif
}

#else

// Fallback if neither AVX2 nor SSE4.2 is available or supported by the compiler/platform
int check_sse42_support()
{
return 0; // SSE4.2 is not supported
}

int check_avx2_support()
{
return 0; // AVX2 is not supported
}

#endif

/* Print given number of bytes from unsigned integer number to a byte stream buffer starting with low-endian. */
static inline void u32_to_buffer( u8* restrict buffer, u32 val, const int len )
{
Expand Down Expand Up @@ -340,3 +406,97 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE )

return 0; // Success
}

int nwipe_rc4_prng_init( NWIPE_PRNG_INIT_SIGNATURE )
{
nwipe_log( NWIPE_LOG_NOTICE, "Initialising RC4 PRNG" );

if( *state == NULL )
{
/* This is the first time that we have been called. */
*state = malloc( sizeof( rc4_state_t ) );
}
rc4_init( (rc4_state_t*) *state, (uint64_t*) ( seed->s ), seed->length / sizeof( uint64_t ) );

return 0;
}

// The main RC4 PRNG read function with AVX2 and SSE4.2 detection
int nwipe_rc4_prng_read( NWIPE_PRNG_READ_SIGNATURE )
{
u8* restrict bufpos = buffer; // Buffer position pointer
size_t words = count / SIZE_OF_RC4_PRNG; // Number of 4096-byte blocks

// Check if the CPU supports AVX2 or SSE4.2
int use_avx2 = check_avx2_support();
int use_sse4 = check_sse42_support();

/* Loop to fill the buffer with blocks directly from the RC4 algorithm */
for( size_t ii = 0; ii < words; ++ii )
{
if( use_avx2 )
{
#if defined( __AVX2__ )
// Use AVX2-optimized version
rc4_genrand_4096_to_buf_avx2( (rc4_state_t*) *state, bufpos );
#else
// Fallback to generic version if AVX2 is not compiled
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, bufpos );
#endif
}
else if( use_sse4 )
{
#if defined( __SSE4_2__ )
// Use SSE4.2-optimized version
rc4_genrand_4096_to_buf_sse42( (rc4_state_t*) *state, bufpos );
#else
// Fallback to generic version if SSE4.2 is not compiled
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, bufpos );
#endif
}
else
{
// Fallback to generic version
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, bufpos );
}
bufpos += SIZE_OF_RC4_PRNG; // Move to the next block
}

/* Handle remaining bytes if count is not a multiple of SIZE_OF_RC4_PRNG */
const size_t remain = count % SIZE_OF_RC4_PRNG;
if( remain > 0 )
{
unsigned char temp_output[SIZE_OF_RC4_PRNG]; // Temporary buffer for the last block

if( use_avx2 )
{
#if defined( __AVX2__ )
// Use AVX2-optimized version
rc4_genrand_4096_to_buf_avx2( (rc4_state_t*) *state, temp_output );
#else
// Fallback to generic version if AVX2 is not compiled
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, temp_output );
#endif
}
else if( use_sse4 )
{
#if defined( __SSE4_2__ )
// Use SSE4.2-optimized version
rc4_genrand_4096_to_buf_sse42( (rc4_state_t*) *state, temp_output );
#else
// Fallback to generic version if SSE4.2 is not compiled
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, temp_output );
#endif
}
else
{
// Fallback to generic version
rc4_genrand_4096_to_buf( (rc4_state_t*) *state, temp_output );
}

// Copy the remaining bytes to the buffer
memcpy( bufpos, temp_output, remain );
}

return 0; // Success
}
7 changes: 7 additions & 0 deletions src/prng.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ int nwipe_add_lagg_fibonacci_prng_read( NWIPE_PRNG_READ_SIGNATURE );
int nwipe_xoroshiro256_prng_init( NWIPE_PRNG_INIT_SIGNATURE );
int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE );

/* RC4 prototypes. */
int nwipe_rc4_prng_init( NWIPE_PRNG_INIT_SIGNATURE );
int nwipe_rc4_prng_read( NWIPE_PRNG_READ_SIGNATURE );

/* Size of the twister is not derived from the architecture, but it is strictly 4 bytes */
#define SIZE_OF_TWISTER 4

Expand All @@ -76,4 +80,7 @@ int nwipe_xoroshiro256_prng_read( NWIPE_PRNG_READ_SIGNATURE );
/* Size of the XOROSHIRO-256 is not derived from the architecture, but it is strictly 32 bytes */
#define SIZE_OF_XOROSHIRO256_PRNG 32

/* Size of the RC4 is not derived from the architecture, but it is strictly 4096 bytes */
#define SIZE_OF_RC4_PRNG 4096

#endif /* PRNG_H_ */
Loading

0 comments on commit a61bf44

Please sign in to comment.