Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Globally clear thread-local states #1919

Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 145 additions & 6 deletions crypto/fipsmodule/rand/new_rand.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "internal.h"
#include "../../internal.h"
#include "../../ube/internal.h"
#include "../delocate.h"

#include "new_rand_prefix.h"
#include "entropy/internal.h"
Expand All @@ -32,8 +33,126 @@ struct rand_thread_local_state {

// Entropy source. UBE volatile state.
const struct entropy_source *entropy_source;

// Backward and forward references to nodes in a doubly-linked list.
struct rand_thread_local_state *next;
struct rand_thread_local_state *previous;

// Lock used when globally clearing (zeroising) all thread-local states at
// process exit.
CRYPTO_MUTEX state_clear_lock;
};

DEFINE_BSS_GET(struct rand_thread_local_state *, thread_states_list_head)
DEFINE_STATIC_MUTEX(thread_local_states_list_lock)

#if defined(_MSC_VER)
#pragma section(".CRT$XCU", read)
static void rand_thread_local_state_clear_all(void);
static void windows_install_rand_thread_local_state_clear_all(void) {
atexit(&rand_thread_local_state_clear_all);
}
__declspec(allocate(".CRT$XCU")) void(*rand_fips_library_destructor)(void) =
windows_install_rand_thread_local_state_clear_all;
#else
static void rand_thread_local_state_clear_all(void) __attribute__ ((destructor));
#endif

// At process exit not all threads will be scheduled and proper exited. To
// ensure no secret state is left, globally clear all thread-local states. This
// is a FIPS-derived requirement: SSPs must be cleared.
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
//
// This is problematic because a thread might be scheduled and return
// randomness from a non-valid state. The linked application should obviously
// arrange that all threads are gracefully exited before existing the process.
// Yet, in cases where such graceful exit does not happen we ensure that no
// output can be returned by locking all thread-local states and deliberately
// not releasing the lock. A synchronization step in the core randomness
// generation routine |RAND_bytes_core| then ensures that no randomness
// generation can occur after a thread-local state has been locked. It also
// ensures |rand_thread_local_state_free| cannot free any thread state while we
// own the lock.
static void rand_thread_local_state_clear_all(void) {
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());
for (struct rand_thread_local_state *state = *thread_states_list_head_bss_get();
state != NULL; state = state->next) {
CRYPTO_MUTEX_lock_write(&state->state_clear_lock);
CTR_DRBG_clear(&state->drbg);
}
}

static void thread_local_list_delete_node(
struct rand_thread_local_state *node_delete) {

// Mutating the global linked list. Need to synchronize over all threads.
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());
struct rand_thread_local_state *node_head = *thread_states_list_head_bss_get();

// We have [node_delete->previous] <--> [node_delete] <--> [node_delete->next]
// and must end up with [node_delete->previous] <--> [node_delete->next]
if (node_head == node_delete) {
// If node node_delete is the head, we know that the backwards reference
// does not exist but we need to update the head pointer.
*thread_states_list_head_bss_get() = node_delete->next;
} else {
// On the other hand, if node_delete is not the head, then we need to update
// the node node_delete->previous to point to the node node_delete->next.
// But only if node_delete->previous actually exists.
if (node_delete->previous != NULL) {
(node_delete->previous)->next = node_delete->next;
}
}

// Now [node_delete->previous] --> [node_delete->next]
// |
// [node_delete] <--|
// Final thing to do is to update the backwards reference for the node
// node_delete->next, if it exists.
if (node_delete->next != NULL) {
// If node_delete is the head, then node_delete->previous is NULL. But that
// is OK because node_delete->next is the new head and should therefore have
// a backwards reference that is NULL.
(node_delete->next)->previous = node_delete->previous;
}

CRYPTO_STATIC_MUTEX_unlock_write(thread_local_states_list_lock_bss_get());
}

// thread_local_list_add adds the state |node_add| to the linked list. Note that
// |node_add| is not added at the tail of the linked list, but is replacing the
// current head to keep the add operation at low time-complexity.
static void thread_local_list_add_node(
struct rand_thread_local_state **node_add) {
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved

struct rand_thread_local_state *node_add_p = *node_add;

// node_add will be the new head and will not have a backwards reference.
node_add_p->previous = NULL;

// Mutating the global linked list. Need to synchronize over all threads.
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());

// First get a reference to the pointer of the head of the linked list.
// That is, the pointer to the head node node_head is *thread_states_list.
struct rand_thread_local_state **thread_states_list = thread_states_list_head_bss_get();
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved

// We have [node_head] <--> [node_head->next] and must end up with
// [node_add] <--> [node_head] <--> [node_head->next]
// First make the forward reference
node_add_p->next = *thread_states_list;

// Only add a backwards reference if a head already existed (this might be
// the first add).
if (node_add_p->next != NULL) {
(node_add_p->next)->previous = node_add_p;
}
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved

// The last thing is to assign the new head.
*thread_states_list = node_add_p;

CRYPTO_STATIC_MUTEX_unlock_write(thread_local_states_list_lock_bss_get());
}

// rand_thread_local_state frees a |rand_thread_local_state|. This is called
// when a thread exits.
static void rand_thread_local_state_free(void *state_in) {
Expand All @@ -43,6 +162,8 @@ static void rand_thread_local_state_free(void *state_in) {
return;
}

thread_local_list_delete_node(state);

// Potentially, something could kill the thread before an entropy source has
// been associated to the thread-local randomness generator object.
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
if (state->entropy_source != NULL) {
Expand Down Expand Up @@ -200,6 +321,7 @@ static void rand_state_initialize(struct rand_thread_local_state *state) {
state->reseed_calls_since_initialization = 0;
state->generate_calls_since_seed = 0;
state->generation_number = 0;
CRYPTO_MUTEX_init(&state->state_clear_lock);

OPENSSL_cleanse(seed, CTR_DRBG_ENTROPY_LEN);
OPENSSL_cleanse(personalization_string, CTR_DRBG_ENTROPY_LEN);
Expand Down Expand Up @@ -250,7 +372,12 @@ static void RAND_bytes_core(
assert(first_pred_resistance_len == 0 ||
first_pred_resistance_len == RAND_PRED_RESISTANCE_LEN);

// TODO: lock here
// Synchronize with |rand_thread_local_state_clear_all|. In case a
// thread-local state has been zeroized, thread execution will block here
// because there is no secure way to generate randomness from that state.
// Note that this lock is thread-local and therefore not contended except at
// process exit.
CRYPTO_MUTEX_lock_read(&state->state_clear_lock);

// Iterate CTR-DRBG generate until |out_len| bytes of randomness have been
// generated. CTR_DRBG_generate can maximally generate
Expand All @@ -267,18 +394,29 @@ static void RAND_bytes_core(
if (must_reseed_before_generate == 1 ||
(state->generate_calls_since_seed + 1) > kCtrDrbgReseedInterval) {

must_reseed_before_generate = 0;

// TODO: unlock here
// An unlock-lock cycle is located here to not acquire any locks while we
// might perform system calls (e.g. when sourcing OS entropy). This
// shields against known bugs. For example, glibc can implement locks
// using memory transactions on powerpc that has been observed to break
// when reaching |getrandom| through |syscall|. For this, see
// https://github.com/google/boringssl/commit/17ce286e0792fc2855fb7e34a968bed17ae914af
// https://www.kernel.org/doc/Documentation/powerpc/transactional_memory.txt
//
// Even though the unlock-lock cycle is under the loop iteration,
// practically a request size (i.e. the value of |out_len|), will
// almost-always be strictly less than |CTR_DRBG_MAX_GENERATE_LENGTH|.
// Hence, practically, only one lock-unlock rotation will be required.
CRYPTO_MUTEX_unlock_read(&state->state_clear_lock);
uint8_t seed[CTR_DRBG_ENTROPY_LEN];
uint8_t additional_data[CTR_DRBG_ENTROPY_LEN];
size_t additional_data_len = 0;
rand_get_ctr_drbg_seed_entropy(state->entropy_source, seed,
additional_data, &additional_data_len);
CRYPTO_MUTEX_lock_read(&state->state_clear_lock);

// TODO: lock here
rand_ctr_drbg_reseed(state, seed, additional_data,
additional_data_len);
must_reseed_before_generate = 0;

OPENSSL_cleanse(seed, CTR_DRBG_ENTROPY_LEN);
OPENSSL_cleanse(additional_data, CTR_DRBG_ENTROPY_LEN);
Expand All @@ -301,7 +439,7 @@ static void RAND_bytes_core(
abort();
}

// TODO: unlock here
CRYPTO_MUTEX_unlock_read(&state->state_clear_lock);
}

static void RAND_bytes_private(uint8_t *out, size_t out_len,
Expand All @@ -324,6 +462,7 @@ static void RAND_bytes_private(uint8_t *out, size_t out_len,
}

rand_state_initialize(state);
thread_local_list_add_node(&state);
}

RAND_bytes_core(state, out, out_len, user_pred_resistance,
Expand Down
25 changes: 23 additions & 2 deletions crypto/fipsmodule/rand/new_rand_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
#include "new_rand_internal.h"
#include "../../ube/internal.h"

#include <thread>

// TODO
// Remove when promoting to default
#if !defined(BORINGSSL_PREFIX)
Expand All @@ -17,8 +19,7 @@

#define MAX_REQUEST_SIZE (CTR_DRBG_MAX_GENERATE_LENGTH * 2 + 1)

TEST(NewRand, Basic) {

static void randBasicTests(bool *returnFlag) {
uint8_t randomness[MAX_REQUEST_SIZE] = {0};
uint8_t user_personalization_string[RAND_PRED_RESISTANCE_LEN] = {0};

Expand All @@ -37,6 +38,26 @@ TEST(NewRand, Basic) {
ASSERT_TRUE(RAND_bytes_with_additional_data(randomness, i, user_personalization_string));
ASSERT_TRUE(RAND_bytes_with_user_prediction_resistance(randomness, i, user_personalization_string));
}

*returnFlag = true;
}

TEST(NewRand, Basic) {
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
#if defined(OPENSSL_THREADS)
constexpr size_t kNumThreads = 10;
bool myFlags[kNumThreads] = {false};
std::thread myThreads[kNumThreads];

for (size_t i = 0; i < kNumThreads; i++) {
myThreads[i] = std::thread(randBasicTests, &myFlags[i]);
}
for (size_t i = 0; i < kNumThreads; i++) {
myThreads[i].join();
ASSERT_TRUE(myFlags[i]) << "Thread " << i << " failed.";
}
#else
randBasicTests();
#endif
}

TEST(NewRand, ReseedInterval) {
Expand Down
Loading