Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Globally clear thread-local states #1919

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 143 additions & 6 deletions crypto/fipsmodule/rand/new_rand.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "internal.h"
#include "../../internal.h"
#include "../../ube/internal.h"
#include "../delocate.h"

#include "new_rand_prefix.h"
#include "entropy/internal.h"
Expand All @@ -32,8 +33,124 @@ struct rand_thread_local_state {

// Entropy source. UBE volatile state.
const struct entropy_source *entropy_source;

// Backward and forward references to nodes in a doubly-linked list.
struct rand_thread_local_state *next;
struct rand_thread_local_state *previous;

// Lock used when globally clearing (zeroising) all thread-local states at
// process exit.
CRYPTO_MUTEX state_clear_lock;
};

DEFINE_BSS_GET(struct rand_thread_local_state *, thread_states_list_head)
DEFINE_STATIC_MUTEX(thread_local_states_list_lock)

#if defined(_MSC_VER)
#pragma section(".CRT$XCU", read)
static void rand_thread_local_state_clear_all(void);
static void windows_install_rand_thread_local_state_clear_all(void) {
atexit(&rand_thread_local_state_clear_all);
}
__declspec(allocate(".CRT$XCU")) void(*rand_fips_library_destructor)(void) =
windows_install_rand_thread_local_state_clear_all;
#else
static void rand_thread_local_state_clear_all(void) __attribute__ ((destructor));
#endif

// At process exit not all threads will be scheduled and proper exited. To
// ensure no secret state is left, globally clear all thread-local states. This
// is a FIPS-derived requirement: SSPs must be cleared.
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
//
// This is problematic because a thread might be scheduled and return
// randomness from a non-valid state. The linked application should obviously
// arrange that all threads are gracefully exited before existing the process.
// Yet, in cases where such graceful exit does not happen we ensure that no
// output can be returned by locking all thread-local states and deliberately
// not releasing the lock. A synchronization step in the core randomness
// generation routine |RAND_bytes_core| then ensures that no randomness
// generation can occur after a thread-local state has been locked. It also
// ensures |rand_thread_local_state_free| cannot free any thread state while we
// own the lock.
static void rand_thread_local_state_clear_all(void) {
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());
for (struct rand_thread_local_state *state = *thread_states_list_head_bss_get();
state != NULL; state = state->next) {
CRYPTO_MUTEX_lock_write(&state->state_clear_lock);
CTR_DRBG_clear(&state->drbg);
}
}

static void thread_local_list_delete_node(
struct rand_thread_local_state *node_delete) {

// Mutating the global linked list. Need to synchronize over all threads.
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());
struct rand_thread_local_state *node_head = *thread_states_list_head_bss_get();

// We have [node_delete->previous] <--> [node_delete] <--> [node_delete->next]
// and must end up with [node_delete->previous] <--> [node_delete->next]
if (node_head == node_delete) {
// If node node_delete is the head, we know that the backwards reference
// does not exist but we need to update the head pointer.
*thread_states_list_head_bss_get() = node_delete->next;
} else {
// On the other hand, if node_delete is not the head, then we need to update
// the node node_delete->previous to point to the node node_delete->next.
// But only if node_delete->previous actually exists.
if (node_delete->previous != NULL) {
(node_delete->previous)->next = node_delete->next;
}
}

// Now [node_delete->previous] --> [node_delete->next]
// |
// [node_delete] <--|
// Final thing to do is to update the backwards reference for the node
// node_delete->next, if it exists.
if (node_delete->next != NULL) {
// If node_delete is the head, then node_delete->previous is NULL. But that
// is OK because node_delete->next is the new head and should therefore have
// a backwards reference that is NULL.
(node_delete->next)->previous = node_delete->previous;
}

CRYPTO_STATIC_MUTEX_unlock_write(thread_local_states_list_lock_bss_get());
}

// thread_local_list_add adds the state |node_add| to the linked list. Note that
// |node_add| is not added at the tail of the linked list, but is replacing the
// current head to keep the add operation at low time-complexity.
static void thread_local_list_add_node(
struct rand_thread_local_state *node_add) {

// node_add will be the new head and will not have a backwards reference.
node_add->previous = NULL;

// Mutating the global linked list. Need to synchronize over all threads.
CRYPTO_STATIC_MUTEX_lock_write(thread_local_states_list_lock_bss_get());

// First get a reference to the pointer of the head of the linked list.
// That is, the pointer to the head node node_head is *thread_states_head.
struct rand_thread_local_state **thread_states_head = thread_states_list_head_bss_get();

// We have [node_head] <--> [node_head->next] and must end up with
// [node_add] <--> [node_head] <--> [node_head->next]
// First make the forward reference
node_add->next = *thread_states_head;

// Only add a backwards reference if a head already existed (this might be
// the first add).
if (*thread_states_head != NULL) {
(*thread_states_head)->previous = node_add;
}

// The last thing is to assign the new head.
*thread_states_head = node_add;

CRYPTO_STATIC_MUTEX_unlock_write(thread_local_states_list_lock_bss_get());
}

// rand_thread_local_state frees a |rand_thread_local_state|. This is called
// when a thread exits.
static void rand_thread_local_state_free(void *state_in) {
Expand All @@ -43,6 +160,8 @@ static void rand_thread_local_state_free(void *state_in) {
return;
}

thread_local_list_delete_node(state);

// Potentially, something could kill the thread before an entropy source has
// been associated to the thread-local randomness generator object.
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
if (state->entropy_source != NULL) {
Expand Down Expand Up @@ -200,6 +319,7 @@ static void rand_state_initialize(struct rand_thread_local_state *state) {
state->reseed_calls_since_initialization = 0;
state->generate_calls_since_seed = 0;
state->generation_number = 0;
CRYPTO_MUTEX_init(&state->state_clear_lock);

OPENSSL_cleanse(seed, CTR_DRBG_ENTROPY_LEN);
OPENSSL_cleanse(personalization_string, CTR_DRBG_ENTROPY_LEN);
Expand Down Expand Up @@ -250,7 +370,12 @@ static void RAND_bytes_core(
assert(first_pred_resistance_len == 0 ||
first_pred_resistance_len == RAND_PRED_RESISTANCE_LEN);

// TODO: lock here
// Synchronize with |rand_thread_local_state_clear_all|. In case a
// thread-local state has been zeroized, thread execution will block here
// because there is no secure way to generate randomness from that state.
// Note that this lock is thread-local and therefore not contended except at
// process exit.
CRYPTO_MUTEX_lock_read(&state->state_clear_lock);

// Iterate CTR-DRBG generate until |out_len| bytes of randomness have been
// generated. CTR_DRBG_generate can maximally generate
Expand All @@ -267,18 +392,29 @@ static void RAND_bytes_core(
if (must_reseed_before_generate == 1 ||
(state->generate_calls_since_seed + 1) > kCtrDrbgReseedInterval) {

must_reseed_before_generate = 0;

// TODO: unlock here
// An unlock-lock cycle is located here to not acquire any locks while we
// might perform system calls (e.g. when sourcing OS entropy). This
// shields against known bugs. For example, glibc can implement locks
// using memory transactions on powerpc that has been observed to break
// when reaching |getrandom| through |syscall|. For this, see
// https://github.com/google/boringssl/commit/17ce286e0792fc2855fb7e34a968bed17ae914af
// https://www.kernel.org/doc/Documentation/powerpc/transactional_memory.txt
//
// Even though the unlock-lock cycle is under the loop iteration,
// practically a request size (i.e. the value of |out_len|), will
// almost-always be strictly less than |CTR_DRBG_MAX_GENERATE_LENGTH|.
// Hence, practically, only one lock-unlock rotation will be required.
CRYPTO_MUTEX_unlock_read(&state->state_clear_lock);
uint8_t seed[CTR_DRBG_ENTROPY_LEN];
uint8_t additional_data[CTR_DRBG_ENTROPY_LEN];
size_t additional_data_len = 0;
rand_get_ctr_drbg_seed_entropy(state->entropy_source, seed,
additional_data, &additional_data_len);
CRYPTO_MUTEX_lock_read(&state->state_clear_lock);

// TODO: lock here
rand_ctr_drbg_reseed(state, seed, additional_data,
additional_data_len);
must_reseed_before_generate = 0;

OPENSSL_cleanse(seed, CTR_DRBG_ENTROPY_LEN);
OPENSSL_cleanse(additional_data, CTR_DRBG_ENTROPY_LEN);
Expand All @@ -301,7 +437,7 @@ static void RAND_bytes_core(
abort();
}

// TODO: unlock here
CRYPTO_MUTEX_unlock_read(&state->state_clear_lock);
}

static void RAND_bytes_private(uint8_t *out, size_t out_len,
Expand All @@ -324,6 +460,7 @@ static void RAND_bytes_private(uint8_t *out, size_t out_len,
}

rand_state_initialize(state);
thread_local_list_add_node(state);
}

RAND_bytes_core(state, out, out_len, user_pred_resistance,
Expand Down
37 changes: 32 additions & 5 deletions crypto/fipsmodule/rand/new_rand_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,13 @@
#include <gtest/gtest.h>

#include <openssl/ctrdrbg.h>
#include <openssl/mem.h>

#include "new_rand_internal.h"
#include "../../ube/internal.h"

#include <thread>

// TODO
// Remove when promoting to default
#if !defined(BORINGSSL_PREFIX)
Expand All @@ -17,9 +20,11 @@

#define MAX_REQUEST_SIZE (CTR_DRBG_MAX_GENERATE_LENGTH * 2 + 1)

TEST(NewRand, Basic) {

uint8_t randomness[MAX_REQUEST_SIZE] = {0};
static void randBasicTests(bool *returnFlag) {
// Do not use stack arrays for these. For example, Alpine OS has too low
// default thread stack size limit to accommodate.
uint8_t *randomness = (uint8_t *) OPENSSL_zalloc(MAX_REQUEST_SIZE);
bssl::UniquePtr<uint8_t> deleter(randomness);
uint8_t user_personalization_string[RAND_PRED_RESISTANCE_LEN] = {0};

for (size_t i = 0; i < 65; i++) {
Expand All @@ -37,10 +42,31 @@ TEST(NewRand, Basic) {
ASSERT_TRUE(RAND_bytes_with_additional_data(randomness, i, user_personalization_string));
ASSERT_TRUE(RAND_bytes_with_user_prediction_resistance(randomness, i, user_personalization_string));
}

*returnFlag = true;
}

TEST(NewRand, Basic) {
torben-hansen marked this conversation as resolved.
Show resolved Hide resolved
#if defined(OPENSSL_THREADS)
constexpr size_t kNumThreads = 10;
bool myFlags[kNumThreads] = {false};
std::thread myThreads[kNumThreads];

for (size_t i = 0; i < kNumThreads; i++) {
myThreads[i] = std::thread(randBasicTests, &myFlags[i]);
}
for (size_t i = 0; i < kNumThreads; i++) {
myThreads[i].join();
ASSERT_TRUE(myFlags[i]) << "Thread " << i << " failed.";
}
#else
randBasicTests();
#endif
}

TEST(NewRand, ReseedInterval) {
uint8_t randomness[CTR_DRBG_MAX_GENERATE_LENGTH * 5 + 1] = {0};
uint8_t *randomness = (uint8_t *) OPENSSL_zalloc(CTR_DRBG_MAX_GENERATE_LENGTH * 5 + 1);
bssl::UniquePtr<uint8_t> deleter(randomness);
uint64_t reseed_calls_since_initialization = get_thread_reseed_calls_since_initialization();
uint64_t generate_calls_since_seed = get_thread_generate_calls_since_seed();

Expand Down Expand Up @@ -82,7 +108,8 @@ static void MockedUbeDetection(std::function<void(uint64_t)> set_detection_metho
const size_t request_size_one_generate = 10;
const size_t request_size_two_generate = CTR_DRBG_MAX_GENERATE_LENGTH + 1;
uint64_t current_reseed_calls = 0;
uint8_t randomness[MAX_REQUEST_SIZE] = {0};
uint8_t *randomness = (uint8_t *) OPENSSL_zalloc(CTR_DRBG_MAX_GENERATE_LENGTH * 5 + 1);
bssl::UniquePtr<uint8_t> deleter(randomness);

// Make sure things are initialized and at default values. Cache
// current_reseed_calls last in case RAND_bytes() invokes a reseed.
Expand Down
Loading