Skip to content

Commit

Permalink
Add C11 standard atomic support (#1645)
Browse files Browse the repository at this point in the history
* Add runtime support for stdatomics

* Fix lock calculation and enable atomic_flag support
  • Loading branch information
sgstreet authored Jul 8, 2024
1 parent 86aab81 commit 01dec6f
Show file tree
Hide file tree
Showing 5 changed files with 387 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/rp2_common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ if (NOT PICO_BARE_METAL)
pico_add_subdirectory(pico_malloc)
pico_add_subdirectory(pico_printf)
pico_add_subdirectory(pico_rand)
pico_add_subdirectory(pico_atomic)

pico_add_subdirectory(pico_stdio)
pico_add_subdirectory(pico_stdio_semihosting)
Expand Down
11 changes: 11 additions & 0 deletions src/rp2_common/pico_atomic/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
if (NOT TARGET pico_atomic)
pico_add_library(pico_atomic)

target_sources(pico_atomic INTERFACE
${CMAKE_CURRENT_LIST_DIR}/pico_atomic.c
)

target_include_directories(pico_atomic_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include)

target_link_libraries(pico_atomic INTERFACE pico_sync)
endif()
27 changes: 27 additions & 0 deletions src/rp2_common/pico_atomic/include/stdatomic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
* Copyright (c) 2024 Stephen Street ([email protected]).
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#ifndef __STDATOMIC_H
#define __STDATOMIC_H

#include_next <stdatomic.h>

#undef atomic_flag_test_and_set
#undef atomic_flag_test_and_set_explicit
#undef atomic_flag_clear
#undef atomic_flag_clear_explicit

extern _Bool __atomic_test_and_set_m0(volatile void *mem, int model);
extern void __atomic_clear_m0 (volatile void *mem, int model);

#define atomic_flag_test_and_set(PTR) __atomic_test_and_set_m0((PTR), __ATOMIC_SEQ_CST)
#define atomic_flag_test_and_set_explicit(PTR, MO) __atomic_test_and_set_m0((PTR), (MO))

#define atomic_flag_clear(PTR) __atomic_clear_m0((PTR), __ATOMIC_SEQ_CST)
#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear_m0((PTR), (MO))

#endif
345 changes: 345 additions & 0 deletions src/rp2_common/pico_atomic/pico_atomic.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,345 @@
/*
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd.
* Copyright (c) 2024 Stephen Street ([email protected]).
*
* SPDX-License-Identifier: BSD-3-Clause
*/

#include <stdbool.h>
#include <stdint.h>

#include "hardware/address_mapped.h"
#include "hardware/regs/watchdog.h"
#include "hardware/sync.h"

#include "pico/config.h"

#ifndef __optimize
#define __optimize __attribute__((optimize("-Os")))
#endif

/* Must be powers of 2 */
#define ATOMIC_STRIPE 4UL
#define ATOMIC_LOCKS 16UL
#define ATOMIC_LOCK_WIDTH 2UL
#define ATOMIC_LOCK_IDX_Pos ((sizeof(unsigned long) * 8) - (__builtin_clz(ATOMIC_STRIPE - 1)))
#define ATOMIC_LOCK_IDX_Msk (ATOMIC_LOCKS - 1UL)
#define ATOMIC_LOCK_REG ((io_rw_32 *)(WATCHDOG_BASE + WATCHDOG_SCRATCH3_OFFSET))

static __used __attribute__((section(".preinit_array.00030"))) void __atomic_init(void) {
*ATOMIC_LOCK_REG = 0;
}

/*
To eliminate interference with existing hardware spinlock usage and reduce multicore contention on
unique atomic variables, we use one of the watchdog scratch registers (WATCHDOG_SCRATCH3) to
implement 16, 2 bit, multicore locks, via a varation of Dekker's algorithm
(see https://en.wikipedia.org/wiki/Dekker%27s_algorithm). The lock is selected as a
function of the variable address and the stripe width which hashes variables
addresses to locks numbers.
*/
static __optimize uint32_t __atomic_lock(volatile void *mem) {
const uint32_t core = get_core_num();
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
const uint32_t lock_mask = ((1UL << ATOMIC_LOCK_WIDTH) - 1) << lock_pos;
const uint32_t locked_mask = 1UL << (lock_pos + core);

uint32_t state = save_and_disable_interrupts();
while (true) {

/* First set the bit */
hw_set_bits(ATOMIC_LOCK_REG, locked_mask);
__dmb();

/* Did we get the lock? */
if ((*ATOMIC_LOCK_REG & lock_mask) == locked_mask)
break;

/* Nope, clear our side */
__dmb();
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);

/* Need to break any ties if the cores are in lock step, is this really required? */
for (uint32_t i = core * 2; i > 0; --i)
asm volatile ("nop");
}

return state;
}

static __optimize void __atomic_unlock(volatile void *mem, uint32_t state) {
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk;
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH;
const uint32_t locked_mask = 1UL << (lock_pos + get_core_num());

__dmb();
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask);
restore_interrupts(state);
}

__optimize uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
uint8_t state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint8_t __atomic_fetch_sub_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
uint8_t state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint8_t __atomic_fetch_and_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
uint8_t state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint8_t __atomic_fetch_or_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
uint8_t state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint8_t __atomic_exchange_1(volatile void *mem, uint8_t val, __unused int model) {
volatile uint8_t *ptr = mem;
uint8_t state = __atomic_lock(mem);
uint8_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

__optimize bool __atomic_compare_exchange_1(volatile void *mem, void *expected, uint8_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint8_t *ptr = mem;
uint8_t *e_ptr = expected;
uint8_t state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

__optimize uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
uint16_t state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint16_t __atomic_fetch_sub_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
uint16_t state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint16_t __atomic_fetch_and_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
uint16_t state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint16_t __atomic_fetch_or_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
uint16_t state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint16_t __atomic_exchange_2(volatile void *mem, uint16_t val, __unused int model) {
volatile uint16_t *ptr = mem;
uint16_t state = __atomic_lock(mem);
uint16_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

__optimize bool __atomic_compare_exchange_2(volatile void *mem, void *expected, uint16_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint16_t *ptr = mem;
uint16_t *e_ptr = expected;
uint16_t state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

__optimize uint32_t __atomic_fetch_add_4(volatile void *mem, uint32_t val, __unused int model) {
volatile uint32_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint32_t __atomic_fetch_sub_4(volatile void *mem, uint32_t val, __unused int model) {
volatile uint32_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint32_t __atomic_fetch_and_4(volatile void *mem, uint32_t val, __unused int model) {
volatile uint32_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint32_t __atomic_fetch_or_4(volatile void *mem, uint32_t val, __unused int model) {
volatile uint32_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint32_t __atomic_exchange_4(volatile void *mem, uint32_t val, __unused int model) {
volatile uint32_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

__optimize bool __atomic_compare_exchange_4(volatile void *mem, void *expected, uint32_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint32_t *ptr = mem;
uint32_t *e_ptr = expected;
uint32_t state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint64_t state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr += val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_fetch_sub_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint64_t state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr -= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_fetch_and_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint64_t state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr &= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_fetch_or_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint64_t state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr |= val;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_exchange_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint64_t state = __atomic_lock(mem);
uint64_t result = *ptr;
*ptr = val;
__atomic_unlock(mem, state);
return result;
}

__optimize bool __atomic_compare_exchange_8(volatile void *mem, void *expected, uint64_t desired, __unused bool weak, __unused int success, __unused int failure) {
bool result = false;
volatile uint64_t *ptr = mem;
uint64_t *e_ptr = expected;
uint64_t state = __atomic_lock(mem);
if (*ptr == *e_ptr) {
*ptr = desired;
result = true;
} else
*e_ptr = *ptr;
__atomic_unlock(mem, state);
return result;
}

__optimize uint64_t __atomic_load_8(volatile void *mem, __unused int model) {
volatile uint64_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
uint32_t result = *ptr;
__atomic_unlock(mem, state);
return result;
}

__optimize void __atomic_store_8(volatile void *mem, uint64_t val, __unused int model) {
volatile uint64_t *ptr = mem;
uint32_t state = __atomic_lock(mem);
*ptr = val;
__atomic_unlock(mem, state);
}

__optimize bool __atomic_test_and_set_m0(volatile void *mem, __unused int model) {
volatile bool *ptr = mem;
uint32_t state = __atomic_lock(mem);
volatile bool result = *ptr;
*ptr = true;
__atomic_unlock(mem, state);
return result;
}

__optimize void __atomic_clear_m0(volatile void *mem, __unused int model) {
volatile bool *ptr = mem;
*ptr = false;
__dmb();
}
Loading

0 comments on commit 01dec6f

Please sign in to comment.