-
Notifications
You must be signed in to change notification settings - Fork 976
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add C11 standard atomic support (#1645)
* Add runtime support for stdatomics * Fix lock calculation and enable atomic_flag support
- Loading branch information
Showing
5 changed files
with
387 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
if (NOT TARGET pico_atomic) | ||
pico_add_library(pico_atomic) | ||
|
||
target_sources(pico_atomic INTERFACE | ||
${CMAKE_CURRENT_LIST_DIR}/pico_atomic.c | ||
) | ||
|
||
target_include_directories(pico_atomic_headers INTERFACE ${CMAKE_CURRENT_LIST_DIR}/include) | ||
|
||
target_link_libraries(pico_atomic INTERFACE pico_sync) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
/* | ||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd. | ||
* Copyright (c) 2024 Stephen Street ([email protected]). | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#ifndef __STDATOMIC_H | ||
#define __STDATOMIC_H | ||
|
||
#include_next <stdatomic.h> | ||
|
||
#undef atomic_flag_test_and_set | ||
#undef atomic_flag_test_and_set_explicit | ||
#undef atomic_flag_clear | ||
#undef atomic_flag_clear_explicit | ||
|
||
extern _Bool __atomic_test_and_set_m0(volatile void *mem, int model); | ||
extern void __atomic_clear_m0 (volatile void *mem, int model); | ||
|
||
#define atomic_flag_test_and_set(PTR) __atomic_test_and_set_m0((PTR), __ATOMIC_SEQ_CST) | ||
#define atomic_flag_test_and_set_explicit(PTR, MO) __atomic_test_and_set_m0((PTR), (MO)) | ||
|
||
#define atomic_flag_clear(PTR) __atomic_clear_m0((PTR), __ATOMIC_SEQ_CST) | ||
#define atomic_flag_clear_explicit(PTR, MO) __atomic_clear_m0((PTR), (MO)) | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,345 @@ | ||
/* | ||
* Copyright (c) 2024 Raspberry Pi (Trading) Ltd. | ||
* Copyright (c) 2024 Stephen Street ([email protected]). | ||
* | ||
* SPDX-License-Identifier: BSD-3-Clause | ||
*/ | ||
|
||
#include <stdbool.h> | ||
#include <stdint.h> | ||
|
||
#include "hardware/address_mapped.h" | ||
#include "hardware/regs/watchdog.h" | ||
#include "hardware/sync.h" | ||
|
||
#include "pico/config.h" | ||
|
||
#ifndef __optimize | ||
#define __optimize __attribute__((optimize("-Os"))) | ||
#endif | ||
|
||
/* Must be powers of 2 */ | ||
#define ATOMIC_STRIPE 4UL | ||
#define ATOMIC_LOCKS 16UL | ||
#define ATOMIC_LOCK_WIDTH 2UL | ||
#define ATOMIC_LOCK_IDX_Pos ((sizeof(unsigned long) * 8) - (__builtin_clz(ATOMIC_STRIPE - 1))) | ||
#define ATOMIC_LOCK_IDX_Msk (ATOMIC_LOCKS - 1UL) | ||
#define ATOMIC_LOCK_REG ((io_rw_32 *)(WATCHDOG_BASE + WATCHDOG_SCRATCH3_OFFSET)) | ||
|
||
static __used __attribute__((section(".preinit_array.00030"))) void __atomic_init(void) { | ||
*ATOMIC_LOCK_REG = 0; | ||
} | ||
|
||
/* | ||
To eliminate interference with existing hardware spinlock usage and reduce multicore contention on | ||
unique atomic variables, we use one of the watchdog scratch registers (WATCHDOG_SCRATCH3) to | ||
implement 16, 2 bit, multicore locks, via a varation of Dekker's algorithm | ||
(see https://en.wikipedia.org/wiki/Dekker%27s_algorithm). The lock is selected as a | ||
function of the variable address and the stripe width which hashes variables | ||
addresses to locks numbers. | ||
*/ | ||
static __optimize uint32_t __atomic_lock(volatile void *mem) { | ||
const uint32_t core = get_core_num(); | ||
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk; | ||
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH; | ||
const uint32_t lock_mask = ((1UL << ATOMIC_LOCK_WIDTH) - 1) << lock_pos; | ||
const uint32_t locked_mask = 1UL << (lock_pos + core); | ||
|
||
uint32_t state = save_and_disable_interrupts(); | ||
while (true) { | ||
|
||
/* First set the bit */ | ||
hw_set_bits(ATOMIC_LOCK_REG, locked_mask); | ||
__dmb(); | ||
|
||
/* Did we get the lock? */ | ||
if ((*ATOMIC_LOCK_REG & lock_mask) == locked_mask) | ||
break; | ||
|
||
/* Nope, clear our side */ | ||
__dmb(); | ||
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask); | ||
|
||
/* Need to break any ties if the cores are in lock step, is this really required? */ | ||
for (uint32_t i = core * 2; i > 0; --i) | ||
asm volatile ("nop"); | ||
} | ||
|
||
return state; | ||
} | ||
|
||
static __optimize void __atomic_unlock(volatile void *mem, uint32_t state) { | ||
const uint32_t lock_idx = (((uintptr_t)mem) >> ATOMIC_LOCK_IDX_Pos) & ATOMIC_LOCK_IDX_Msk; | ||
const uint32_t lock_pos = lock_idx * ATOMIC_LOCK_WIDTH; | ||
const uint32_t locked_mask = 1UL << (lock_pos + get_core_num()); | ||
|
||
__dmb(); | ||
hw_clear_bits(ATOMIC_LOCK_REG, locked_mask); | ||
restore_interrupts(state); | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_add_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_sub_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_and_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_fetch_or_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint8_t __atomic_exchange_1(volatile void *mem, uint8_t val, __unused int model) { | ||
volatile uint8_t *ptr = mem; | ||
uint8_t state = __atomic_lock(mem); | ||
uint8_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_1(volatile void *mem, void *expected, uint8_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint8_t *ptr = mem; | ||
uint8_t *e_ptr = expected; | ||
uint8_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_add_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_sub_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_and_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_fetch_or_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint16_t __atomic_exchange_2(volatile void *mem, uint16_t val, __unused int model) { | ||
volatile uint16_t *ptr = mem; | ||
uint16_t state = __atomic_lock(mem); | ||
uint16_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_2(volatile void *mem, void *expected, uint16_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint16_t *ptr = mem; | ||
uint16_t *e_ptr = expected; | ||
uint16_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_add_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_sub_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_and_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_fetch_or_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint32_t __atomic_exchange_4(volatile void *mem, uint32_t val, __unused int model) { | ||
volatile uint32_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_4(volatile void *mem, void *expected, uint32_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint32_t *ptr = mem; | ||
uint32_t *e_ptr = expected; | ||
uint32_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_add_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr += val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_sub_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr -= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_and_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr &= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_fetch_or_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr |= val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_exchange_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint64_t state = __atomic_lock(mem); | ||
uint64_t result = *ptr; | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize bool __atomic_compare_exchange_8(volatile void *mem, void *expected, uint64_t desired, __unused bool weak, __unused int success, __unused int failure) { | ||
bool result = false; | ||
volatile uint64_t *ptr = mem; | ||
uint64_t *e_ptr = expected; | ||
uint64_t state = __atomic_lock(mem); | ||
if (*ptr == *e_ptr) { | ||
*ptr = desired; | ||
result = true; | ||
} else | ||
*e_ptr = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize uint64_t __atomic_load_8(volatile void *mem, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
uint32_t result = *ptr; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize void __atomic_store_8(volatile void *mem, uint64_t val, __unused int model) { | ||
volatile uint64_t *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
*ptr = val; | ||
__atomic_unlock(mem, state); | ||
} | ||
|
||
__optimize bool __atomic_test_and_set_m0(volatile void *mem, __unused int model) { | ||
volatile bool *ptr = mem; | ||
uint32_t state = __atomic_lock(mem); | ||
volatile bool result = *ptr; | ||
*ptr = true; | ||
__atomic_unlock(mem, state); | ||
return result; | ||
} | ||
|
||
__optimize void __atomic_clear_m0(volatile void *mem, __unused int model) { | ||
volatile bool *ptr = mem; | ||
*ptr = false; | ||
__dmb(); | ||
} |
Oops, something went wrong.