diff --git a/arch/Kconfig b/arch/Kconfig index f3e4f14dfbc89f..f1295eec38e87b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -24,6 +24,7 @@ config ARC imply XIP select ARCH_HAS_THREAD_LOCAL_STORAGE select ARCH_SUPPORTS_ROM_START + select ARCH_HAS_DIRECTED_IPIS help ARC architecture @@ -50,6 +51,7 @@ config ARM64 select USE_SWITCH_SUPPORTED select IRQ_OFFLOAD_NESTED if IRQ_OFFLOAD select BARRIER_OPERATIONS_ARCH + select ARCH_HAS_DIRECTED_IPIS help ARM64 (AArch64) architecture @@ -115,6 +117,7 @@ config RISCV select USE_SWITCH_SUPPORTED select USE_SWITCH select SCHED_IPI_SUPPORTED if SMP + select ARCH_HAS_DIRECTED_IPIS select BARRIER_OPERATIONS_BUILTIN imply XIP help @@ -129,6 +132,7 @@ config XTENSA select ARCH_HAS_CODE_DATA_RELOCATION select ARCH_HAS_TIMING_FUNCTIONS select ARCH_MEM_DOMAIN_DATA if USERSPACE + select ARCH_HAS_DIRECTED_IPIS help Xtensa architecture @@ -746,6 +750,13 @@ config ARCH_HAS_RESERVED_PAGE_FRAMES memory mappings. The architecture will need to implement arch_reserved_pages_update(). +config ARCH_HAS_DIRECTED_IPIS + bool + help + This hidden configuration should be selected by the architecture if + it has an implementation for arch_sched_directed_ipi() which allows + for IPIs to be directed to specific CPUs. + config CPU_HAS_DCACHE bool help diff --git a/arch/arc/core/smp.c b/arch/arc/core/smp.c index 9f8ee38a4a1055..aa12623db8014a 100644 --- a/arch/arc/core/smp.c +++ b/arch/arc/core/smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -130,21 +131,27 @@ static void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - uint32_t i; + unsigned int i; + unsigned int num_cpus = arch_num_cpus(); - /* broadcast sched_ipi request to other cores + /* Send sched_ipi request to other cores * if the target is current core, hardware will ignore it */ - unsigned int num_cpus = arch_num_cpus(); for (i = 0U; i < num_cpus; i++) { - z_arc_connect_ici_generate(i); + if ((cpu_bitmap & BIT(i)) != 0) { + z_arc_connect_ici_generate(i); + } } } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + int arch_smp_init(void) { struct arc_connect_bcr bcr; diff --git a/arch/arc/include/kernel_arch_func.h b/arch/arc/include/kernel_arch_func.h index 65a497e02d0786..ca382a274f4b1b 100644 --- a/arch/arc/include/kernel_arch_func.h +++ b/arch/arc/include/kernel_arch_func.h @@ -64,8 +64,6 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1, extern void z_arc_fatal_error(unsigned int reason, const struct arch_esf *esf); -extern void arch_sched_ipi(void); - extern void z_arc_switch(void *switch_to, void **switched_from); static inline void arch_switch(void *switch_to, void **switched_from) diff --git a/arch/arm/core/cortex_a_r/Kconfig b/arch/arm/core/cortex_a_r/Kconfig index 3ec57cc408e1bc..4095a277c61388 100644 --- a/arch/arm/core/cortex_a_r/Kconfig +++ b/arch/arm/core/cortex_a_r/Kconfig @@ -131,6 +131,7 @@ config AARCH32_ARMV8_R bool select ATOMIC_OPERATIONS_BUILTIN select SCHED_IPI_SUPPORTED if SMP + select ARCH_HAS_DIRECTED_IPIS help This option signifies the use of an ARMv8-R AArch32 processor implementation. diff --git a/arch/arm/core/cortex_a_r/smp.c b/arch/arm/core/cortex_a_r/smp.c index 9e06730f91396c..379b7663d016b5 100644 --- a/arch/arm/core/cortex_a_r/smp.c +++ b/arch/arm/core/cortex_a_r/smp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include "boot.h" #include "zephyr/cache.h" #include "zephyr/kernel/thread_stack.h" @@ -210,7 +211,7 @@ void arch_secondary_cpu_init(void) #ifdef CONFIG_SMP -static void broadcast_ipi(unsigned int ipi) +static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap) { uint32_t mpidr = MPIDR_TO_CORE(GET_MPIDR()); @@ -220,6 +221,10 @@ static void broadcast_ipi(unsigned int ipi) unsigned int num_cpus = arch_num_cpus(); for (int i = 0; i < num_cpus; i++) { + if ((cpu_bitmap & BIT(i)) == 0) { + continue; + } + uint32_t target_mpidr = cpu_map[i]; uint8_t aff0; @@ -239,10 +244,14 @@ void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) +{ + send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - broadcast_ipi(SGI_SCHED_IPI); + send_ipi(SGI_SCHED_IPI, cpu_bitmap); } int arch_smp_init(void) diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c index 8777c400766fce..31dfcf337e4212 100644 --- a/arch/arm64/core/smp.c +++ b/arch/arm64/core/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -180,7 +181,7 @@ void arch_secondary_cpu_init(int cpu_num) #ifdef CONFIG_SMP -static void broadcast_ipi(unsigned int ipi) +static void send_ipi(unsigned int ipi, uint32_t cpu_bitmap) { uint64_t mpidr = MPIDR_TO_CORE(GET_MPIDR()); @@ -190,6 +191,10 @@ static void broadcast_ipi(unsigned int ipi) unsigned int num_cpus = arch_num_cpus(); for (int i = 0; i < num_cpus; i++) { + if ((cpu_bitmap & BIT(i)) == 0) { + continue; + } + uint64_t target_mpidr = cpu_map[i]; uint8_t aff0; @@ -209,10 +214,14 @@ void sched_ipi_handler(const void *unused) z_sched_ipi(); } -/* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) +{ + send_ipi(SGI_SCHED_IPI, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { - broadcast_ipi(SGI_SCHED_IPI); + send_ipi(SGI_SCHED_IPI, cpu_bitmap); } #ifdef CONFIG_USERSPACE @@ -232,7 +241,7 @@ void mem_cfg_ipi_handler(const void *unused) void z_arm64_mem_cfg_ipi(void) { - broadcast_ipi(SGI_MMCFG_IPI); + send_ipi(SGI_MMCFG_IPI, IPI_ALL_CPUS_MASK); } #endif diff --git a/arch/riscv/core/smp.c b/arch/riscv/core/smp.c index 68147f8880a653..b5b94aac25cf09 100644 --- a/arch/riscv/core/smp.c +++ b/arch/riscv/core/smp.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -86,14 +87,15 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS]; #define IPI_SCHED 0 #define IPI_FPU_FLUSH 1 -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { unsigned int key = arch_irq_lock(); unsigned int id = _current_cpu->id; unsigned int num_cpus = arch_num_cpus(); for (unsigned int i = 0; i < num_cpus; i++) { - if (i != id && _kernel.cpus[i].arch.online) { + if ((i != id) && _kernel.cpus[i].arch.online && + ((cpu_bitmap & BIT(i)) != 0)) { atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED); MSIP(_kernel.cpus[i].arch.hartid) = 1; } @@ -102,6 +104,11 @@ void arch_sched_ipi(void) arch_irq_unlock(key); } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + #ifdef CONFIG_FPU_SHARING void arch_flush_fpu_ipi(unsigned int cpu) { diff --git a/arch/x86/core/intel64/smp.c b/arch/x86/core/intel64/smp.c index a73ba9c8f38c36..b0232f21984132 100644 --- a/arch/x86/core/intel64/smp.c +++ b/arch/x86/core/intel64/smp.c @@ -34,7 +34,7 @@ int arch_smp_init(void) * it is not clear exactly how/where/why to abstract this, as it * assumes the use of a local APIC (but there's no other mechanism). */ -void arch_sched_ipi(void) +void arch_sched_broadcast_ipi(void) { z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_SCHED_IPI_VECTOR); } diff --git a/doc/kernel/services/smp/smp.rst b/doc/kernel/services/smp/smp.rst index ca1e0149ad55e6..4b178432bd5597 100644 --- a/doc/kernel/services/smp/smp.rst +++ b/doc/kernel/services/smp/smp.rst @@ -180,13 +180,17 @@ handle the newly-runnable load. So where possible, Zephyr SMP architectures should implement an interprocessor interrupt. The current framework is very simple: the -architecture provides a :c:func:`arch_sched_ipi` call, which when invoked -will flag an interrupt on all CPUs (except the current one, though -that is allowed behavior) which will then invoke the :c:func:`z_sched_ipi` -function implemented in the scheduler. The expectation is that these -APIs will evolve over time to encompass more functionality -(e.g. cross-CPU calls), and that the scheduler-specific calls here -will be implemented in terms of a more general framework. +architecture provides at least a :c:func:`arch_sched_broadcast_ipi` call, +which when invoked will flag an interrupt on all CPUs (except the current one, +though that is allowed behavior). If the architecture supports directed IPIs +(see :kconfig:option:`CONFIG_ARCH_HAS_DIRECTED_IPIS`), then the +architecture also provides a :c:func:`arch_sched_directed_ipi` call, which +when invoked will flag an interrupt on the specified CPUs. When an interrupt is +flagged on the CPUs, the :c:func:`z_sched_ipi` function implmented in the +scheduler will get invoked on those CPUs. The expectation is that these +APIs will evolve over time to encompass more functionality (e.g. cross-CPU +calls), and that the scheduler-specific calls here will be implemented in +terms of a more general framework. Note that not all SMP architectures will have a usable IPI mechanism (either missing, or just undocumented/unimplemented). In those cases diff --git a/include/zephyr/arch/arch_interface.h b/include/zephyr/arch/arch_interface.h index 797a60bbaa58c7..d7c33e511ce503 100644 --- a/include/zephyr/arch/arch_interface.h +++ b/include/zephyr/arch/arch_interface.h @@ -494,10 +494,18 @@ static inline uint32_t arch_proc_id(void); /** * Broadcast an interrupt to all CPUs * - * This will invoke z_sched_ipi() on other CPUs in the system. + * This will invoke z_sched_ipi() on all other CPUs in the system. */ -void arch_sched_ipi(void); +void arch_sched_broadcast_ipi(void); +/** + * Direct IPIs to the specified CPUs + * + * This will invoke z_sched_ipi() on the CPUs identified by @a cpu_bitmap. + * + * @param cpu_bitmap A bitmap indicating which CPUs need the IPI + */ +void arch_sched_directed_ipi(uint32_t cpu_bitmap); int arch_smp_init(void); diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h index baa2046f07c8e0..cf7daff9a6cf79 100644 --- a/include/zephyr/kernel_structs.h +++ b/include/zephyr/kernel_structs.h @@ -240,8 +240,8 @@ struct z_kernel { #endif #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) - /* Need to signal an IPI at the next scheduling point */ - bool pending_ipi; + /* Identify CPUs to send IPIs to at the next scheduling point */ + atomic_t pending_ipi; #endif }; diff --git a/kernel/Kconfig.smp b/kernel/Kconfig.smp index 22279270b19f0c..da83d1624e060a 100644 --- a/kernel/Kconfig.smp +++ b/kernel/Kconfig.smp @@ -56,12 +56,11 @@ config MP_MAX_NUM_CPUS config SCHED_IPI_SUPPORTED bool help - True if the architecture supports a call to - arch_sched_ipi() to broadcast an interrupt that will call - z_sched_ipi() on other CPUs in the system. Required for - k_thread_abort() to operate with reasonable latency - (otherwise we might have to wait for the other thread to - take an interrupt, which can be arbitrarily far in the + True if the architecture supports a call to arch_sched_broadcast_ipi() + to broadcast an interrupt that will call z_sched_ipi() on other CPUs + in the system. Required for k_thread_abort() to operate with + reasonable latency (otherwise we might have to wait for the other + thread to take an interrupt, which can be arbitrarily far in the future). config TRACE_SCHED_IPI @@ -73,6 +72,24 @@ config TRACE_SCHED_IPI depends on SCHED_IPI_SUPPORTED depends on MP_MAX_NUM_CPUS>1 +config IPI_OPTIMIZE + bool "Optimize IPI delivery" + default n + depends on SCHED_IPI_SUPPORTED && MP_MAX_NUM_CPUS>1 + help + When selected, the kernel will attempt to determine the minimum + set of CPUs that need an IPI to trigger a reschedule in response to + a thread newly made ready for execution. This increases the + computation required at every scheduler operation by a value that is + O(N) in the number of CPUs, and in exchange reduces the number of + interrupts delivered. Which to choose is going to depend on + application behavior. If the architecture also supports directing + IPIs to specific CPUs then this has the potential to signficantly + reduce the number of IPIs (and consequently ISRs) processed by the + system as the number of CPUs increases. If not, the only benefit + would be to not issue any IPIs if the newly readied thread is of + lower priority than all the threads currently executing on other CPUs. + config KERNEL_COHERENCE bool "Place all shared data into coherent memory" depends on ARCH_HAS_COHERENCE diff --git a/kernel/include/ipi.h b/kernel/include/ipi.h index 77105cac16834e..b353a676d4624d 100644 --- a/kernel/include/ipi.h +++ b/kernel/include/ipi.h @@ -7,13 +7,25 @@ #ifndef ZEPHYR_KERNEL_INCLUDE_IPI_H_ #define ZEPHYR_KERNEL_INCLUDE_IPI_H_ +#include +#include +#include + +#define IPI_ALL_CPUS_MASK ((1 << CONFIG_MP_MAX_NUM_CPUS) - 1) + +#define IPI_CPU_MASK(cpu_id) \ + (IS_ENABLED(CONFIG_IPI_OPTIMIZE) ? BIT(cpu_id) : IPI_ALL_CPUS_MASK) + + /* defined in ipi.c when CONFIG_SMP=y */ #ifdef CONFIG_SMP -void flag_ipi(void); +void flag_ipi(uint32_t ipi_mask); void signal_pending_ipi(void); +atomic_val_t ipi_mask_create(struct k_thread *thread); #else -#define flag_ipi() do { } while (false) +#define flag_ipi(ipi_mask) do { } while (false) #define signal_pending_ipi() do { } while (false) #endif /* CONFIG_SMP */ + #endif /* ZEPHYR_KERNEL_INCLUDE_IPI_H_ */ diff --git a/kernel/ipi.c b/kernel/ipi.c index 99693c0ecbfcfb..ee01c4594251ca 100644 --- a/kernel/ipi.c +++ b/kernel/ipi.c @@ -13,15 +13,58 @@ extern void z_trace_sched_ipi(void); #endif -void flag_ipi(void) +void flag_ipi(uint32_t ipi_mask) { #if defined(CONFIG_SCHED_IPI_SUPPORTED) if (arch_num_cpus() > 1) { - _kernel.pending_ipi = true; + atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask); } #endif /* CONFIG_SCHED_IPI_SUPPORTED */ } +/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */ +atomic_val_t ipi_mask_create(struct k_thread *thread) +{ + if (!IS_ENABLED(CONFIG_IPI_OPTIMIZE)) { + return (CONFIG_MP_MAX_NUM_CPUS > 1) ? IPI_ALL_CPUS_MASK : 0; + } + + uint32_t ipi_mask = 0; + uint32_t num_cpus = (uint32_t)arch_num_cpus(); + uint32_t id = _current_cpu->id; + struct k_thread *cpu_thread; + bool executable_on_cpu = true; + + for (uint32_t i = 0; i < num_cpus; i++) { + if (id == i) { + continue; + } + + /* + * An IPI absolutely does not need to be sent if ... + * 1. the CPU is not active, or + * 2. can not execute on the target CPU + * ... and might not need to be sent if ... + * 3. the target CPU's active thread is not preemptible, or + * 4. the target CPU's active thread has a higher priority + * (Items 3 & 4 may be overridden by a metaIRQ thread) + */ + +#if defined(CONFIG_SCHED_CPU_MASK) + executable_on_cpu = ((thread->base.cpu_mask & BIT(i)) != 0); +#endif + + cpu_thread = _kernel.cpus[i].current; + if ((cpu_thread != NULL) && + (((z_sched_prio_cmp(cpu_thread, thread) < 0) && + (thread_is_preemptible(cpu_thread))) || + thread_is_metairq(thread)) && executable_on_cpu) { + ipi_mask |= BIT(i); + } + } + + return (atomic_val_t)ipi_mask; +} void signal_pending_ipi(void) { @@ -34,9 +77,15 @@ void signal_pending_ipi(void) */ #if defined(CONFIG_SCHED_IPI_SUPPORTED) if (arch_num_cpus() > 1) { - if (_kernel.pending_ipi) { - _kernel.pending_ipi = false; - arch_sched_ipi(); + uint32_t cpu_bitmap; + + cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi); + if (cpu_bitmap != 0) { +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS + arch_sched_directed_ipi(cpu_bitmap); +#else + arch_sched_broadcast_ipi(); +#endif } } #endif /* CONFIG_SCHED_IPI_SUPPORTED */ diff --git a/kernel/sched.c b/kernel/sched.c index 506ad57a141b21..67e5645bc6f245 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -348,11 +348,11 @@ static void update_cache(int preempt_ok) #endif /* CONFIG_SMP */ } -static bool thread_active_elsewhere(struct k_thread *thread) +static struct _cpu *thread_active_elsewhere(struct k_thread *thread) { - /* True if the thread is currently running on another CPU. - * There are more scalable designs to answer this question in - * constant time, but this is fine for now. + /* Returns pointer to _cpu if the thread is currently running on + * another CPU. There are more scalable designs to answer this + * question in constant time, but this is fine for now. */ #ifdef CONFIG_SMP int currcpu = _current_cpu->id; @@ -362,12 +362,12 @@ static bool thread_active_elsewhere(struct k_thread *thread) for (int i = 0; i < num_cpus; i++) { if ((i != currcpu) && (_kernel.cpus[i].current == thread)) { - return true; + return &_kernel.cpus[i]; } } #endif /* CONFIG_SMP */ ARG_UNUSED(thread); - return false; + return NULL; } static void ready_thread(struct k_thread *thread) @@ -384,13 +384,14 @@ static void ready_thread(struct k_thread *thread) queue_thread(thread); update_cache(0); - flag_ipi(); + + flag_ipi(ipi_mask_create(thread)); } } void z_ready_thread_locked(struct k_thread *thread) { - if (!thread_active_elsewhere(thread)) { + if (thread_active_elsewhere(thread) == NULL) { ready_thread(thread); } } @@ -398,7 +399,7 @@ void z_ready_thread_locked(struct k_thread *thread) void z_ready_thread(struct k_thread *thread) { K_SPINLOCK(&_sched_spinlock) { - if (!thread_active_elsewhere(thread)) { + if (thread_active_elsewhere(thread) == NULL) { ready_thread(thread); } } @@ -466,11 +467,18 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key, * halt itself in the IPI. Otherwise it's unscheduled, so we * can clean it up directly. */ - if (thread_active_elsewhere(thread)) { + + struct _cpu *cpu = thread_active_elsewhere(thread); + + if (cpu != NULL) { thread->base.thread_state |= (terminate ? _THREAD_ABORTING : _THREAD_SUSPENDING); #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED) - arch_sched_ipi(); +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS + arch_sched_directed_ipi(IPI_CPU_MASK(cpu->id)); +#else + arch_sched_broadcast_ipi(); +#endif #endif if (arch_is_in_isr()) { thread_halt_spin(thread, key); @@ -731,19 +739,38 @@ void z_unpend_thread(struct k_thread *thread) bool z_thread_prio_set(struct k_thread *thread, int prio) { bool need_sched = 0; + int old_prio = thread->base.prio; K_SPINLOCK(&_sched_spinlock) { need_sched = z_is_thread_ready(thread); if (need_sched) { - /* Don't requeue on SMP if it's the running thread */ if (!IS_ENABLED(CONFIG_SMP) || z_is_thread_queued(thread)) { dequeue_thread(thread); thread->base.prio = prio; queue_thread(thread); + + if (old_prio > prio) { + flag_ipi(ipi_mask_create(thread)); + } } else { + /* + * This is a running thread on SMP. Update its + * priority, but do not requeue it. An IPI is + * needed if the priority is both being lowered + * and it is running on another CPU. + */ + thread->base.prio = prio; + + struct _cpu *cpu; + + cpu = thread_active_elsewhere(thread); + if ((cpu != NULL) && (old_prio < prio)) { + flag_ipi(IPI_CPU_MASK(cpu->id)); + } } + update_cache(1); } else { thread->base.prio = prio; @@ -1006,8 +1033,8 @@ void z_impl_k_thread_priority_set(k_tid_t thread, int prio) bool need_sched = z_thread_prio_set((struct k_thread *)thread, prio); - flag_ipi(); - if (need_sched && (_current->base.sched_locked == 0U)) { + if ((need_sched) && (IS_ENABLED(CONFIG_SMP) || + (_current->base.sched_locked == 0U))) { z_reschedule_unlocked(); } } @@ -1219,7 +1246,7 @@ void z_impl_k_wakeup(k_tid_t thread) z_mark_thread_as_not_suspended(thread); - if (!thread_active_elsewhere(thread)) { + if (thread_active_elsewhere(thread) == NULL) { ready_thread(thread); } diff --git a/kernel/timeslicing.c b/kernel/timeslicing.c index 07ae497c7f91e6..be91d9606f51e2 100644 --- a/kernel/timeslicing.c +++ b/kernel/timeslicing.c @@ -58,11 +58,10 @@ static void slice_timeout(struct _timeout *timeout) slice_expired[cpu] = true; /* We need an IPI if we just handled a timeslice expiration - * for a different CPU. Ideally this would be able to target - * the specific core, but that's not part of the API yet. + * for a different CPU. */ - if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) { - flag_ipi(); + if (cpu != _current_cpu->id) { + flag_ipi(IPI_CPU_MASK(cpu)); } } diff --git a/soc/espressif/esp32/esp32-mp.c b/soc/espressif/esp32/esp32-mp.c index c380df6c8b77a6..ca2de23e1e8b86 100644 --- a/soc/espressif/esp32/esp32-mp.c +++ b/soc/espressif/esp32/esp32-mp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -290,10 +291,12 @@ void arch_cpu_start(int cpu_num, k_thread_stack_t *stack, int sz, smp_log("ESP32: APPCPU initialized"); } -void arch_sched_ipi(void) +void arch_sched_directed_ipi(uint32_t cpu_bitmap) { const int core_id = esp_core_id(); + ARG_UNUSED(cpu_bitmap); + if (core_id == 0) { DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0); } else { @@ -301,6 +304,11 @@ void arch_sched_ipi(void) } } +void arch_sched_broadcast_ipi(void) +{ + arch_sched_directed_ipi(IPI_ALL_CPUS_MASK); +} + IRAM_ATTR bool arch_cpu_active(int cpu_num) { return cpus_active[cpu_num]; diff --git a/soc/intel/intel_adsp/ace/multiprocessing.c b/soc/intel/intel_adsp/ace/multiprocessing.c index 68b8693a520d4b..1c9b3fa3d32256 100644 --- a/soc/intel/intel_adsp/ace/multiprocessing.c +++ b/soc/intel/intel_adsp/ace/multiprocessing.c @@ -21,6 +21,7 @@ #include #include #include +#include #define CORE_POWER_CHECK_NUM 128 @@ -209,7 +210,7 @@ void soc_mp_startup(uint32_t cpu) #ifndef CONFIG_XTENSA_MMU ALWAYS_INLINE #endif -static void send_ipi(uint32_t msg) +static void send_ipi(uint32_t msg, uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); @@ -217,24 +218,30 @@ static void send_ipi(uint32_t msg) unsigned int num_cpus = arch_num_cpus(); for (int core = 0; core < num_cpus; core++) { - if (core != curr && soc_cpus_active[core]) { + if ((core != curr) && soc_cpus_active[core] && + ((cpu_bitmap & BIT(core)) != 0)) { IDC[core].agents[1].ipc.idr = msg | INTEL_ADSP_IPC_BUSY; } } } -void arch_sched_ipi(void) -{ - send_ipi(0); -} - #if defined(CONFIG_XTENSA_MMU) && (CONFIG_MP_MAX_NUM_CPUS > 1) void xtensa_mmu_tlb_ipi(void) { - send_ipi(IPI_TLB_FLUSH); + send_ipi(IPI_TLB_FLUSH, IPI_ALL_CPUS_MASK); } #endif +void arch_sched_broadcast_ipi(void) +{ + send_ipi(0, IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) +{ + send_ipi(0, cpu_bitmap); +} + #if CONFIG_MP_MAX_NUM_CPUS > 1 int soc_adsp_halt_cpu(int id) { diff --git a/soc/intel/intel_adsp/cavs/multiprocessing.c b/soc/intel/intel_adsp/cavs/multiprocessing.c index 2a38f20355da01..d87cd435e57598 100644 --- a/soc/intel/intel_adsp/cavs/multiprocessing.c +++ b/soc/intel/intel_adsp/cavs/multiprocessing.c @@ -8,6 +8,7 @@ #include #include #include +#include /* IDC power up message to the ROM firmware. This isn't documented * anywhere, it's basically just a magic number (except the high bit, @@ -121,18 +122,29 @@ void soc_start_core(int cpu_num) IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP; } -void arch_sched_ipi(void) +static void send_ipi(uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); unsigned int num_cpus = arch_num_cpus(); for (int c = 0; c < num_cpus; c++) { - if (c != curr && soc_cpus_active[c]) { + if ((c != curr) && soc_cpus_active[c] && + ((cpu_bitmap & BIT(c)) != 0)) { IDC[curr].core[c].itc = BIT(31); } } } +void arch_sched_broadcast_ipi(void) +{ + send_ipi(IPI_ALL_CPUS_MASK); +} + +void arch_sched_directed_ipi(uint32_t cpu_bitmap) +{ + send_ipi(cpu_bitmap); +} + void idc_isr(const void *param) { ARG_UNUSED(param); diff --git a/submanifests/optional.yaml b/submanifests/optional.yaml index 35f5d931167694..a6a9048d17f4e5 100644 --- a/submanifests/optional.yaml +++ b/submanifests/optional.yaml @@ -34,7 +34,7 @@ manifest: groups: - optional - name: sof - revision: a44758883f3f6cfb6c67b19bc76fcb01f77ca50b + revision: 3f1716b0da7a48358bc265586b90b2252745c14c path: modules/audio/sof remote: upstream groups: diff --git a/tests/kernel/ipi_optimize/CMakeLists.txt b/tests/kernel/ipi_optimize/CMakeLists.txt new file mode 100644 index 00000000000000..f32de519289c3b --- /dev/null +++ b/tests/kernel/ipi_optimize/CMakeLists.txt @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 + +cmake_minimum_required(VERSION 3.20.0) +find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE}) +project(smp) + +target_sources(app PRIVATE src/main.c) + +target_include_directories(app PRIVATE + ${ZEPHYR_BASE}/kernel/include + ${ZEPHYR_BASE}/arch/${ARCH}/include + ) diff --git a/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf b/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf new file mode 100644 index 00000000000000..f0ee34b467edd4 --- /dev/null +++ b/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.conf @@ -0,0 +1,4 @@ +# Copyright (c) 2022 Carlo Caione +# SPDX-License-Identifier: Apache-2.0 + +CONFIG_MP_MAX_NUM_CPUS=4 diff --git a/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.overlay b/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.overlay new file mode 100644 index 00000000000000..5bb497069dd8fd --- /dev/null +++ b/tests/kernel/ipi_optimize/boards/qemu_cortex_a53_qemu_cortex_a53_smp.overlay @@ -0,0 +1,19 @@ +/* Copyright 2022 Carlo Caione + * SPDX-License-Identifier: Apache-2.0 + */ + +/ { + cpus { + cpu@2 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <2>; + }; + + cpu@3 { + device_type = "cpu"; + compatible = "arm,cortex-a53"; + reg = <3>; + }; + }; +}; diff --git a/tests/kernel/ipi_optimize/prj.conf b/tests/kernel/ipi_optimize/prj.conf new file mode 100644 index 00000000000000..f337c89ff5bb41 --- /dev/null +++ b/tests/kernel/ipi_optimize/prj.conf @@ -0,0 +1,5 @@ +CONFIG_ZTEST=y +CONFIG_SMP=y +CONFIG_TRACE_SCHED_IPI=y +CONFIG_IPI_OPTIMIZE=y +CONFIG_SYS_CLOCK_TICKS_PER_SEC=50 diff --git a/tests/kernel/ipi_optimize/src/main.c b/tests/kernel/ipi_optimize/src/main.c new file mode 100644 index 00000000000000..029b79b6d3a85a --- /dev/null +++ b/tests/kernel/ipi_optimize/src/main.c @@ -0,0 +1,475 @@ +/* + * Copyright (c) 2024 Intel Corporation. + * + * SPDX-License-Identifier: Apache-2.0 + */ + +#include +#include +#include +#include +#include +#include + +#define STACK_SIZE (1024 + CONFIG_TEST_EXTRA_STACK_SIZE) + +#define NUM_THREADS (CONFIG_MP_MAX_NUM_CPUS - 1) + +#define DELAY_FOR_IPIS 200 + +static struct k_thread thread[NUM_THREADS]; +static struct k_thread alt_thread; + +static bool alt_thread_created; + +static K_THREAD_STACK_ARRAY_DEFINE(stack, NUM_THREADS, STACK_SIZE); +static K_THREAD_STACK_DEFINE(alt_stack, STACK_SIZE); + +static uint32_t ipi_count[CONFIG_MP_MAX_NUM_CPUS]; +static struct k_spinlock ipilock; +static atomic_t busy_started; +static volatile bool alt_thread_done; + +static K_SEM_DEFINE(sem, 0, 1); + +void z_trace_sched_ipi(void) +{ + k_spinlock_key_t key; + + key = k_spin_lock(&ipilock); + ipi_count[_current_cpu->id]++; + k_spin_unlock(&ipilock, key); +} + +static void clear_ipi_counts(void) +{ + k_spinlock_key_t key; + + key = k_spin_lock(&ipilock); + memset(ipi_count, 0, sizeof(ipi_count)); + k_spin_unlock(&ipilock, key); +} + +static void get_ipi_counts(uint32_t *set, size_t n_elem) +{ + k_spinlock_key_t key; + + key = k_spin_lock(&ipilock); + memcpy(set, ipi_count, n_elem * sizeof(*set)); + k_spin_unlock(&ipilock, key); +} + +static void busy_thread_entry(void *p1, void *p2, void *p3) +{ + int key; + uint32_t id; + + key = arch_irq_lock(); + id = _current_cpu->id; + arch_irq_unlock(key); + + atomic_or(&busy_started, BIT(id)); + + while (1) { + } +} + +static bool wait_until_busy_threads_ready(uint32_t id) +{ + uint32_t all; + uint32_t value; + unsigned int i; + + all = IPI_ALL_CPUS_MASK ^ BIT(id); + for (i = 0; i < 10; i++) { + k_busy_wait(1000); + + value = (uint32_t)atomic_get(&busy_started); + if (value == all) { + break; + } + } + + return (i < 10); +} + +static void pending_thread_entry(void *p1, void *p2, void *p3) +{ + int key; + + k_sem_take(&sem, K_FOREVER); + + while (!alt_thread_done) { + key = arch_irq_lock(); + arch_spin_relax(); + arch_irq_unlock(key); + } +} + +static void alt_thread_create(int priority, const char *desc) +{ + k_thread_create(&alt_thread, alt_stack, STACK_SIZE, + pending_thread_entry, NULL, NULL, NULL, + priority, 0, K_NO_WAIT); + alt_thread_created = true; + + /* Verify alt_thread is pending */ + + k_busy_wait(10000); + zassert_true(z_is_thread_pending(&alt_thread), + "%s priority thread has not pended.\n", desc); +} + +uint32_t busy_threads_create(int priority) +{ + unsigned int i; + uint32_t id; + int key; + + atomic_clear(&busy_started); + + for (i = 0; i < NUM_THREADS; i++) { + k_thread_create(&thread[i], stack[i], STACK_SIZE, + busy_thread_entry, NULL, NULL, NULL, + priority, 0, K_NO_WAIT); + } + + /* Align to tick boundary to minimize probability of timer ISRs */ + + k_sleep(K_TICKS(1)); + key = arch_irq_lock(); + id = _current_cpu->id; + arch_irq_unlock(key); + + /* + * Spin until all busy threads are ready. It is assumed that as this + * thread and the busy threads are cooperative that they will not be + * rescheduled to execute on a different CPU. + */ + + zassert_true(wait_until_busy_threads_ready(id), + "1 or more 'busy threads' not ready.\n"); + + return id; +} + +void busy_threads_priority_set(int priority, int delta) +{ + unsigned int i; + + for (i = 0; i < NUM_THREADS; i++) { + k_thread_priority_set(&thread[i], priority); + priority += delta; + } +} + +/** + * Verify that arch_sched_broadcast_ipi() broadcasts IPIs as expected. + */ +ZTEST(ipi, test_arch_sched_broadcast_ipi) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int j; + + priority = k_thread_priority_get(k_current_get()); + + id = busy_threads_create(priority - 1); + + /* Broadcast the IPI. All other CPUs ought to receive and process it */ + + clear_ipi_counts(); + arch_sched_broadcast_ipi(); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + for (j = 0; j < CONFIG_MP_MAX_NUM_CPUS; j++) { + if (id == j) { + zassert_true(set[j] == 0, + "Broadcast-Expected 0, got %u\n", + set[j]); + } else { + zassert_true(set[j] == 1, + "Broadcast-Expected 1, got %u\n", + set[j]); + } + } +} + +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS +/** + * Verify that arch_sched_directed_ipi() directs IPIs as expected. + */ +ZTEST(ipi, test_arch_sched_directed_ipi) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int j; + + priority = k_thread_priority_get(k_current_get()); + + id = busy_threads_create(priority - 1); + + /* + * Send an IPI to each CPU, one at a time. Verify that only the + * targeted CPU received the IPI. + */ + for (unsigned int i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) { + if (i == id) { + continue; + } + + clear_ipi_counts(); + arch_sched_directed_ipi(BIT(i)); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + for (j = 0; j < CONFIG_MP_MAX_NUM_CPUS; j++) { + if (i == j) { + zassert_true(set[j] == 1, + "Direct-Expected 1, got %u\n", + set[j]); + } else { + zassert_true(set[j] == 0, + "Direct-Expected 0, got %u\n", + set[j]); + } + } + } +} +#endif + +/** + * Verify that waking a thread whose priority is lower than any other + * currently executing thread does not result in any IPIs being sent. + */ +ZTEST(ipi, test_low_thread_wakes_no_ipis) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int i; + + priority = k_thread_priority_get(k_current_get()); + atomic_clear(&busy_started); + + alt_thread_create(5, "Low"); + + id = busy_threads_create(priority - 1); + + /* + * Lower the priority of the busy threads now that we know that they + * have started. As this is expected to generate IPIs, busy wait for + * some small amount of time to give them time to be processed. + */ + + busy_threads_priority_set(0, 0); + k_busy_wait(DELAY_FOR_IPIS); + + /* + * Low priority thread is pended. Current thread is cooperative. + * Other CPUs are executing preemptible threads @ priority 0. + */ + + clear_ipi_counts(); + k_sem_give(&sem); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + zassert_true(z_is_thread_ready(&alt_thread), + "Low priority thread is not ready.\n"); + + alt_thread_done = true; + + for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) { + zassert_true(set[i] == 0, + "CPU %u unexpectedly received IPI.\n", i); + } +} + +/** + * Verify that waking a thread whose priority is higher than all currently + * executing threads results in the proper IPIs being sent and processed. + */ +ZTEST(ipi, test_high_thread_wakes_some_ipis) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int i; + + priority = k_thread_priority_get(k_current_get()); + atomic_clear(&busy_started); + + alt_thread_create(priority - 1 - NUM_THREADS, "High"); + + id = busy_threads_create(priority - 1); + + /* + * Lower the priority of the busy threads now that we know that they + * have started and are busy waiting. As this is expected to generate + * IPIs, busy wait for some small amount of time to give them time to + * be processed. + */ + + busy_threads_priority_set(0, 1); + k_busy_wait(DELAY_FOR_IPIS); + + /* + * High priority thread is pended. Current thread is cooperative. + * Other CPUs are executing preemptible threads. + */ + + clear_ipi_counts(); + k_sem_give(&sem); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + zassert_true(z_is_thread_ready(&alt_thread), + "High priority thread is not ready.\n"); + + alt_thread_done = true; + + for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) { + if (i == id) { + continue; + } + + zassert_true(set[i] == 1, "CPU%u got %u IPIs", i, set[i]); + } + + zassert_true(set[id] == 0, "Current CPU got %u IPI(s).\n", set[id]); +} + +/** + * Verify that lowering the priority of an active thread results in an IPI. + * If directed IPIs are enabled, then only the CPU executing that active + * thread ought to receive the IPI. Otherwise if IPIs are broadcast, then all + * other CPUs save the current CPU ought to receive IPIs. + */ +ZTEST(ipi, test_thread_priority_set_lower) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int i; + + priority = k_thread_priority_get(k_current_get()); + + id = busy_threads_create(priority - 1); + + clear_ipi_counts(); + k_thread_priority_set(&thread[0], priority); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) { + if (i == id) { + continue; + } + +#ifdef CONFIG_ARCH_HAS_DIRECTED_IPIS + unsigned int j; + + for (j = 0; j < NUM_THREADS; j++) { + if (_kernel.cpus[i].current == &thread[j]) { + break; + } + } + + zassert_true(j < NUM_THREADS, + "CPU%u not executing expected thread\n", i); + + if (j == 0) { + zassert_true(set[i] == 1, "CPU%u got %u IPIs.\n", + i, set[i]); + } else { + zassert_true(set[i] == 0, "CPU%u got %u IPI(s).\n", + i, set[i]); + } +#else + zassert_true(set[i] == 1, "CPU%u got %u IPIs", i, set[i]); +#endif + } + + zassert_true(set[id] == 0, "Current CPU got %u IPI(s).\n", set[id]); +} + +/* + * Verify that IPIs are not sent to CPUs that are executing cooperative + * threads. + */ +ZTEST(ipi, test_thread_coop_no_ipis) +{ + uint32_t set[CONFIG_MP_MAX_NUM_CPUS]; + uint32_t id; + int priority; + unsigned int i; + + priority = k_thread_priority_get(k_current_get()); + atomic_clear(&busy_started); + + alt_thread_create(priority - 1 - NUM_THREADS, "High"); + + id = busy_threads_create(priority - 1); + + /* + * High priority thread is pended. Current thread is cooperative. + * Other CPUs are executing lower priority cooperative threads. + */ + + clear_ipi_counts(); + k_sem_give(&sem); + k_busy_wait(DELAY_FOR_IPIS); + get_ipi_counts(set, CONFIG_MP_MAX_NUM_CPUS); + + zassert_true(z_is_thread_ready(&alt_thread), + "High priority thread is not ready.\n"); + + alt_thread_done = true; + + for (i = 0; i < CONFIG_MP_MAX_NUM_CPUS; i++) { + zassert_true(set[i] == 0, "CPU%u got %u IPIs", i, set[i]); + } +} + +static void *ipi_tests_setup(void) +{ + /* + * Sleep a bit to guarantee that all CPUs enter an idle thread + * from which they can exit correctly to run the test. + */ + + k_sleep(K_MSEC(20)); + + return NULL; +} + +static void cleanup_threads(void *fixture) +{ + unsigned int i; + + ARG_UNUSED(fixture); + + /* + * Ensure that spawned busy threads are aborted before + * proceeding to the next test. + */ + + for (i = 0; i < NUM_THREADS; i++) { + k_thread_abort(&thread[i]); + } + + /* Ensure alt_thread ,if it was created, also gets aborted */ + + if (alt_thread_created) { + k_thread_abort(&alt_thread); + } + alt_thread_created = false; + + alt_thread_done = false; +} + +ZTEST_SUITE(ipi, NULL, ipi_tests_setup, NULL, cleanup_threads, NULL); diff --git a/tests/kernel/ipi_optimize/testcase.yaml b/tests/kernel/ipi_optimize/testcase.yaml new file mode 100644 index 00000000000000..49227a720cbac8 --- /dev/null +++ b/tests/kernel/ipi_optimize/testcase.yaml @@ -0,0 +1,6 @@ +tests: + kernel.ipi_optimize.smp: + tags: + - kernel + - smp + filter: (CONFIG_MP_MAX_NUM_CPUS > 1) diff --git a/tests/kernel/smp/src/main.c b/tests/kernel/smp/src/main.c index f73a1dfdbbb6ea..7f556793e670c2 100644 --- a/tests/kernel/smp/src/main.c +++ b/tests/kernel/smp/src/main.c @@ -695,8 +695,8 @@ void z_trace_sched_ipi(void) * - To verify architecture layer provides a mechanism to issue an interprocessor * interrupt to all other CPUs in the system that calls the scheduler IPI. * We simply add a hook in z_sched_ipi(), in order to check if it has been - * called once in another CPU except the caller, when arch_sched_ipi() is - * called. + * called once in another CPU except the caller, when arch_sched_broadcast_ipi() + * is called. * * Testing techniques: * - Interface testing, function and block box testing, @@ -711,7 +711,7 @@ void z_trace_sched_ipi(void) * * Test Procedure: * -# In main thread, given a global variable sched_ipi_has_called equaled zero. - * -# Call arch_sched_ipi() then sleep for 100ms. + * -# Call arch_sched_broadcast_ipi() then sleep for 100ms. * -# In z_sched_ipi() handler, increment the sched_ipi_has_called. * -# In main thread, check the sched_ipi_has_called is not equaled to zero. * -# Repeat step 1 to 4 for 3 times. @@ -727,7 +727,7 @@ void z_trace_sched_ipi(void) * - This test using for the platform that support SMP, in our current scenario * , only x86_64 and arc supported. * - * @see arch_sched_ipi() + * @see arch_sched_broadcast_ipi() */ #ifdef CONFIG_SCHED_IPI_SUPPORTED ZTEST(smp, test_smp_ipi) @@ -741,7 +741,7 @@ ZTEST(smp, test_smp_ipi) for (int i = 0; i < 3 ; i++) { /* issue a sched ipi to tell other CPU to run thread */ sched_ipi_has_called = 0; - arch_sched_ipi(); + arch_sched_broadcast_ipi(); /* Need to wait longer than we think, loaded CI * systems need to wait for host scheduling to run the