From 3d20b9de7e339065c3fb956421facea68083cae9 Mon Sep 17 00:00:00 2001 From: Peter Mitsis Date: Mon, 4 Mar 2024 10:52:24 -0500 Subject: [PATCH] kernel: Add cpu_bitmap parameter to arch_sched_ipi() Update the arch_sched_ipi() API so that implementations may choose to either target IPIs to specific CPUs or broadcast them to all other CPUs. Signed-off-by: Peter Mitsis --- arch/arc/core/smp.c | 7 ++++--- arch/arc/include/kernel_arch_func.h | 2 +- arch/arm/core/cortex_a_r/smp.c | 4 +++- arch/arm64/core/smp.c | 4 +++- arch/riscv/core/smp.c | 8 +++++--- arch/x86/core/intel64/smp.c | 4 +++- doc/kernel/services/smp/smp.rst | 11 +++++------ include/zephyr/sys/arch_interface.h | 10 +++++++--- kernel/Kconfig | 2 +- kernel/sched.c | 4 ++-- soc/espressif/esp32/esp32-mp.c | 4 +++- soc/intel/intel_adsp/ace/multiprocessing.c | 8 +++++--- soc/intel/intel_adsp/cavs/multiprocessing.c | 8 +++++--- tests/kernel/smp/src/main.c | 7 +++++-- 14 files changed, 52 insertions(+), 31 deletions(-) diff --git a/arch/arc/core/smp.c b/arch/arc/core/smp.c index 6bc89883fad999d..1c7c6cf3681725b 100644 --- a/arch/arc/core/smp.c +++ b/arch/arc/core/smp.c @@ -131,16 +131,17 @@ static void sched_ipi_handler(const void *unused) } /* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { uint32_t i; + uint32_t bit = 1; - /* broadcast sched_ipi request to other cores + /* Send sched_ipi request to other cores * if the target is current core, hardware will ignore it */ unsigned int num_cpus = arch_num_cpus(); - for (i = 0U; i < num_cpus; i++) { + for (i = 0U; i < num_cpus; i++, bit <<= 1) { z_arc_connect_ici_generate(i); } } diff --git a/arch/arc/include/kernel_arch_func.h b/arch/arc/include/kernel_arch_func.h index 1c46423cb4f03f4..a7326e6db282742 100644 --- a/arch/arc/include/kernel_arch_func.h +++ b/arch/arc/include/kernel_arch_func.h @@ -64,7 +64,7 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1, extern void z_arc_fatal_error(unsigned int reason, const z_arch_esf_t *esf); -extern void arch_sched_ipi(void); +extern void arch_sched_ipi(uint32_t cpu_bitmap); extern void z_arc_switch(void *switch_to, void **switched_from); diff --git a/arch/arm/core/cortex_a_r/smp.c b/arch/arm/core/cortex_a_r/smp.c index f581c7703104060..0c01afce07b2138 100644 --- a/arch/arm/core/cortex_a_r/smp.c +++ b/arch/arm/core/cortex_a_r/smp.c @@ -240,8 +240,10 @@ void sched_ipi_handler(const void *unused) } /* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { + ARG_UNUSED(cpu_bitmap); + broadcast_ipi(SGI_SCHED_IPI); } diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c index 97fd60b42363396..e6b7a1ce3ca5503 100644 --- a/arch/arm64/core/smp.c +++ b/arch/arm64/core/smp.c @@ -210,8 +210,10 @@ void sched_ipi_handler(const void *unused) } /* arch implementation of sched_ipi */ -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { + ARG_UNUSED(cpu_bitmap); + broadcast_ipi(SGI_SCHED_IPI); } diff --git a/arch/riscv/core/smp.c b/arch/riscv/core/smp.c index 54de29c05515f62..71c133b673e0247 100644 --- a/arch/riscv/core/smp.c +++ b/arch/riscv/core/smp.c @@ -86,14 +86,16 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS]; #define IPI_SCHED 0 #define IPI_FPU_FLUSH 1 -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { unsigned int key = arch_irq_lock(); unsigned int id = _current_cpu->id; unsigned int num_cpus = arch_num_cpus(); + uint32_t bit = 1; - for (unsigned int i = 0; i < num_cpus; i++) { - if (i != id && _kernel.cpus[i].arch.online) { + for (unsigned int i = 0; i < num_cpus; i++, bit <<= 1) { + if ((i != id) && _kernel.cpus[i].arch.online && + ((cpu_bitmap & bit) != 0)) { atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED); MSIP(_kernel.cpus[i].arch.hartid) = 1; } diff --git a/arch/x86/core/intel64/smp.c b/arch/x86/core/intel64/smp.c index a73ba9c8f38c363..32b8285a8aac03f 100644 --- a/arch/x86/core/intel64/smp.c +++ b/arch/x86/core/intel64/smp.c @@ -34,8 +34,10 @@ int arch_smp_init(void) * it is not clear exactly how/where/why to abstract this, as it * assumes the use of a local APIC (but there's no other mechanism). */ -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { + ARG_UNUSED(cpu_bitmap); + z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_SCHED_IPI_VECTOR); } diff --git a/doc/kernel/services/smp/smp.rst b/doc/kernel/services/smp/smp.rst index 0a94ed022b0dabe..269d401c968a166 100644 --- a/doc/kernel/services/smp/smp.rst +++ b/doc/kernel/services/smp/smp.rst @@ -181,12 +181,11 @@ handle the newly-runnable load. So where possible, Zephyr SMP architectures should implement an interprocessor interrupt. The current framework is very simple: the architecture provides a :c:func:`arch_sched_ipi` call, which when invoked -will flag an interrupt on all CPUs (except the current one, though -that is allowed behavior) which will then invoke the :c:func:`z_sched_ipi` -function implemented in the scheduler. The expectation is that these -APIs will evolve over time to encompass more functionality -(e.g. cross-CPU calls), and that the scheduler-specific calls here -will be implemented in terms of a more general framework. +will flag an interrupt on at least the specified CPUs which will then invoke +the :c:func:`z_sched_ipi` function implemented in the scheduler. The +expectation is that these APIs will evolve over time to encompass more +functionality (e.g. cross-CPU calls), and that the scheduler-specific calls +here will be implemented in terms of a more general framework. Note that not all SMP architectures will have a usable IPI mechanism (either missing, or just undocumented/unimplemented). In those cases diff --git a/include/zephyr/sys/arch_interface.h b/include/zephyr/sys/arch_interface.h index 0ffc95c663bc455..e8f1d5dbd099d8e 100644 --- a/include/zephyr/sys/arch_interface.h +++ b/include/zephyr/sys/arch_interface.h @@ -489,11 +489,15 @@ static inline struct _cpu *arch_curr_cpu(void); static inline uint32_t arch_proc_id(void); /** - * Broadcast an interrupt to all CPUs + * Send an interrupt to specified CPUs * - * This will invoke z_sched_ipi() on other CPUs in the system. + * This will invoke z_sched_ipi() on other CPUs in the system. Whether the IPIs + * are targeted to specific CPUs or broadcast to all other cores is up to the + * specific implementation. + * + * @param cpu_bitmap A hint indicating which CPUs need the IPI */ -void arch_sched_ipi(void); +void arch_sched_ipi(uint32_t cpu_bitmap); int arch_smp_init(void); diff --git a/kernel/Kconfig b/kernel/Kconfig index 1620a3c9aa4c696..73b762c094c7ab5 100644 --- a/kernel/Kconfig +++ b/kernel/Kconfig @@ -1200,7 +1200,7 @@ config SCHED_IPI_SUPPORTED bool help True if the architecture supports a call to - arch_sched_ipi() to broadcast an interrupt that will call + arch_sched_ipi() to send an interrupt that will call z_sched_ipi() on other CPUs in the system. Required for k_thread_abort() to operate with reasonable latency (otherwise we might have to wait for the other thread to diff --git a/kernel/sched.c b/kernel/sched.c index 1cace822ac41715..fd7c5509cb7879b 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -264,7 +264,7 @@ static void signal_pending_ipi(void) cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi); if (cpu_bitmap != 0) { - arch_sched_ipi(); + arch_sched_ipi(cpu_bitmap); } } #endif @@ -721,7 +721,7 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key, * here, not deferred! */ #ifdef CONFIG_SCHED_IPI_SUPPORTED - arch_sched_ipi(); + arch_sched_ipi(1 << thread->base.cpu); #endif } diff --git a/soc/espressif/esp32/esp32-mp.c b/soc/espressif/esp32/esp32-mp.c index 7922e6f18cdcef3..792e0c841d2e3c1 100644 --- a/soc/espressif/esp32/esp32-mp.c +++ b/soc/espressif/esp32/esp32-mp.c @@ -290,10 +290,12 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz, smp_log("ESP32: APPCPU initialized"); } -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { const int core_id = esp_core_id(); + ARG_UNUSED(cpu_bitmap); + if (core_id == 0) { DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0); } else { diff --git a/soc/intel/intel_adsp/ace/multiprocessing.c b/soc/intel/intel_adsp/ace/multiprocessing.c index 3170a8f1090f650..f8bbe9ccc592b77 100644 --- a/soc/intel/intel_adsp/ace/multiprocessing.c +++ b/soc/intel/intel_adsp/ace/multiprocessing.c @@ -194,15 +194,17 @@ void soc_mp_startup(uint32_t cpu) #endif /* CONFIG_ADSP_IDLE_CLOCK_GATING */ } -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); + uint32_t bit = 1; /* Signal agent B[n] to cause an interrupt from agent A[n] */ unsigned int num_cpus = arch_num_cpus(); - for (int core = 0; core < num_cpus; core++) { - if (core != curr && soc_cpus_active[core]) { + for (int core = 0; core < num_cpus; core++, bit <<= 1) { + if ((core != curr) && soc_cpus_active[core] && + ((cpu_bitmap & bit) != 0)) { IDC[core].agents[1].ipc.idr = INTEL_ADSP_IPC_BUSY; } } diff --git a/soc/intel/intel_adsp/cavs/multiprocessing.c b/soc/intel/intel_adsp/cavs/multiprocessing.c index 2a38f20355da013..756777ab8f49b13 100644 --- a/soc/intel/intel_adsp/cavs/multiprocessing.c +++ b/soc/intel/intel_adsp/cavs/multiprocessing.c @@ -121,13 +121,15 @@ void soc_start_core(int cpu_num) IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP; } -void arch_sched_ipi(void) +void arch_sched_ipi(uint32_t cpu_bitmap) { uint32_t curr = arch_proc_id(); unsigned int num_cpus = arch_num_cpus(); + uint32_t bit = 1; - for (int c = 0; c < num_cpus; c++) { - if (c != curr && soc_cpus_active[c]) { + for (int c = 0; c < num_cpus; c++, bit <<= 1) { + if ((c != curr) && soc_cpus_active[c] && + ((cpu_bitmap & bit) != 0)) { IDC[curr].core[c].itc = BIT(31); } } diff --git a/tests/kernel/smp/src/main.c b/tests/kernel/smp/src/main.c index 2749d76fb3f43a7..fd32440601e16cb 100644 --- a/tests/kernel/smp/src/main.c +++ b/tests/kernel/smp/src/main.c @@ -732,16 +732,19 @@ void z_trace_sched_ipi(void) #ifdef CONFIG_SCHED_IPI_SUPPORTED ZTEST(smp, test_smp_ipi) { + uint32_t num_cpus; + #ifndef CONFIG_TRACE_SCHED_IPI ztest_test_skip(); #endif - TC_PRINT("cpu num=%d", arch_num_cpus()); + num_cpus = arch_num_cpus(); + TC_PRINT("cpu num=%d", num_cpus); for (int i = 0; i < 3 ; i++) { /* issue a sched ipi to tell other CPU to run thread */ sched_ipi_has_called = 0; - arch_sched_ipi(); + arch_sched_ipi((1 << num_cpus) - 1); /* Need to wait longer than we think, loaded CI * systems need to wait for host scheduling to run the