diff --git a/arch/arc/core/smp.c b/arch/arc/core/smp.c
index 6bc89883fad999d..1c7c6cf3681725b 100644
--- a/arch/arc/core/smp.c
+++ b/arch/arc/core/smp.c
@@ -131,16 +131,17 @@ static void sched_ipi_handler(const void *unused)
 }
 
 /* arch implementation of sched_ipi */
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
 	uint32_t i;
+	uint32_t bit = 1;
 
-	/* broadcast sched_ipi request to other cores
+	/* Send sched_ipi request to other cores
 	 * if the target is current core, hardware will ignore it
 	 */
 	unsigned int num_cpus = arch_num_cpus();
 
-	for (i = 0U; i < num_cpus; i++) {
+	for (i = 0U; i < num_cpus; i++, bit <<= 1) {
 		z_arc_connect_ici_generate(i);
 	}
 }
diff --git a/arch/arc/include/kernel_arch_func.h b/arch/arc/include/kernel_arch_func.h
index 1c46423cb4f03f4..a7326e6db282742 100644
--- a/arch/arc/include/kernel_arch_func.h
+++ b/arch/arc/include/kernel_arch_func.h
@@ -64,7 +64,7 @@ extern void z_arc_userspace_enter(k_thread_entry_t user_entry, void *p1,
 
 extern void z_arc_fatal_error(unsigned int reason, const z_arch_esf_t *esf);
 
-extern void arch_sched_ipi(void);
+extern void arch_sched_ipi(uint32_t cpu_bitmap);
 
 extern void z_arc_switch(void *switch_to, void **switched_from);
 
diff --git a/arch/arm/core/cortex_a_r/smp.c b/arch/arm/core/cortex_a_r/smp.c
index f581c7703104060..0c01afce07b2138 100644
--- a/arch/arm/core/cortex_a_r/smp.c
+++ b/arch/arm/core/cortex_a_r/smp.c
@@ -240,8 +240,10 @@ void sched_ipi_handler(const void *unused)
 }
 
 /* arch implementation of sched_ipi */
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
+	ARG_UNUSED(cpu_bitmap);
+
 	broadcast_ipi(SGI_SCHED_IPI);
 }
 
diff --git a/arch/arm64/core/smp.c b/arch/arm64/core/smp.c
index 97fd60b42363396..e6b7a1ce3ca5503 100644
--- a/arch/arm64/core/smp.c
+++ b/arch/arm64/core/smp.c
@@ -210,8 +210,10 @@ void sched_ipi_handler(const void *unused)
 }
 
 /* arch implementation of sched_ipi */
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
+	ARG_UNUSED(cpu_bitmap);
+
 	broadcast_ipi(SGI_SCHED_IPI);
 }
 
diff --git a/arch/riscv/core/smp.c b/arch/riscv/core/smp.c
index 54de29c05515f62..71c133b673e0247 100644
--- a/arch/riscv/core/smp.c
+++ b/arch/riscv/core/smp.c
@@ -86,14 +86,16 @@ static atomic_val_t cpu_pending_ipi[CONFIG_MP_MAX_NUM_CPUS];
 #define IPI_SCHED	0
 #define IPI_FPU_FLUSH	1
 
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
 	unsigned int key = arch_irq_lock();
 	unsigned int id = _current_cpu->id;
 	unsigned int num_cpus = arch_num_cpus();
+	uint32_t bit = 1;
 
-	for (unsigned int i = 0; i < num_cpus; i++) {
-		if (i != id && _kernel.cpus[i].arch.online) {
+	for (unsigned int i = 0; i < num_cpus; i++, bit <<= 1) {
+		if ((i != id) && _kernel.cpus[i].arch.online &&
+		    ((cpu_bitmap & bit) != 0)) {
 			atomic_set_bit(&cpu_pending_ipi[i], IPI_SCHED);
 			MSIP(_kernel.cpus[i].arch.hartid) = 1;
 		}
diff --git a/arch/x86/core/intel64/smp.c b/arch/x86/core/intel64/smp.c
index a73ba9c8f38c363..32b8285a8aac03f 100644
--- a/arch/x86/core/intel64/smp.c
+++ b/arch/x86/core/intel64/smp.c
@@ -34,8 +34,10 @@ int arch_smp_init(void)
  * it is not clear exactly how/where/why to abstract this, as it
  * assumes the use of a local APIC (but there's no other mechanism).
  */
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
+	ARG_UNUSED(cpu_bitmap);
+
 	z_loapic_ipi(0, LOAPIC_ICR_IPI_OTHERS, CONFIG_SCHED_IPI_VECTOR);
 }
 
diff --git a/doc/kernel/services/smp/smp.rst b/doc/kernel/services/smp/smp.rst
index 0a94ed022b0dabe..269d401c968a166 100644
--- a/doc/kernel/services/smp/smp.rst
+++ b/doc/kernel/services/smp/smp.rst
@@ -181,12 +181,11 @@ handle the newly-runnable load.
 So where possible, Zephyr SMP architectures should implement an
 interprocessor interrupt.  The current framework is very simple: the
 architecture provides a :c:func:`arch_sched_ipi` call, which when invoked
-will flag an interrupt on all CPUs (except the current one, though
-that is allowed behavior) which will then invoke the :c:func:`z_sched_ipi`
-function implemented in the scheduler.  The expectation is that these
-APIs will evolve over time to encompass more functionality
-(e.g. cross-CPU calls), and that the scheduler-specific calls here
-will be implemented in terms of a more general framework.
+will flag an interrupt on at least the specified CPUs which will then invoke
+the :c:func:`z_sched_ipi` function implemented in the scheduler.  The
+expectation is that these APIs will evolve over time to encompass more
+functionality (e.g. cross-CPU calls), and that the scheduler-specific calls
+here will be implemented in terms of a more general framework.
 
 Note that not all SMP architectures will have a usable IPI mechanism
 (either missing, or just undocumented/unimplemented).  In those cases
diff --git a/include/zephyr/sys/arch_interface.h b/include/zephyr/sys/arch_interface.h
index 0ffc95c663bc455..e8f1d5dbd099d8e 100644
--- a/include/zephyr/sys/arch_interface.h
+++ b/include/zephyr/sys/arch_interface.h
@@ -489,11 +489,15 @@ static inline struct _cpu *arch_curr_cpu(void);
 static inline uint32_t arch_proc_id(void);
 
 /**
- * Broadcast an interrupt to all CPUs
+ * Send an interrupt to specified CPUs
  *
- * This will invoke z_sched_ipi() on other CPUs in the system.
+ * This will invoke z_sched_ipi() on other CPUs in the system. Whether the IPIs
+ * are targeted to specific CPUs or broadcast to all other cores is up to the
+ * specific implementation.
+ *
+ * @param cpu_bitmap A hint indicating which CPUs need the IPI
  */
-void arch_sched_ipi(void);
+void arch_sched_ipi(uint32_t cpu_bitmap);
 
 
 int arch_smp_init(void);
diff --git a/kernel/Kconfig b/kernel/Kconfig
index 1620a3c9aa4c696..73b762c094c7ab5 100644
--- a/kernel/Kconfig
+++ b/kernel/Kconfig
@@ -1200,7 +1200,7 @@ config SCHED_IPI_SUPPORTED
 	bool
 	help
 	  True if the architecture supports a call to
-	  arch_sched_ipi() to broadcast an interrupt that will call
+	  arch_sched_ipi() to send an interrupt that will call
 	  z_sched_ipi() on other CPUs in the system.  Required for
 	  k_thread_abort() to operate with reasonable latency
 	  (otherwise we might have to wait for the other thread to
diff --git a/kernel/sched.c b/kernel/sched.c
index 1cace822ac41715..fd7c5509cb7879b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -264,7 +264,7 @@ static void signal_pending_ipi(void)
 
 		cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
 		if (cpu_bitmap != 0) {
-			arch_sched_ipi();
+			arch_sched_ipi(cpu_bitmap);
 		}
 	}
 #endif
@@ -721,7 +721,7 @@ static void z_thread_halt(struct k_thread *thread, k_spinlock_key_t key,
 		 * here, not deferred!
 		 */
 #ifdef CONFIG_SCHED_IPI_SUPPORTED
-		arch_sched_ipi();
+		arch_sched_ipi(1 << thread->base.cpu);
 #endif
 	}
 
diff --git a/soc/espressif/esp32/esp32-mp.c b/soc/espressif/esp32/esp32-mp.c
index 7922e6f18cdcef3..792e0c841d2e3c1 100644
--- a/soc/espressif/esp32/esp32-mp.c
+++ b/soc/espressif/esp32/esp32-mp.c
@@ -290,10 +290,12 @@ void arch_start_cpu(int cpu_num, k_thread_stack_t *stack, int sz,
 	smp_log("ESP32: APPCPU initialized");
 }
 
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
 	const int core_id = esp_core_id();
 
+	ARG_UNUSED(cpu_bitmap);
+
 	if (core_id == 0) {
 		DPORT_WRITE_PERI_REG(DPORT_CPU_INTR_FROM_CPU_0_REG, DPORT_CPU_INTR_FROM_CPU_0);
 	} else {
diff --git a/soc/intel/intel_adsp/ace/multiprocessing.c b/soc/intel/intel_adsp/ace/multiprocessing.c
index 3170a8f1090f650..f8bbe9ccc592b77 100644
--- a/soc/intel/intel_adsp/ace/multiprocessing.c
+++ b/soc/intel/intel_adsp/ace/multiprocessing.c
@@ -194,15 +194,17 @@ void soc_mp_startup(uint32_t cpu)
 #endif /* CONFIG_ADSP_IDLE_CLOCK_GATING */
 }
 
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
 	uint32_t curr = arch_proc_id();
+	uint32_t bit = 1;
 
 	/* Signal agent B[n] to cause an interrupt from agent A[n] */
 	unsigned int num_cpus = arch_num_cpus();
 
-	for (int core = 0; core < num_cpus; core++) {
-		if (core != curr && soc_cpus_active[core]) {
+	for (int core = 0; core < num_cpus; core++, bit <<= 1) {
+		if ((core != curr) && soc_cpus_active[core] &&
+		    ((cpu_bitmap & bit) != 0)) {
 			IDC[core].agents[1].ipc.idr = INTEL_ADSP_IPC_BUSY;
 		}
 	}
diff --git a/soc/intel/intel_adsp/cavs/multiprocessing.c b/soc/intel/intel_adsp/cavs/multiprocessing.c
index 2a38f20355da013..756777ab8f49b13 100644
--- a/soc/intel/intel_adsp/cavs/multiprocessing.c
+++ b/soc/intel/intel_adsp/cavs/multiprocessing.c
@@ -121,13 +121,15 @@ void soc_start_core(int cpu_num)
 	IDC[curr_cpu].core[cpu_num].itc = IDC_MSG_POWER_UP;
 }
 
-void arch_sched_ipi(void)
+void arch_sched_ipi(uint32_t cpu_bitmap)
 {
 	uint32_t curr = arch_proc_id();
 	unsigned int num_cpus = arch_num_cpus();
+	uint32_t bit = 1;
 
-	for (int c = 0; c < num_cpus; c++) {
-		if (c != curr && soc_cpus_active[c]) {
+	for (int c = 0; c < num_cpus; c++, bit <<= 1) {
+		if ((c != curr) && soc_cpus_active[c] &&
+		    ((cpu_bitmap & bit) != 0)) {
 			IDC[curr].core[c].itc = BIT(31);
 		}
 	}
diff --git a/tests/kernel/smp/src/main.c b/tests/kernel/smp/src/main.c
index 2749d76fb3f43a7..fd32440601e16cb 100644
--- a/tests/kernel/smp/src/main.c
+++ b/tests/kernel/smp/src/main.c
@@ -732,16 +732,19 @@ void z_trace_sched_ipi(void)
 #ifdef CONFIG_SCHED_IPI_SUPPORTED
 ZTEST(smp, test_smp_ipi)
 {
+	uint32_t  num_cpus;
+
 #ifndef CONFIG_TRACE_SCHED_IPI
 	ztest_test_skip();
 #endif
 
-	TC_PRINT("cpu num=%d", arch_num_cpus());
+	num_cpus = arch_num_cpus();
+	TC_PRINT("cpu num=%d", num_cpus);
 
 	for (int i = 0; i < 3 ; i++) {
 		/* issue a sched ipi to tell other CPU to run thread */
 		sched_ipi_has_called = 0;
-		arch_sched_ipi();
+		arch_sched_ipi((1 << num_cpus) - 1);
 
 		/* Need to wait longer than we think, loaded CI
 		 * systems need to wait for host scheduling to run the