kernel: Refine flagging pending IPIs

Instead of marking a flag that indicates an IPI must be broadcast to the other CPUs, let the bits of the kernel's pending_ipi field indicate if an IPI must be sent to a specific CPU. When the IPI is needed to force a reschedule because of time slicing it is easy to identify the target CPU. When a thread is made ready or if its priority changes, then the CPUs that need to receive an IPI are those that are currently executing a lower priority thread. Though this does not change IPIs from being broadcast to targeted, in some circumstances it may result in the broadcasting of fewer IPIs. Signed-off-by: Peter Mitsis <[email protected]>
zephyrproject-rtos · Mar 4, 2024 · 36ef61b · 36ef61b
1 parent 9077b92
commit 36ef61b
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 11 deletions.
diff --git a/include/zephyr/kernel_structs.h b/include/zephyr/kernel_structs.h
@@ -237,8 +237,8 @@ struct z_kernel {
 #endif
 
 #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
-	/* Need to signal an IPI at the next scheduling point */
-	bool pending_ipi;
+	/* Identify CPUs to send IPIs to at the next scheduling point */
+	atomic_t pending_ipi;
 #endif
 };
 

diff --git a/kernel/sched.c b/kernel/sched.c
@@ -260,8 +260,10 @@ static void signal_pending_ipi(void)
 	 */
 #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		if (_kernel.pending_ipi) {
-			_kernel.pending_ipi = false;
+		uint32_t cpu_bitmap;
+
+		cpu_bitmap = (uint32_t)atomic_clear(&_kernel.pending_ipi);
+		if (cpu_bitmap != 0) {
 			arch_sched_ipi();
 		}
 	}
@@ -397,15 +399,43 @@ static void move_thread_to_end_of_prio_q(struct k_thread *thread)
 	update_cache(thread == _current);
 }
 
-static void flag_ipi(void)
+static void flag_ipi(uint32_t ipi_mask)
 {
 #if defined(CONFIG_SMP) && defined(CONFIG_SCHED_IPI_SUPPORTED)
 	if (arch_num_cpus() > 1) {
-		_kernel.pending_ipi = true;
+		atomic_or(&_kernel.pending_ipi, (atomic_val_t)ipi_mask);
 	}
 #endif
 }
 
+/* Create a bitmask of CPUs that need an IPI. Note: sched_spinlock is held. */
+static atomic_val_t ipi_mask_create(struct k_thread *thread)
+{
+#if defined(CONFIG_SMP) && (CONFIG_MP_MAX_NUM_CPUS > 1)
+	uint32_t  ipi_mask = 0;
+	struct k_thread *cpu_thread;
+	unsigned int num_cpus = arch_num_cpus();
+	uint32_t  id = arch_proc_id();
+
+	for (uint32_t i = 0; i < num_cpus; i++) {
+		if (id == i) {
+			continue;
+		}
+
+		cpu_thread = _kernel.cpus[i].current;
+		if ((cpu_thread != NULL) &&
+		    (cpu_thread->base.prio > thread->base.prio)) {
+			ipi_mask |= (1 << i);
+		}
+	}
+
+	return (atomic_val_t)ipi_mask;
+#else
+	ARG_UNUSED(thread);
+	return 0;
+#endif
+}
+
 #ifdef CONFIG_TIMESLICING
 
 static int slice_ticks = DIV_ROUND_UP(CONFIG_TIMESLICE_SIZE * Z_HZ_ticks, Z_HZ_ms);
@@ -458,11 +488,10 @@ static void slice_timeout(struct _timeout *t)
 	slice_expired[cpu] = true;
 
 	/* We need an IPI if we just handled a timeslice expiration
-	 * for a different CPU.  Ideally this would be able to target
-	 * the specific core, but that's not part of the API yet.
+	 * for a different CPU.
 	 */
 	if (IS_ENABLED(CONFIG_SMP) && cpu != _current_cpu->id) {
-		flag_ipi();
+		flag_ipi(1 << cpu);
 	}
 }
 
@@ -616,7 +645,8 @@ static void ready_thread(struct k_thread *thread)
 
 		queue_thread(thread);
 		update_cache(0);
-		flag_ipi();
+
+		flag_ipi(ipi_mask_create(thread));
 	}
 }
 
@@ -1003,7 +1033,7 @@ bool z_set_prio(struct k_thread *thread, int prio)
 				thread->base.prio = prio;
 			}
 
-			flag_ipi();
+			flag_ipi(ipi_mask_create(thread));
 			update_cache(1);
 		} else {
 			thread->base.prio = prio;