-
-
Notifications
You must be signed in to change notification settings - Fork 52
/
0628-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch
326 lines (297 loc) · 10.2 KB
/
0628-x86-mwait-idle-Use-ACPI-for-CPUs-without-hardcoded-C.patch
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
From 2b4cd4c5ce29c48f49ddb63d09a4c720b6cfe11e Mon Sep 17 00:00:00 2001
:
: It's currently unclear if it's really needed or not. Keep it disabled
: by default but compile it in such that testing with it is easy,
: especially because this is closer to what Linux does. If it turn's out
: to be needed, we need to check what exactly is needed and then discuss
: this again [1] with upstream.
:
: [1]: https://lore.kernel.org/xen-devel/[email protected]/
:
From: Simon Gaiser <[email protected]>
Date: Mon, 14 Aug 2023 10:09:59 +0200
Subject: [PATCH] x86/mwait-idle: Use ACPI for CPUs without hardcoded C-state
table
mwait-idle includes a hardcoded config for many CPUs. But some are
missing, for example Tiger Lake. Linux' driver reads the config from
ACPI in those cases. This adds this to Xen's implementation.
The Linux driver also has a feature to combine the internal table with
the infos from ACPI. This is not implemented here, for CPUs with
internal config nothing is changed.
Signed-off-by: Simon Gaiser <[email protected]>
---
xen/arch/x86/acpi/cpu_idle.c | 58 ++++++++++-----
xen/arch/x86/cpu/mwait-idle.c | 116 +++++++++++++++++++++++++----
xen/arch/x86/include/asm/cpuidle.h | 2 +-
3 files changed, 142 insertions(+), 34 deletions(-)
diff --git a/xen/arch/x86/acpi/cpu_idle.c b/xen/arch/x86/acpi/cpu_idle.c
index ca0b1217913b..4f7530d25be1 100644
--- a/xen/arch/x86/acpi/cpu_idle.c
+++ b/xen/arch/x86/acpi/cpu_idle.c
@@ -78,6 +78,7 @@
static void cf_check lapic_timer_nop(void) { }
void (*__read_mostly lapic_timer_off)(void);
void (*__read_mostly lapic_timer_on)(void);
+static struct notifier_block cpu_nfb;
bool lapic_timer_init(void)
{
@@ -1316,6 +1317,26 @@ static void print_cx_pminfo(uint32_t cpu, struct xen_processor_power *power)
#define print_cx_pminfo(c, p)
#endif
+
+static void repark_cpu(int cpu_id)
+{
+ uint32_t apic_id = x86_cpu_to_apicid[cpu_id];
+
+ /*
+ * If we've just learned of more available C states, wake the CPU if
+ * it's parked, so it can go back to sleep in perhaps a deeper state.
+ */
+ if ( park_offline_cpus && apic_id != BAD_APICID )
+ {
+ unsigned long flags;
+
+ local_irq_save(flags);
+ apic_wait_icr_idle();
+ apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, apic_id);
+ local_irq_restore(flags);
+ }
+}
+
long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power)
{
XEN_GUEST_HANDLE(xen_processor_cx_t) states;
@@ -1363,24 +1384,27 @@ long set_cx_pminfo(uint32_t acpi_id, struct xen_processor_power *power)
set_cx(acpi_power, &xen_cx);
}
- if ( !cpu_online(cpu_id) )
- {
- uint32_t apic_id = x86_cpu_to_apicid[cpu_id];
-
- /*
- * If we've just learned of more available C states, wake the CPU if
- * it's parked, so it can go back to sleep in perhaps a deeper state.
- */
- if ( park_offline_cpus && apic_id != BAD_APICID )
- {
- unsigned long flags;
-
- local_irq_save(flags);
- apic_wait_icr_idle();
- apic_icr_write(APIC_DM_NMI | APIC_DEST_PHYSICAL, apic_id);
- local_irq_restore(flags);
+ if ( cpu_id == 0 && pm_idle_save == NULL ) {
+ /* Now that we have the ACPI info from dom0, try again to setup
+ * mwait-idle*/
+ ret = mwait_idle_init(&cpu_nfb, true);
+ if (ret >= 0) {
+ unsigned int cpu;
+ /* mwait-idle took over, call it's initializer for all CPUs*/
+ for_each_present_cpu ( cpu )
+ {
+ cpu_nfb.notifier_call(&cpu_nfb, CPU_UP_PREPARE, (void *)(long)cpu);
+ cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, (void *)(long)cpu);
+ if ( !cpu_online(cpu) ) {
+ repark_cpu(cpu);
+ }
+ }
+ return 0;
}
}
+
+ if ( !cpu_online(cpu_id) )
+ repark_cpu(cpu_id);
else if ( cpuidle_current_governor->enable )
{
ret = cpuidle_current_governor->enable(acpi_power);
@@ -1680,7 +1704,7 @@ static int __init cf_check cpuidle_presmp_init(void)
if ( !xen_cpuidle )
return 0;
- mwait_idle_init(&cpu_nfb);
+ mwait_idle_init(&cpu_nfb, false);
cpu_nfb.notifier_call(&cpu_nfb, CPU_UP_PREPARE, cpu);
cpu_nfb.notifier_call(&cpu_nfb, CPU_ONLINE, cpu);
register_cpu_notifier(&cpu_nfb);
diff --git a/xen/arch/x86/cpu/mwait-idle.c b/xen/arch/x86/cpu/mwait-idle.c
index ae6987117169..511c666f2d6b 100644
--- a/xen/arch/x86/cpu/mwait-idle.c
+++ b/xen/arch/x86/cpu/mwait-idle.c
@@ -60,15 +60,20 @@
#undef PREFIX
#define PREFIX "mwait-idle: "
+#define pr_err(fmt...) printk(KERN_ERR fmt)
+
#ifdef DEBUG
# define pr_debug(fmt...) printk(KERN_DEBUG fmt)
#else
# define pr_debug(fmt...)
#endif
-static __initdata bool opt_mwait_idle = true;
+static bool opt_mwait_idle = true;
boolean_param("mwait-idle", opt_mwait_idle);
+static bool opt_mwait_idle_acpi = false;
+boolean_param("mwait-idle-acpi", opt_mwait_idle_acpi);
+
static unsigned int mwait_substates;
/*
@@ -81,7 +86,7 @@ static unsigned int mwait_substates;
* exclusive C-states, this parameter has no effect.
*/
static unsigned int __ro_after_init preferred_states_mask;
-static char __initdata preferred_states[64];
+static char preferred_states[64];
string_param("preferred-cstates", preferred_states);
#define LAPIC_TIMER_ALWAYS_RELIABLE 0xFFFFFFFF
@@ -1140,6 +1145,9 @@ static const struct idle_cpu idle_cpu_snr = {
.c1e_promotion = C1E_PROMOTION_DISABLE,
};
+static struct idle_cpu __read_mostly idle_cpu_acpi = {
+};
+
#define ICPU(model, cpu) \
{ X86_VENDOR_INTEL, 6, INTEL_FAM6_ ## model, X86_FEATURE_ALWAYS, \
&idle_cpu_ ## cpu}
@@ -1425,21 +1433,92 @@ static void __init mwait_idle_state_table_update(void)
}
}
-static int __init mwait_idle_probe(void)
+static int mwait_idle_state_table_from_acpi(void) {
+ // Linux tries every CPU until it finds one that declares FFH as entry
+ // method for all C-states in it's ACPI table. It assumes that the
+ // config is identical for all CPUs. So let's just check the first CPU.
+
+ int rc = -EINVAL;
+ struct acpi_processor_power *acpi_power = processor_powers[0];
+ struct cpuidle_state *state_table = xzalloc_array(
+ struct cpuidle_state,
+ acpi_power->count + 1 /* NULL at end */ - 1 /* no C0 */
+ );
+
+ if (state_table == NULL) {
+ pr_err(PREFIX "failed to allocate state table\n");
+ rc = -ENOMEM;
+ goto ret;
+ }
+
+ for (unsigned int cstate = 1; cstate < acpi_power->count; ++cstate) {
+ struct acpi_processor_cx *acpi_cx = &acpi_power->states[cstate];
+ struct cpuidle_state *idle_cx = &state_table[cstate - 1];
+ if (acpi_cx->entry_method != ACPI_CSTATE_EM_FFH) {
+ pr_debug(PREFIX "ACPI based config not usable: Entry method for C-state %u isn't FFH\n", cstate);
+ rc = -ENODEV;
+ goto ret;
+ }
+
+ snprintf(idle_cx->name, sizeof(idle_cx->name), "C%u", cstate);
+
+ idle_cx->flags = MWAIT2flg(acpi_cx->address);
+ if (acpi_cx->type > ACPI_STATE_C2)
+ idle_cx->flags |= CPUIDLE_FLAG_TLB_FLUSHED;
+ // Like Linux we don't set CPUIDLE_FLAG_IBRS
+
+ idle_cx->exit_latency = acpi_cx->latency;
+
+ idle_cx->target_residency = acpi_cx->latency;
+ if (acpi_cx->type > ACPI_STATE_C1)
+ idle_cx->target_residency *= 3;
+ }
+
+ idle_cpu_acpi.state_table = state_table;
+ rc = 0;
+ pr_debug(PREFIX "config read from ACPI\n");
+
+ret:
+ if (rc < 0 && state_table != NULL) {
+ xfree(state_table);
+ }
+ return rc;
+}
+
+static int mwait_idle_probe(bool from_acpi)
{
unsigned int eax, ebx, ecx;
- const struct x86_cpu_id *id = x86_match_cpu(intel_idle_ids);
const char *str;
- if (!id) {
- pr_debug(PREFIX "does not run on family %d model %d\n",
- boot_cpu_data.x86, boot_cpu_data.x86_model);
- return -ENODEV;
- }
+ if (from_acpi) {
+ int rc;
- if (!boot_cpu_has(X86_FEATURE_MONITOR)) {
- pr_debug(PREFIX "Please enable MWAIT in BIOS SETUP\n");
- return -ENODEV;
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
+ boot_cpu_data.x86 != 6 ||
+ !boot_cpu_has(X86_FEATURE_MONITOR)) {
+ pr_debug(PREFIX "skipping ACPI check on unsupported CPU\n");
+ return -ENODEV;
+ }
+
+ rc = mwait_idle_state_table_from_acpi();
+ if (rc < 0)
+ return rc;
+
+ icpu = &idle_cpu_acpi;
+ } else {
+ const struct x86_cpu_id *id = x86_match_cpu(intel_idle_ids);
+ if (!id) {
+ pr_debug(PREFIX "no interal config for family %d model %d\n",
+ boot_cpu_data.x86, boot_cpu_data.x86_model);
+ return -ENODEV;
+ }
+
+ if (!boot_cpu_has(X86_FEATURE_MONITOR)) {
+ pr_debug(PREFIX "Please enable MWAIT in BIOS SETUP\n");
+ return -ENODEV;
+ }
+
+ icpu = id->driver_data;
}
if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
@@ -1459,7 +1538,6 @@ static int __init mwait_idle_probe(void)
pr_debug(PREFIX "MWAIT substates: %#x\n", mwait_substates);
- icpu = id->driver_data;
cpuidle_state_table = icpu->state_table;
if (boot_cpu_has(X86_FEATURE_ARAT))
@@ -1504,7 +1582,8 @@ static int __init mwait_idle_probe(void)
if (str[0])
printk("unrecognized \"preferred-cstates=%s\"\n", str);
- mwait_idle_state_table_update();
+ if (!from_acpi)
+ mwait_idle_state_table_update();
return 0;
}
@@ -1613,14 +1692,19 @@ static int cf_check mwait_idle_cpu_init(
return NOTIFY_DONE;
}
-int __init mwait_idle_init(struct notifier_block *nfb)
+int mwait_idle_init(struct notifier_block *nfb, bool from_acpi)
{
int err;
+ if (from_acpi && !opt_mwait_idle_acpi) {
+ pr_debug(PREFIX "ACPI based config disabled\n");
+ return -EPERM;
+ }
+
if (pm_idle_save)
return -ENODEV;
- err = mwait_idle_probe();
+ err = mwait_idle_probe(from_acpi);
if (!err && !boot_cpu_has(X86_FEATURE_ARAT)) {
hpet_broadcast_init();
if (xen_cpuidle < 0 && !hpet_broadcast_is_available())
diff --git a/xen/arch/x86/include/asm/cpuidle.h b/xen/arch/x86/include/asm/cpuidle.h
index 707b3e948d45..3f5cd40fd596 100644
--- a/xen/arch/x86/include/asm/cpuidle.h
+++ b/xen/arch/x86/include/asm/cpuidle.h
@@ -15,7 +15,7 @@ extern void (*lapic_timer_on)(void);
extern uint64_t (*cpuidle_get_tick)(void);
-int mwait_idle_init(struct notifier_block *nfb);
+int mwait_idle_init(struct notifier_block *nfb, bool);
int cpuidle_init_cpu(unsigned int cpu);
void cf_check default_dead_idle(void);
void cf_check acpi_dead_idle(void);
--
2.44.0