From 0769444f90b06ab824a45abace252832f2d1d76f Mon Sep 17 00:00:00 2001 From: Nikolay Martyanov Date: Tue, 1 Oct 2024 11:59:06 -0700 Subject: [PATCH] pillar: Adapt pillar to use native cpupinning Update pillar's handling of QEMU guests to take advantage of native CPU pinning options introduced in the newer version of QEMU (8.0.4). This eliminates the need for our custom patches to QEMU for CPU pinning. The following changes are included: Removing custom qemu cpu-pinning patches to include the native qemu-pinning feature (1) Refactoring CPU pinning to use integer slices instead of comma-separated strings (2) Updating QEMU command-line arguments to include native CPU pinning options (3) Ensuring compatibility with both KVM and Xen hypervisors (4) This change allows us to leverage upstream QEMU improvements and simplifies the codebase by removing custom patches and complex string manipulations. Tested with QEMU version 8.0.4 Co-authored-by: Roja Eswaran Signed-off-by: Roja Eswaran --- pkg/pillar/cmd/domainmgr/domainmgr.go | 48 ++--- pkg/pillar/containerd/oci.go | 8 +- pkg/pillar/hypervisor/kvm.go | 20 +- pkg/pillar/hypervisor/xen.go | 8 +- pkg/pillar/types/domainmgrtypes.go | 2 +- ...12-remove-vanillaqemu4.19-cpupinning.patch | 201 ----------------- ...qemu-Add-cpu-pin-and-cpumask-options.patch | 152 ------------- .../14-qemu-Init-CPU-mask-per-VCPU.patch | 202 ------------------ ...inity-of-QEMU-threads-according-to-t.patch | 101 --------- 9 files changed, 44 insertions(+), 698 deletions(-) delete mode 100644 pkg/xen-tools/patches-4.19.0/12-remove-vanillaqemu4.19-cpupinning.patch delete mode 100644 pkg/xen-tools/patches-4.19.0/13-qemu-Add-cpu-pin-and-cpumask-options.patch delete mode 100644 pkg/xen-tools/patches-4.19.0/14-qemu-Init-CPU-mask-per-VCPU.patch delete mode 100644 pkg/xen-tools/patches-4.19.0/15-qemu-Set-the-affinity-of-QEMU-threads-according-to-t.patch diff --git a/pkg/pillar/cmd/domainmgr/domainmgr.go b/pkg/pillar/cmd/domainmgr/domainmgr.go index fcfabd3e86..bfd3d8fbde 100644 --- a/pkg/pillar/cmd/domainmgr/domainmgr.go +++ b/pkg/pillar/cmd/domainmgr/domainmgr.go @@ -1243,38 +1243,24 @@ func setCgroupCpuset(config *types.DomainConfig, status *types.DomainStatus) err log.Warnf("Failed to find cgroups directory for %s", config.DisplayName) return nil } - err = controller.Update(&specs.LinuxResources{CPU: &specs.LinuxCPU{Cpus: status.VmConfig.CPUs}}) + // Convert a list of CPUs to a CPU string + cpuStrings := make([]string, 0) + for _, cpu := range status.VmConfig.CPUs { + cpuStrings = append(cpuStrings, strconv.Itoa(cpu)) + } + cpuMask := strings.Join(cpuStrings, ",") + + err = controller.Update(&specs.LinuxResources{CPU: &specs.LinuxCPU{Cpus: cpuMask}}) if err != nil { log.Warnf("Failed to update CPU set for %s", config.DisplayName) return err } - log.Functionf("Adjust the cgroups cpuset of %s to %s", config.DisplayName, status.VmConfig.CPUs) + log.Functionf("Adjust the cgroups cpuset of %s to %v", config.DisplayName, status.VmConfig.CPUs) return nil } -// constructNonPinnedCpumaskString returns a cpumask that contains at least CPUs reserved for the system -// services. Hence, it can never be empty. -func constructNonPinnedCpumaskString(ctx *domainContext) string { - result := "" - for _, cpu := range ctx.cpuAllocator.GetAllFree() { - addToMask(cpu, &result) - } - return result -} - -func addToMask(cpu int, s *string) { - if s == nil { - return - } - if *s == "" { - *s = fmt.Sprintf("%d", cpu) - } else { - *s = fmt.Sprintf("%s,%d", *s, cpu) - } -} - func updateNonPinnedCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) error { - status.VmConfig.CPUs = constructNonPinnedCpumaskString(ctx) + status.VmConfig.CPUs = ctx.cpuAllocator.GetAllFree() err := setCgroupCpuset(config, status) if err != nil { return errors.New("failed to redistribute CPUs between VMs, can affect the inter-VM isolation") @@ -1292,10 +1278,10 @@ func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.Do return errors.New("failed to allocate necessary amount of CPUs") } for _, cpu := range cpusToAssign { - addToMask(cpu, &status.VmConfig.CPUs) + status.VmConfig.CPUs = append(status.VmConfig.CPUs, cpu) } } else { // VM has no pinned CPUs, assign all the CPUs from the shared set - status.VmConfig.CPUs = constructNonPinnedCpumaskString(ctx) + status.VmConfig.CPUs = ctx.cpuAllocator.GetAllFree() } return nil } @@ -1303,12 +1289,12 @@ func assignCPUs(ctx *domainContext, config *types.DomainConfig, status *types.Do // releaseCPUs releases the CPUs that were previously assigned to the VM. // The cpumask in the *status* is updated accordingly, and the CPUs are released in the CPUAllocator context. func releaseCPUs(ctx *domainContext, config *types.DomainConfig, status *types.DomainStatus) { - if ctx.cpuPinningSupported && config.VmConfig.CPUsPinned && status.VmConfig.CPUs != "" { + if ctx.cpuPinningSupported && config.VmConfig.CPUsPinned && status.VmConfig.CPUs != nil { if err := ctx.cpuAllocator.Free(config.UUIDandVersion.UUID); err != nil { log.Errorf("Failed to free CPUs for %s: %s", config.DisplayName, err) } } - status.VmConfig.CPUs = "" + status.VmConfig.CPUs = nil } func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) { @@ -1330,7 +1316,7 @@ func handleCreate(ctx *domainContext, key string, config *types.DomainConfig) { Service: config.Service, } - status.VmConfig.CPUs = "" + status.VmConfig.CPUs = make([]int, 0) // Note that the -emu interface doesn't exist until after boot of the domU, but we // initialize the VifList here with the VifUsed. @@ -1545,7 +1531,7 @@ func doActivate(ctx *domainContext, config types.DomainConfig, publishDomainStatus(ctx, status) return } - log.Functionf("CPUs for %s assigned: %s", config.DisplayName, status.VmConfig.CPUs) + log.Functionf("CPUs for %s assigned: %v", config.DisplayName, status.VmConfig.CPUs) } if errDescription := reserveAdapters(ctx, config); errDescription != nil { @@ -1932,7 +1918,7 @@ func doCleanup(ctx *domainContext, status *types.DomainStatus) { } triggerCPUNotification() } - status.VmConfig.CPUs = "" + status.VmConfig.CPUs = nil } releaseAdapters(ctx, status.IoAdapterList, status.UUIDandVersion.UUID, status) diff --git a/pkg/pillar/containerd/oci.go b/pkg/pillar/containerd/oci.go index 0f273c9f93..5c39aa1940 100644 --- a/pkg/pillar/containerd/oci.go +++ b/pkg/pillar/containerd/oci.go @@ -486,8 +486,12 @@ func (s *ociSpec) UpdateFromDomain(dom *types.DomainConfig, status *types.Domain s.Linux.Resources.Memory.Limit = &m s.Linux.Resources.CPU.Period = &p s.Linux.Resources.CPU.Quota = &q - if status.VmConfig.CPUs != "" { - s.Linux.Resources.CPU.Cpus = status.VmConfig.CPUs + if len(status.VmConfig.CPUs) != 0 { + cpusAsString := make([]string, len(status.VmConfig.CPUs)) + for i, cpu := range status.VmConfig.CPUs { + cpusAsString[i] = fmt.Sprintf("%d", cpu) + } + s.Linux.Resources.CPU.Cpus = strings.Join(cpusAsString, ",") } s.Linux.CgroupsPath = fmt.Sprintf("/%s/%s", ctrdServicesNamespace, dom.GetTaskName()) diff --git a/pkg/pillar/hypervisor/kvm.go b/pkg/pillar/hypervisor/kvm.go index 953a108eed..173ec99e80 100644 --- a/pkg/pillar/hypervisor/kvm.go +++ b/pkg/pillar/hypervisor/kvm.go @@ -70,12 +70,6 @@ const qemuConfTemplate = `# This file is automatically generated by domainmgr [machine] type = "{{.Machine}}" dump-guest-core = "off" -{{- if .DomainStatus.CPUs }} - cpumask = "{{.DomainStatus.CPUs}}" -{{- end -}} -{{- if .DomainConfig.CPUsPinned }} - cpu-pin = "on" -{{- end -}} {{- if eq .Machine "virt" }} accel = "kvm:tcg" gic-version = "host" @@ -842,6 +836,20 @@ func (ctx KvmContext) Setup(status types.DomainStatus, config types.DomainConfig "-readconfig", file.Name(), "-pidfile", kvmStateDir+domainName+"/pid") + // Add CPUs affinity as a parameter to qemu. + // It's not supported to be configured in the .ini file so we need to add it here. + // The arguments are in the format of: -object thread-context,id=tc1,cpu-affinity=0-1,cpu-affinity=6-7 + // The thread-context object is introduced in qemu 7.2 + if config.CPUsPinned { + // Create the thread-context object string + threadContext := "thread-context,id=tc1" + for _, cpu := range status.CPUs { + // Add the cpu-affinity arguments to the thread-context object + threadContext += fmt.Sprintf(",cpu-affinity=%d", cpu) + } + args = append(args, "-object", threadContext) + } + spec, err := ctx.setupSpec(&status, &config, status.OCIConfigDir) if err != nil { diff --git a/pkg/pillar/hypervisor/xen.go b/pkg/pillar/hypervisor/xen.go index f25fa63339..7abd8df334 100644 --- a/pkg/pillar/hypervisor/xen.go +++ b/pkg/pillar/hypervisor/xen.go @@ -260,8 +260,12 @@ func (ctx xenContext) CreateDomConfig(domainName string, maxCpus = vCpus } file.WriteString(fmt.Sprintf("maxvcpus = %d\n", maxCpus)) - if config.CPUs != "" { - file.WriteString(fmt.Sprintf("cpus = \"%s\"\n", config.CPUs)) + if len(config.CPUs) > 0 { + cpusString := make([]string, 0) + for _, curCPU := range config.CPUs { + cpusString = append(cpusString, strconv.Itoa(curCPU)) + } + file.WriteString(fmt.Sprintf("cpus = \"%s\"\n", strings.Join(cpusString, ","))) } if config.DeviceTree != "" { file.WriteString(fmt.Sprintf("device_tree = \"%s\"\n", diff --git a/pkg/pillar/types/domainmgrtypes.go b/pkg/pillar/types/domainmgrtypes.go index 3e7bd762cc..678a0957ca 100644 --- a/pkg/pillar/types/domainmgrtypes.go +++ b/pkg/pillar/types/domainmgrtypes.go @@ -242,7 +242,7 @@ type VmConfig struct { ExtraArgs string // added to bootargs BootLoader string // default "" // For CPU pinning - CPUs string // default "", list of "1,2" + CPUs []int // default nil, list of [1,2] // Needed for device passthru DeviceTree string // default ""; sets device_tree // Example: device_tree="guest-gpio.dtb" diff --git a/pkg/xen-tools/patches-4.19.0/12-remove-vanillaqemu4.19-cpupinning.patch b/pkg/xen-tools/patches-4.19.0/12-remove-vanillaqemu4.19-cpupinning.patch deleted file mode 100644 index 0bdd83e6bd..0000000000 --- a/pkg/xen-tools/patches-4.19.0/12-remove-vanillaqemu4.19-cpupinning.patch +++ /dev/null @@ -1,201 +0,0 @@ -From: Roja Eswaran -Date: Mon, 19 Sep 2024 11:00:50 +0530 -Subject: [PATCH] 12-remove-vanillaqemu4.19-cpupinning.patch removes new qemu_thread_set_affinity implementation (QEMU 8.0.4) - -diff --git a/tools/qemu-xen/include/qemu/thread.h b/tools/qemu-xen/include/qemu/thread.h -index dd3822d..a5b9ca8 100644 ---- a/tools/qemu-xen/include/qemu/thread.h -+++ b/tools/qemu-xen/include/qemu/thread.h -@@ -189,10 +189,6 @@ void qemu_event_destroy(QemuEvent *ev); - void qemu_thread_create(QemuThread *thread, const char *name, - void *(*start_routine)(void *), - void *arg, int mode); --int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, -- unsigned long nbits); --int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, -- unsigned long *nbits); - void *qemu_thread_join(QemuThread *thread); - void qemu_thread_get_self(QemuThread *thread); - bool qemu_thread_is_self(QemuThread *thread); -diff --git a/tools/qemu-xen/util/qemu-thread-posix.c b/tools/qemu-xen/util/qemu-thread-posix.c -index b2e26e2..0a563ba 100644 ---- a/tools/qemu-xen/util/qemu-thread-posix.c -+++ b/tools/qemu-xen/util/qemu-thread-posix.c -@@ -589,75 +589,6 @@ void qemu_thread_create(QemuThread *thread, const char *name, - pthread_attr_destroy(&attr); - } - --int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, -- unsigned long nbits) --{ --#if defined(CONFIG_PTHREAD_AFFINITY_NP) -- const size_t setsize = CPU_ALLOC_SIZE(nbits); -- unsigned long value; -- cpu_set_t *cpuset; -- int err; -- -- cpuset = CPU_ALLOC(nbits); -- g_assert(cpuset); -- -- CPU_ZERO_S(setsize, cpuset); -- value = find_first_bit(host_cpus, nbits); -- while (value < nbits) { -- CPU_SET_S(value, setsize, cpuset); -- value = find_next_bit(host_cpus, nbits, value + 1); -- } -- -- err = pthread_setaffinity_np(thread->thread, setsize, cpuset); -- CPU_FREE(cpuset); -- return err; --#else -- return -ENOSYS; --#endif --} -- --int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, -- unsigned long *nbits) --{ --#if defined(CONFIG_PTHREAD_AFFINITY_NP) -- unsigned long tmpbits; -- cpu_set_t *cpuset; -- size_t setsize; -- int i, err; -- -- tmpbits = CPU_SETSIZE; -- while (true) { -- setsize = CPU_ALLOC_SIZE(tmpbits); -- cpuset = CPU_ALLOC(tmpbits); -- g_assert(cpuset); -- -- err = pthread_getaffinity_np(thread->thread, setsize, cpuset); -- if (err) { -- CPU_FREE(cpuset); -- if (err != -EINVAL) { -- return err; -- } -- tmpbits *= 2; -- } else { -- break; -- } -- } -- -- /* Convert the result into a proper bitmap. */ -- *nbits = tmpbits; -- *host_cpus = bitmap_new(tmpbits); -- for (i = 0; i < tmpbits; i++) { -- if (CPU_ISSET(i, cpuset)) { -- set_bit(i, *host_cpus); -- } -- } -- CPU_FREE(cpuset); -- return 0; --#else -- return -ENOSYS; --#endif --} -- - void qemu_thread_get_self(QemuThread *thread) - { - thread->thread = pthread_self(); -diff --git a/tools/qemu-xen/util/qemu-thread-win32.c b/tools/qemu-xen/util/qemu-thread-win32.c -index a7fe3cc..2c02a1b 100644 ---- a/tools/qemu-xen/util/qemu-thread-win32.c -+++ b/tools/qemu-xen/util/qemu-thread-win32.c -@@ -507,18 +507,6 @@ void qemu_thread_create(QemuThread *thread, const char *name, - thread->data = data; - } - --int qemu_thread_set_affinity(QemuThread *thread, unsigned long *host_cpus, -- unsigned long nbits) --{ -- return -ENOSYS; --} -- --int qemu_thread_get_affinity(QemuThread *thread, unsigned long **host_cpus, -- unsigned long *nbits) --{ -- return -ENOSYS; --} -- - void qemu_thread_get_self(QemuThread *thread) - { - thread->data = qemu_thread_data; -diff --git a/tools/qemu-xen/util/thread-context.c b/tools/qemu-xen/util/thread-context.c -index 2bc7883..ba443f5 100644 ---- a/tools/qemu-xen/util/thread-context.c -+++ b/tools/qemu-xen/util/thread-context.c -@@ -113,21 +113,7 @@ static void thread_context_set_cpu_affinity(Object *obj, Visitor *v, - set_bit(l->value, bitmap); - } - -- if (tc->thread_id != -1) { -- /* -- * Note: we won't be adjusting the affinity of any thread that is still -- * around, but only the affinity of the context thread. -- */ -- ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits); -- if (ret) { -- error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); -- } -- } else { -- tc->init_cpu_bitmap = bitmap; -- bitmap = NULL; -- tc->init_cpu_nbits = nbits; -- } --out: -+ out: - g_free(bitmap); - qapi_free_uint16List(host_cpus); - } -@@ -147,12 +133,6 @@ static void thread_context_get_cpu_affinity(Object *obj, Visitor *v, - return; - } - -- ret = qemu_thread_get_affinity(&tc->thread, &bitmap, &nbits); -- if (ret) { -- error_setg(errp, "Getting CPU affinity failed: %s", strerror(ret)); -- return; -- } -- - value = find_first_bit(bitmap, nbits); - while (value < nbits) { - QAPI_LIST_APPEND(tail, value); -@@ -213,21 +193,7 @@ static void thread_context_set_node_affinity(Object *obj, Visitor *v, - goto out; - } - -- if (tc->thread_id != -1) { -- /* -- * Note: we won't be adjusting the affinity of any thread that is still -- * around for now, but only the affinity of the context thread. -- */ -- ret = qemu_thread_set_affinity(&tc->thread, bitmap, nbits); -- if (ret) { -- error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); -- } -- } else { -- tc->init_cpu_bitmap = bitmap; -- bitmap = NULL; -- tc->init_cpu_nbits = nbits; -- } --out: -+ out: - g_free(bitmap); - qapi_free_uint16List(host_nodes); - #else -@@ -262,15 +228,6 @@ static void thread_context_instance_complete(UserCreatable *uc, Error **errp) - qemu_sem_wait(&tc->sem); - } - -- if (tc->init_cpu_bitmap) { -- ret = qemu_thread_set_affinity(&tc->thread, tc->init_cpu_bitmap, -- tc->init_cpu_nbits); -- if (ret) { -- error_setg(errp, "Setting CPU affinity failed: %s", strerror(ret)); -- } -- g_free(tc->init_cpu_bitmap); -- tc->init_cpu_bitmap = NULL; -- } - } - - static void thread_context_class_init(ObjectClass *oc, void *data) diff --git a/pkg/xen-tools/patches-4.19.0/13-qemu-Add-cpu-pin-and-cpumask-options.patch b/pkg/xen-tools/patches-4.19.0/13-qemu-Add-cpu-pin-and-cpumask-options.patch deleted file mode 100644 index 0c77f63167..0000000000 --- a/pkg/xen-tools/patches-4.19.0/13-qemu-Add-cpu-pin-and-cpumask-options.patch +++ /dev/null @@ -1,152 +0,0 @@ -From ccfea3c5e131b3ee623aa5e4e16e13bf8d0291d2 Mon Sep 17 00:00:00 2001 -From: Nikolay Martyanov -Date: Wed, 28 Sep 2022 15:47:11 +0200 -Subject: [PATCH 13/15] qemu: Add 'cpu-pin' and 'cpumask' options. - -Add the 'cpumask' option to set the CPU mask for the threads created by QEMU. -The mask affects all the threads belonging to a VM: both VCPU threads and -non-VCPU threads. If the option is not provided, it's considered to be '-1', -which corresponds to all the available CPUs. -The CPU mask in represented in the form "d[[,-]d]*". E.g. "0-2" or "0-2,5,6". -CPUs start with 0. For example, the mask "0,3" would mean that only -physical CPUs 0 and 3 are available for the VM. - -Add the 'cpu-pin' option to pin any VCPU thread to a specific CPU. If the -option is set, any VCPU thread will be assigned to a CPU provided with the -'cpumask'. If it's not provided, the VPCU threads can migrate from a CPU to a -CPU within the set of CPUs provided by the 'cpumask' option. - -Signed-off-by: Nikolay Martyanov ---- - tools/qemu-xen/hw/core/machine.c | 38 ++++++++++++++++++++++++++++++ - tools/qemu-xen/include/hw/boards.h | 2 ++ - tools/qemu-xen/qemu-options.hx | 18 ++++++++++++++ - tools/qemu-xen/softmmu/vl.c | 4 ++++ - 4 files changed, 62 insertions(+) - -diff --git a/tools/qemu-xen/hw/core/machine.c b/tools/qemu-xen/hw/core/machine.c -index 8d1a90c..2f9993e 100644 ---- a/tools/qemu-xen/hw/core/machine.c -+++ b/tools/qemu-xen/hw/core/machine.c -@@ -372,6 +372,35 @@ static void machine_set_graphics(Object *obj, bool value, Error **errp) - ms->enable_graphics = value; - } - -+static bool machine_get_cpu_pin(Object *obj, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ return ms->cpu_pin; -+} -+ -+static void machine_set_cpu_pin(Object *obj, bool value, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ ms->cpu_pin = value; -+} -+ -+static char *machine_get_cpumask(Object *obj, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ return g_strdup(ms->cpumask_str); -+} -+ -+static void machine_set_cpumask(Object *obj, const char *value, Error **errp) -+{ -+ MachineState *ms = MACHINE(obj); -+ -+ g_free(ms->cpumask_str); -+ ms->cpumask_str = g_strdup(value); -+} -+ - static char *machine_get_firmware(Object *obj, Error **errp) - { - MachineState *ms = MACHINE(obj); -@@ -841,6 +870,15 @@ static void machine_class_init(ObjectClass *oc, void *data) - object_class_property_set_description(oc, "usb", - "Set on/off to enable/disable usb"); - -+ object_class_property_add_bool(oc, "cpu-pin", -+ machine_get_cpu_pin, machine_set_cpu_pin); -+ object_class_property_set_description(oc, "cpu-pin", -+ "Set on/off to enable/disable CPU pinning"); -+ -+ object_class_property_add_str(oc, "cpumask", -+ machine_get_cpumask, machine_set_cpumask); -+ object_class_property_set_description(oc, "cpumask", "CPU Mask"); -+ - object_class_property_add_bool(oc, "graphics", - machine_get_graphics, machine_set_graphics); - object_class_property_set_description(oc, "graphics", -diff --git a/tools/qemu-xen/include/hw/boards.h b/tools/qemu-xen/include/hw/boards.h -index 426ce5f..b06f13e 100644 ---- a/tools/qemu-xen/include/hw/boards.h -+++ b/tools/qemu-xen/include/hw/boards.h -@@ -294,6 +294,8 @@ struct MachineState { - char *kernel_filename; - char *kernel_cmdline; - char *initrd_filename; -+ bool cpu_pin; -+ char *cpumask_str; - const char *cpu_type; - AccelState *accelerator; - CPUArchIdList *possible_cpus; -diff --git a/tools/qemu-xen/qemu-options.hx b/tools/qemu-xen/qemu-options.hx -index 708583b..a33ee1e 100644 ---- a/tools/qemu-xen/qemu-options.hx -+++ b/tools/qemu-xen/qemu-options.hx -@@ -509,6 +509,24 @@ SRST - Preallocate memory when using -mem-path. - ERST - -+DEF("cpu-pin", 0, QEMU_OPTION_cpu_pin, -+ "-cpu-pin pin any VPCU thread to a physical CPU\n", -+ QEMU_ARCH_ALL) -+SRST -+``-cpu-pin`` -+ Pin any VCPU thread to a physical CPU. -+ERST -+ -+DEF("cpumask", HAS_ARG, QEMU_OPTION_cpumask, -+ "-cpumask=value define the set of CPUs used by the VM\n", -+ QEMU_ARCH_ALL) -+SRST -+``-cpumask=value`` -+ CPU mask in form "d[[,-]d]*". E.g. "0-2" or "0-2,5,6". CPUs start with 0. -+ For example, the mask "0,3" would mean that only physical CPUs 0 and 3 are -+ available for the VM. -+ERST -+ - DEF("k", HAS_ARG, QEMU_OPTION_k, - "-k language use keyboard layout (for example 'fr' for French)\n", - QEMU_ARCH_ALL) -diff --git a/tools/qemu-xen/softmmu/vl.c b/tools/qemu-xen/softmmu/vl.c -index ea20b23..59ff7c7 100644 ---- a/tools/qemu-xen/softmmu/vl.c -+++ b/tools/qemu-xen/softmmu/vl.c -@@ -173,6 +173,7 @@ static QemuPluginList plugin_list = QTAILQ_HEAD_INITIALIZER(plugin_list); - static BlockdevOptionsQueue bdo_queue = QSIMPLEQ_HEAD_INITIALIZER(bdo_queue); - static bool nographic = false; - static int mem_prealloc; /* force preallocation of physical target memory */ -+static bool cpu_pin = false; - static const char *vga_model = NULL; - static DisplayOptions dpy; - static int num_serial_hds; -@@ -3360,6 +3361,10 @@ void qemu_init(int argc, char **argv) - case QEMU_OPTION_nodefaults: - has_defaults = 0; - break; -+ case QEMU_OPTION_cpu_pin: -+ cpu_pin = true; -+ break; -+ - case QEMU_OPTION_xen_domid: - if (!(accel_find("xen")) && !(accel_find("kvm"))) { - error_report("Option not supported for this target"); - -base-commit: 9c55fdd5e54c5ea4bf238cee787f13a03eac1c86 --- -2.35.1 - diff --git a/pkg/xen-tools/patches-4.19.0/14-qemu-Init-CPU-mask-per-VCPU.patch b/pkg/xen-tools/patches-4.19.0/14-qemu-Init-CPU-mask-per-VCPU.patch deleted file mode 100644 index f6a52c9fdb..0000000000 --- a/pkg/xen-tools/patches-4.19.0/14-qemu-Init-CPU-mask-per-VCPU.patch +++ /dev/null @@ -1,202 +0,0 @@ -From bea4aaef5c4dd309799055da9c5d7036579b89e9 Mon Sep 17 00:00:00 2001 -From: Nikolay Martyanov -Date: Wed, 28 Sep 2022 15:52:24 +0200 -Subject: [PATCH 14/15] qemu: Init CPU mask per VCPU. - -Signed-off-by: Nikolay Martyanov ---- - tools/qemu-xen/include/hw/boards.h | 1 + - tools/qemu-xen/include/hw/core/cpu.h | 2 + - tools/qemu-xen/softmmu/cpus.c | 31 ++++++++++++ - tools/qemu-xen/softmmu/vl.c | 75 ++++++++++++++++++++++++++++ - 4 files changed, 109 insertions(+) - -diff --git a/tools/qemu-xen/include/hw/boards.h b/tools/qemu-xen/include/hw/boards.h -index b06f13e..a4b4f11 100644 ---- a/tools/qemu-xen/include/hw/boards.h -+++ b/tools/qemu-xen/include/hw/boards.h -@@ -296,6 +296,7 @@ struct MachineState { - char *initrd_filename; - bool cpu_pin; - char *cpumask_str; -+ uint64_t cpumask; - const char *cpu_type; - AccelState *accelerator; - CPUArchIdList *possible_cpus; -diff --git a/tools/qemu-xen/include/hw/core/cpu.h b/tools/qemu-xen/include/hw/core/cpu.h -index 397fd3a..1bf357b 100644 ---- a/tools/qemu-xen/include/hw/core/cpu.h -+++ b/tools/qemu-xen/include/hw/core/cpu.h -@@ -344,6 +344,9 @@ struct CPUState { - bool created; - bool stop; - bool stopped; -+ bool pinned; -+ uint64_t cpumask; -+ - - /* Should CPU start in powered-off state? */ - bool start_powered_off; - -diff --git a/tools/qemu-xen/softmmu/cpus.c b/tools/qemu-xen/softmmu/cpus.c -index 9cbc817..2867325 100644 ---- a/tools/qemu-xen/softmmu/cpus.c -+++ b/tools/qemu-xen/softmmu/cpus.c -@@ -72,6 +72,25 @@ static QemuMutex qemu_global_mutex; - */ - static const AccelOpsClass *cpus_accel; - -+ -+static inline void cpumask_clear_bit(uint64_t *mask, uint8_t bit) -+{ -+ *mask &= ~(1ul << bit); -+} -+ -+static inline long cpumask_get_min_bit(uint64_t mask) -+{ -+ return __builtin_ffsll(mask) - 1; -+ } -+ -+static long pick_pcpu(uint64_t *cpumask) -+{ -+ long ret = cpumask_get_min_bit(*cpumask); -+ cpumask_clear_bit(cpumask, ret); -+ return ret; -+} -+ -+ - bool cpu_is_stopped(CPUState *cpu) - { - return cpu->stopped || !runstate_is_running(); -@@ -635,6 +654,21 @@ void qemu_init_vcpu(CPUState *cpu) - cpu->stopped = true; - cpu->random_seed = qemu_guest_random_seed_thread_part1(); - -+ cpu->pinned = ms->cpu_pin; -+ static uint64_t vm_cpumask; -+ uint64_t vcpu_cpumask; -+ if (!vm_cpumask) -+ vm_cpumask = ms->cpumask; -+ if (!cpu->pinned) { -+ /* If the CPUs are not pinned, assign the whole CPU mask to the VCPU */ -+ vcpu_cpumask = vm_cpumask; -+ } else { -+ /* If the CPUs are pinned, pick only one CPU for this VCPU */ -+ vcpu_cpumask = 1ull << pick_pcpu(&vm_cpumask); -+ } -+ -+ cpu->cpumask = vcpu_cpumask; -+ - if (!cpu->as) { - /* If the target cpu hasn't set up any address spaces itself, - * give it the default one. - - -diff --git a/tools/qemu-xen/softmmu/vl.c b/tools/qemu-xen/softmmu/vl.c -index ea20b23..98287f4 100644 ---- a/tools/qemu-xen/softmmu/vl.c -+++ b/tools/qemu-xen/softmmu/vl.c -@@ -2631,6 +2631,68 @@ void qmp_x_exit_preconfig(Error **errp) - } - } - -+static inline void cpumask_set_bit(uint64_t *mask, uint8_t bit) -+{ -+ *mask |= 1ull << bit ; -+} -+ -+/* Parse d[[,-]d]* mask (0-2 or 0-2,5,6). CPUs start with 0. -+ * Return 0 in case of error, bitmask if ok -+ */ -+static uint64_t cpumask_parse(const char* cpumask_str) -+{ -+ uint64_t cpumask = 0; -+ const char *cur = cpumask_str; -+ bool range = false; -+ -+ assert(cpumask_str != NULL); -+ -+ if (strcmp(cpumask_str, "") == 0) -+ return 0; -+ -+ if (cpumask_str[0] == '-') { -+ warn_report("The CPU mask cannot start with -\n"); -+ return 0; -+ } -+ -+ uint8_t last_set; -+ while (*cur) { -+ unsigned long num; -+ char *end; -+ if (*cur == '-') { -+ cur++; -+ range = true; -+ continue; -+ } -+ if (*cur == ',') { -+ cur++; -+ continue; -+ } -+ num = strtoul(cur, &end, 10); -+ if (num > UCHAR_MAX) { -+ warn_report ("Too big CPU number is provided! Numbers more than %d " -+ "are not supported at the moment!\n", UCHAR_MAX); -+ return 0; -+ } -+ if (end != cur) { -+ if (num == 0 || num > sizeof(cpumask) * BITS_PER_BYTE) -+ return 0; -+ if (range) { -+ range = false; -+ for (int i = last_set + 1; i < num; i++) -+ cpumask_set_bit(&cpumask, i); -+ } -+ cpumask_set_bit(&cpumask, num); -+ last_set = num; -+ cur = end; -+ continue; -+ } -+ warn_report("The CPU mask option is broken!\n"); -+ return 0; -+ } -+ return cpumask; -+} -+ - void qemu_init(int argc, char **argv) - { - QemuOpts *opts; -@@ -3357,6 +3419,7 @@ void qemu_init(int argc, char **argv) - case QEMU_OPTION_only_migratable: - only_migratable = 1; - break; -+ - case QEMU_OPTION_nodefaults: - has_defaults = 0; - break; -@@ -3596,6 +3659,21 @@ void qemu_init(int argc, char **argv) - if (cpu_option) { - current_machine->cpu_type = parse_cpu_option(cpu_option); - } -+ -+ -+ current_machine->cpumask = 0; -+ if (current_machine->cpumask_str) { -+ current_machine->cpumask = cpumask_parse(current_machine->cpumask_str); -+ if (!current_machine->cpumask) { -+ current_machine->cpumask = ~0ull; -+ } -+ } -+ if (current_machine->cpu_pin) { -+ if (!current_machine->cpumask_str) { -+ current_machine->cpumask = ~0ull; -+ } -+ } -+ - /* NB: for machine none cpu_type could STILL be NULL here! */ - - qemu_resolve_machine_memdev(); - - --- -2.35.1 - diff --git a/pkg/xen-tools/patches-4.19.0/15-qemu-Set-the-affinity-of-QEMU-threads-according-to-t.patch b/pkg/xen-tools/patches-4.19.0/15-qemu-Set-the-affinity-of-QEMU-threads-according-to-t.patch deleted file mode 100644 index b9526c8338..0000000000 --- a/pkg/xen-tools/patches-4.19.0/15-qemu-Set-the-affinity-of-QEMU-threads-according-to-t.patch +++ /dev/null @@ -1,101 +0,0 @@ -From f6861fa011836c9101977e2b0ad1aadaf8d45a57 Mon Sep 17 00:00:00 2001 -From: Nikolay Martyanov -Date: Wed, 28 Sep 2022 16:01:48 +0200 -Subject: [PATCH 15/15] qemu: Set the affinity of QEMU threads according to the - CPU mask options. - -Signed-off-by: Nikolay Martyanov ---- - tools/qemu-xen/include/qemu/thread.h | 2 ++ - tools/qemu-xen/softmmu/cpus.c | 6 ++++ - tools/qemu-xen/util/qemu-thread-posix.c | 37 +++++++++++++++++++++++++ - 3 files changed, 45 insertions(+) - -diff --git a/tools/qemu-xen/include/qemu/thread.h b/tools/qemu-xen/include/qemu/thread.h -index 4baf4d1..3d1a76e 100644 ---- a/tools/qemu-xen/include/qemu/thread.h -+++ b/tools/qemu-xen/include/qemu/thread.h -@@ -174,6 +174,8 @@ void qemu_event_destroy(QemuEvent *ev); - void qemu_thread_create(QemuThread *thread, const char *name, - void *(*start_routine)(void *), - void *arg, int mode); -+/* TODO implemented for POSIX only by now */ -+void qemu_thread_set_affinity(QemuThread *thread, unsigned int cpumask); - void *qemu_thread_join(QemuThread *thread); - void qemu_thread_get_self(QemuThread *thread); - bool qemu_thread_is_self(QemuThread *thread); -diff --git a/tools/qemu-xen/softmmu/cpus.c b/tools/qemu-xen/softmmu/cpus.c -index da56052..000df00 100644 ---- a/tools/qemu-xen/softmmu/cpus.c -+++ b/tools/qemu-xen/softmmu/cpus.c -@@ -2073,6 +2073,12 @@ void qemu_init_vcpu(CPUState *cpu) - qemu_dummy_start_vcpu(cpu); - } - -+ if(cpu_can_run(cpu)) -+ warn_report("Change a CPU affinity after the CPU may have been running for a while\n"); -+ -+ if (cpu->cpumask) -+ qemu_thread_set_affinity(cpu->thread, cpu->cpumask); -+ - while (!cpu->created) { - qemu_cond_wait(&qemu_cpu_cond, &qemu_global_mutex); - } -diff --git a/tools/qemu-xen/util/qemu-thread-posix.c b/tools/qemu-xen/util/qemu-thread-posix.c -index b4c2359..c404ff4 100644 ---- a/tools/qemu-xen/util/qemu-thread-posix.c -+++ b/tools/qemu-xen/util/qemu-thread-posix.c -@@ -17,6 +17,8 @@ - #include "qemu-thread-common.h" - #include "qemu/tsan.h" - -+#include "hw/core/cpu.h" -+ - static bool name_threads; - - void qemu_thread_naming(bool enable) -@@ -523,6 +525,41 @@ static void *qemu_thread_start(void *args) - return r; - } - -+static inline unsigned get_max_cpu_in_mask(unsigned int cpumask) -+{ -+ assert(cpumask != 0); -+ return (sizeof (cpumask) * BITS_PER_BYTE) - __builtin_clz (cpumask) - 1; -+} -+ -+void qemu_thread_set_affinity(QemuThread *thread, unsigned int cpumask) -+{ -+ int err; -+ size_t cpu_set_size; -+ cpu_set_t cpu_set; -+ unsigned int max_pcpu; -+ unsigned int cpumask_tmp = cpumask; -+ -+ CPU_ZERO(&cpu_set); -+ -+ /* set the CPU_SET according to mask */ -+ int cur_pcpu = 0; -+ while(cpumask_tmp) { -+ if (cpumask_tmp & 1) -+ CPU_SET (cur_pcpu, &cpu_set); -+ cpumask_tmp >>= 1; -+ cur_pcpu += 1; -+ } -+ -+ /* Count the size of the necessary CPU_SET */ -+ max_pcpu = get_max_cpu_in_mask(cpumask); -+ cpu_set_size = DIV_ROUND_UP(max_pcpu + 1, BITS_PER_BYTE); -+ -+ err = pthread_setaffinity_np(thread->thread, cpu_set_size, &cpu_set); -+ -+ if (err) -+ error_exit (err, __func__); -+} -+ - void qemu_thread_create(QemuThread *thread, const char *name, - void *(*start_routine)(void*), - void *arg, int mode) --- -2.35.1 -