diff --git a/pkg/pillar/hypervisor/kubevirt.go b/pkg/pillar/hypervisor/kubevirt.go index ae3c3e128f..14907416c8 100644 --- a/pkg/pillar/hypervisor/kubevirt.go +++ b/pkg/pillar/hypervisor/kubevirt.go @@ -191,6 +191,11 @@ func (ctx kubevirtContext) Setup(status types.DomainStatus, config types.DomainC logrus.Debugf("Setup called for Domain: %s, vmmode %v", domainName, config.VirtualizationMode) + if config.EnforceNetworkInterfaceOrder { + return logError("enforcing user-defined network interface order is not supported " + + "with the KubeVirt hypervisor") + } + if config.VirtualizationMode == types.NOHYPER { if err := ctx.CreatePodConfig(domainName, config, status, diskStatusList, aa, file); err != nil { return logError("failed to build kube pod config: %v", err) diff --git a/pkg/pillar/hypervisor/kvm.go b/pkg/pillar/hypervisor/kvm.go index 5fd59bd350..b73836b3f2 100644 --- a/pkg/pillar/hypervisor/kvm.go +++ b/pkg/pillar/hypervisor/kvm.go @@ -415,7 +415,7 @@ const qemuPCIPassthruTemplate = ` {{- end -}} {{- if .Xopregion }} x-igd-opregion = "on" -{{- end -}} +{{- end}} ` const qemuSerialTemplate = ` @@ -1068,40 +1068,295 @@ func getFmlCustomResolution(status *types.DomainStatus, globalConfig *types.Conf return "", fmt.Errorf("invalid fml resolution %s", fmlResolutions) } +type virtualNetwork struct { + types.VifConfig + networkID int + pciDeviceID int +} + +type pciAddressAllocator struct { + pciAssignments []pciDevice + virtualNetworks []virtualNetwork + multifunctionDevices multifunctionDevs + firstFreePCIID int + enforceNetInterfaceOrder bool +} + +// allocate sets pciDeviceID and pciBridgeID for every pciDevice and virtualNetwork +// based on user-configured ordering requirements. +func (a *pciAddressAllocator) allocate() error { + if !a.enforceNetInterfaceOrder { + // Fallback to legacy ordering of PCI devices. + return a.allocateLegacy() + } + + // Determine PCI addresses for virtual network interfaces, which are connected + // to the root bus using root ports. + for i := range a.virtualNetworks { + pciDeviceID := a.firstFreePCIID + // Increment pciDeviceID by 1 for every (virtual or assigned) PCI device + // which should have lower PCI address. + // For a multifunction PCI device, we increment by 1 for the entire group of + // functions, as they share a single address on the root bus through their bridge. + for j := range a.virtualNetworks { + if i == j { + continue + } + if a.virtualNetworks[j].VifOrder < a.virtualNetworks[i].VifOrder { + pciDeviceID++ + } + } + for pciAddr, md := range a.multifunctionDevices { + isBefore, isAfter := md.compareOrderWithVirtNet(a.virtualNetworks[i]) + invalidOrder := isBefore && isAfter + if invalidOrder { + return logError("user-defined network interface order "+ + "for VIF %s breaks order of the multifunction PCI device %s", + a.virtualNetworks[i].Vif, pciAddr) + } + if isBefore { + pciDeviceID++ + } + } + a.virtualNetworks[i].pciDeviceID = pciDeviceID + } + + // Determine PCI addresses for direct PCI assignments. + for i := range a.pciAssignments { + pciLongWoFunc, err := a.pciAssignments[i].pciLongWOFunction() + if err != nil { + logrus.Warnf("retrieving pci address without function failed: %v", err) + continue + } + md := a.multifunctionDevices[pciLongWoFunc] + if md == nil { + // Even when device is not multifunction, it still should have entry + // in the multifunctionDevices map. + logrus.Warnf("missing multifunctionDevices entry for pci address: %s", + pciLongWoFunc) + continue + } + // Increment pciDeviceOrBridgeID by 1 for every (virtual or assigned) PCI device + // which should have lower PCI address. + // For a multifunction PCI device, we increment by 1 for the entire group of + // functions, as they share a single address on the root bus through their bridge. + pciDeviceOrBridgeID := a.firstFreePCIID + for _, virtNet := range a.virtualNetworks { + // Order validity already checked when addresses for virtual networks + // were determined. + if isBefore, _ := md.compareOrderWithVirtNet(virtNet); !isBefore { + // Also increased when order is undefined, i.e. this PCI device + // does not have network function. Non-networking PCI devices + // are placed after virtual network interfaces. + pciDeviceOrBridgeID++ + } + } + thisHasNetFunc := md.hasNetworkFunction() + for pciAddr2, md2 := range a.multifunctionDevices { + if pciLongWoFunc == pciAddr2 { + continue + } + theOtherHasNetFunc := md2.hasNetworkFunction() + if !thisHasNetFunc { + if theOtherHasNetFunc { + // Network functions take priority in the order. + pciDeviceOrBridgeID++ + } else { + // Between non-networking devices, order by PCI addresses + // lexicographically. + if pciLongWoFunc > pciAddr2 { + pciDeviceOrBridgeID++ + } + } + continue + } + if !theOtherHasNetFunc { + // The other non-network device is ordered after this network device. + continue + } + // Both devices have at least one network function. + theOtherIsBefore, theOtherIsAfter := md2.compareOrder(*md) + invalidOrder := theOtherIsBefore && theOtherIsAfter + if invalidOrder { + return logError("user-defined network interface order "+ + "between devices %s and %s breaks order of PCI functions", + pciLongWoFunc, pciAddr2) + } + if theOtherIsBefore { + pciDeviceOrBridgeID++ + } + } + if len(md.devs) > 1 { + // pciDeviceOrBridgeID is for the bridge wrt. the root bus. + a.pciAssignments[i].pciBridgeID = pciDeviceOrBridgeID + // Determine device address on the secondary bus provided by the bridge. + // Skip PCI address 0 which is unsupported for standard hotplug controller. + pciDeviceID := 1 + devIndex := md.index(a.pciAssignments[i]) + thisIsNetDev := a.pciAssignments[i].ioType.IsNet() + for dev2Index, dev2 := range md.devs { + theOtherIsNetDev := dev2.ioType.IsNet() + if !thisIsNetDev { + if theOtherIsNetDev { + // Network functions take priority in the order. + pciDeviceID++ + } else { + // Between non-networking functions, preserve the order received + // from zedagent. + if dev2Index < devIndex { + pciDeviceID++ + } + } + continue + } + if !theOtherIsNetDev { + // The other non-network device is ordered after this network device. + continue + } + // Both devices are of the networking type. + if dev2.netIntfOrder < a.pciAssignments[i].netIntfOrder { + pciDeviceID++ + } + } + a.pciAssignments[i].pciDeviceID = pciDeviceID + } else { + // Not multifunction PCI device. + a.pciAssignments[i].pciBridgeID = 0 + a.pciAssignments[i].pciDeviceID = pciDeviceOrBridgeID + } + } + return nil +} + +func (a *pciAddressAllocator) allocateLegacy() error { + // Virtual network interfaces precede PCI-passthrough devices in the PCI topology. + // Among virtual interfaces, the order received from zedagent is preserved. + pciDeviceID := a.firstFreePCIID + for i := range a.virtualNetworks { + a.virtualNetworks[i].pciDeviceID = pciDeviceID + pciDeviceID++ + } + + // Preserve order of PCI assignments as received from zedagent, but group + // functions of the same multifunction PCI device under the same bridge. + pciBridgeIDs := make(map[string]int) // key = PCI address without function suffix + for i := range a.pciAssignments { + pciLongWoFunc, err := a.pciAssignments[i].pciLongWOFunction() + if err != nil { + logrus.Warnf("retrieving pci address without function failed: %v", err) + continue + } + md := a.multifunctionDevices[pciLongWoFunc] + if md == nil { + // Even when device is not multifunction, it still should have entry + // in the a.multifunctionDevices map. + logrus.Warnf("missing multifunctionDevices entry for pci address: %s", + pciLongWoFunc) + continue + } + if len(md.devs) > 1 { + // Multi-function PCI device. + pciBridgeID, bridgeIDAllocated := pciBridgeIDs[pciLongWoFunc] + if !bridgeIDAllocated { + pciBridgeID = pciDeviceID + pciBridgeIDs[pciLongWoFunc] = pciBridgeID + pciDeviceID++ + } + a.pciAssignments[i].pciBridgeID = pciBridgeID + // Skip PCI address 0 which is unsupported for standard hotplug controller. + a.pciAssignments[i].pciDeviceID = md.index(a.pciAssignments[i]) + 1 + } else { + // Not multifunction PCI device. + a.pciAssignments[i].pciBridgeID = 0 + a.pciAssignments[i].pciDeviceID = pciDeviceID + pciDeviceID++ + } + } + return nil +} + type pciDevicesWithBridge struct { bridgeBus string devs []*pciDevice } -type multifunctionDevs map[string]*pciDevicesWithBridge // key: pci long without function number - -func (md multifunctionDevs) isMultiFunction(p pciDevice) bool { - pciLongWOFunc, err := p.pciLongWOFunction() - if err != nil { - return false +func (pd pciDevicesWithBridge) index(p pciDevice) int { + for i, dev := range pd.devs { + if p.sameDevice(*dev) { + return i + } } - devs, found := md[pciLongWOFunc] - return found && len(devs.devs) > 1 + return -1 } -func (md multifunctionDevs) index(p pciDevice) int { - pciLongWOFunc, err := p.pciLongWOFunction() - if err != nil { - logrus.Warnf("retrieving pci address without function failed: %v", err) - return -1 +// compareOrder determines if this (possibly multifunction) device should be ordered +// (in the PCI hierarchy) before the other given device. +// Please note that if both booleans are true, then the user-defined order is invalid +// and would break the (multifunction) device if applied. +// If both return values are false, the order is undefined. This occurs when one or both +// devices lack a network function (user only specifies order for network interfaces). +func (pd pciDevicesWithBridge) compareOrder( + pd2 pciDevicesWithBridge) (isBefore, isAfter bool) { + for _, dev := range pd.devs { + if !dev.ioType.IsNet() { + continue + } + for _, dev2 := range pd2.devs { + if !dev2.ioType.IsNet() { + continue + } + if dev.netIntfOrder < dev2.netIntfOrder { + isBefore = true + } + if dev.netIntfOrder > dev2.netIntfOrder { + isAfter = true + } + } } - pciDevices, found := md[pciLongWOFunc] - if !found { - return -1 + return isBefore, isAfter +} + +// compareOrderWithVirtNet determines if this (possibly multifunction) device should be +// ordered (in the PCI hierarchy) before or after the given virtual network device. +// Please note that if both booleans are true, then the user-defined order is invalid +// and would break the (multifunction) device if applied. +// If both return values are false, the order is undefined. This occurs when this device +// lacks a network function (user only specifies order for network interfaces). +func (pd pciDevicesWithBridge) compareOrderWithVirtNet( + virtNet virtualNetwork) (isBefore, isAfter bool) { + for _, dev := range pd.devs { + if !dev.ioType.IsNet() { + continue + } + if dev.netIntfOrder < virtNet.VifOrder { + isBefore = true + } + if dev.netIntfOrder > virtNet.VifOrder { + isAfter = true + } } + return isBefore, isAfter +} - for i, dev := range pciDevices.devs { - if p.ioType == dev.ioType && p.pciLong == dev.pciLong { - return i +// Return true if device has at least one network function. +func (pd pciDevicesWithBridge) hasNetworkFunction() bool { + for _, dev := range pd.devs { + if dev.ioType.IsNet() { + return true } } + return false +} - return -1 +type multifunctionDevs map[string]*pciDevicesWithBridge // key: pci long without function number + +func (md multifunctionDevs) isMultiFunction(p pciDevice) bool { + pciLongWOFunc, err := p.pciLongWOFunction() + if err != nil { + return false + } + devs, found := md[pciLongWOFunc] + return found && len(devs.devs) > 1 } func multifunctionDevGroup(pcis []pciDevice) multifunctionDevs { @@ -1127,45 +1382,32 @@ func multifunctionDevGroup(pcis []pciDevice) multifunctionDevs { return mds } -func (p pciDevice) pciLongWOFunction() (string, error) { - pciLongSplit := strings.Split(p.pciLong, ".") - if len(pciLongSplit) == 0 { - return "", fmt.Errorf("could not split %s", p.pciLong) - } - pciWithoutFunction := strings.Join(pciLongSplit[0:len(pciLongSplit)-1], ".") - - return pciWithoutFunction, nil -} - type pciAssignmentsTemplateFiller struct { multifunctionDevices multifunctionDevs file io.Writer } func (f *pciAssignmentsTemplateFiller) pciEBridge(pciID int, pciWOFunction string) error { - pcieBridgeContext := tQemuPCIeBridgeContext{PCIId: 0, Bus: pciID} + pcieBridgeContext := tQemuPCIeBridgeContext{Bus: pciID} if err := tQemuPCIeBridge.Execute(f.file, pcieBridgeContext); err != nil { return fmt.Errorf("can't write PCIe bridge Passthrough to config file (%w)", err) } f.multifunctionDevices[pciWOFunction].bridgeBus = fmt.Sprintf("pcie-bridge.%d", pciID) - return nil } -func (f *pciAssignmentsTemplateFiller) do(file io.Writer, pciAssignments []pciDevice, pciID int) error { +func (f *pciAssignmentsTemplateFiller) do(file io.Writer, pciAssignments []pciDevice) error { if len(pciAssignments) == 0 { return nil } - var pcieRPContext tQemuPCIeRootPortContext - var pciPTContext tQemuPCIPassthruContext pciEBridgeForMultiFuncDevCreated := make(map[string]struct{}) // key: pci long without function number - for i, pa := range pciAssignments { - pcieRPContext.PCIId = pciID + i - pciPTContext.Xopregion = false - pciPTContext.Xvga = pa.isVGA() - pciPTContext.PciShortAddr = types.PCILongToShort(pa.pciLong) - + for _, pa := range pciAssignments { + pciPTContext := tQemuPCIPassthruContext{ + PciShortAddr: types.PCILongToShort(pa.pciLong), + Xvga: pa.isVGA(), + Xopregion: false, + } if vendor, err := pa.vid(); err == nil { // check for Intel vendor if vendor == "0x8086" { @@ -1184,29 +1426,32 @@ func (f *pciAssignmentsTemplateFiller) do(file io.Writer, pciAssignments []pciDe } _, pciEBridgeCreated := pciEBridgeForMultiFuncDevCreated[pciLongWoFunc] pciEBridgeForMultiFuncDevCreated[pciLongWoFunc] = struct{}{} - - // for non-multifunction devices, every pci device gets a "pcie-root-port" - if !f.multifunctionDevices.isMultiFunction(pa) || !pciEBridgeCreated { + isMultifunctionDev := f.multifunctionDevices.isMultiFunction(pa) + + // Connect device using PCI Express Root Port either directly or via bridge if it + // has multiple functions. + if !isMultifunctionDev || !pciEBridgeCreated { + var pcieRPContext tQemuPCIeRootPortContext + if isMultifunctionDev { + pcieRPContext.PCIId = pa.pciBridgeID + } else { + pcieRPContext.PCIId = pa.pciDeviceID + } if err := tQemuPCIeRootPort.Execute(file, pcieRPContext); err != nil { - return logError("can't write Root Port PCI Passthrough to config file (%v)", err) + return logError("can't write PCIe Root Port to config file (%v)", err) } } - if f.multifunctionDevices.isMultiFunction(pa) { + if isMultifunctionDev { if !pciEBridgeCreated { - err := f.pciEBridge(pcieRPContext.PCIId, pciLongWoFunc) + err := f.pciEBridge(pa.pciBridgeID, pciLongWoFunc) if err != nil { logrus.Warnf("could not write template: %v, skipping", err) } } - pciPTContext.Bus = f.multifunctionDevices[pciLongWoFunc].bridgeBus - funcIndex := f.multifunctionDevices.index(pa) - if funcIndex < 0 { - logrus.Warn("PCI function index is less than 0 - skipping") - } - pciPTContext.Addr = fmt.Sprintf("0x%x", funcIndex+1) // Unsupported PCI slot 0 for standard hotplug controller + pciPTContext.Addr = fmt.Sprintf("0x%x", pa.pciDeviceID) } else { - pciPTContext.Bus = fmt.Sprintf("pci.%d", pcieRPContext.PCIId) + pciPTContext.Bus = fmt.Sprintf("pci.%d", pa.pciDeviceID) pciPTContext.Addr = "0x0" } if err := tQemuPCIPassthru.Execute(file, pciPTContext); err != nil { @@ -1288,32 +1533,9 @@ func (ctx KvmContext) CreateDomConfig(domainName string, diskContext.DiskID = diskContext.DiskID + 1 } - // render network device model settings - netContext := tQemuNetContext{PCIId: diskContext.PCIId} - for _, net := range config.VifList { - netContext.Mac = net.Mac.String() - netContext.Bridge = net.Bridge - netContext.Vif = net.Vif - if config.VirtualizationMode == types.LEGACY { - netContext.Driver = "e1000" - } else { - netContext.Driver = "virtio-net-pci" - } - netContext.MTU = net.MTU - if err := tQemuNet.Execute(file, netContext); err != nil { - return logError("can't write to config file %s (%v)", file.Name(), err) - } - netContext.PCIId = netContext.PCIId + 1 - netContext.NetID = netContext.NetID + 1 - } - - // Gather all PCI assignments into a single line var pciAssignments []pciDevice - // Gather all serial assignments into a single line var serialAssignments []string - // Gather all CAN Bus assignments into a single line canBusAssignments := make(map[string]string) - for _, adapter := range config.IoAdapterList { logrus.Debugf("processing adapter %d %s\n", adapter.Type, adapter.Name) list := aa.LookupIoBundleAny(adapter.Name) @@ -1334,6 +1556,9 @@ func (ctx KvmContext) CreateDomConfig(domainName string, if ib.PciLong != "" && ib.UsbAddr == "" { logrus.Infof("Adding PCI device <%v>\n", ib.PciLong) tap := pciDevice{pciLong: ib.PciLong, ioType: ib.Type} + if ib.Type.IsNet() { + tap.netIntfOrder = adapter.IntfOrder + } pciAssignments = addNoDuplicatePCI(pciAssignments, tap) } if ib.Serial != "" { @@ -1357,12 +1582,60 @@ func (ctx KvmContext) CreateDomConfig(domainName string, } } + // Group functions of the same multifunction PCI device together. + multifunctionDevices := multifunctionDevGroup(pciAssignments) + + // Prepare a list of virtual interfaces connecting the application with network + // instances. + var virtualNetworks []virtualNetwork + var networkID int + for _, vif := range config.VifList { + virtualNetworks = append(virtualNetworks, virtualNetwork{ + VifConfig: vif, + networkID: networkID, + }) + networkID++ + } + + // Determine PCI addresses for all virtual networks and direct PCI assignments. + addrAllocator := pciAddressAllocator{ + pciAssignments: pciAssignments, + virtualNetworks: virtualNetworks, + multifunctionDevices: multifunctionDevices, + firstFreePCIID: diskContext.PCIId, + enforceNetInterfaceOrder: config.EnforceNetworkInterfaceOrder, + } + // Set pciDeviceID and pciBridgeID for every item in pciAssignments and virtualNetworks. + if err = addrAllocator.allocate(); err != nil { + return logError(err.Error()) + } + + // Render virtual network interfaces. + for _, virtNet := range virtualNetworks { + netContext := tQemuNetContext{ + PCIId: virtNet.pciDeviceID, + NetID: virtNet.networkID, + Mac: virtNet.Mac.String(), + Bridge: virtNet.Bridge, + Vif: virtNet.Vif, + MTU: virtNet.MTU, + } + if config.VirtualizationMode == types.LEGACY { + netContext.Driver = "e1000" + } else { + netContext.Driver = "virtio-net-pci" + } + if err := tQemuNet.Execute(file, netContext); err != nil { + return logError("can't write to config file %s (%v)", file.Name(), err) + } + } + + // Render PCI assignments. pciAssignmentsFiller := pciAssignmentsTemplateFiller{ - multifunctionDevices: multifunctionDevGroup(pciAssignments), + multifunctionDevices: multifunctionDevices, file: file, } - - err = pciAssignmentsFiller.do(file, pciAssignments, netContext.PCIId) + err = pciAssignmentsFiller.do(file, pciAssignments) if err != nil { return fmt.Errorf("writing to template file %s failed: %w", file.Name(), err) } diff --git a/pkg/pillar/hypervisor/kvm_test.go b/pkg/pillar/hypervisor/kvm_test.go index 35b01aa73c..c931d7afd7 100644 --- a/pkg/pillar/hypervisor/kvm_test.go +++ b/pkg/pillar/hypervisor/kvm_test.go @@ -12,6 +12,7 @@ import ( "github.com/google/go-cmp/cmp" zconfig "github.com/lf-edge/eve-api/go/config" "github.com/lf-edge/eve/pkg/pillar/types" + . "github.com/onsi/gomega" uuid "github.com/satori/go.uuid" ) @@ -1403,6 +1404,7 @@ func domConfigArm64() string { host = "f3:00.0" bus = "pci.10" addr = "0x0" + [chardev "charserial-usr0"] backend = "serial" path = "/dev/ttyS0" @@ -1702,6 +1704,7 @@ func domConfigAmd64FML() string { host = "f3:00.0" bus = "pci.10" addr = "0x0" + [device "pci.11"] driver = "pcie-root-port" port = "111" @@ -1715,6 +1718,7 @@ func domConfigAmd64FML() string { host = "f4:00.0" bus = "pci.11" addr = "0x0" + [chardev "charserial-usr0"] backend = "serial" path = "/dev/ttyS0" @@ -2003,6 +2007,7 @@ func domConfigAmd64Legacy() string { host = "f3:00.0" bus = "pci.10" addr = "0x0" + [chardev "charserial-usr0"] backend = "serial" path = "/dev/ttyS0" @@ -2289,6 +2294,7 @@ func domConfigAmd64() string { host = "f3:00.0" bus = "pci.10" addr = "0x0" + [chardev "charserial-usr0"] backend = "serial" path = "/dev/ttyS0" @@ -2577,6 +2583,7 @@ func domConfigContainerVNC() string { host = "f3:00.0" bus = "pci.9" addr = "0x0" + [chardev "charserial-usr0"] backend = "serial" path = "/dev/ttyS0" @@ -2788,6 +2795,7 @@ func expectedMultifunctionDevice() string { host = "00:0a.0" bus = "pci.0" addr = "0x0" + [device "pci.1"] driver = "pcie-root-port" port = "11" @@ -2806,6 +2814,7 @@ func expectedMultifunctionDevice() string { host = "00:0d.0" bus = "pcie-bridge.1" addr = "0x1" + [device "pci.2"] driver = "pcie-root-port" port = "12" @@ -2819,11 +2828,13 @@ func expectedMultifunctionDevice() string { host = "00:0b.0" bus = "pci.2" addr = "0x0" + [device] driver = "vfio-pci" host = "00:0d.2" bus = "pcie-bridge.1" - addr = "0x2"` + addr = "0x2" +` } func TestPCIAssignmentsTemplateFillMultifunctionDevice(t *testing.T) { @@ -2847,11 +2858,20 @@ func TestPCIAssignmentsTemplateFillMultifunctionDevice(t *testing.T) { } wr := bytes.Buffer{} + multifunctionDevices := multifunctionDevGroup(pciAssignments) + addrAllocator := pciAddressAllocator{ + pciAssignments: pciAssignments, + multifunctionDevices: multifunctionDevices, + } + if err := addrAllocator.allocate(); err != nil { + t.Error(err) + } + p := pciAssignmentsTemplateFiller{ - multifunctionDevices: multifunctionDevGroup(pciAssignments), + multifunctionDevices: multifunctionDevices, file: &wr, } - p.do(&wr, pciAssignments, 0) + p.do(&wr, pciAssignments) if wr.String() != expectedMultifunctionDevice() { t.Fatalf("not equal, diff: \n%s\ncomplete:\n%s", cmp.Diff(wr.String(), expectedMultifunctionDevice()), wr.String()) @@ -2897,3 +2917,133 @@ func TestConvertToMultifunctionPCIDevices(t *testing.T) { t.Fatal("expected one device") } } + +func TestPCIAddressAllocator(t *testing.T) { + g := NewGomegaWithT(t) + virtualNetworks := []virtualNetwork{ + { + VifConfig: types.VifConfig{ + Vif: "nbu1x1", + VifOrder: 1, + }, + }, + { + VifConfig: types.VifConfig{ + Vif: "nbu2x1", + VifOrder: 4, + }, + }, + } + pciAssignments := []pciDevice{ + { + pciLong: "0000:06:00.0", + ioType: types.IoNetEth, + netIntfOrder: 2, + }, + { + pciLong: "0000:00:15.0", + ioType: types.IoUSB, + }, + { + pciLong: "0000:06:00.2", + ioType: types.IoOther, + }, + { + pciLong: "0000:06:00.1", + ioType: types.IoNetEth, + netIntfOrder: 3, + }, + { + pciLong: "0000:08:00.0", + ioType: types.IoNetWWAN, + netIntfOrder: 0, + }, + } + multifunctionDevices := multifunctionDevGroup(pciAssignments) + g.Expect(multifunctionDevices).To(HaveLen(3)) + addrAllocator := pciAddressAllocator{ + pciAssignments: pciAssignments, + virtualNetworks: virtualNetworks, + multifunctionDevices: multifunctionDevices, + firstFreePCIID: 5, + // First test the legacy order. + enforceNetInterfaceOrder: false, + } + err := addrAllocator.allocate() + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(virtualNetworks[0].pciDeviceID).To(Equal(5)) + g.Expect(virtualNetworks[1].pciDeviceID).To(Equal(6)) + g.Expect(pciAssignments[0].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[0].pciDeviceID).To(Equal(1)) + g.Expect(pciAssignments[1].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[1].pciDeviceID).To(Equal(8)) + g.Expect(pciAssignments[2].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[2].pciDeviceID).To(Equal(2)) + g.Expect(pciAssignments[3].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[3].pciDeviceID).To(Equal(3)) + g.Expect(pciAssignments[4].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[4].pciDeviceID).To(Equal(9)) + + // Test enforced user-defined network interface order. + addrAllocator.enforceNetInterfaceOrder = true + err = addrAllocator.allocate() + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(virtualNetworks[0].pciDeviceID).To(Equal(6)) + g.Expect(virtualNetworks[1].pciDeviceID).To(Equal(8)) + g.Expect(pciAssignments[0].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[0].pciDeviceID).To(Equal(1)) + g.Expect(pciAssignments[1].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[1].pciDeviceID).To(Equal(9)) + g.Expect(pciAssignments[2].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[2].pciDeviceID).To(Equal(3)) + g.Expect(pciAssignments[3].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[3].pciDeviceID).To(Equal(2)) + g.Expect(pciAssignments[4].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[4].pciDeviceID).To(Equal(5)) + + // Try re-ordering functions of a multifunction device. This should be allowed. + pciAssignments[0].netIntfOrder = 3 + pciAssignments[3].netIntfOrder = 2 + err = addrAllocator.allocate() + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(virtualNetworks[0].pciDeviceID).To(Equal(6)) + g.Expect(virtualNetworks[1].pciDeviceID).To(Equal(8)) + g.Expect(pciAssignments[0].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[0].pciDeviceID).To(Equal(2)) // 1 -> 2 + g.Expect(pciAssignments[1].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[1].pciDeviceID).To(Equal(9)) + g.Expect(pciAssignments[2].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[2].pciDeviceID).To(Equal(3)) + g.Expect(pciAssignments[3].pciBridgeID).To(Equal(7)) + g.Expect(pciAssignments[3].pciDeviceID).To(Equal(1)) // 2 -> 1 + g.Expect(pciAssignments[4].pciBridgeID).To(Equal(0)) + g.Expect(pciAssignments[4].pciDeviceID).To(Equal(5)) + + // Try to put virtual interface in-between functions of a multifunction device. + // This should not be allowed. + virtualNetworks[1].VifOrder = 3 + pciAssignments[0].netIntfOrder = 4 + + err = addrAllocator.allocate() + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(Equal("user-defined network interface order for VIF " + + "nbu2x1 breaks order of the multifunction PCI device 0000:06:00")) + // Revert the invalid config change. + virtualNetworks[1].VifOrder = 4 + pciAssignments[0].netIntfOrder = 3 + err = addrAllocator.allocate() + g.Expect(err).ToNot(HaveOccurred()) + + // Try to put PCI-passthrough network device in-between functions of a multifunction + // device. This should not be allowed. + pciAssignments[4].netIntfOrder = 3 + pciAssignments[0].netIntfOrder = 4 + + err = addrAllocator.allocate() + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(Equal("user-defined network interface order between " + + "devices 0000:06:00 and 0000:08:00 breaks order of PCI functions")) +} diff --git a/pkg/pillar/hypervisor/pci.go b/pkg/pillar/hypervisor/pci.go index ba91826d1e..4a663dac46 100644 --- a/pkg/pillar/hypervisor/pci.go +++ b/pkg/pillar/hypervisor/pci.go @@ -75,6 +75,23 @@ func addNoDuplicatePCI(list []pciDevice, tap pciDevice) []pciDevice { type pciDevice struct { pciLong string ioType types.IoType + // netIntfOrder is only applied to network devices when EnforceNetworkInterfaceOrder + // is enabled. + netIntfOrder uint32 + + // pciBridgeID and pciDeviceID are set by pciAddressAllocator. + + // PCI bridge is only used for multifunction PCI devices. + // In that case pciBridgeID is address of the bridge on the root bus, while + // pciDeviceID is device address inside the secondary bus provided by the bridge. + // For single-function PCI devices, bridge is unused, pciBridgeID is unset + // and pciDeviceID points to device address on the root bus. + pciBridgeID int + pciDeviceID int +} + +func (d pciDevice) sameDevice(d2 pciDevice) bool { + return d.ioType == d2.ioType && d.pciLong == d2.pciLong } // check if the PCI device is a VGA device @@ -102,6 +119,16 @@ func (d pciDevice) devid() (string, error) { return strings.TrimSpace(strings.TrimSuffix(string(devID), "\n")), nil } +// Return PCI long address without the function suffix. +func (d pciDevice) pciLongWOFunction() (string, error) { + pciLongSplit := strings.Split(d.pciLong, ".") + if len(pciLongSplit) == 0 { + return "", fmt.Errorf("could not split %s", d.pciLong) + } + pciWithoutFunction := strings.Join(pciLongSplit[0:len(pciLongSplit)-1], ".") + return pciWithoutFunction, nil +} + // isBridge checks if the given PCI device is a bridge. // It reads the device's class from the sysfs filesystem and returns true if the class // starts with "0x06", which is the PCI base class code for bridges. diff --git a/pkg/pillar/hypervisor/xen.go b/pkg/pillar/hypervisor/xen.go index 565dca6f14..0dcc29bc61 100644 --- a/pkg/pillar/hypervisor/xen.go +++ b/pkg/pillar/hypervisor/xen.go @@ -163,6 +163,11 @@ func (ctx xenContext) CreateDomConfig(domainName string, xenGlobal := "" uuidStr := fmt.Sprintf("appuuid=%s ", config.UUIDandVersion.UUID) + if config.EnforceNetworkInterfaceOrder { + return logError("enforcing user-defined network interface order is not supported " + + "with the Xen hypervisor") + } + switch config.VirtualizationMode { case types.PV: xenType = "pv"