Skip to content

Commit

Permalink
Merge pull request #17988 from wanyaoqi/fix/nvidia-vgpu-check
Browse files Browse the repository at this point in the history
fix(region): check nvidia vgpu count and conflict with other gpus
  • Loading branch information
zexi authored Sep 12, 2023
2 parents dd740a0 + 4888f81 commit baf8742
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
29 changes: 29 additions & 0 deletions pkg/compute/models/guest_actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -2046,6 +2046,20 @@ func (self *SGuest) startAttachIsolatedDevices(ctx context.Context, userCred mcc
}
}

if dev.DevType == api.LEGACY_VGPU_TYPE {
devs, err := self.GetIsolatedDevices()
if err != nil {
return errors.Wrap(err, "get isolated devices")
}
for i := range devs {
if devs[i].DevType == api.LEGACY_VGPU_TYPE {
return httperrors.NewBadRequestError("Nvidia vgpu count exceed > 1")
} else if utils.IsInStringArray(devs[i].DevType, api.VALID_GPU_TYPES) {
return httperrors.NewBadRequestError("Nvidia vgpu can't passthrough with other gpus")
}
}
}

defer func() { go host.ClearSchedDescCache() }()
for i := 0; i < len(devs); i++ {
err = self.attachIsolatedDevice(ctx, userCred, &devs[i], nil, nil)
Expand Down Expand Up @@ -2085,6 +2099,21 @@ func (self *SGuest) startAttachIsolatedDevGeneral(ctx context.Context, userCred
if !utils.IsInStringArray(self.GetStatus(), []string{api.VM_READY, api.VM_RUNNING}) {
return httperrors.NewInvalidStatusError("Can't attach GPU when status is %q", self.GetStatus())
}

if dev.DevType == api.LEGACY_VGPU_TYPE {
devs, err := self.GetIsolatedDevices()
if err != nil {
return errors.Wrap(err, "get isolated devices")
}
for i := range devs {
if devs[i].DevType == api.LEGACY_VGPU_TYPE {
return httperrors.NewBadRequestError("Nvidia vgpu count exceed > 1")
} else if utils.IsInStringArray(devs[i].DevType, api.VALID_GPU_TYPES) {
return httperrors.NewBadRequestError("Nvidia vgpu can't passthrough with other gpus")
}
}
}

host, _ := self.GetHost()
lockman.LockObject(ctx, host)
defer lockman.ReleaseObject(ctx, host)
Expand Down
17 changes: 17 additions & 0 deletions pkg/compute/models/guests.go
Original file line number Diff line number Diff line change
Expand Up @@ -1828,6 +1828,23 @@ func (manager *SGuestManager) validateCreateData(
input.IsolatedDevices[idx] = devConfig
}

nvidiaVgpuCnt := 0
gpuCnt := 0
for i := 0; i < len(input.IsolatedDevices); i++ {
if input.IsolatedDevices[i].DevType == api.LEGACY_VGPU_TYPE {
nvidiaVgpuCnt += 1
} else if utils.IsInStringArray(input.IsolatedDevices[i].DevType, api.VALID_GPU_TYPES) {
gpuCnt += 1
}
}

if nvidiaVgpuCnt > 1 {
return nil, httperrors.NewBadRequestError("Nvidia vgpu count exceed > 1")
}
if nvidiaVgpuCnt > 0 && gpuCnt > 0 {
return nil, httperrors.NewBadRequestError("Nvidia vgpu can't passthrough with other gpus")
}

keypairId := input.KeypairId
if len(keypairId) > 0 {
keypairObj, err := KeypairManager.FetchByIdOrName(userCred, keypairId)
Expand Down

0 comments on commit baf8742

Please sign in to comment.