Skip to content

Commit

Permalink
CP-44320 scaffolding for NVidia Virtual Compute Service (VCS)
Browse files Browse the repository at this point in the history
NVidia is supporting vGPUs that can run as compute vGPUs. These are
marked in their vgpuConfig.xml with a new value "Compute" for the
existing "class" attribute:

<vgpuType id="307" name="GRID P100C-12C" class="Compute">

We don't parse and use this attribute so far but need to in order to
support VCS. This commit extends the parser to recognise the class
attribute and to pass it on under the internal name "vclass" to xenopsd.
Xenopsd's job is to start device emulators accordingly. Selecting the
correct emulators is future work. So far, xenopsd logs the vclass it
receives.

Unit tests have been updated.

This commit should have no impact on current functionality, be safe to
merge, and lay groundwork for extending xenopsd to support VCS.

Signed-off-by: Christian Lindig <[email protected]>
  • Loading branch information
Christian Lindig committed Oct 16, 2023
1 parent 3d32794 commit 7563664
Show file tree
Hide file tree
Showing 8 changed files with 47 additions and 7 deletions.
5 changes: 5 additions & 0 deletions ocaml/tests/common/test_vgpu_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ let k100 =
; vdev_id= 0x0fe7
; vsubdev_id= 0x101e
; sriov= false
; vclass= "NVS"
}
)
; experimental= false
Expand All @@ -59,6 +60,7 @@ let k140q =
; vdev_id= 0x0ff7
; vsubdev_id= 0x1037
; sriov= false
; vclass= "NVS"
}
)
; experimental= false
Expand All @@ -85,6 +87,7 @@ let k200 =
; vdev_id= 0x118d
; vsubdev_id= 0x101d
; sriov= false
; vclass= "NVS"
}
)
; experimental= false
Expand All @@ -111,6 +114,7 @@ let k240q =
; vdev_id= 0x11b0
; vsubdev_id= 0x101a
; sriov= false
; vclass= "NVS"
}
)
; experimental= false
Expand All @@ -137,6 +141,7 @@ let k260q =
; vdev_id= 0x11b0
; vsubdev_id= 0x101b
; sriov= false
; vclass= "NVS"
}
)
; experimental= false
Expand Down
4 changes: 4 additions & 0 deletions ocaml/tests/test_vgpu_type.ml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ module NvidiaTest = struct
; vdev_id= 0x1111
; vsubdev_id= 0x2222
; sriov= false
; vclass= "NVS"
}
; framebufferlength= 0x10000000L
; num_heads= 2L
Expand All @@ -92,6 +93,7 @@ module NvidiaTest = struct
; vdev_id= 0x1111
; vsubdev_id= 0x2222
; sriov= false
; vclass= "NVS"
}
; framebufferlength= 0x10000000L
; num_heads= 2L
Expand All @@ -117,6 +119,7 @@ module NvidiaTest = struct
; vdev_id= 0x1112
; vsubdev_id= 0x2223
; sriov= false
; vclass= "NVS"
}
; framebufferlength= 0x20000000L
; num_heads= 4L
Expand All @@ -138,6 +141,7 @@ module NvidiaTest = struct
; vdev_id= 0x1111
; vsubdev_id= 0x2222
; sriov= false
; vclass= "NVS"
}
; framebufferlength= 0x10000000L
; num_heads= 2L
Expand Down
7 changes: 7 additions & 0 deletions ocaml/tests/test_xenopsd_metadata.ml
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ module GenerateVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 0)
; extra_args= ""
; vclass= None
}
)
]
Expand Down Expand Up @@ -364,6 +365,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 0)
; extra_args= ""
; vclass= None
}
)
; Xenops_interface.Vgpu.(
Expand All @@ -375,6 +377,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 1)
; extra_args= ""
; vclass= None
}
)
]
Expand All @@ -398,6 +401,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 0)
; extra_args= ""
; vclass= None
}
)
; Xenops_interface.Vgpu.(
Expand All @@ -409,6 +413,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 1)
; extra_args= ""
; vclass= None
}
)
; Xenops_interface.Vgpu.(
Expand All @@ -420,6 +425,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 2)
; extra_args= ""
; vclass= None
}
)
; Xenops_interface.Vgpu.(
Expand All @@ -431,6 +437,7 @@ module GenerateMultiVGPUMetadata = Generic.MakeStateful (struct
; type_id= Some "type_id_1"
; uuid= Some (uuid_with_index 3)
; extra_args= ""
; vclass= None
}
)
]
Expand Down
1 change: 1 addition & 0 deletions ocaml/xapi-idl/xen/xenops_types.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ module Vgpu = struct
[@default {domain= 0000; bus= 0; dev= 11; fn= 0}]
; type_id: string option
; uuid: string option
; vclass: string option (** from vgpu: Compute, NVS, Quadro *)
; extra_args: string [@default ""]
(** string is passed on as is and no structure is assumed *)
}
Expand Down
3 changes: 3 additions & 0 deletions ocaml/xapi/xapi_globs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,9 @@ let pass_through_pif_carrier = ref false

let vgpu_type_id = "type_id"

(** key for NVidia vgpu "class" attribute in vgpuConfig.xml *)
let vgpu_type_vclass = "vclass"

let igd_passthru_key = "igd_passthrough"

let vgt_low_gm_sz = "vgt_low_gm_sz"
Expand Down
20 changes: 18 additions & 2 deletions ocaml/xapi/xapi_vgpu_type.ml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ module Identifier = struct
; vdev_id: int (** vgpuTYpe/deviceId in XML *)
; vsubdev_id: int (** vgpuTYpe/subsystemId in XML *)
; sriov: bool (** true if SRIOV mode to be used *)
; vclass: string (** vgpuType/class in XML: Quadro, NVS, Compute *)
}

type gvt_g_id = {
Expand Down Expand Up @@ -399,6 +400,7 @@ let read_whitelist_line_by_line ~whitelist ~device_id ~parse_line
[]

module Vendor_nvidia = struct
(* represents a vgpuType declaration found in vgpuConfig.xml *)
type vgpu_conf = {
identifier: Identifier.nvidia_id
; framebufferlength: int64
Expand Down Expand Up @@ -652,6 +654,7 @@ module Vendor_nvidia = struct
)
in
let devid = find_one_by_name "devId" vgpu_type in
let vclass = get_attr "class" vgpu_type in
let identifier =
Identifier.
{
Expand All @@ -669,6 +672,7 @@ module Vendor_nvidia = struct
false
)
(* don't use SRIOV *)
; vclass
}
in

Expand Down Expand Up @@ -760,7 +764,11 @@ module Vendor_nvidia = struct
; max_resolution_x= conf.max_x
; max_resolution_y= conf.max_y
; size= Int64.div Constants.pgpu_default_size conf.max_instance
; internal_config= [(Xapi_globs.vgpu_type_id, conf.type_id)]
; internal_config=
[
(Xapi_globs.vgpu_type_id, conf.type_id)
; (Xapi_globs.vgpu_type_vclass, conf.identifier.vclass)
]
; identifier= Nvidia conf.identifier
; experimental= false
; compatible_model_names_in_vm= conf.compatible_model_names_in_vm
Expand Down Expand Up @@ -999,7 +1007,15 @@ module Nvidia_compat = struct
Scanf.sscanf (List.assoc "plugin0.vdev_id" args) {|"0x%x:0x%x"|}
(fun vdev_id vsubdev_id ->
Identifier.(
Nvidia {pdev_id; psubdev_id; vdev_id; vsubdev_id; sriov= false}
Nvidia
{
pdev_id
; psubdev_id
; vdev_id
; vsubdev_id
; sriov= false
; vclass= ""
}
)
)
with e -> raise (Parse_error e)
Expand Down
10 changes: 6 additions & 4 deletions ocaml/xapi/xapi_xenops.ml
Original file line number Diff line number Diff line change
Expand Up @@ -946,10 +946,11 @@ module MD = struct
let physical_pci_address = get_target_pci_address ~__context vgpu in
let virtual_pci_address = get_virtual_pci_address ~__context vgpu in
let vgpu_type = vgpu.Db_actions.vGPU_type in
let type_id, config_file =
Db.VGPU_type.get_internal_config ~__context ~self:vgpu_type |> fun x ->
( List.assoc_opt Xapi_globs.vgpu_type_id x
, List.assoc_opt Xapi_globs.nvidia_compat_config_file_key x
let type_id, config_file, vclass =
Db.VGPU_type.get_internal_config ~__context ~self:vgpu_type |> fun kv ->
( List.assoc_opt Xapi_globs.vgpu_type_id kv
, List.assoc_opt Xapi_globs.nvidia_compat_config_file_key kv
, List.assoc_opt Xapi_globs.vgpu_type_vclass kv
)
in
let uuid = vgpu.Db_actions.vGPU_uuid in
Expand All @@ -964,6 +965,7 @@ module MD = struct
; type_id
; uuid= Some uuid
; extra_args
; vclass (* from vgpuType class attribute in vgpuConfig.xml *)
}
in
{
Expand Down
4 changes: 3 additions & 1 deletion ocaml/xenopsd/xc/device.ml
Original file line number Diff line number Diff line change
Expand Up @@ -3635,7 +3635,9 @@ module Dm = struct
let start_vgpu ~xc:_ ~xs task ?(restore = false) domid vgpus vcpus profile =
let open Xenops_interface.Vgpu in
match vgpus with
| {implementation= Nvidia _; _} :: _ ->
| {implementation= Nvidia {vclass; _}; _} :: _ ->
let vclass = Option.value ~default:"unknown" vclass in
info "NVidia vgpu vclass=%s" vclass ;
(* Start DEMU and wait until it has reached the desired state *)
if not (Service.Vgpu.is_running ~xs domid) then (
let pcis = List.map (fun x -> x.physical_pci_address) vgpus in
Expand Down

0 comments on commit 7563664

Please sign in to comment.