Add support for MLprogram in ort_coreml (#116)

It enables fp16 computation on ANE, instead of allocating all to CPU. However, the MLprogram is not well-supported currently, supporting much less EPs than regular NeuralNetwork.
AmusementClub · Nov 29, 2024 · a2b1a88 · a2b1a88
1 parent c684113
commit a2b1a88
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 4 deletions.
diff --git a/.github/workflows/macos-ort.yml b/.github/workflows/macos-ort.yml
@@ -103,7 +103,7 @@ jobs:
 
     - name: Setup ONNX Runtime
       run: |
-        curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-osx-arm64-1.19.2.tgz
+        curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.20.0/onnxruntime-osx-arm64-1.20.0.tgz
         tar -xf ort.tgz
         mv onnxruntime-* onnxruntime
 

diff --git a/scripts/vsmlrt.py b/scripts/vsmlrt.py
@@ -273,6 +273,7 @@ class ORT_COREML:
         verbosity: int = 0
         fp16: bool = False
         fp16_blacklist_ops: typing.Optional[typing.Sequence[str]] = None
+        ml_program: int = 0
 
         # internal backend attributes
         supports_onnx_serialization: bool = True
@@ -2483,6 +2484,7 @@ def _inference(
             fp16=backend.fp16,
             path_is_serialization=path_is_serialization,
             fp16_blacklist_ops=backend.fp16_blacklist_ops,
+            ml_program=backend.ml_program,
             **kwargs
         )
     elif isinstance(backend, Backend.ORT_CUDA):

diff --git a/vsort/README.md b/vsort/README.md
@@ -27,7 +27,9 @@ Arguments:
  - `string provider`: Specifies the device to run the inference on.
    - `"CPU"` or `""`: pure CPU backend
    - `"CUDA"`: CUDA GPU backend, requires Nvidia Maxwell+ GPUs.
- - `int device_id`: select the GPU device for the CUDA backend.
+   - `"DML"`: DirectML backend
+   - `"COREML"`: CoreML backend
+ - `int device_id`: select the GPU device for the CUDA backend.'
  - `int verbosity`: specify the verbosity of logging, the default is warning.
    - 0: fatal error only, `ORT_LOGGING_LEVEL_FATAL`
    - 1: also errors, `ORT_LOGGING_LEVEL_ERROR`
@@ -40,6 +42,9 @@ Arguments:
  - `bint fp16`: whether to quantize model to fp16 for faster and memory efficient computation.
  - `bint path_is_serialization`: whether the `network_path` argument specifies an onnx serialization of type `bytes`.
  - `bint use_cuda_graph`: whether to use CUDA Graphs to improve performance and reduce CPU overhead in CUDA backend. Not all models are supported.
+ - `int ml_program`: select CoreML provider.
+   - 0: NeuralNetwork
+   - 1: MLProgram
 
 When `overlap` and `tilesize` are not specified, the filter will internally try to resize the network to fit the input clips. This might not always work (for example, the network might require the width to be divisible by 8), and the filter will error out in this case.
 

diff --git a/vsort/vs_onnxruntime.cpp b/vsort/vs_onnxruntime.cpp
@@ -454,6 +454,10 @@ struct vsOrtData {
     std::vector<VSNodeRef *> nodes;
     std::unique_ptr<VSVideoInfo> out_vi;
 
+#ifdef ENABLE_COREML
+    bool ml_program;
+#endif //ENABLE_COREML
+
     int overlap_w, overlap_h;
 
     OrtEnv * environment;
@@ -908,6 +912,19 @@ static void VS_CC vsOrtCreate(
     if (error) {
         verbosity = ORT_LOGGING_LEVEL_WARNING;
     }
+#ifdef ENABLE_COREML
+    auto ml_program = vsapi->propGetInt(in, "ml_program", 0, &error);
+
+    if (error) {
+        d->ml_program = false;
+    } else if (ml_program == 0) {
+        d->ml_program = false;
+    } else if (ml_program == 1) {
+        d->ml_program = true;
+    } else {
+        return set_error("\"ml_program\" must be 0 or 1");
+    }
+#endif //ENABLE_COREML
 
     // match verbosity of vs-trt
     verbosity = static_cast<OrtLoggingLevel>(4 - static_cast<int>(verbosity));
@@ -1232,10 +1249,12 @@ static void VS_CC vsOrtCreate(
         }
 #endif // ENABLE_CUDA
 #ifdef ENABLE_COREML
+        uint32_t coreml_flag = 0;
+        if (ml_program) coreml_flag |= 0x010;
         if (d->backend == Backend::COREML) {
             checkError(OrtSessionOptionsAppendExecutionProvider_CoreML(
                 session_options,
-                0
+                coreml_flag
             ));
         }
 #endif // ENABLE_COREML
@@ -1394,7 +1413,7 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit(
         "network_path:data;"
         "overlap:int[]:opt;"
         "tilesize:int[]:opt;"
-        "provider:data:opt;" // "": Default (CPU), "CUDA": CUDA
+        "provider:data:opt;" // "": Default (CPU), "CUDA": CUDA, "COREML": COREML, "DML": DML
         "device_id:int:opt;"
         "num_streams:int:opt;"
         "verbosity:int:opt;"
@@ -1409,6 +1428,9 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit(
         "output_format:int:opt;"
         "tf32:int:opt;"
         "flexible_output_prop:data:opt;"
+#ifdef ENABLE_COREML
+        "ml_program:int:opt;"
+#endif //ENABLE_COREML
         , vsOrtCreate,
         nullptr,
         plugin