From a2b1a88a22deba036b3a8caabe0a68e1dbfb4b4b Mon Sep 17 00:00:00 2001 From: yuygfgg <140488233+yuygfgg@users.noreply.github.com> Date: Fri, 29 Nov 2024 09:49:45 +0800 Subject: [PATCH] Add support for MLprogram in ort_coreml (#116) It enables fp16 computation on ANE, instead of allocating all to CPU. However, the MLprogram is not well-supported currently, supporting much less EPs than regular NeuralNetwork. --- .github/workflows/macos-ort.yml | 2 +- scripts/vsmlrt.py | 2 ++ vsort/README.md | 7 ++++++- vsort/vs_onnxruntime.cpp | 26 ++++++++++++++++++++++++-- 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/.github/workflows/macos-ort.yml b/.github/workflows/macos-ort.yml index 0714abf..3889eb3 100644 --- a/.github/workflows/macos-ort.yml +++ b/.github/workflows/macos-ort.yml @@ -103,7 +103,7 @@ jobs: - name: Setup ONNX Runtime run: | - curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-osx-arm64-1.19.2.tgz + curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.20.0/onnxruntime-osx-arm64-1.20.0.tgz tar -xf ort.tgz mv onnxruntime-* onnxruntime diff --git a/scripts/vsmlrt.py b/scripts/vsmlrt.py index 232da5f..7598914 100644 --- a/scripts/vsmlrt.py +++ b/scripts/vsmlrt.py @@ -273,6 +273,7 @@ class ORT_COREML: verbosity: int = 0 fp16: bool = False fp16_blacklist_ops: typing.Optional[typing.Sequence[str]] = None + ml_program: int = 0 # internal backend attributes supports_onnx_serialization: bool = True @@ -2483,6 +2484,7 @@ def _inference( fp16=backend.fp16, path_is_serialization=path_is_serialization, fp16_blacklist_ops=backend.fp16_blacklist_ops, + ml_program=backend.ml_program, **kwargs ) elif isinstance(backend, Backend.ORT_CUDA): diff --git a/vsort/README.md b/vsort/README.md index cb5dbad..33ca0f6 100644 --- a/vsort/README.md +++ b/vsort/README.md @@ -27,7 +27,9 @@ Arguments: - `string provider`: Specifies the device to run the inference on. - `"CPU"` or `""`: pure CPU backend - `"CUDA"`: CUDA GPU backend, requires Nvidia Maxwell+ GPUs. - - `int device_id`: select the GPU device for the CUDA backend. + - `"DML"`: DirectML backend + - `"COREML"`: CoreML backend + - `int device_id`: select the GPU device for the CUDA backend.' - `int verbosity`: specify the verbosity of logging, the default is warning. - 0: fatal error only, `ORT_LOGGING_LEVEL_FATAL` - 1: also errors, `ORT_LOGGING_LEVEL_ERROR` @@ -40,6 +42,9 @@ Arguments: - `bint fp16`: whether to quantize model to fp16 for faster and memory efficient computation. - `bint path_is_serialization`: whether the `network_path` argument specifies an onnx serialization of type `bytes`. - `bint use_cuda_graph`: whether to use CUDA Graphs to improve performance and reduce CPU overhead in CUDA backend. Not all models are supported. + - `int ml_program`: select CoreML provider. + - 0: NeuralNetwork + - 1: MLProgram When `overlap` and `tilesize` are not specified, the filter will internally try to resize the network to fit the input clips. This might not always work (for example, the network might require the width to be divisible by 8), and the filter will error out in this case. diff --git a/vsort/vs_onnxruntime.cpp b/vsort/vs_onnxruntime.cpp index 9acedc5..d3cd173 100644 --- a/vsort/vs_onnxruntime.cpp +++ b/vsort/vs_onnxruntime.cpp @@ -454,6 +454,10 @@ struct vsOrtData { std::vector nodes; std::unique_ptr out_vi; +#ifdef ENABLE_COREML + bool ml_program; +#endif //ENABLE_COREML + int overlap_w, overlap_h; OrtEnv * environment; @@ -908,6 +912,19 @@ static void VS_CC vsOrtCreate( if (error) { verbosity = ORT_LOGGING_LEVEL_WARNING; } +#ifdef ENABLE_COREML + auto ml_program = vsapi->propGetInt(in, "ml_program", 0, &error); + + if (error) { + d->ml_program = false; + } else if (ml_program == 0) { + d->ml_program = false; + } else if (ml_program == 1) { + d->ml_program = true; + } else { + return set_error("\"ml_program\" must be 0 or 1"); + } +#endif //ENABLE_COREML // match verbosity of vs-trt verbosity = static_cast(4 - static_cast(verbosity)); @@ -1232,10 +1249,12 @@ static void VS_CC vsOrtCreate( } #endif // ENABLE_CUDA #ifdef ENABLE_COREML + uint32_t coreml_flag = 0; + if (ml_program) coreml_flag |= 0x010; if (d->backend == Backend::COREML) { checkError(OrtSessionOptionsAppendExecutionProvider_CoreML( session_options, - 0 + coreml_flag )); } #endif // ENABLE_COREML @@ -1394,7 +1413,7 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit( "network_path:data;" "overlap:int[]:opt;" "tilesize:int[]:opt;" - "provider:data:opt;" // "": Default (CPU), "CUDA": CUDA + "provider:data:opt;" // "": Default (CPU), "CUDA": CUDA, "COREML": COREML, "DML": DML "device_id:int:opt;" "num_streams:int:opt;" "verbosity:int:opt;" @@ -1409,6 +1428,9 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit( "output_format:int:opt;" "tf32:int:opt;" "flexible_output_prop:data:opt;" +#ifdef ENABLE_COREML + "ml_program:int:opt;" +#endif //ENABLE_COREML , vsOrtCreate, nullptr, plugin