Skip to content

Commit

Permalink
Add support for MLprogram in ort_coreml (#116)
Browse files Browse the repository at this point in the history
It enables fp16 computation on ANE, instead of allocating all to CPU. However, the MLprogram is not well-supported currently, supporting much less EPs than regular NeuralNetwork.
  • Loading branch information
yuygfgg authored Nov 29, 2024
1 parent c684113 commit a2b1a88
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 4 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/macos-ort.yml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ jobs:
- name: Setup ONNX Runtime
run: |
curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.19.2/onnxruntime-osx-arm64-1.19.2.tgz
curl -L -o ort.tgz https://github.com/microsoft/onnxruntime/releases/download/v1.20.0/onnxruntime-osx-arm64-1.20.0.tgz
tar -xf ort.tgz
mv onnxruntime-* onnxruntime
Expand Down
2 changes: 2 additions & 0 deletions scripts/vsmlrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ class ORT_COREML:
verbosity: int = 0
fp16: bool = False
fp16_blacklist_ops: typing.Optional[typing.Sequence[str]] = None
ml_program: int = 0

# internal backend attributes
supports_onnx_serialization: bool = True
Expand Down Expand Up @@ -2483,6 +2484,7 @@ def _inference(
fp16=backend.fp16,
path_is_serialization=path_is_serialization,
fp16_blacklist_ops=backend.fp16_blacklist_ops,
ml_program=backend.ml_program,
**kwargs
)
elif isinstance(backend, Backend.ORT_CUDA):
Expand Down
7 changes: 6 additions & 1 deletion vsort/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ Arguments:
- `string provider`: Specifies the device to run the inference on.
- `"CPU"` or `""`: pure CPU backend
- `"CUDA"`: CUDA GPU backend, requires Nvidia Maxwell+ GPUs.
- `int device_id`: select the GPU device for the CUDA backend.
- `"DML"`: DirectML backend
- `"COREML"`: CoreML backend
- `int device_id`: select the GPU device for the CUDA backend.'
- `int verbosity`: specify the verbosity of logging, the default is warning.
- 0: fatal error only, `ORT_LOGGING_LEVEL_FATAL`
- 1: also errors, `ORT_LOGGING_LEVEL_ERROR`
Expand All @@ -40,6 +42,9 @@ Arguments:
- `bint fp16`: whether to quantize model to fp16 for faster and memory efficient computation.
- `bint path_is_serialization`: whether the `network_path` argument specifies an onnx serialization of type `bytes`.
- `bint use_cuda_graph`: whether to use CUDA Graphs to improve performance and reduce CPU overhead in CUDA backend. Not all models are supported.
- `int ml_program`: select CoreML provider.
- 0: NeuralNetwork
- 1: MLProgram

When `overlap` and `tilesize` are not specified, the filter will internally try to resize the network to fit the input clips. This might not always work (for example, the network might require the width to be divisible by 8), and the filter will error out in this case.

Expand Down
26 changes: 24 additions & 2 deletions vsort/vs_onnxruntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,10 @@ struct vsOrtData {
std::vector<VSNodeRef *> nodes;
std::unique_ptr<VSVideoInfo> out_vi;

#ifdef ENABLE_COREML
bool ml_program;
#endif //ENABLE_COREML

int overlap_w, overlap_h;

OrtEnv * environment;
Expand Down Expand Up @@ -908,6 +912,19 @@ static void VS_CC vsOrtCreate(
if (error) {
verbosity = ORT_LOGGING_LEVEL_WARNING;
}
#ifdef ENABLE_COREML
auto ml_program = vsapi->propGetInt(in, "ml_program", 0, &error);

if (error) {
d->ml_program = false;
} else if (ml_program == 0) {
d->ml_program = false;
} else if (ml_program == 1) {
d->ml_program = true;
} else {
return set_error("\"ml_program\" must be 0 or 1");
}
#endif //ENABLE_COREML

// match verbosity of vs-trt
verbosity = static_cast<OrtLoggingLevel>(4 - static_cast<int>(verbosity));
Expand Down Expand Up @@ -1232,10 +1249,12 @@ static void VS_CC vsOrtCreate(
}
#endif // ENABLE_CUDA
#ifdef ENABLE_COREML
uint32_t coreml_flag = 0;
if (ml_program) coreml_flag |= 0x010;
if (d->backend == Backend::COREML) {
checkError(OrtSessionOptionsAppendExecutionProvider_CoreML(
session_options,
0
coreml_flag
));
}
#endif // ENABLE_COREML
Expand Down Expand Up @@ -1394,7 +1413,7 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit(
"network_path:data;"
"overlap:int[]:opt;"
"tilesize:int[]:opt;"
"provider:data:opt;" // "": Default (CPU), "CUDA": CUDA
"provider:data:opt;" // "": Default (CPU), "CUDA": CUDA, "COREML": COREML, "DML": DML
"device_id:int:opt;"
"num_streams:int:opt;"
"verbosity:int:opt;"
Expand All @@ -1409,6 +1428,9 @@ VS_EXTERNAL_API(void) VapourSynthPluginInit(
"output_format:int:opt;"
"tf32:int:opt;"
"flexible_output_prop:data:opt;"
#ifdef ENABLE_COREML
"ml_program:int:opt;"
#endif //ENABLE_COREML
, vsOrtCreate,
nullptr,
plugin
Expand Down

0 comments on commit a2b1a88

Please sign in to comment.