diff --git a/.circleci/config.yml b/.circleci/config.yml index 2cf3366a..72829bd5 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -45,9 +45,11 @@ jobs: Pop-Location - restore_cache: keys: + - v2338-downs-{{ .Branch }} - v2338-downs-{{ checksum ".circleci/config.yml" }} - restore_cache: keys: + - v2338-bins-{{ .Branch }} - v2338-bins-{{ checksum ".circleci/config.yml" }} - run: name: "Install: port-setup.txt" @@ -58,6 +60,7 @@ jobs: --overlay-ports="$env:CIRCLE_WORKING_DIRECTORY/ports" ` $(Get-Content "$env:CIRCLE_WORKING_DIRECTORY/.circleci/port-setup.txt") working_directory: vcpkg + no_output_timeout: 1h - save_cache: key: v2338-downs-{{ checksum ".circleci/config.yml" }} paths: @@ -112,13 +115,13 @@ jobs: equal: [ main, << pipeline.git.branch >> ] steps: - save_cache: - key: v2338-bins-{{ .Branch }} + key: v2338-bins-{{ checksum ".circleci/config.yml" }} paths: - C:/vcpkg-bins - save_cache: key: v2338-downs-{{ .Branch }} paths: - - C:/vcpkg-bins + - C:/vcpkg-downs - save_cache: key: v2338-bins-{{ .Branch }} paths: diff --git a/.github/workflows/build-macos.yml b/.github/workflows/build-macos.yml index 75706165..36283eee 100644 --- a/.github/workflows/build-macos.yml +++ b/.github/workflows/build-macos.yml @@ -13,7 +13,7 @@ jobs: triplet: [x64-osx, arm64-ios] env: VCPKG_DOWNLOADS: "/usr/local/share/vcpkg-downloads" - VCPKG_DEFAULT_BINARY_CACHE: "/usr/local/share/vcpkg-downloads" + VCPKG_DEFAULT_BINARY_CACHE: "/usr/local/share/vcpkg-archives" VCPKG_OVERLAY_PORTS: "${{ github.workspace }}/ports" VCPKG_OVERLAY_TRIPLETS: ${{ github.workspace }}/triplets steps: @@ -24,9 +24,10 @@ jobs: mkdir -p ${VCPKG_DEFAULT_BINARY_CACHE} - uses: actions/cache@v3 with: - key: "vcpkg-2023.08-${{ runner.os }}" + key: "v2338-${{ runner.os }}" path: | /usr/local/share/vcpkg-downloads + /usr/local/share/vcpkg-archives - uses: ConorMacBride/install-package@v1 with: brew: ninja autoconf automake libtool diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index 52386fde..bf5b3b64 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -12,7 +12,7 @@ jobs: matrix: triplet: [x64-windows] env: - VCPKG_DOWNLOADS: "C:/vcpkg/archives" + VCPKG_DOWNLOADS: "C:/vcpkg/downloads" VCPKG_DEFAULT_BINARY_CACHE: "C:/vcpkg/archives" VCPKG_OVERLAY_PORTS: "${{ github.workspace }}/ports" VCPKG_OVERLAY_TRIPLETS: ${{ github.workspace }}/triplets @@ -24,8 +24,9 @@ jobs: New-Item -Type Directory -Force ${env:VCPKG_DEFAULT_BINARY_CACHE} - uses: actions/cache@v3 with: - key: "vcpkg-2023.08-${{ runner.os }}" + key: "v2338-${{ runner.os }}" path: | + C:/vcpkg/downloads C:/vcpkg/archives - uses: microsoft/setup-msbuild@v1.1 with: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 19fade51..a138669d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -75,9 +75,9 @@ stages: - powershell: New-Item -Type Directory -Force "$env:VCPKG_DEFAULT_BINARY_CACHE" - task: Cache@2 inputs: - key: '"2338-bin-mac-host"' + key: '"v2338-bin-mac-host"' restoreKeys: | - "2338-bin-mac-host" + "v2338-bin-mac-host" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 displayName: "coreml-tools" @@ -124,15 +124,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-windows"' + key: '"v2338-down-windows"' restoreKeys: | - "2338-down-windows" + "v2338-down-windows" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-windows" | "$(vcpkg.default.triplet)"' + key: '"v2338-bin-windows" | "$(vcpkg.default.triplet)"' restoreKeys: | - "2338-bin-windows" | "$(vcpkg.default.triplet)" + "v2338-bin-windows" | "$(vcpkg.default.triplet)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 inputs: @@ -186,15 +186,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-uwp"' + key: '"v2338-down-uwp"' restoreKeys: | - "2338-down-uwp" + "v2338-down-uwp" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-uwp" | "$(vcpkg.default.triplet)"' + key: '"v2338-bin-uwp" | "$(vcpkg.default.triplet)"' restoreKeys: | - "2338-bin-uwp" | "$(vcpkg.default.triplet)" + "v2338-bin-uwp" | "$(vcpkg.default.triplet)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 inputs: @@ -253,15 +253,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-ubuntu"' + key: '"v2338-down-ubuntu"' restoreKeys: | - "2338-down-ubuntu" + "v2338-down-ubuntu" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-ubuntu" | "$(vcpkg.default.triplet)_$(cc)"' + key: '"v2338-bin-ubuntu" | "$(vcpkg.default.triplet)_$(cc)"' restoreKeys: | - "2338-bin-ubuntu" | "$(vcpkg.default.triplet)_$(cc)" + "v2338-bin-ubuntu" | "$(vcpkg.default.triplet)_$(cc)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 displayName: "tensorflow-lite" @@ -311,15 +311,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-android"' + key: '"v2338-down-android"' restoreKeys: | - "2338-down-android" + "v2338-down-android" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-android" | "$(vcpkg.default.triplet)"' + key: '"v2338-bin-android" | "$(vcpkg.default.triplet)"' restoreKeys: | - "2338-bin-android" | "$(vcpkg.default.triplet)" + "v2338-bin-android" | "$(vcpkg.default.triplet)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 inputs: @@ -373,15 +373,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-apple"' + key: '"v2338-down-apple"' restoreKeys: | - "2338-down-apple" + "v2338-down-apple" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-osx" | "$(vcpkg.default.triplet)"' + key: '"v2338-bin-osx" | "$(vcpkg.default.triplet)"' restoreKeys: | - "2338-bin-osx" | "$(vcpkg.default.triplet)" + "v2338-bin-osx" | "$(vcpkg.default.triplet)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 inputs: @@ -433,15 +433,15 @@ stages: vcpkgGitCommitId: $(vcpkg.commit) - task: Cache@2 inputs: - key: '"2338-down-apple"' + key: '"v2338-down-apple"' restoreKeys: | - "2338-down-apple" + "v2338-down-apple" path: $(Build.BinariesDirectory)/vcpkg/downloads - task: Cache@2 inputs: - key: '"2338-bin-ios" | "$(vcpkg.default.triplet)"' + key: '"v2338-bin-ios" | "$(vcpkg.default.triplet)"' restoreKeys: | - "2338-bin-ios" | "$(vcpkg.default.triplet)" + "v2338-bin-ios" | "$(vcpkg.default.triplet)" path: $(vcpkg.default.binary.cache) - task: run-vcpkg@0 displayName: "arm64-ios" diff --git a/ports/tensorflow-lite/fix-absl.patch b/ports/tensorflow-lite/fix-absl.patch deleted file mode 100644 index b21cb697..00000000 --- a/ports/tensorflow-lite/fix-absl.patch +++ /dev/null @@ -1,227 +0,0 @@ -diff --git a/tensorflow/lite/delegates/gpu/gl/compiler.cc b/tensorflow/lite/delegates/gpu/gl/compiler.cc -index d6e670e0..bafa99fb 100644 ---- a/tensorflow/lite/delegates/gpu/gl/compiler.cc -+++ b/tensorflow/lite/delegates/gpu/gl/compiler.cc -@@ -196,7 +196,7 @@ class CompilerImpl : public Compiler { - // Prepare readonly objects and check whether object types are supported. - for (auto node : compiled_graph_.nodes()) { - auto& attr = -- std::any_cast(node->operation.attributes); -+ absl::any_cast(node->operation.attributes); - - // Set workload explicitly. - if (attr.code.workload == uint3()) { -@@ -251,7 +251,7 @@ class CompilerImpl : public Compiler { - ShaderCodegen codegen(options_, gpu_info_); - for (auto node : compiled_graph_.nodes()) { - auto& attr = -- std::any_cast(node->operation.attributes); -+ absl::any_cast(node->operation.attributes); - if (attr.code.source_code.empty()) { - // noop. Skip this node. - continue; -diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc -index 761fb8b4..c7a7de9a 100644 ---- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc -+++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc -@@ -46,7 +46,7 @@ std::pair MakeDataReplacement(int n, int k) { - - TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { - auto& node_attr = -- std::any_cast(node->operation.attributes); -+ absl::any_cast(node->operation.attributes); - auto& node_code = node_attr.code; - - if (node_code.input != IOStructure::AUTO) { -@@ -75,7 +75,7 @@ TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { - if (graph->FindOutputs(input_producer->id).size() != 1) { - continue; // input node has more than one output - } -- auto& input_producer_attr = std::any_cast( -+ auto& input_producer_attr = absl::any_cast( - input_producer->operation.attributes); - if (input_producer_attr.code.output != IOStructure::AUTO) { - continue; -@@ -143,7 +143,7 @@ TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { - for (auto input_and_num : nodes_to_fuse) { - auto& input = input_and_num.first; - auto& attr = -- std::any_cast(input->operation.attributes); -+ absl::any_cast(input->operation.attributes); - auto super_inputs = graph->FindInputs(input->id); - - // Replace all internal references in the input source code. For example: -diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc -index f227ab21..486d4544 100644 ---- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc -+++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc -@@ -40,9 +40,9 @@ TransformResult FuseAutoOutputWithInline::ApplyToNodesSequence( - Node* node1 = sequence.front(); - Node* node2 = sequence.back(); - auto& attr1 = -- std::any_cast(node1->operation.attributes); -+ absl::any_cast(node1->operation.attributes); - auto& attr2 = -- std::any_cast(node2->operation.attributes); -+ absl::any_cast(node2->operation.attributes); - - if (attr1.code.output != IOStructure::AUTO || - graph->FindInputs(node2->id).size() != 1 || -diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc -index 1e27404b..b7719c49 100644 ---- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc -+++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc -@@ -81,7 +81,7 @@ class InplaceCodeRewrite : public InlineRewrite { - TransformResult RemoveUnusedInplaceUpdates::ApplyToNode(Node* node, - GraphFloat32* graph) { - auto& attr = -- std::any_cast(node->operation.attributes); -+ absl::any_cast(node->operation.attributes); - // Remove inplace block by rewriting to empty string. - EmptyInplaceRewrite rewrite; - TextPreprocessor preprocessor('$', true); -@@ -100,9 +100,9 @@ TransformResult FuseInplaceUpdate::ApplyToNodesSequence( - Node* node1 = sequence.front(); - Node* node2 = sequence.back(); - auto& attr1 = -- std::any_cast(node1->operation.attributes); -+ absl::any_cast(node1->operation.attributes); - auto& attr2 = -- std::any_cast(node2->operation.attributes); -+ absl::any_cast(node2->operation.attributes); - - if (graph->FindInputs(node2->id).size() != 1 || - graph->FindOutputs(node2->id).size() != 1 || -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc -index a14d7f24..faf3c0e3 100644 ---- a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc -@@ -42,11 +42,11 @@ class Add : public NodeShader { - absl::Status GenerateCode(const GenerationContext& ctx, - GeneratedCode* generated_code) const final { - const auto& attr = std::any_cast(ctx.op_attr); -- auto adds = std::get_if>(&attr.param); -- auto scalar = std::get_if(&attr.param); -+ auto adds = absl::get_if>(&attr.param); -+ auto scalar = absl::get_if(&attr.param); - - const auto* hwc_tensor = -- std::get_if>(&attr.param); -+ absl::get_if>(&attr.param); - - if (hwc_tensor) { - std::string code; -@@ -69,7 +69,7 @@ class Add : public NodeShader { - uint3(hwc_tensor->shape.w, hwc_tensor->shape.h, - DivideRoundUp(hwc_tensor->shape.c, 4)), - ConvertToPHWC4( -- std::get>(attr.param)))}}, -+ absl::get>(attr.param)))}}, - /*shared_variables=*/{}, - // Declare workload explicitly because shader depends on gid.z. - /*workload=*/ -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc -index b2a6a997..4bc34fc5 100644 ---- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc -@@ -159,10 +159,10 @@ class ElementwiseTwoArguments : public NodeShader { - argument1 = "$input_data_1[0, 0, gid.z]$"; - } else { // Scalar of const vector case - const auto& attr = -- std::any_cast(ctx.op_attr); -+ absl::any_cast(ctx.op_attr); - const auto* tensor = -- std::get_if>(&attr.param); -- const auto* scalar = std::get_if(&attr.param); -+ absl::get_if>(&attr.param); -+ const auto* scalar = absl::get_if(&attr.param); - if (!tensor && !scalar) { - return absl::InvalidArgumentError( - "Couldn't read scalar of const vector data from the attributes."); -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc -index 3d21a0ae..410ea0c8 100644 ---- a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc -@@ -87,9 +87,9 @@ absl::Status GenerateMultiplyScalarCode( - const NodeShader::GenerationContext& ctx, GeneratedCode* generated_code) { - const auto& attr = std::any_cast(ctx.op_attr); - -- if (std::holds_alternative(attr.param)) { -+ if (absl::holds_alternative(attr.param)) { - *generated_code = { -- /*parameters=*/{{"scalar", std::get(attr.param)}}, -+ /*parameters=*/{{"scalar", absl::get(attr.param)}}, - /*objects=*/{}, - /*shared_variables=*/{}, - /*workload=*/uint3(), -@@ -101,13 +101,13 @@ absl::Status GenerateMultiplyScalarCode( - return absl::OkStatus(); - } - -- if (std::holds_alternative>(attr.param)) { -+ if (absl::holds_alternative>(attr.param)) { - *generated_code = { - /*parameters=*/{}, - /*objects=*/ - {{"mul_buffer", - MakeReadonlyObject( -- std::get>(attr.param).data)}}, -+ absl::get>(attr.param).data)}}, - /*shared_variables=*/{}, - // Declare workload explicitly because shader depends on gid.z. - /*workload=*/ -@@ -122,7 +122,7 @@ absl::Status GenerateMultiplyScalarCode( - return absl::OkStatus(); - } - -- if (std::holds_alternative>(attr.param)) { -+ if (absl::holds_alternative>(attr.param)) { - *generated_code = { - /*parameters=*/{}, - /*objects=*/ -@@ -132,7 +132,7 @@ absl::Status GenerateMultiplyScalarCode( - static_cast(ctx.input_shapes[0][1]), - DivideRoundUp(static_cast(ctx.input_shapes[0][3]), 4)), - ConvertToPHWC4( -- std::get>(attr.param)))}}, -+ absl::get>(attr.param)))}}, - /*shared_variables=*/{}, - // Declare workload explicitly because shader depends on gid.z. - /*workload=*/ -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc -index 58882ba1..c71579ea 100644 ---- a/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc -@@ -40,8 +40,8 @@ class PReLULinearAlpha : public NodeShader { - public: - absl::Status GenerateCode(const GenerationContext& ctx, - GeneratedCode* generated_code) const final { -- const auto& attr = std::any_cast(ctx.op_attr); -- auto alpha = std::get_if>(&attr.alpha); -+ const auto& attr = absl::any_cast(ctx.op_attr); -+ auto alpha = absl::get_if>(&attr.alpha); - if (!alpha) { - return absl::InvalidArgumentError("Alpha is missing"); - } -@@ -75,8 +75,8 @@ class PReLUFull : public NodeShader { - public: - absl::Status GenerateCode(const GenerationContext& ctx, - GeneratedCode* generated_code) const final { -- const auto& attr = std::any_cast(ctx.op_attr); -- auto alpha = std::get_if>(&attr.alpha); -+ const auto& attr = absl::any_cast(ctx.op_attr); -+ auto alpha = absl::get_if>(&attr.alpha); - if (!alpha) { - return absl::InvalidArgumentError("Alpha is missing"); - } -@@ -118,8 +118,8 @@ class PReLU : public NodeShader { - public: - absl::Status GenerateCode(const GenerationContext& ctx, - GeneratedCode* generated_code) const final { -- const auto& attr = std::any_cast(ctx.op_attr); -- auto* alpha = std::get_if>(&attr.alpha); -+ const auto& attr = absl::any_cast(ctx.op_attr); -+ auto* alpha = absl::get_if>(&attr.alpha); - return alpha ? full_.GenerateCode(ctx, generated_code) - : linear_.GenerateCode(ctx, generated_code); - } diff --git a/ports/tensorflow-lite/fix-cmake-c-api.patch b/ports/tensorflow-lite/fix-cmake-c-api.patch new file mode 100644 index 00000000..4d595ffb --- /dev/null +++ b/ports/tensorflow-lite/fix-cmake-c-api.patch @@ -0,0 +1,50 @@ +diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt +index f8709c6a..161e3421 100644 +--- a/tensorflow/lite/CMakeLists.txt ++++ b/tensorflow/lite/CMakeLists.txt +@@ -640,6 +640,7 @@ if(TFLITE_ENABLE_INSTALL) + EXPORT ${PROJECT_NAME}Targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} ++ RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} + ) + + foreach(hdr ${_ALL_TFLITE_HDRS}) +@@ -671,6 +672,37 @@ if(TFLITE_ENABLE_INSTALL) + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + endif() ++ ++# see tensorflow/lite/c/CMakeLists.txt ++target_sources(tensorflow-lite PRIVATE ++ ${TFLITE_SOURCE_DIR}/core/c/c_api.cc ++ ${TFLITE_SOURCE_DIR}/core/c/c_api_experimental.cc ++ ${TFLITE_SOURCE_DIR}/core/c/common.cc ++ ${TFLITE_SOURCE_DIR}/core/c/registration_external.cc ++ c/builtin_op_data.h ++ c/c_api.h ++ c/c_api_experimental.h ++ c/c_api_internal.h ++ c/c_api_types.h ++ c/common.h ++) ++target_include_directories(tensorflow-lite PRIVATE ++ c ++) ++# set_target_properties(tensorflow-lite PROPERTEIS ++# OUTPUT_NAME tensorflowlite_c ++# ) ++ ++if (BUILD_SHARED_LIBS) ++ if (WIN32) ++ target_compile_definitions(tensorflow-lite PRIVATE TFL_COMPILE_LIBRARY) ++ elseif (APPLE) ++ target_link_options(tensorflow-lite PRIVATE "-Wl,-exported_symbols_list,${TFLITE_SOURCE_DIR}/c/exported_symbols.lds") ++ else () ++ target_link_options(tensorflow-lite PRIVATE "-Wl,--version-script,${TFLITE_SOURCE_DIR}/c/version_script.lds") ++ endif() ++endif() ++ + return() # drop following targets + # The kernel tests. + if(TFLITE_KERNEL_TEST) diff --git a/ports/tensorflow-lite/fix-cmake-gpu.patch b/ports/tensorflow-lite/fix-cmake-gpu.patch new file mode 100644 index 00000000..1227f62c --- /dev/null +++ b/ports/tensorflow-lite/fix-cmake-gpu.patch @@ -0,0 +1,161 @@ +diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt +index 161e3421..27760a43 100644 +--- a/tensorflow/lite/CMakeLists.txt ++++ b/tensorflow/lite/CMakeLists.txt +@@ -26,7 +26,7 @@ + # - Many features in experimental + # - Host Tools (i.e conversion / analysis tools etc.) + +-cmake_minimum_required(VERSION 3.16) ++cmake_minimum_required(VERSION 3.27) + if(NOT CMAKE_BUILD_TYPE) + message(STATUS "Setting build type to Release, for debug builds use" + "'-DCMAKE_BUILD_TYPE=Debug'.") +@@ -268,18 +268,61 @@ populate_tflite_source_vars("core/tools" TFLITE_CORE_TOOLS_SRCS) + populate_tflite_source_vars("c" TFLITE_C_SRCS) + populate_tflite_source_vars("delegates" TFLITE_DELEGATES_SRCS) + if(TFLITE_ENABLE_GPU) +- find_package(opencl_headers REQUIRED) +- find_package(vulkan_headers REQUIRED) +- find_package(fp16_headers REQUIRED) ++ find_package(OpenCL REQUIRED) # OpenCL::OpenCL ++ find_package(Vulkan COMPONENTS Headers) # Vulkan::Headers Vulkan::Vulkan ++ if(Vulkan_FOUND) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES Vulkan::Headers) ++ endif(Vulkan_FOUND) + # Android NDK already has OpenGL, EGL headers. +- if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android") +- find_package(opengl_headers REQUIRED) +- find_package(egl_headers REQUIRED) +- endif() ++ # In Windows, we use Google/ANGLE from vcpkg ++ if(ANDROID OR WIN32) ++ set(OPENGL_USE_EGL true) ++ find_path(OPENGL_EGL_INCLUDE_DIRS "EGL/egl.h" REQUIRED) ++ find_library(OPENGL_egl_LIBRARY NAMES EGL libEGL REQUIRED) ++ # it's using "version 310 es" ++ find_path(OPENGL_GLES3_INCLUDE_DIR "GLES3/gl3.h" REQUIRED) ++ find_library(OPENGL_gles3_LIBRARY NAMES GLESv3 libGLESv3 GLESv2 libGLESv2 REQUIRED) ++ find_library(OPENGL_gl_LIBRARY NAMES GLESv3 libGLESv3 GLESv2 libGLESv2 REQUIRED) ++ find_package(OpenGL REQUIRED COMPONENTS GLES3) # OpenGL::GLES3 ++ list(APPEND TFLITE_TARGET_DEPENDENCIES OpenGL::GLES3 ${OPENGL_egl_LIBRARY}) ++ list(APPEND TFLITE_TARGET_PRIVATE_OPTIONS "-DEGL_EGLEXT_PROTOTYPES") ++ # include GPU sources ... ++ populate_tflite_source_vars("delegates/gpu/gl" TFLITE_DELEGATES_GPU_GL_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("delegates/gpu/gl/compiler" TFLITE_DELEGATES_GPU_GL_COMPILER_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("delegates/gpu/gl/converters" TFLITE_DELEGATES_GPU_GL_CONVERTERS_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("delegates/gpu/gl/kernels" TFLITE_DELEGATES_GPU_GL_KERNELS_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("delegates/gpu/gl/workgroups" TFLITE_DELEGATES_GPU_GL_WORKGROUPS_SRCS FILTER "(_test)\\.(cc|h)$") ++ list(APPEND TFLITE_DELEGATES_GPU_SRCS ++ ${TFLITE_DELEGATES_GPU_GL_SRCS} ++ ${TFLITE_DELEGATES_GPU_GL_COMPILER_SRCS} ++ ${TFLITE_DELEGATES_GPU_GL_CONVERTERS_SRCS} ++ ${TFLITE_DELEGATES_GPU_GL_KERNELS_SRCS} ++ ${TFLITE_DELEGATES_GPU_GL_WORKGROUPS_SRCS} ++ ) ++ endif() ++ if(ANDROID) ++ populate_tflite_source_vars("core/async/interop/c" TFLITE_CORE_ASYNC_INTEROP_C_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("delegates/utils" TFLITE_DELEGATES_UTILS_SRCS FILTER "(_test)\\.(cc|h)$") ++ populate_tflite_source_vars("async" TFLITE_ASYNC_SRCS FILTER "(_test)\\.(cc|h)$") ++ list(APPEND TFLITE_DELEGATES_GPU_SRCS ++ ${TFLITE_CORE_ASYNC_INTEROP_C_SRCS} ++ ${TFLITE_DELEGATES_UTILS_SRCS} ++ ${TFLITE_ASYNC_SRCS} ++ ) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES android nativewindow) ++ endif() +- populate_tflite_source_vars( +- "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS +- FILTER "(_test|gl_interop|gpu_api_delegate|egl_sync)\\.(cc|h)$" +- ) ++ if(NOT (ANDROID OR WIN32)) ++ populate_tflite_source_vars( ++ "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS ++ FILTER "(_test|gl_interop|gpu_api_delegate|egl_sync)\\.(cc|h)$" ++ ) ++ list(APPEND TFLITE_TARGET_PRIVATE_OPTIONS "-DCL_DELEGATE_NO_GL") ++ else() ++ populate_tflite_source_vars( ++ "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS ++ FILTER "(_test)\\.(cc|h)$" ++ ) ++ endif() + populate_tflite_source_vars( + "delegates/gpu/cl/default" TFLITE_DELEGATES_GPU_CL_DEFAULT_SRCS + FILTER "(_test)\\.(cc|h)$" +@@ -355,11 +398,13 @@ if(TFLITE_ENABLE_GPU) + ${TFLITE_SOURCE_DIR}/delegates/gpu/common + ${TFLITE_SOURCE_DIR}/delegates/gpu/common/task + ) +- if(TFLITE_ENABLE_METAL AND "${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") ++ if(TFLITE_ENABLE_METAL AND APPLE) + # + # libmetal_delegate library + # + enable_language(OBJCXX) ++ # Xcode will apply -fobjc-arc ++ set(CMAKE_XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES") + list(APPEND TFLITE_DELEGATES_METAL_SRCS + ${TFLITE_SOURCE_DIR}/delegates/gpu/metal_delegate.mm + ${TFLITE_SOURCE_DIR}/delegates/gpu/metal/buffer.cc +@@ -375,10 +420,13 @@ if(TFLITE_ENABLE_GPU) + ${TFLITE_DELEGATES_METAL_SRCS} + ) + target_include_directories(metal_delegate PUBLIC +- ${CMAKE_BINARY_DIR}/abseil-cpp +- ${CMAKE_BINARY_DIR}/flatbuffers/include + PRIVATE ${TENSORFLOW_SOURCE_DIR} + ) ++ target_link_libraries(metal_delegate PUBLIC ++ absl::status flatbuffers::flatbuffers ++ "-framework Foundation" "-framework Metal" ++ ) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES metal_delegate) + # + # generate flatbuffers header for inference_context + # +@@ -415,12 +463,13 @@ if(TFLITE_ENABLE_GPU) + foreach(lib_name ${CC_SRCS}) + set_source_files_properties(${METAL_DELEGATE_PATH}${lib_name}.cc PROPERTIES LANGUAGE OBJCXX) + add_library("${lib_name}" STATIC ${METAL_DELEGATE_PATH}${lib_name}.cc) +- target_include_directories("${lib_name}" PUBLIC +- ${CMAKE_BINARY_DIR}/abseil-cpp +- ${CMAKE_BINARY_DIR}/flatbuffers/include +- ) ++ target_include_directories("${lib_name}" PRIVATE ${TENSORFLOW_SOURCE_DIR}) ++ target_link_libraries("${lib_name}" PUBLIC ++ absl::base ++ flatbuffers::flatbuffers ++ ) + set_target_properties(${lib_name} PROPERTIES LINKER_LANGUAGE OBJCXX) +- target_link_libraries(${lib_name}) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES "${lib_name}") + endforeach() + + list(APPEND MM_SRCS +@@ -429,15 +478,18 @@ if(TFLITE_ENABLE_GPU) + ) + foreach(lib_name ${MM_SRCS}) + add_library("${lib_name}" STATIC ${METAL_DELEGATE_PATH}${lib_name}.mm) +- target_include_directories("${lib_name}" PUBLIC +- ${CMAKE_BINARY_DIR}/abseil-cpp +- ${CMAKE_BINARY_DIR}/flatbuffers/include +- ) +- target_link_libraries(${lib_name}) ++ target_include_directories("${lib_name}" PRIVATE ${TENSORFLOW_SOURCE_DIR}) ++ target_link_libraries("${lib_name}" PUBLIC ++ absl::base ++ flatbuffers::flatbuffers ++ ) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES "${lib_name}") + endforeach() + endif() + list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DCL_DELEGATE_NO_GL" "-DEGL_NO_X11") +- list(APPEND TFLITE_TARGET_DEPENDENCIES ++ message(STATUS "using CL_TARGET_OPENCL_VERSION: ${OpenCL_VERSION_MAJOR}${OpenCL_VERSION_MINOR}0") ++ list(APPEND TFLITE_TARGET_PRIVATE_OPTIONS "-DCL_TARGET_OPENCL_VERSION=${OpenCL_VERSION_MAJOR}${OpenCL_VERSION_MINOR}0") ++ list(APPEND TFLITE_TARGET_DEPENDENCIES OpenCL::OpenCL + absl::any + absl::flat_hash_map + ) diff --git a/ports/tensorflow-lite/fix-cmake-nnapi.patch b/ports/tensorflow-lite/fix-cmake-nnapi.patch new file mode 100644 index 00000000..a1c46355 --- /dev/null +++ b/ports/tensorflow-lite/fix-cmake-nnapi.patch @@ -0,0 +1,23 @@ +diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt +index 38f94e3a..15fdb224 100644 +--- a/tensorflow/lite/CMakeLists.txt ++++ b/tensorflow/lite/CMakeLists.txt +@@ -505,9 +505,17 @@ if(_TFLITE_ENABLE_NNAPI) + ) + + list(APPEND TFLITE_NNAPI_SRCS ++ "${TF_SOURCE_DIR}/tsl/platform/default/logging.cc" ++ "${TF_SOURCE_DIR}/tsl/platform/default/mutex.cc" + "${TFLITE_SOURCE_DIR}/nnapi/sl/SupportLibrary.cc" + ) +- ++ # tsl::mutex requires nsync_cpp ++ find_library(NSYNC_LIBRARY NAMES nsync_cpp REQUIRED) ++ list(APPEND TFLITE_TARGET_DEPENDENCIES ${NSYNC_LIBRARY}) ++ # disable ml_dtypes/float8.h which requries libeigen3 master branch ... ++ set_source_files_properties(${TFLITE_NNAPI_SRCS} PROPERTIES ++ COMPILE_DEFINITIONS "TENSORFLOW_TSL_PLATFORM_FLOAT8_H_;TENSORFLOW_CORE_PLATFORM_FLOAT8_H_" ++ ) + if(${TFLITE_ENABLE_NNAPI_VERBOSE_VALIDATION}) + list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DNNAPI_VERBOSE_VALIDATION") + endif() diff --git a/ports/tensorflow-lite/fix-cmake-use-vcpkg.patch b/ports/tensorflow-lite/fix-cmake-use-vcpkg.patch new file mode 100644 index 00000000..268062d2 --- /dev/null +++ b/ports/tensorflow-lite/fix-cmake-use-vcpkg.patch @@ -0,0 +1,85 @@ +diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt +index 0924ca9d..c2b50631 100644 +--- a/tensorflow/lite/CMakeLists.txt ++++ b/tensorflow/lite/CMakeLists.txt +@@ -50,14 +50,7 @@ if(NOT TENSORFLOW_SOURCE_DIR) + endif() + set(TF_SOURCE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow") + set(TFLITE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}") +-set(CMAKE_MODULE_PATH +- "${TFLITE_SOURCE_DIR}/tools/cmake/modules" +- ${CMAKE_MODULE_PATH} +-) +-set(CMAKE_PREFIX_PATH +- "${TFLITE_SOURCE_DIR}/tools/cmake/modules" +- ${CMAKE_PREFIX_PATH} +-) ++# CMake modules/scripts are replaced to vcpkg portfile patches + include(GNUInstallDirs) + include(CMakeDependentOption) + option(TFLITE_ENABLE_INSTALL "Enable install rule" OFF) +@@ -143,6 +136,7 @@ set(OVERRIDABLE_FETCH_CONTENT_LICENSE_CHECK ON) + # Additional library dependencies based upon enabled features. + set(TFLITE_TARGET_DEPENDENCIES "") + # Find TensorFlow Lite dependencies. ++find_path(FP16_INCLUDE_DIRS "fp16.h" REQUIRED) + find_package(absl REQUIRED) + find_package(Eigen3 REQUIRED) + find_package(farmhash REQUIRED) +@@ -449,7 +443,7 @@ endif() + ) + endif() + if(_TFLITE_ENABLE_NNAPI) +- find_package(fp16_headers REQUIRED) ++ find_path(FP16_INCLUDE_DIRS "fp16.h" REQUIRED) + populate_tflite_source_vars("delegates/nnapi" + TFLITE_DELEGATES_NNAPI_SRCS + FILTER "(_test_list|_disabled)\\.(cc|h)$" +@@ -474,14 +468,14 @@ else() + ) + endif() + if(TFLITE_ENABLE_XNNPACK) +- find_package(fp16_headers REQUIRED) +- find_package(XNNPACK REQUIRED) ++ find_path(FP16_INCLUDE_DIRS "fp16.h" REQUIRED) ++ find_library(XNNPACK_LIBRARY NAMES XNNPACK REQUIRED) + populate_tflite_source_vars("delegates/xnnpack" + TFLITE_DELEGATES_XNNPACK_SRCS + FILTER ".*(_test|_tester)\\.(cc|h)" + ) + list(APPEND TFLITE_TARGET_DEPENDENCIES +- XNNPACK ++ ${XNNPACK_LIBRARY} + ) + list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFLITE_BUILD_WITH_XNNPACK_DELEGATE") + endif() +@@ -607,9 +601,10 @@ set(_ALL_TFLITE_HDRS ${_ALL_TFLITE_SRCS}) + list(FILTER _ALL_TFLITE_HDRS INCLUDE REGEX ".*\\.h$") + target_include_directories(tensorflow-lite + PUBLIC $ $ ++ PRIVATE ${FP16_INCLUDE_DIRS} + ) + target_link_libraries(tensorflow-lite +- PUBLIC ++ PRIVATE + Eigen3::Eigen + absl::flags + absl::hash +@@ -623,7 +618,7 @@ target_link_libraries(tensorflow-lite + gemmlowp::gemmlowp + ml_dtypes + ruy::ruy +- pthreadpool ++ ${PTHREADPOOL_LIB} + ${CMAKE_DL_LIBS} + ${TFLITE_TARGET_DEPENDENCIES} + ) +@@ -676,7 +671,7 @@ if(TFLITE_ENABLE_INSTALL) + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}" + ) + endif() +- ++return() # drop following targets + # The kernel tests. + if(TFLITE_KERNEL_TEST) + enable_testing() diff --git a/ports/tensorflow-lite/fix-cmake.patch b/ports/tensorflow-lite/fix-cmake.patch deleted file mode 100644 index 0fe0d2c0..00000000 --- a/ports/tensorflow-lite/fix-cmake.patch +++ /dev/null @@ -1,459 +0,0 @@ -diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt -index 073b29c4..9cb7f5fd 100644 ---- a/tensorflow/lite/CMakeLists.txt -+++ b/tensorflow/lite/CMakeLists.txt -@@ -50,14 +50,6 @@ if(NOT TENSORFLOW_SOURCE_DIR) - endif() - set(TF_SOURCE_DIR "${TENSORFLOW_SOURCE_DIR}/tensorflow") - set(TFLITE_SOURCE_DIR "${CMAKE_CURRENT_LIST_DIR}") --set(CMAKE_MODULE_PATH -- "${TFLITE_SOURCE_DIR}/tools/cmake/modules" -- ${CMAKE_MODULE_PATH} --) --set(CMAKE_PREFIX_PATH -- "${TFLITE_SOURCE_DIR}/tools/cmake/modules" -- ${CMAKE_PREFIX_PATH} --) - include(GNUInstallDirs) - include(CMakeDependentOption) - option(TFLITE_ENABLE_INSTALL "Enable install rule" OFF) -@@ -139,15 +131,16 @@ macro(populate_tf_source_vars RELATIVE_DIR SOURCES_VAR) - ) - endmacro() - # Find TensorFlow Lite dependencies. --find_package(absl REQUIRED) --find_package(Eigen3 REQUIRED) --find_package(farmhash REQUIRED) --find_package(fft2d REQUIRED) --find_package(Flatbuffers REQUIRED) --find_package(gemmlowp REQUIRED) --find_package(NEON_2_SSE REQUIRED) --find_package(cpuinfo REQUIRED) #CPUINFO is used by XNNPACK and RUY library --find_package(ruy REQUIRED) -+find_package(absl CONFIG REQUIRED) -+find_package(Eigen3 3.4 CONFIG REQUIRED) -+find_library(FARMHASH_LIBPATH NAMES farmhash REQUIRED) -+find_library(FFT2D_SG2D_LIBPATH NAMES fft2d_fftsg2d REQUIRED) -+find_library(FFT2D_SG_LIBPATH NAMES fft2d_fftsg REQUIRED) -+find_package(Flatbuffers 2.0 CONFIG REQUIRED) -+find_package(gemmlowp CONFIG REQUIRED) -+find_package(NEON_2_SSE CONFIG REQUIRED) -+find_package(cpuinfo CONFIG REQUIRED) -+find_package(ruy CONFIG REQUIRED) - set(TF_TARGET_PRIVATE_OPTIONS "") - if(CMAKE_CXX_COMPILER_ID MATCHES "Clang$") - # TensorFlow uses a heap of deprecated proto fields so surpress these -@@ -197,11 +190,15 @@ if(CMAKE_SYSTEM_NAME MATCHES "Windows") - endif() - endif() - if(CMAKE_SYSTEM_NAME MATCHES "Android") -- find_library(ANDROID_LOG_LIB log) -+ find_library(ANDROID_LOG_LIB NAMES log REQUIRED) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES ${ANDROID_LOG_LIB}) - endif() - # Build a list of source files to compile into the TF Lite library. - populate_tflite_source_vars("." TFLITE_SRCS) - -+# Exclude tensorflow::profiler usage -+list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tensorflow_profiler_logger\\.cc$") -+ - # This particular file is excluded because the more explicit approach to enable - # XNNPACK delegate is preferred to the weak-symbol one. - list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*tflite_with_xnnpack\\.cc$") -@@ -214,24 +211,83 @@ if(_TFLITE_ENABLE_MMAP) - else() - list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*mmap_allocation\\.cc$") - endif() --if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android") -- list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_android\\.cc$") --endif() --if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "iOS") -- list(FILTER TFLITE_SRCS EXCLUDE REGEX ".*minimal_logging_ios\\.cc$") -+list(FILTER TFLITE_SRCS EXCLUDE REGEX "minimal_logging_.*\\.cc$") -+if(ANDROID) -+ list(APPEND TFLITE_SRCS ${TFLITE_SOURCE_DIR}/minimal_logging_android.cc) -+elseif(IOS) -+ list(APPEND TFLITE_SRCS ${TFLITE_SOURCE_DIR}/minimal_logging_ios.cc) -+else() -+ list(APPEND TFLITE_SRCS ${TFLITE_SOURCE_DIR}/minimal_logging_default.cc) - endif() - populate_tflite_source_vars("core" TFLITE_CORE_SRCS) - populate_tflite_source_vars("core/api" TFLITE_CORE_API_SRCS) - populate_tflite_source_vars("c" TFLITE_C_SRCS) - populate_tflite_source_vars("delegates" TFLITE_DELEGATES_SRCS) - if(TFLITE_ENABLE_GPU) -- find_package(opencl_headers REQUIRED) -- find_package(vulkan_headers REQUIRED) -- find_package(fp16_headers REQUIRED) -- # Android NDK already has OpenGL, EGL headers. -- if(NOT "${CMAKE_SYSTEM_NAME}" STREQUAL "Android") -- find_package(opengl_headers REQUIRED) -- find_package(egl_headers REQUIRED) -+ # OpenCL from vcpkg -+ if(APPLE) -+ find_path(OpenCL_INCLUDE_DIR "CL/opencl.h" REQUIRED) -+ find_package(OpenCL) -+ elseif(ANDROID) -+ find_path(OpenCL_INCLUDE_DIR "CL/opencl.h" REQUIRED) -+ else() -+ find_package(OpenCL 2.0 REQUIRED) -+ endif() -+ if(OpenCL_LIBRARY) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES ${OpenCL_LIBRARY}) -+ endif() -+ # Vulkan headers will be installed with vcpkg -+ find_package(Vulkan) -+ if(Vulkan_FOUND) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES Vulkan::Headers) -+ endif() -+ # OpenGL, EGL headers will be installed with vcpkg -+ if(WIN32 OR ANDROID OR (CMAKE_SYSTEM_NAME STREQUAL Linux)) -+ find_library(EGL_LIBPATH NAMES libEGL EGL REQUIRED) -+ find_library(GLES_LIBPATH NAMES libGLESv3 GLESv3 libGLESv2 GLESv2 REQUIRED) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES ${EGL_LIBPATH} ${GLES_LIBPATH}) -+ list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DEGL_EGLEXT_PROTOTYPES" "-DEGL_NO_X11") -+ elseif(APPLE) -+ enable_language(OBJC) -+ enable_language(OBJCXX) -+ set(CMAKE_OBJC_STANDARD 11) -+ set(CMAKE_OBJCXX_STANDARD 17) -+ list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DCL_DELEGATE_NO_GL") -+ endif() -+ -+ if(TFLITE_ENABLE_METAL AND APPLE) -+ # expect same protobuf version with mlmodel library -+ find_package(Protobuf REQUIRED) -+ # CoreML .proto files from https://github.com/apple/coremltools -+ # note: moved protoc generated code to coreml-tools port -+ find_library(MLMODEL_LIBRARY NAMES mlmodel REQUIRED) -+ -+ file(GLOB TFLITE_OBJCXX_SRCS "${TFLITE_SOURCE_DIR}/objc/sources/*.mm") -+ set_source_files_properties(${TFLITE_OBJCXX_SRCS} PROPERTIES -+ LANGUAGE OBJCXX COMPILE_OPTIONS "-fobjc-arc" -+ ) -+ file(GLOB TFLITE_OBJC_SRCS "${TFLITE_SOURCE_DIR}/objc/sources/*.m") -+ set_source_files_properties(${TFLITE_OBJC_SRCS} PROPERTIES -+ LANGUAGE OBJC COMPILE_OPTIONS "-fobjc-arc" -+ ) -+ populate_tflite_source_vars("delegates/coreml/builders" TFLITE_DELEGATES_COREML_BUILDERS_CXX_SRCS) -+ file(GLOB TFLITE_DELEGATES_COREML_SRCS "${TFLITE_SOURCE_DIR}/delegates/coreml/*.mm") -+ list(APPEND TFLITE_DELEGATES_PROVIDER_SRCS -+ ${TFLITE_SOURCE_DIR}/tools/delegates/coreml_delegate_provider.cc -+ ${TFLITE_SOURCE_DIR}/tools/delegates/external_delegate_provider.cc -+ ) -+ set_source_files_properties(${TFLITE_DELEGATES_COREML_BUILDERS_CXX_SRCS} ${TFLITE_DELEGATES_COREML_SRCS} ${TFLITE_DELEGATES_PROVIDER_SRCS} PROPERTIES -+ LANGUAGE OBJCXX COMPILE_OPTIONS "-fobjc-arc" -+ ) -+ -+ list(APPEND TFLITE_TARGET_DEPENDENCIES protobuf::libprotobuf ${MLMODEL_LIBRARY} "-framework Foundation" "-framework Metal" "-framework CoreML") -+ list(APPEND TFLITE_DELEGATES_GPU_SRCS -+ ${TFLITE_DELEGATES_COREML_BUILDERS_CXX_SRCS} -+ ${TFLITE_DELEGATES_COREML_SRCS} -+ ${TFLITE_DELEGATES_PROVIDER_SRCS} -+ ${TFLITE_OBJCXX_SRCS} -+ ${TFLITE_OBJC_SRCS} -+ ) - endif() - populate_tflite_source_vars( - "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS -@@ -305,13 +361,18 @@ if(TFLITE_ENABLE_GPU) - ${TFLITE_DELEGATES_GPU_COMMON_TRANSFORMATIONS_SRCS} - ${TFLITE_SOURCE_DIR}/tools/versioning/gpu_compatibility.cc - ${TFLITE_SOURCE_DIR}/tools/versioning/op_signature.cc -+ ${TFLITE_SOURCE_DIR}/tools/evaluation/utils.cc -+ ${TFLITE_SOURCE_DIR}/tools/tool_params.cc -+ ${TFLITE_SOURCE_DIR}/tools/delegates/delegate_provider.cc -+ ${TFLITE_SOURCE_DIR}/tools/delegates/default_execution_provider.cc -+ ${TFLITE_SOURCE_DIR}/tools/command_line_flags.cc - ) - include_directories( - AFTER - ${TFLITE_SOURCE_DIR}/delegates/gpu/common - ${TFLITE_SOURCE_DIR}/delegates/gpu/common/task - ) -- if(TFLITE_ENABLE_METAL AND "${CMAKE_SYSTEM_NAME}" STREQUAL "Darwin") -+ if(TFLITE_ENABLE_METAL AND APPLE) - # - # libmetal_delegate library - # -@@ -319,13 +380,12 @@ if(TFLITE_ENABLE_GPU) - list(APPEND TFLITE_DELEGATES_METAL_SRCS - ${TFLITE_SOURCE_DIR}/delegates/gpu/metal_delegate.mm - ) -- add_library(metal_delegate STATIC -+ add_library(metal_delegate OBJECT - ${TFLITE_DELEGATES_METAL_SRCS} - ) -- target_include_directories(metal_delegate PUBLIC -- ${CMAKE_BINARY_DIR}/abseil-cpp -- ${CMAKE_BINARY_DIR}/flatbuffers/include -- ) -+ target_include_directories(metal_delegate PRIVATE ${TENSORFLOW_SOURCE_DIR}) -+ target_link_libraries(metal_delegate PUBLIC absl::base flatbuffers::flatbuffers) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES metal_delegate) - - # - # supplementary libraries for libmetal_delegate -@@ -334,23 +394,19 @@ if(TFLITE_ENABLE_GPU) - buffer - compute_task - inference_context -- linear_storage - metal_arguments - metal_device - metal_spatial_tensor -- texture2d - ) - SET(METAL_DELEGATE_PATH ${TFLITE_SOURCE_DIR}/delegates/gpu/metal/) - - foreach(lib_name ${CC_SRCS}) - set_source_files_properties(${METAL_DELEGATE_PATH}${lib_name}.cc PROPERTIES LANGUAGE OBJCXX) -- add_library("${lib_name}" STATIC ${METAL_DELEGATE_PATH}${lib_name}.cc) -- target_include_directories("${lib_name}" PUBLIC -- ${CMAKE_BINARY_DIR}/abseil-cpp -- ${CMAKE_BINARY_DIR}/flatbuffers/include -- ) -+ add_library(${lib_name} OBJECT ${METAL_DELEGATE_PATH}${lib_name}.cc) -+ target_link_libraries(${lib_name} PUBLIC absl::base flatbuffers::flatbuffers) -+ target_include_directories(${lib_name} PRIVATE ${TENSORFLOW_SOURCE_DIR}) - set_target_properties(${lib_name} PROPERTIES LINKER_LANGUAGE OBJCXX) -- target_link_libraries(${lib_name}) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES "${lib_name}") - endforeach() - - list(APPEND MM_SRCS -@@ -358,12 +414,10 @@ if(TFLITE_ENABLE_GPU) - common - ) - foreach(lib_name ${MM_SRCS}) -- add_library("${lib_name}" STATIC ${METAL_DELEGATE_PATH}${lib_name}.mm) -- target_include_directories("${lib_name}" PUBLIC -- ${CMAKE_BINARY_DIR}/abseil-cpp -- ${CMAKE_BINARY_DIR}/flatbuffers/include -- ) -- target_link_libraries(${lib_name}) -+ add_library(${lib_name} OBJECT ${METAL_DELEGATE_PATH}${lib_name}.mm) -+ target_link_libraries(${lib_name} PUBLIC absl::base flatbuffers::flatbuffers) -+ target_include_directories(${lib_name} PRIVATE ${TENSORFLOW_SOURCE_DIR}) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES "${lib_name}") - endforeach() - endif() - list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DCL_DELEGATE_NO_GL" "-DEGL_NO_X11") -@@ -373,7 +427,7 @@ endif() - ) - endif() - if(_TFLITE_ENABLE_NNAPI) -- find_package(fp16_headers REQUIRED) -+ find_path(FP16_INCLUDE_DIRS "fp16.h" REQUIRED) - populate_tflite_source_vars("delegates/nnapi" - TFLITE_DELEGATES_NNAPI_SRCS - FILTER "(_test_list|_disabled)\\.(cc|h)$" -@@ -383,9 +437,13 @@ if(_TFLITE_ENABLE_NNAPI) - ) - - list(APPEND TFLITE_NNAPI_SRCS -+ ${TF_SOURCE_DIR}/tsl/platform/default/logging.h -+ ${TF_SOURCE_DIR}/tsl/platform/default/logging.cc -+ ${TF_SOURCE_DIR}/tsl/platform/default/mutex.cc - "${TFLITE_SOURCE_DIR}/nnapi/sl/SupportLibrary.cc" - ) -- -+ find_library(NSYNC_LIBRARY NAMES nsync_cpp nsync REQUIRED) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES ${NSYNC_LIBRARY}) - if(${TFLITE_ENABLE_NNAPI_VERBOSE_VALIDATION}) - list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DNNAPI_VERBOSE_VALIDATION") - endif() -@@ -398,14 +456,16 @@ else() - ) - endif() - if(TFLITE_ENABLE_XNNPACK) -- find_package(fp16_headers REQUIRED) -- find_package(XNNPACK REQUIRED) -+ find_path(FP16_INCLUDE_DIRS "fp16.h" REQUIRED) -+ find_package(unofficial-pthreadpool CONFIG REQUIRED) -+ find_package(cpuinfo CONFIG REQUIRED) -+ find_package(xnnpack CONFIG REQUIRED) - populate_tflite_source_vars("delegates/xnnpack" - TFLITE_DELEGATES_XNNPACK_SRCS - FILTER ".*(_test|_tester)\\.(cc|h)" - ) - list(APPEND TFLITE_TARGET_DEPENDENCIES -- XNNPACK -+ xnnpack cpuinfo::clog cpuinfo::cpuinfo unofficial::pthreadpool - ) - list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFLITE_BUILD_WITH_XNNPACK_DELEGATE") - endif() -@@ -470,11 +530,12 @@ if(CMAKE_SYSTEM_NAME MATCHES "Android") - list(APPEND TFLITE_PROFILER_SRCS - ${TFLITE_SOURCE_DIR}/profiling/atrace_profiler.cc - ) --elseif(CMAKE_SYSTEM_NAME MATCHES "iOS") -+elseif(APPLE) - enable_language(OBJCXX) - list(APPEND TFLITE_PROFILER_SRCS - ${TFLITE_SOURCE_DIR}/profiling/signpost_profiler.mm - ) -+ list(APPEND TFLITE_TARGET_DEPENDENCIES "-framework Foundation") - endif() - - # TFLite library -@@ -526,15 +587,43 @@ target_link_libraries(tensorflow-lite - absl::strings - absl::synchronization - absl::variant -- farmhash -- fft2d_fftsg2d -+ ${FARMHASH_LIBPATH} -+ ${FFT2D_SG2D_LIBPATH} ${FFT2D_SG_LIBPATH} - flatbuffers::flatbuffers -- gemmlowp -+ gemmlowp::gemmlowp - ruy::ruy - ${CMAKE_DL_LIBS} -+ PRIVATE - ${TFLITE_TARGET_DEPENDENCIES} - ) - -+# include sources of tensorflowlite_c -+target_sources(tensorflow-lite PRIVATE -+ c/builtin_op_data.h -+ c/common.h -+ c/common.cc -+ c/c_api_types.h -+ c/c_api.h -+ c/c_api.cc -+ c/c_api_experimental.h -+ c/c_api_experimental.cc -+ c/c_api_internal.h -+ c/c_api_internal.cc -+ c/c_api_opaque_internal.cc -+) -+if(WIN32) -+ target_sources(tensorflow-lite PRIVATE ${TF_SOURCE_DIR}/tsl/platform/windows/env_time.cc) -+else() -+ target_sources(tensorflow-lite PRIVATE ${TF_SOURCE_DIR}/tsl/platform/default/env_time.cc) -+endif() -+if (WIN32) -+ target_compile_definitions(tensorflow-lite PRIVATE TFL_COMPILE_LIBRARY) -+ target_link_libraries(tensorflow-lite PRIVATE Kernel32) # for UWP -+elseif (APPLE) -+ target_link_options(tensorflow-lite PRIVATE "-Wl,-exported_symbols_list,${TENSORFLOW_SOURCE_DIR}/tensorflow/lite/c/exported_symbols.lds") -+else() -+ target_link_options(tensorflow-lite PRIVATE "-Wl,--version-script,${TENSORFLOW_SOURCE_DIR}/tensorflow/lite/c/version_script.lds") -+endif() - if (NOT BUILD_SHARED_LIBS) - list(APPEND TFLITE_TARGET_PUBLIC_OPTIONS "-DTFL_STATIC_LIBRARY_BUILD") - endif() -@@ -554,14 +643,49 @@ if(TFLITE_ENABLE_INSTALL) - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) - -- foreach(hdr ${_ALL_TFLITE_HDRS}) -- get_filename_component(dir ${hdr} DIRECTORY) -- file(RELATIVE_PATH dir ${CMAKE_CURRENT_SOURCE_DIR} ${dir}) -- install( -- FILES ${hdr} -- DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/${dir}" -- ) -- endforeach() -+ install(DIRECTORY ${TFLITE_SOURCE_DIR}/c -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite -+ FILES_MATCHING PATTERN "*.h" -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/builtin_ops.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/gpu/delegate.h -+ ${TFLITE_SOURCE_DIR}/delegates/gpu/cl/gpu_api_delegate.h -+ ${TFLITE_SOURCE_DIR}/delegates/gpu/gl_delegate.h -+ ${TFLITE_SOURCE_DIR}/delegates/gpu/delegate_options.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/gpu -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/xnnpack/xnnpack_delegate.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/xnnpack -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/external/external_delegate.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/external -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/hexagon/hexagon_delegate.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/hexagon -+ ) -+ if(APPLE) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/gpu/metal_delegate.h -+ ${TFLITE_SOURCE_DIR}/delegates/gpu/metal_delegate_internal.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/gpu -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/coreml/coreml_delegate.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/coreml -+ ) -+ endif() -+ if(ANDROID) -+ install(FILES ${TFLITE_SOURCE_DIR}/nnapi/NeuralNetworksTypes.h -+ ${TFLITE_SOURCE_DIR}/nnapi/NeuralNetworksShim.h -+ ${TFLITE_SOURCE_DIR}/nnapi/nnapi_implementation.h -+ ${TFLITE_SOURCE_DIR}/nnapi/nnapi_util.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/nnapi -+ ) -+ install(FILES ${TFLITE_SOURCE_DIR}/delegates/nnapi/nnapi_delegate.h -+ ${TFLITE_SOURCE_DIR}/delegates/nnapi/nnapi_delegate_c_api.h -+ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/tensorflow/lite/delegates/nnapi -+ ) -+ endif() - - install( - EXPORT ${PROJECT_NAME}Targets -@@ -584,6 +708,7 @@ if(TFLITE_ENABLE_INSTALL) - ) - endif() - -+return() # ----- Discard the other targets ----- - # The kernel tests. - if(TFLITE_KERNEL_TEST) - enable_testing() -diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt -index 1267b46c..9f2157ee 100644 ---- a/tensorflow/lite/CMakeLists.txt -+++ b/tensorflow/lite/CMakeLists.txt -@@ -289,6 +289,22 @@ if(TFLITE_ENABLE_GPU) - ${TFLITE_OBJC_SRCS} - ) - endif() -+ if(WIN32 OR ANDROID) -+ populate_tflite_source_vars("delegates/gpu/gl/compiler" TFLITE_DELEGATES_GPU_GL_COMPILER_SRCS) -+ populate_tflite_source_vars("delegates/gpu/gl/converters" TFLITE_DELEGATES_GPU_GL_CONVERTERS_SRCS) -+ populate_tflite_source_vars("delegates/gpu/gl/kernels" TFLITE_DELEGATES_GPU_GL_KERNELS_SRCS) -+ populate_tflite_source_vars("delegates/gpu/gl/runtime" TFLITE_DELEGATES_GPU_GL_RUNTIME_SRCS) -+ populate_tflite_source_vars("delegates/gpu/gl/workgroups" TFLITE_DELEGATES_GPU_GL_WORKGROUPS_SRCS) -+ populate_tflite_source_vars("delegates/gpu/gl" TFLITE_DELEGATES_GPU_GL_SRCS) -+ list(APPEND TFLITE_DELEGATES_GPU_SRCS -+ ${TFLITE_DELEGATES_GPU_GL_COMPILER_SRCS} -+ ${TFLITE_DELEGATES_GPU_GL_CONVERTERS_SRCS} -+ ${TFLITE_DELEGATES_GPU_GL_KERNELS_SRCS} -+ ${TFLITE_DELEGATES_GPU_GL_RUNTIME_SRCS} -+ ${TFLITE_DELEGATES_GPU_GL_WORKGROUPS_SRCS} -+ ${TFLITE_DELEGATES_GPU_GL_SRCS} -+ ) -+ endif() - populate_tflite_source_vars( - "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS - FILTER "(_test|gl_interop|gpu_api_delegate|egl_sync)\\.(cc|h)$" -diff --git a/tensorflow/lite/CMakeLists.txt b/tensorflow/lite/CMakeLists.txt -index 9f2157ee..ebe824d2 100644 ---- a/tensorflow/lite/CMakeLists.txt -+++ b/tensorflow/lite/CMakeLists.txt -@@ -305,6 +305,15 @@ if(TFLITE_ENABLE_GPU) - ${TFLITE_DELEGATES_GPU_GL_SRCS} - ) - endif() -+ if(WITH_MEDIAPIPE) -+ # custom_registry.cc from TFLITE_DELEGATES_GPU_GL_KERNELS_SRCS -+ list(FILTER TFLITE_DELEGATES_GPU_SRCS EXCLUDE REGEX ".*custom_registry\\.cc$") -+ # mediapipe/registry.cc will replace RegisterCustomOps -+ populate_tflite_source_vars("delegates/gpu/gl/kernels/mediapipe" TFLITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_SRCS) -+ list(APPEND TFLITE_DELEGATES_GPU_SRCS -+ ${TFLITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_SRCS} -+ ) -+ endif() - populate_tflite_source_vars( - "delegates/gpu/cl" TFLITE_DELEGATES_GPU_CL_SRCS - FILTER "(_test|gl_interop|gpu_api_delegate|egl_sync)\\.(cc|h)$" diff --git a/ports/tensorflow-lite/fix-source-abseil.patch b/ports/tensorflow-lite/fix-source-abseil.patch new file mode 100644 index 00000000..e50f3567 --- /dev/null +++ b/ports/tensorflow-lite/fix-source-abseil.patch @@ -0,0 +1,583 @@ +diff --git a/tensorflow/compiler/xla/runtime/custom_call.h b/tensorflow/compiler/xla/runtime/custom_call.h +index 400edef0..f01508ff 100644 +--- a/tensorflow/compiler/xla/runtime/custom_call.h ++++ b/tensorflow/compiler/xla/runtime/custom_call.h +@@ -834,7 +834,7 @@ template + struct Decode, checks> { + ABSL_ATTRIBUTE_ALWAYS_INLINE static FailureOr call( + DecodingOffsets& offsets, DecodingContext& ctx) { +- return std::any_cast(ctx.values[offsets.values++]); ++ return absl::any_cast(ctx.values[offsets.values++]); + } + }; + +diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +index 7681cba5..5bd5ff4d 100644 +--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc ++++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc +@@ -1526,7 +1526,7 @@ GpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, + std::vector> results; + + std::any target_config = options.target_config(); +- auto* gpu_target_config = std::any_cast(&target_config); ++ auto* gpu_target_config = absl::any_cast(&target_config); + CHECK(gpu_target_config != nullptr || options.executor() != nullptr); + + for (const auto& module : modules) { +@@ -1536,7 +1536,7 @@ GpuCompiler::CompileAheadOfTime(std::unique_ptr module_group, + CompileModuleResults compile_module_results; + + const std::any& target_config = options.target_config(); +- auto* gpu_target_config = std::any_cast(&target_config); ++ auto* gpu_target_config = absl::any_cast(&target_config); + + if (gpu_target_config) { + // CUDA "CC" major value, -1 if not available. +diff --git a/tensorflow/lite/delegates/gpu/common/operations.h b/tensorflow/lite/delegates/gpu/common/operations.h +index 37d8dfa0..588d2fc2 100644 +--- a/tensorflow/lite/delegates/gpu/common/operations.h ++++ b/tensorflow/lite/delegates/gpu/common/operations.h +@@ -119,8 +119,8 @@ std::string ToString(enum OperationType op); + OperationType OperationTypeFromString(const std::string& name); + + template +-using TensorOrScalarBase = std::variant, +- Tensor, t>; ++using TensorOrScalarBase = absl::variant, ++ Tensor, t>; + + using TensorOrScalar = TensorOrScalarBase; + +diff --git a/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc b/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc +index 655c7bba..88027ae6 100644 +--- a/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc ++++ b/tensorflow/lite/delegates/gpu/common/selectors/operation_selector.cc +@@ -310,7 +310,7 @@ absl::Status CreateElementwiseTwoInputWithOneConstant( + const GpuInfo& gpu_info, const OperationDef& op_def, OperationType op_type, + const Node& node, const Value* input, const Value* output, + std::unique_ptr* gpu_op) { +- auto attr = std::any_cast>( ++ auto attr = absl::any_cast>( + node.operation.attributes); + GPUOperation operation; + if (input->tensor.shape != output->tensor.shape) { +diff --git a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc +index ff8cf35f..615827e5 100644 +--- a/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc ++++ b/tensorflow/lite/delegates/gpu/common/tasks/elementwise.cc +@@ -340,11 +340,11 @@ ElementwiseDescriptor CreateElementwiseDesc( + const GpuInfo& gpu_info, const OperationDef& definition, + const OperationType& op_type, + const ElementwiseAttributesBase& attr) { +- const T* scalar = std::get_if(&attr.param); ++ const T* scalar = absl::get_if(&attr.param); + const auto* linear_tensor = +- std::get_if>(&attr.param); ++ absl::get_if>(&attr.param); + const auto* hwc_tensor = +- std::get_if>(&attr.param); ++ absl::get_if>(&attr.param); + + if (scalar) { + return CreateElementwiseOneRuntimeOneScalar(definition, op_type, *scalar, +diff --git a/tensorflow/lite/delegates/gpu/common/tasks/special/conv_pointwise.cc b/tensorflow/lite/delegates/gpu/common/tasks/special/conv_pointwise.cc +index b8c4d05c..bee97ac8 100644 +--- a/tensorflow/lite/delegates/gpu/common/tasks/special/conv_pointwise.cc ++++ b/tensorflow/lite/delegates/gpu/common/tasks/special/conv_pointwise.cc +@@ -126,7 +126,7 @@ absl::Status IsReduceSumNode(const GraphFloat32& graph, Node* node, + RETURN_IF_ERROR( + IsNode(graph, OperationType::REDUCE_SUM, 1, 1, node, node_context)); + auto reduce_attr = +- std::any_cast(node_context->node->operation.attributes); ++ absl::any_cast(node_context->node->operation.attributes); + if (reduce_attr.dims != std::set{Axis::CHANNELS}) { + return absl::InternalError( + "Expected reduce_sum node with channels reduction."); +diff --git a/tensorflow/lite/delegates/gpu/gl/compiler.cc b/tensorflow/lite/delegates/gpu/gl/compiler.cc +index d6e670e0..bafa99fb 100644 +--- a/tensorflow/lite/delegates/gpu/gl/compiler.cc ++++ b/tensorflow/lite/delegates/gpu/gl/compiler.cc +@@ -196,7 +196,7 @@ class CompilerImpl : public Compiler { + // Prepare readonly objects and check whether object types are supported. + for (auto node : compiled_graph_.nodes()) { + auto& attr = +- std::any_cast(node->operation.attributes); ++ absl::any_cast(node->operation.attributes); + + // Set workload explicitly. + if (attr.code.workload == uint3()) { +@@ -251,7 +251,7 @@ class CompilerImpl : public Compiler { + ShaderCodegen codegen(options_, gpu_info_); + for (auto node : compiled_graph_.nodes()) { + auto& attr = +- std::any_cast(node->operation.attributes); ++ absl::any_cast(node->operation.attributes); + if (attr.code.source_code.empty()) { + // noop. Skip this node. + continue; +diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc +index 761fb8b4..c7a7de9a 100644 +--- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc ++++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_auto_input.cc +@@ -46,7 +46,7 @@ std::pair MakeDataReplacement(int n, int k) { + + TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { + auto& node_attr = +- std::any_cast(node->operation.attributes); ++ absl::any_cast(node->operation.attributes); + auto& node_code = node_attr.code; + + if (node_code.input != IOStructure::AUTO) { +@@ -75,7 +75,7 @@ TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { + if (graph->FindOutputs(input_producer->id).size() != 1) { + continue; // input node has more than one output + } +- auto& input_producer_attr = std::any_cast( ++ auto& input_producer_attr = absl::any_cast( + input_producer->operation.attributes); + if (input_producer_attr.code.output != IOStructure::AUTO) { + continue; +@@ -143,7 +143,7 @@ TransformResult FuseAutoInput::ApplyToNode(Node* node, GraphFloat32* graph) { + for (auto input_and_num : nodes_to_fuse) { + auto& input = input_and_num.first; + auto& attr = +- std::any_cast(input->operation.attributes); ++ absl::any_cast(input->operation.attributes); + auto super_inputs = graph->FindInputs(input->id); + + // Replace all internal references in the input source code. For example: +diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc +index f227ab21..486d4544 100644 +--- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc ++++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inline.cc +@@ -40,9 +40,9 @@ TransformResult FuseAutoOutputWithInline::ApplyToNodesSequence( + Node* node1 = sequence.front(); + Node* node2 = sequence.back(); + auto& attr1 = +- std::any_cast(node1->operation.attributes); ++ absl::any_cast(node1->operation.attributes); + auto& attr2 = +- std::any_cast(node2->operation.attributes); ++ absl::any_cast(node2->operation.attributes); + + if (attr1.code.output != IOStructure::AUTO || + graph->FindInputs(node2->id).size() != 1 || +diff --git a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc +index 1e27404b..b7719c49 100644 +--- a/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc ++++ b/tensorflow/lite/delegates/gpu/gl/compiler/fuse_inplace.cc +@@ -81,7 +81,7 @@ class InplaceCodeRewrite : public InlineRewrite { + TransformResult RemoveUnusedInplaceUpdates::ApplyToNode(Node* node, + GraphFloat32* graph) { + auto& attr = +- std::any_cast(node->operation.attributes); ++ absl::any_cast(node->operation.attributes); + // Remove inplace block by rewriting to empty string. + EmptyInplaceRewrite rewrite; + TextPreprocessor preprocessor('$', true); +@@ -100,9 +100,9 @@ TransformResult FuseInplaceUpdate::ApplyToNodesSequence( + Node* node1 = sequence.front(); + Node* node2 = sequence.back(); + auto& attr1 = +- std::any_cast(node1->operation.attributes); ++ absl::any_cast(node1->operation.attributes); + auto& attr2 = +- std::any_cast(node2->operation.attributes); ++ absl::any_cast(node2->operation.attributes); + + if (graph->FindInputs(node2->id).size() != 1 || + graph->FindOutputs(node2->id).size() != 1 || +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc +index a14d7f24..0a01997c 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/add.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/add.cc +@@ -41,12 +41,12 @@ class Add : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); +- auto adds = std::get_if>(&attr.param); +- auto scalar = std::get_if(&attr.param); ++ const auto& attr = absl::any_cast(ctx.op_attr); ++ auto adds = absl::get_if>(&attr.param); ++ auto scalar = absl::get_if(&attr.param); + + const auto* hwc_tensor = +- std::get_if>(&attr.param); ++ absl::get_if>(&attr.param); + + if (hwc_tensor) { + std::string code; +@@ -69,7 +69,7 @@ class Add : public NodeShader { + uint3(hwc_tensor->shape.w, hwc_tensor->shape.h, + DivideRoundUp(hwc_tensor->shape.c, 4)), + ConvertToPHWC4( +- std::get>(attr.param)))}}, ++ absl::get>(attr.param)))}}, + /*shared_variables=*/{}, + // Declare workload explicitly because shader depends on gid.z. + /*workload=*/ +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc b/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc +index 0513c8ec..c737b490 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/concat.cc +@@ -37,7 +37,7 @@ namespace { + class AlignedConcatByChannels : public NodeShader { + public: + static bool IsSupported(const GenerationContext& ctx) { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + // Implementation supports concatenation by channels only. + if (attr.axis != Axis::CHANNELS) return false; +@@ -95,7 +95,7 @@ class AlignedConcatByChannels : public NodeShader { + class ConcatByAnyChannel : public NodeShader { + public: + static bool IsSupported(const GenerationContext& ctx) { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + // Implementation supports concatenation by channels only. + if (attr.axis != Axis::CHANNELS) return false; +@@ -308,7 +308,7 @@ vec4 val = vec4(0.0f); + class FlatConcatByHeight : public NodeShader { + public: + static bool IsSupported(const GenerationContext& ctx) { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + // Implementation supports concatenation by height only. + if (attr.axis != Axis::HEIGHT) return false; +@@ -367,7 +367,7 @@ class FlatConcatByHeight : public NodeShader { + class FlatConcatByWidth : public NodeShader { + public: + static bool IsSupported(const GenerationContext& ctx) { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + // Implementation supports concatenation by width only. + if (attr.axis != Axis::WIDTH) return false; +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc +index 8522ea25..0e67fe10 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/conv.cc +@@ -47,7 +47,7 @@ class Convolution : public NodeShader { + "Convolution does not support more than 1 runtime tensor"); + } + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + if (attr.groups != 1) { + return absl::UnimplementedError( + "Convolution does not support more than 1 group"); +@@ -179,7 +179,7 @@ class Convolution1x1 : public NodeShader { + "Convolution does not support more than 1 runtime tensor"); + } + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + if (attr.weights.shape.h != 1 || attr.weights.shape.w != 1) { + return absl::UnimplementedError("Height and width should be 1."); + } +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc +index 627aeeec..b86eefff 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/depthwise_conv.cc +@@ -46,7 +46,7 @@ class DepthwiseConvolution : public NodeShader { + "DepthWise Convolution does not support more than 1 runtime tensor"); + } + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + auto weights = attr.weights.shape; + const int offsets_count = weights.h * weights.w; + const bool offsets_count_too_large = offsets_count > kMaxConstArraySize; +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +index db6714b0..e65ea47e 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/elementwise.cc +@@ -168,10 +168,10 @@ class ElementwiseTwoArguments : public NodeShader { + argument1 = "$input_data_1[0, 0, gid.z]$"; + } else { // Scalar of const vector case + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + const auto* tensor = +- std::get_if>(&attr.param); +- const auto* scalar = std::get_if(&attr.param); ++ absl::get_if>(&attr.param); ++ const auto* scalar = absl::get_if(&attr.param); + if (!tensor && !scalar) { + return absl::InvalidArgumentError( + "Couldn't read scalar of const vector data from the attributes."); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc b/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc +index 7dacd3e6..256a16b6 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/fully_connected.cc +@@ -40,7 +40,7 @@ class FullyConnectedBuffers : public NodeShader { + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + + const int src_depth = DivideRoundUp(attr.weights.shape.i, 4); + const int dst_depth = DivideRoundUp(attr.weights.shape.o, 4); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc b/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc +index d59a3e7f..d993a8fc 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/max_unpooling.cc +@@ -39,7 +39,7 @@ class MaxUnpooling : public NodeShader { + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + std::vector parameters = { + {"stride", int2(attr.strides.w, attr.strides.h)}, + {"offset", int2(attr.padding.prepended.w, attr.padding.prepended.h)}, +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc +index fbb37e0e..ea3edd01 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/mean.cc +@@ -242,7 +242,7 @@ class Mean : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + if (attr.dims != std::set({Axis::HEIGHT, Axis::WIDTH})) { + return absl::InvalidArgumentError( + "Mean calculation is supported only for height and width."); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc +index fb309862..d6a3fe7e 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/mul.cc +@@ -90,11 +90,11 @@ absl::Status GenerateMultiplyRuntimeTensorCode( + + absl::Status GenerateMultiplyConstantTensorCode( + const NodeShader::GenerationContext& ctx, GeneratedCode* generated_code) { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + +- if (std::holds_alternative(attr.param)) { ++ if (absl::holds_alternative(attr.param)) { + *generated_code = { +- /*parameters=*/{{"scalar", std::get(attr.param)}}, ++ /*parameters=*/{{"scalar", absl::get(attr.param)}}, + /*objects=*/{}, + /*shared_variables=*/{}, + /*workload=*/uint3(), +@@ -106,13 +106,13 @@ absl::Status GenerateMultiplyConstantTensorCode( + return absl::OkStatus(); + } + +- if (std::holds_alternative>(attr.param)) { ++ if (absl::holds_alternative>(attr.param)) { + *generated_code = { + /*parameters=*/{}, + /*objects=*/ + {{"mul_buffer", + MakeReadonlyObject( +- std::get>(attr.param).data)}}, ++ absl::get>(attr.param).data)}}, + /*shared_variables=*/{}, + // Declare workload explicitly because shader depends on gid.z. + /*workload=*/ +@@ -127,9 +127,9 @@ absl::Status GenerateMultiplyConstantTensorCode( + return absl::OkStatus(); + } + +- if (std::holds_alternative>(attr.param)) { ++ if (absl::holds_alternative>(attr.param)) { + bool single_channel_mask = +- std::get>(attr.param).shape.c == 1; ++ absl::get>(attr.param).shape.c == 1; + std::string source; + if (single_channel_mask) { + source = "vec4 const_val = $hwc_buffer[gid.x, gid.y, 0]$;"; +@@ -157,7 +157,7 @@ absl::Status GenerateMultiplyConstantTensorCode( + static_cast(ctx.input_shapes[0][1]), + DivideRoundUp(static_cast(ctx.input_shapes[0][3]), 4)), + ConvertToPHWC4( +- std::get>(attr.param)))}}, ++ absl::get>(attr.param)))}}, + /*shared_variables=*/{}, + // Declare workload explicitly because shader depends on gid.z. + /*workload=*/ +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc b/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc +index 537da8c9..66eeb2ae 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/pad.cc +@@ -39,7 +39,7 @@ class Pad : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + if (attr.type != PaddingContentType::ZEROS && + attr.type != PaddingContentType::REFLECT) { +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc b/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc +index ba746f6b..bc662957 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/pooling.cc +@@ -178,7 +178,7 @@ class Pooling : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + switch (attr.type) { + case PoolingType::AVERAGE: + return GenerateAveragePoolingCode(attr, ctx, generated_code); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc +index 58882ba1..c71579ea 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/prelu.cc +@@ -40,8 +40,8 @@ class PReLULinearAlpha : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); +- auto alpha = std::get_if>(&attr.alpha); ++ const auto& attr = absl::any_cast(ctx.op_attr); ++ auto alpha = absl::get_if>(&attr.alpha); + if (!alpha) { + return absl::InvalidArgumentError("Alpha is missing"); + } +@@ -75,8 +75,8 @@ class PReLUFull : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); +- auto alpha = std::get_if>(&attr.alpha); ++ const auto& attr = absl::any_cast(ctx.op_attr); ++ auto alpha = absl::get_if>(&attr.alpha); + if (!alpha) { + return absl::InvalidArgumentError("Alpha is missing"); + } +@@ -118,8 +118,8 @@ class PReLU : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); +- auto* alpha = std::get_if>(&attr.alpha); ++ const auto& attr = absl::any_cast(ctx.op_attr); ++ auto* alpha = absl::get_if>(&attr.alpha); + return alpha ? full_.GenerateCode(ctx, generated_code) + : linear_.GenerateCode(ctx, generated_code); + } +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc +index 80f03dde..3af047a0 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/quantize_and_dequantize.cc +@@ -42,7 +42,7 @@ value_0 = value_0 * vec4($quant_scale$) + vec4($quant_min$); + )"; + + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + *generated_code = { + /*parameters=*/{{"quant_min", attr.min}, + {"quant_max", attr.max}, +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc +index 6d05ea89..2ba5f107 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/relu.cc +@@ -38,7 +38,7 @@ class ReLU : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + // clamp(value, min(0, alpha * value), clip) + std::vector params; + std::string min; +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/reshape.cc b/tensorflow/lite/delegates/gpu/gl/kernels/reshape.cc +index 899e7a1f..2bab3e5d 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/reshape.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/reshape.cc +@@ -44,7 +44,7 @@ class Reshape : public NodeShader { + return absl::InvalidArgumentError( + "Number of elements in input & output tensors don't match."); + } +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + if (attr.new_shape.h != ctx.output_shapes[0][1] || + attr.new_shape.w != ctx.output_shapes[0][2] || + attr.new_shape.c != ctx.output_shapes[0][3]) { +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/resize.cc b/tensorflow/lite/delegates/gpu/gl/kernels/resize.cc +index 04485059..f2c96d69 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/resize.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/resize.cc +@@ -38,7 +38,7 @@ class Resize : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + if (ctx.input_shapes[0][2] > ctx.output_shapes[0][2] || + ctx.input_shapes[0][1] > ctx.output_shapes[0][1]) { +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc b/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc +index b0874658..48e98c35 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/slice.cc +@@ -38,7 +38,7 @@ class Slice : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + + const int4 channels(attr.starts.c, attr.strides.c, attr.ends.c, 0); + const int4 heights(attr.starts.h, attr.strides.h, attr.ends.h, 0); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc b/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc +index b83dcead..6ad7d607 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/softmax.cc +@@ -44,7 +44,7 @@ class Softmax : public NodeShader { + public: + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { +- const auto& attr = std::any_cast(ctx.op_attr); ++ const auto& attr = absl::any_cast(ctx.op_attr); + if (ctx.input_shapes[0] != ctx.output_shapes[0]) { + return absl::InvalidArgumentError( + "Input and output shapes do not match."); +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/space_to_depth.cc b/tensorflow/lite/delegates/gpu/gl/kernels/space_to_depth.cc +index 60f66d86..f2f0ad85 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/space_to_depth.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/space_to_depth.cc +@@ -36,7 +36,7 @@ class SpaceToDepth : public NodeShader { + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + std::string code = R"( + for (int i = 0; i < 4; ++i) { + int dst_c = 4 * gid.z + i; +@@ -70,7 +70,7 @@ class DepthToSpace : public NodeShader { + absl::Status GenerateCode(const GenerationContext& ctx, + GeneratedCode* generated_code) const final { + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + std::string code = R"( + for (int i = 0; i < 4; ++i) { + int dst_c = 4 * gid.z + i; +diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc b/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc +index 170e5e39..1a8a4126 100644 +--- a/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc ++++ b/tensorflow/lite/delegates/gpu/gl/kernels/transpose_conv.cc +@@ -45,7 +45,7 @@ class ConvolutionTransposedBuffers : public NodeShader { + "Convolution Transposed does not support more than 1 runtime tensor"); + } + const auto& attr = +- std::any_cast(ctx.op_attr); ++ absl::any_cast(ctx.op_attr); + auto weights = attr.weights.shape; + + std::vector parameters = { diff --git a/ports/tensorflow-lite/fix-source-cpp20.patch b/ports/tensorflow-lite/fix-source-cpp20.patch new file mode 100644 index 00000000..45a140d2 --- /dev/null +++ b/ports/tensorflow-lite/fix-source-cpp20.patch @@ -0,0 +1,43 @@ +diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.cc b/tensorflow/lite/delegates/gpu/gl/runtime.cc +index fe3db954..edee334b 100644 +--- a/tensorflow/lite/delegates/gpu/gl/runtime.cc ++++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc +@@ -90,27 +90,22 @@ absl::Status MakeGlTexture(const Object& object, const ObjectData& data, + if (data.size() % 2 != 0) { + return absl::InvalidArgumentError("Texture size is not aligned"); + } +- return std::visit( +- TextureF16Maker{ +- .data = absl::MakeConstSpan( +- reinterpret_cast(data.data()), +- data.size() / 2), +- .gl_texture = gl_texture, +- }, +- object.size); ++ TextureF16Maker marker{}; ++ marker.data = absl::MakeConstSpan( ++ reinterpret_cast(data.data()), data.size() / 2); ++ marker.gl_texture = gl_texture; ++ return std::visit(marker, object.size); + } + case DataType::FLOAT32: { + if (data.size() % sizeof(float) != 0) { + return absl::InvalidArgumentError("Texture size is not aligned"); + } +- return std::visit( +- TextureF32Maker{ +- .data = absl::MakeConstSpan( +- reinterpret_cast(data.data()), +- data.size() / sizeof(float)), +- .gl_texture = gl_texture, +- }, +- object.size); ++ TextureF32Maker marker{}; ++ marker.data = ++ absl::MakeConstSpan(reinterpret_cast(data.data()), ++ data.size() / sizeof(float)); ++ marker.gl_texture = gl_texture; ++ return std::visit(marker, object.size); + } + default: + return absl::InvalidArgumentError("Unsupported textures data type."); diff --git a/ports/tensorflow-lite/fix-opencl-extension.patch b/ports/tensorflow-lite/fix-source-gpu.patch similarity index 93% rename from ports/tensorflow-lite/fix-opencl-extension.patch rename to ports/tensorflow-lite/fix-source-gpu.patch index b41e5ff1..41969f42 100644 --- a/ports/tensorflow-lite/fix-opencl-extension.patch +++ b/ports/tensorflow-lite/fix-source-gpu.patch @@ -146,3 +146,16 @@ index b615a9b6..e61432a5 100644 default: return absl::StrCat("Unknown OpenCL error code - ", error_code); +diff --git a/tensorflow/tsl/platform/float8.h b/tensorflow/tsl/platform/float8.h +index 4ea4ff1f..0df70cb7 100644 +--- a/tensorflow/tsl/platform/float8.h ++++ b/tensorflow/tsl/platform/float8.h +@@ -16,7 +16,7 @@ limitations under the License. + #ifndef TENSORFLOW_TSL_PLATFORM_FLOAT8_H_ + #define TENSORFLOW_TSL_PLATFORM_FLOAT8_H_ + +-#include "include/float8.h" // from @ml_dtypes ++#include "ml_dtypes/float8.h" // from @ml_dtypes + + namespace tsl { + using float8_e4m3fn = ml_dtypes::float8_e4m3fn; diff --git a/ports/tensorflow-lite/fix-source.patch b/ports/tensorflow-lite/fix-source.patch deleted file mode 100644 index 49e8ea07..00000000 --- a/ports/tensorflow-lite/fix-source.patch +++ /dev/null @@ -1,108 +0,0 @@ -diff --git a/tensorflow/lite/delegates/telemetry.h b/tensorflow/lite/delegates/telemetry.h -index d7e92be5..6affa9ae 100644 ---- a/tensorflow/lite/delegates/telemetry.h -+++ b/tensorflow/lite/delegates/telemetry.h -@@ -90,6 +90,8 @@ class DelegateStatus { - int32_t code_; - }; - -+using tflite::proto::TFLiteSettings; -+ - // Used by delegates to report their configuration/settings to TFLite. - // Calling this method adds a new GENERAL_RUNTIME_INSTRUMENTATION_EVENT to - // the runtime Profiler. -diff --git a/tensorflow/lite/kernels/random_ops.cc b/tensorflow/lite/kernels/random_ops.cc -index 8636ca20..8f8eb564 100644 ---- a/tensorflow/lite/kernels/random_ops.cc -+++ b/tensorflow/lite/kernels/random_ops.cc -@@ -17,6 +17,7 @@ limitations under the License. - #include - #include - -+#include "tensorflow/lite/kernels/internal/constants.h" - #include "tensorflow/core/lib/random/philox_random.h" - #include "tensorflow/core/lib/random/random_distributions_utils.h" - #include "tensorflow/lite/c/builtin_op_data.h" -diff --git a/tensorflow/lite/simple_memory_arena.cc b/tensorflow/lite/simple_memory_arena.cc -index 1c7a0384..59a139ec 100644 ---- a/tensorflow/lite/simple_memory_arena.cc -+++ b/tensorflow/lite/simple_memory_arena.cc -@@ -168,11 +168,17 @@ TfLiteStatus SimpleMemoryArena::ReleaseBuffer() { - return kTfLiteOk; - } - -+#if defined(_DEBUG) -+void DumpArenaInfo(const std::string& name, -+ const std::vector& execution_plan, size_t arena_size, -+ const std::vector& allocs); -+#else - // Using weak symbols to create a pluggable debugging module. - TFLITE_ATTRIBUTE_WEAK void DumpArenaInfo( - const std::string& name, const std::vector& execution_plan, - size_t arena_size, const std::vector& allocs) { - } -+#endif - - void SimpleMemoryArena::DumpDebugInfo( - const std::string& name, const std::vector& execution_plan) const { -diff --git a/tensorflow/lite/simple_memory_arena_debug_dump.cc b/tensorflow/lite/simple_memory_arena_debug_dump.cc -index 0cf80051..26547a0c 100644 ---- a/tensorflow/lite/simple_memory_arena_debug_dump.cc -+++ b/tensorflow/lite/simple_memory_arena_debug_dump.cc -@@ -129,6 +129,7 @@ class TopKLayers { - }; - } // namespace - -+#if defined(_DEBUG) - // Corresponding weak declaration found in lite/simple_memory_arena.cc - void DumpArenaInfo(const std::string& name, - const std::vector& execution_plan, size_t arena_size, -@@ -194,4 +195,5 @@ void DumpArenaInfo(const std::string& name, - top_usage.Print(); - printf("===End of %s ===\n\n", name.c_str()); - } -+#endif - } // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/runtime.cc b/tensorflow/lite/delegates/gpu/gl/runtime.cc -index fe3db954..edee334b 100644 ---- a/tensorflow/lite/delegates/gpu/gl/runtime.cc -+++ b/tensorflow/lite/delegates/gpu/gl/runtime.cc -@@ -90,27 +90,22 @@ absl::Status MakeGlTexture(const Object& object, const ObjectData& data, - if (data.size() % 2 != 0) { - return absl::InvalidArgumentError("Texture size is not aligned"); - } -- return std::visit( -- TextureF16Maker{ -- .data = absl::MakeConstSpan( -- reinterpret_cast(data.data()), -- data.size() / 2), -- .gl_texture = gl_texture, -- }, -- object.size); -+ TextureF16Maker marker{}; -+ marker.data = absl::MakeConstSpan( -+ reinterpret_cast(data.data()), data.size() / 2); -+ marker.gl_texture = gl_texture; -+ return std::visit(marker, object.size); - } - case DataType::FLOAT32: { - if (data.size() % sizeof(float) != 0) { - return absl::InvalidArgumentError("Texture size is not aligned"); - } -- return std::visit( -- TextureF32Maker{ -- .data = absl::MakeConstSpan( -- reinterpret_cast(data.data()), -- data.size() / sizeof(float)), -- .gl_texture = gl_texture, -- }, -- object.size); -+ TextureF32Maker marker{}; -+ marker.data = -+ absl::MakeConstSpan(reinterpret_cast(data.data()), -+ data.size() / sizeof(float)); -+ marker.gl_texture = gl_texture; -+ return std::visit(marker, object.size); - } - default: - return absl::InvalidArgumentError("Unsupported textures data type."); diff --git a/ports/tensorflow-lite/org_tensorflow_compatibility_fixes.diff b/ports/tensorflow-lite/org_tensorflow_compatibility_fixes.diff deleted file mode 100644 index beb18a80..00000000 --- a/ports/tensorflow-lite/org_tensorflow_compatibility_fixes.diff +++ /dev/null @@ -1,25 +0,0 @@ -diff --git a/tensorflow/tsl/lib/monitoring/percentile_sampler.cc b/tensorflow/tsl/lib/monitoring/percentile_sampler.cc -index b7c22ae77ba..d0ba7b48b4b 100644 ---- a/tensorflow/tsl/lib/monitoring/percentile_sampler.cc -+++ b/tensorflow/tsl/lib/monitoring/percentile_sampler.cc -@@ -29,7 +29,8 @@ namespace monitoring { - void PercentileSamplerCell::Add(double sample) { - uint64 nstime = EnvTime::NowNanos(); - mutex_lock l(mu_); -- samples_[next_position_] = {nstime, sample}; -+ samples_[next_position_].nstime = nstime; -+ samples_[next_position_].value = sample; - ++next_position_; - if (TF_PREDICT_FALSE(next_position_ >= samples_.size())) { - next_position_ = 0; -@@ -73,7 +74,9 @@ Percentiles PercentileSamplerCell::value() const { - size_t index = std::min( - static_cast(percentile * pct_samples.num_samples / 100.0), - pct_samples.num_samples - 1); -- PercentilePoint pct = {percentile, samples[index].value}; -+ PercentilePoint pct; -+ pct.percentile = percentile; -+ pct.value = samples[index].value; - pct_samples.points.push_back(pct); - } - } diff --git a/ports/tensorflow-lite/org_tensorflow_custom_ops.diff b/ports/tensorflow-lite/org_tensorflow_custom_ops.diff deleted file mode 100644 index 5de25a22..00000000 --- a/ports/tensorflow-lite/org_tensorflow_custom_ops.diff +++ /dev/null @@ -1,3046 +0,0 @@ -diff --git a/tensorflow/lite/delegates/gpu/common/BUILD b/tensorflow/lite/delegates/gpu/common/BUILD -index c49f2ce731d..d72773c0a5b 100644 ---- a/tensorflow/lite/delegates/gpu/common/BUILD -+++ b/tensorflow/lite/delegates/gpu/common/BUILD -@@ -173,7 +173,7 @@ cc_library( - "//tensorflow/lite/kernels:kernel_util", - "//tensorflow/lite/kernels/internal:reference_base", - "//tensorflow/lite/kernels/internal:tensor", -- ] + tf_platform_alias("custom_parsers", "//tensorflow/lite/delegates/gpu/common/"), -+ ] + ["//tensorflow/lite/delegates/gpu/common/mediapipe:custom_parsers"], - ) - - cc_test( -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/BUILD b/tensorflow/lite/delegates/gpu/common/mediapipe/BUILD -new file mode 100644 -index 00000000000..58967ddbb66 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/BUILD -@@ -0,0 +1,93 @@ -+package( -+ default_visibility = ["//visibility:public"], -+ licenses = ["notice"], -+) -+ -+cc_library( -+ name = "custom_parsers", -+ srcs = ["custom_parsers.cc"], -+ hdrs = ["//tensorflow/lite/delegates/gpu/common:custom_parsers.h"], -+ deps = [ -+ ":landmarks_to_transform_matrix", -+ ":transform_landmarks", -+ ":transform_tensor_bilinear", -+ "//tensorflow/lite/delegates/gpu/common:operation_parser", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:unimplemented_operation_parser", -+ "@com_google_absl//absl/memory", -+ "@com_google_absl//absl/strings", -+ "@com_google_absl//absl/types:any", -+ ], -+) -+ -+cc_library( -+ name = "custom_transformations", -+ srcs = ["custom_transformations.cc"], -+ hdrs = ["//tensorflow/lite/delegates/gpu/common:custom_transformations.h"], -+ deps = [ -+ ":landmarks_to_transform_matrix", -+ ":transform_landmarks", -+ ":transform_tensor_bilinear", -+ "//tensorflow/lite/delegates/gpu/common:model_transformer", -+ "@com_google_absl//absl/memory", -+ ], -+) -+ -+cc_library( -+ name = "landmarks_to_transform_matrix", -+ srcs = ["landmarks_to_transform_matrix.cc"], -+ hdrs = ["landmarks_to_transform_matrix.h"], -+ deps = [ -+ "//tensorflow/lite/c:common", -+ "//tensorflow/lite/delegates/gpu/common:model", -+ "//tensorflow/lite/delegates/gpu/common:model_builder_helper", -+ "//tensorflow/lite/delegates/gpu/common:model_transformer", -+ "//tensorflow/lite/delegates/gpu/common:object_reader", -+ "//tensorflow/lite/delegates/gpu/common:operation_parser", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:tensor", -+ "//tensorflow/lite/delegates/gpu/common:types", -+ "@com_google_absl//absl/types:any", -+ "@flatbuffers", -+ ], -+) -+ -+cc_library( -+ name = "transform_landmarks", -+ srcs = ["transform_landmarks.cc"], -+ hdrs = ["transform_landmarks.h"], -+ deps = [ -+ "//tensorflow/lite/c:common", -+ "//tensorflow/lite/delegates/gpu/common:model", -+ "//tensorflow/lite/delegates/gpu/common:model_builder_helper", -+ "//tensorflow/lite/delegates/gpu/common:model_transformer", -+ "//tensorflow/lite/delegates/gpu/common:object_reader", -+ "//tensorflow/lite/delegates/gpu/common:operation_parser", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:tensor", -+ "@com_google_absl//absl/types:any", -+ "@flatbuffers", -+ ], -+) -+ -+cc_library( -+ name = "transform_tensor_bilinear", -+ srcs = ["transform_tensor_bilinear.cc"], -+ hdrs = ["transform_tensor_bilinear.h"], -+ deps = [ -+ "//tensorflow/lite/c:common", -+ "//tensorflow/lite/delegates/gpu/common:model", -+ "//tensorflow/lite/delegates/gpu/common:model_builder_helper", -+ "//tensorflow/lite/delegates/gpu/common:model_transformer", -+ "//tensorflow/lite/delegates/gpu/common:object_reader", -+ "//tensorflow/lite/delegates/gpu/common:operation_parser", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:tensor", -+ "@com_google_absl//absl/types:any", -+ "@flatbuffers", -+ ], -+) -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/custom_parsers.cc b/tensorflow/lite/delegates/gpu/common/mediapipe/custom_parsers.cc -new file mode 100644 -index 00000000000..52c11b90fc8 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/custom_parsers.cc -@@ -0,0 +1,34 @@ -+#include "tensorflow/lite/delegates/gpu/common/custom_parsers.h" -+ -+#include -+#include -+ -+#include "absl/memory/memory.h" -+#include "absl/strings/string_view.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/unimplemented_operation_parser.h" -+ -+namespace tflite { -+namespace gpu { -+ -+std::unique_ptr NewCustomOperationParser( -+ absl::string_view op_name) { -+ if (op_name == "Landmarks2TransformMatrix" || -+ op_name == "Landmarks2TransformMatrixV2") { -+ return std::make_unique(); -+ } -+ if (op_name == "TransformLandmarks") { -+ return std::make_unique(); -+ } -+ if (op_name == "TransformTensor" /*for version 1*/ || -+ op_name == "TransformTensorBilinear" /*for version 2*/) { -+ return std::make_unique(); -+ } -+ return absl::make_unique(op_name); -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/custom_transformations.cc b/tensorflow/lite/delegates/gpu/common/mediapipe/custom_transformations.cc -new file mode 100644 -index 00000000000..1509ea3bcf3 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/custom_transformations.cc -@@ -0,0 +1,24 @@ -+#include "tensorflow/lite/delegates/gpu/common/custom_transformations.h" -+ -+#include "absl/memory/memory.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h" -+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" -+ -+namespace tflite { -+namespace gpu { -+bool ApplyCustomTransformations(ModelTransformer* transformer) { -+ return transformer->Apply( -+ "transform_landmarks_v2_to_v1", -+ absl::make_unique().get()) && -+ transformer->Apply( -+ "transform_tensor_bilinear_v2_to_v1", -+ absl::make_unique().get()) && -+ transformer->Apply( -+ "landmarks_to_transform_matrix_v2_with_mul", -+ absl::make_unique() -+ .get()); -+} -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.cc b/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.cc -new file mode 100644 -index 00000000000..4e73cf649e6 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.cc -@@ -0,0 +1,182 @@ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+ -+#include -+#include -+#include -+ -+#include "absl/types/any.h" -+#include "flatbuffers/flexbuffers.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/tensor.h" -+#include "tensorflow/lite/delegates/gpu/common/types.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status LandmarksToTransformMatrixOperationParser::IsSupported( -+ const TfLiteContext* context, const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) { -+ RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 2)); -+ return CheckInputsOutputs(context, tflite_node, /*runtime_inputs=*/1, -+ /*outputs=*/1); -+} -+ -+absl::Status LandmarksToTransformMatrixOperationParser::Parse( -+ const TfLiteNode* tflite_node, const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) { -+ Node* node = graph->NewNode(); -+ RETURN_IF_ERROR(reader->AddInput(node, 0)); // landmarks -+ RETURN_IF_ERROR(reader->AddOutputs(node)); // transform matrix -+ -+ node->operation.type = kLandmarksToTransformMatrixType; -+ BHWC output_shape; -+ if (registration->version == 2) { -+ LandmarksToTransformMatrixV2Attributes attr; -+ RETURN_IF_ERROR(ParseLandmarksToTransformMatrixV2Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else if (registration->version == 1) { -+ LandmarksToTransformMatrixV1Attributes attr; -+ RETURN_IF_ERROR(ParseLandmarksToTransformMatrixV1Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else { -+ return absl::UnimplementedError( -+ "Landmarks To Transform Matrix operation can be of version 1 or 2 " -+ "only."); -+ } -+ -+ auto output_value = graph->FindOutputs(node->id)[0]; -+ output_value->tensor.shape = output_shape; -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseLandmarksToTransformMatrixV1Attributes( -+ const void* data, uint32_t data_size, -+ LandmarksToTransformMatrixV1Attributes* attr, BHWC* output_shape) { -+ const flexbuffers::Map m = -+ flexbuffers::GetRoot(reinterpret_cast(data), data_size) -+ .AsMap(); -+ -+ const auto input_hw = m["input_hw"].AsTypedVector(); -+ attr->input_hw = HW(input_hw[0].AsInt32(), input_hw[1].AsInt32()); -+ -+ const auto output_hw = m["output_hw"].AsTypedVector(); -+ attr->output_hw = HW(output_hw[0].AsInt32(), output_hw[1].AsInt32()); -+ -+ attr->dimensions = m["dimensions"].AsInt32(); -+ attr->landmarks_range = m["landmarks_range"].AsInt32(); -+ attr->bbox_size_multiplier = m["bbox_size_multiplier"].AsFloat(); -+ attr->left_rotation_idx = m["left_rotation_idx"].AsInt32(); -+ attr->right_rotation_idx = m["right_rotation_idx"].AsInt32(); -+ -+ const auto subset = m["subset"].AsTypedVector(); -+ for (int i = 0; i < subset.size() / 2; i++) { -+ attr->subset.emplace_back(subset[i * 2].AsInt32(), -+ subset[i * 2 + 1].AsInt32()); -+ } -+ if (subset.size() % 2 != 0) { -+ attr->subset.emplace_back(subset[subset.size() - 1].AsInt32(), -+ subset[subset.size() - 1].AsInt32()); -+ } -+ *output_shape = BHWC(1, 1, 4, 4); -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseLandmarksToTransformMatrixV2Attributes( -+ const void* data, uint32_t data_size, -+ LandmarksToTransformMatrixV2Attributes* attr, BHWC* output_shape) { -+ const flexbuffers::Map m = -+ flexbuffers::GetRoot(reinterpret_cast(data), data_size) -+ .AsMap(); -+ const auto subset_idxs = m["subset_idxs"].AsTypedVector(); -+ int amount = subset_idxs.size(); -+ for (int i = 0; i < amount / 2; i++) { -+ attr->subset_idxs.emplace_back(subset_idxs[i * 2].AsInt32(), -+ subset_idxs[i * 2 + 1].AsInt32()); -+ } -+ if (amount % 2 != 0) { -+ int previous = amount - 1; -+ attr->subset_idxs.emplace_back(subset_idxs[previous].AsInt32(), -+ subset_idxs[previous].AsInt32()); -+ } -+ attr->left_rotation_idx = m["left_rotation_idx"].AsInt32(); -+ attr->right_rotation_idx = m["right_rotation_idx"].AsInt32(); -+ attr->target_rotation_radians = m["target_rotation_radians"].AsFloat(); -+ attr->output_height = m["output_height"].AsInt32(); -+ attr->output_width = m["output_width"].AsInt32(); -+ attr->scale_x = m["scale_x"].AsFloat(); -+ attr->scale_y = m["scale_y"].AsFloat(); -+ -+ *output_shape = BHWC(1, 1, 4, 4); -+ return absl::OkStatus(); -+} -+ -+TransformResult LandmarksToTransformMatrixV2ToV2WithMul::ApplyToNode( -+ Node* node, GraphFloat32* graph) { -+ // Recognize Landmarks2TransformMatrix.v2 as a root operation of this -+ // transformation. -+ if (node->operation.type != kLandmarksToTransformMatrixType) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ auto* landmarks2tm_attr = -+ absl::any_cast( -+ &node->operation.attributes); -+ if (!landmarks2tm_attr) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ auto node_inputs = graph->FindInputs(node->id); -+ if (node_inputs.size() != 1) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ // Recognize preeceding scalar Mul operation and save the value. -+ auto mul = graph->FindProducer(node_inputs[0]->id); -+ if (mul->operation.type != ToString(OperationType::MUL)) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ const auto& mul_attr = -+ absl::any_cast(mul->operation.attributes); -+ float scalar = 0.0; -+ if (!absl::holds_alternative(mul_attr.param)) { -+ return {TransformStatus::SKIPPED, ""}; -+ } else { -+ scalar = absl::get(mul_attr.param); -+ } -+ auto mul_inputs = graph->FindInputs(mul->id); -+ if (mul_inputs.size() != 1) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ // Recognize preceding reshape. -+ auto reshape = graph->FindProducer(mul_inputs[0]->id); -+ if (reshape->operation.type != ToString(OperationType::RESHAPE)) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ // Start modifying the graph. -+ { -+ absl::Status status = RemoveSimpleNodeKeepInput(graph, reshape); -+ if (!status.ok()) { -+ return {TransformStatus::INVALID, -+ "Unable to remove a node: " + std::string(status.message())}; -+ } -+ } -+ { -+ absl::Status status = RemoveSimpleNodeKeepInput(graph, mul); -+ if (!status.ok()) { -+ return {TransformStatus::INVALID, -+ "Unable to remove a node: " + std::string(status.message())}; -+ } -+ } -+ // Update LandmarksToTransformMatrix attributes with a stored multiplier. -+ landmarks2tm_attr->multiplier = scalar; -+ return {TransformStatus::APPLIED, ""}; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h b/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h -new file mode 100644 -index 00000000000..78c72aea123 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h -@@ -0,0 +1,96 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -+ -+#include -+#include -+ -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/types.h" -+ -+namespace tflite { -+namespace gpu { -+ -+constexpr const char kLandmarksToTransformMatrixType[] = -+ "landmarks_to_transform_matrix"; -+ -+struct LandmarksToTransformMatrixV1Attributes { -+ int dimensions; -+ int landmarks_range; -+ int left_rotation_idx; -+ int right_rotation_idx; -+ float bbox_size_multiplier; -+ HW input_hw; -+ HW output_hw; -+ std::vector subset; -+}; -+ -+struct LandmarksToTransformMatrixV2Attributes { -+ std::vector subset_idxs; -+ int left_rotation_idx; -+ int right_rotation_idx; -+ float target_rotation_radians; -+ int output_height; -+ int output_width; -+ float scale_x; -+ float scale_y; -+ float multiplier = 1.0; -+}; -+ -+class LandmarksToTransformMatrixOperationParser : public TFLiteOperationParser { -+ public: -+ absl::Status IsSupported(const TfLiteContext* context, -+ const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) final; -+ absl::Status Parse(const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) final; -+}; -+ -+absl::Status ParseLandmarksToTransformMatrixV1Attributes( -+ const void* data, uint32_t data_size, -+ LandmarksToTransformMatrixV1Attributes* attr, BHWC* output_shape); -+ -+absl::Status ParseLandmarksToTransformMatrixV2Attributes( -+ const void* data, uint32_t data_size, -+ LandmarksToTransformMatrixV2Attributes* attr, BHWC* output_shape); -+ -+// Converts subgraph of Reshape + Mul + Landmarks2TransformMatrix.v2 into -+// Landmarks2TransformMatrix.v2 with multiplier: -+// Source subgraph: -+// -+// Value_0 [1, 1, 1, 30] -+// | -+// Reshape -+// | -+// Value_1 [1, 10, 3] -+// | -+// Mul (* 0.25) -+// | -+// Value_2 [1, 10, 3] -+// | -+// Landmarks2TransformMatrix.v2 -+// | -+// Value_3 [1, 1, 4] -+// -+// Resulting subgraph: -+// -+// Value_0 [1, 1, 1, 30] -+// | -+// Landmarks2TransformMatrix.v2 -+// | -+// Value_3 [1, 1, 4] -+class LandmarksToTransformMatrixV2ToV2WithMul : public NodeTransformation { -+ public: -+ TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final; -+}; -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.cc b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.cc -new file mode 100644 -index 00000000000..fba7e742998 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.cc -@@ -0,0 +1,169 @@ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h" -+ -+#include -+#include -+#include -+ -+#include "absl/types/any.h" -+#include "flatbuffers/flexbuffers.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/tensor.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status TransformLandmarksOperationParser::IsSupported( -+ const TfLiteContext* context, const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) { -+ RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 2)); -+ RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, -+ /*runtime_inputs=*/2, /*outputs=*/1)); -+ return absl::OkStatus(); -+} -+ -+absl::Status TransformLandmarksOperationParser::Parse( -+ const TfLiteNode* tflite_node, const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) { -+ Node* node = graph->NewNode(); -+ RETURN_IF_ERROR(reader->AddInput(node, 0)); // data -+ RETURN_IF_ERROR(reader->AddInput(node, 1)); // bbox -+ RETURN_IF_ERROR(reader->AddOutputs(node)); -+ node->operation.type = kTransformLandmarksType; -+ BHWC output_shape = graph->FindOutputs(node->id)[0]->tensor.shape; -+ if (registration->version == 2) { -+ TransformLandmarksAttributes attr; -+ RETURN_IF_ERROR(ParseTransformLandmarksV2Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else if (registration->version == 1) { -+ TransformLandmarksAttributes attr; -+ RETURN_IF_ERROR(ParseTransformLandmarksV1Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else { -+ return absl::UnimplementedError( -+ "Transform Landmarks operation can be of version 1 or 2 only."); -+ } -+ -+ auto output_value = graph->FindOutputs(node->id)[0]; -+ -+ output_value->tensor.shape = graph->FindInputs(node->id)[0]->tensor.shape; -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseTransformLandmarksV1Attributes( -+ const void* data, uint32_t data_size, TransformLandmarksAttributes* attr, -+ BHWC* output_shape) { -+ attr->version = 1; -+ -+ const flexbuffers::Map m = -+ flexbuffers::GetRoot(reinterpret_cast(data), data_size) -+ .AsMap(); -+ const flexbuffers::TypedVector keys = m.Keys(); -+ -+ for (int k = 0; k < keys.size(); ++k) { -+ const std::string key = keys[k].ToString(); -+ const auto value = m[key]; -+ if (key == "dimensions") { -+ attr->dimensions = value.AsInt32(); -+ } -+ if (key == "scale") { -+ attr->scale = value.AsFloat(); -+ } -+ } -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseTransformLandmarksV2Attributes( -+ const void* data, uint32_t data_size, TransformLandmarksAttributes* attr, -+ BHWC* output_shape) { -+ attr->version = 2; -+ attr->dimensions = output_shape->c; -+ attr->scale = 1.0; -+ -+ return absl::OkStatus(); -+} -+ -+TransformResult TransformLandmarksV2ToV1::ApplyToNode(Node* node, -+ GraphFloat32* graph) { -+ // Recognize suitable Transform Landmarks operation. -+ if (node->operation.type != kTransformLandmarksType) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ TransformLandmarksAttributes transform_landmarks_attr = -+ absl::any_cast(node->operation.attributes); -+ if (transform_landmarks_attr.version != 2) { -+ return {TransformStatus::SKIPPED, -+ "Transform Landmarks operation should be of version 2."}; -+ } -+ -+ // Recognize suitable preceding Reshape. -+ std::vector transform_landmarks_inputs = graph->FindInputs(node->id); -+ if (transform_landmarks_inputs.size() != 2) { -+ return {TransformStatus::SKIPPED, -+ "Transform Landmarks operation should have two inputs."}; -+ } -+ Value* landmarks_input_tensor = transform_landmarks_inputs[1]; -+ if (transform_landmarks_inputs[1]->tensor.shape == BHWC(1, 1, 4, 4)) { -+ landmarks_input_tensor = transform_landmarks_inputs[0]; -+ } -+ Node* preceding_reshape = graph->FindProducer(landmarks_input_tensor->id); -+ if (preceding_reshape->operation.type != ToString(OperationType::RESHAPE)) { -+ return {TransformStatus::SKIPPED, -+ "Expected Reshape node to be a producer of the transformation " -+ "matrix input."}; -+ } -+ -+ // Recognize suitable succeeding Reshape. -+ std::vector transform_landmarks_outputs = -+ graph->FindOutputs(node->id); -+ if (transform_landmarks_outputs.size() != 1) { -+ return {TransformStatus::SKIPPED, -+ "Transform Landmarks operation should have one output."}; -+ } -+ Value* landmarks_output_tensor = transform_landmarks_outputs[0]; -+ std::vector landmarks__output_consumers = -+ graph->FindConsumers(landmarks_output_tensor->id); -+ if (landmarks__output_consumers.size() != 1) { -+ return {TransformStatus::SKIPPED, -+ "Transform Landmarks output should be consumed by one operation."}; -+ } -+ Node* succeeding_reshape = landmarks__output_consumers[0]; -+ if (succeeding_reshape->operation.type != ToString(OperationType::RESHAPE)) { -+ return {TransformStatus::SKIPPED, -+ "Expected Reshape node to be a consumer of the Transform " -+ "Landmarks operation's output value."}; -+ } -+ -+ // Delete preceding and succeding Reshape operations. -+ absl::Status removed_preceding = -+ RemoveSimpleNodeKeepInput(graph, preceding_reshape); -+ if (!removed_preceding.ok()) { -+ return {TransformStatus::INVALID, -+ "Unable to remove a preceding Reshape node: " + -+ std::string(removed_preceding.message())}; -+ } -+ absl::Status removed_succeeding = -+ RemoveSimpleNodeKeepOutput(graph, succeeding_reshape); -+ if (!removed_succeeding.ok()) { -+ return {TransformStatus::INVALID, -+ "Unable to remove a succeeding Reshape node: " + -+ std::string(removed_succeeding.message())}; -+ } -+ -+ // Switch Transform Landmarks operation back to version 1. -+ transform_landmarks_attr.version = 1; -+ node->operation.attributes = transform_landmarks_attr; -+ -+ return {TransformStatus::APPLIED, ""}; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h -new file mode 100644 -index 00000000000..f804e14e55d ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h -@@ -0,0 +1,74 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -+ -+#include -+ -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+ -+namespace tflite { -+namespace gpu { -+ -+constexpr const char kTransformLandmarksType[] = "transform_landmarks"; -+ -+struct TransformLandmarksAttributes { -+ int dimensions = 3; -+ float scale = 1.0; -+ int version = 0; -+}; -+ -+class TransformLandmarksOperationParser : public TFLiteOperationParser { -+ public: -+ absl::Status IsSupported(const TfLiteContext* context, -+ const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) final; -+ absl::Status Parse(const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) final; -+}; -+ -+absl::Status ParseTransformLandmarksV1Attributes( -+ const void* data, uint32_t data_size, TransformLandmarksAttributes* attr, -+ BHWC* output_shape); -+ -+absl::Status ParseTransformLandmarksV2Attributes( -+ const void* data, uint32_t data_size, TransformLandmarksAttributes* attr, -+ BHWC* output_shape); -+ -+// Removes reshapes from subgraph: -+// -+// Value_0 [1, 1, 1, 240] -+// | -+// Reshape -+// | -+// Value_1 [1, 1, 80, 3] Value_2 [1, 1, 4, 4] -+// \ / -+// TransformLandmarks.version_2 -+// | -+// Value_3 [1, 1, 80, 3] -+// | -+// Reshape -+// | -+// Value_4 [1, 1, 1, 240] -+// -+// Resulting subgraph is: -+// -+// Value_0 [1, 1, 1, 240] Value_2 [1, 1, 4, 4] -+// \ / -+// TransformLandmarks.version_1 -+// | -+// Value_4 [1, 1, 1, 240] -+class TransformLandmarksV2ToV1 : public NodeTransformation { -+ public: -+ TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final; -+}; -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.cc b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.cc -new file mode 100644 -index 00000000000..704ce7d4a47 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.cc -@@ -0,0 +1,142 @@ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h" -+ -+#include -+#include -+#include -+#include -+ -+#include "absl/types/any.h" -+#include "flatbuffers/flexbuffers.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_builder_helper.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/tensor.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status TransformTensorBilinearOperationParser::IsSupported( -+ const TfLiteContext* context, const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) { -+ RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 2)); -+ RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node, -+ /*runtime_inputs=*/2, /*outputs=*/1)); -+ return absl::OkStatus(); -+} -+ -+absl::Status TransformTensorBilinearOperationParser::Parse( -+ const TfLiteNode* tflite_node, const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) { -+ Node* node = graph->NewNode(); -+ RETURN_IF_ERROR(reader->AddInput(node, 0)); // data -+ RETURN_IF_ERROR(reader->AddInput(node, 1)); // bbox -+ RETURN_IF_ERROR(reader->AddOutputs(node)); -+ -+ node->operation.type = kTransformTensorBilinearType; -+ BHWC output_shape; -+ if (registration->version == 2) { -+ TransformTensorBilinearAttributes attr; -+ RETURN_IF_ERROR(ParseTransformTensorBilinearV2Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else if (registration->version == 1) { -+ TransformTensorBilinearAttributes attr; -+ RETURN_IF_ERROR(ParseTransformTensorBilinearV1Attributes( -+ tflite_node->custom_initial_data, tflite_node->custom_initial_data_size, -+ &attr, &output_shape)); -+ node->operation.attributes = attr; -+ } else { -+ return absl::UnimplementedError( -+ "Transform Tensor Bilinear operation can be of version 1 or 2 only."); -+ } -+ -+ auto output_value = graph->FindOutputs(node->id)[0]; -+ -+ output_value->tensor.shape = -+ BHWC(1, output_shape.h, output_shape.w, -+ graph->FindInputs(node->id)[0]->tensor.shape.c); -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseTransformTensorBilinearV1Attributes( -+ const void* data, uint32_t data_size, -+ TransformTensorBilinearAttributes* attr, BHWC* output_shape) { -+ attr->version = 1; -+ -+ const flexbuffers::Map m = -+ flexbuffers::GetRoot(reinterpret_cast(data), data_size) -+ .AsMap(); -+ const flexbuffers::TypedVector keys = m.Keys(); -+ -+ for (int k = 0; k < keys.size(); ++k) { -+ const std::string key = keys[k].ToString(); -+ const auto value = m[key]; -+ if (key == "mode") { -+ if (value.AsString().str() != "bilinear") { -+ return absl::UnimplementedError( -+ "TransformTensor operation supports only bilinear interpolation."); -+ } -+ } -+ -+ if (key == "output_size") { -+ attr->output_size = HW(value.AsTypedVector()[0].AsInt32(), -+ value.AsTypedVector()[1].AsInt32()); -+ } -+ } -+ attr->align_corners = false; -+ *output_shape = BHWC(1, attr->output_size.h, attr->output_size.w, 1); -+ return absl::OkStatus(); -+} -+ -+absl::Status ParseTransformTensorBilinearV2Attributes( -+ const void* data, uint32_t data_size, -+ TransformTensorBilinearAttributes* attr, BHWC* output_shape) { -+ attr->version = 2; -+ -+ const flexbuffers::Map m = -+ flexbuffers::GetRoot(reinterpret_cast(data), data_size) -+ .AsMap(); -+ const flexbuffers::TypedVector keys = m.Keys(); -+ HW output_size; -+ for (int k = 0; k < keys.size(); ++k) { -+ const std::string key = keys[k].ToString(); -+ const auto value = m[key]; -+ if (key == "output_height") { -+ output_size.h = value.AsInt32(); -+ } -+ if (key == "output_width") { -+ output_size.w = value.AsInt32(); -+ } -+ } -+ attr->output_size = std::move(output_size); -+ attr->align_corners = true; -+ *output_shape = BHWC(1, attr->output_size.h, attr->output_size.w, 1); -+ return absl::OkStatus(); -+} -+ -+TransformResult TransformTensorBilinearV2ToV1::ApplyToNode( -+ Node* node, GraphFloat32* graph) { -+ if (node->operation.type != kTransformTensorBilinearType) { -+ return {TransformStatus::SKIPPED, ""}; -+ } -+ TransformTensorBilinearAttributes transform_tensor_attr = -+ absl::any_cast( -+ node->operation.attributes); -+ -+ if (transform_tensor_attr.version != 2) { -+ return {TransformStatus::SKIPPED, -+ "Transform Tensor Bilinear operation should be of version 2."}; -+ } -+ transform_tensor_attr.version = 1; -+ transform_tensor_attr.align_corners = true; -+ node->operation.attributes = transform_tensor_attr; -+ -+ return {TransformStatus::APPLIED, ""}; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h -new file mode 100644 -index 00000000000..8a1f840c12f ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h -@@ -0,0 +1,54 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ -+ -+#include -+ -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_transformer.h" -+#include "tensorflow/lite/delegates/gpu/common/object_reader.h" -+#include "tensorflow/lite/delegates/gpu/common/operation_parser.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+ -+namespace tflite { -+namespace gpu { -+ -+constexpr const char kTransformTensorBilinearType[] = -+ "transform_tensor_bilinear"; -+ -+struct TransformTensorBilinearAttributes { -+ HW output_size; -+ bool align_corners = false; -+ int version = 0; -+}; -+ -+class TransformTensorBilinearOperationParser : public TFLiteOperationParser { -+ public: -+ absl::Status IsSupported(const TfLiteContext* context, -+ const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration) final; -+ absl::Status Parse(const TfLiteNode* tflite_node, -+ const TfLiteRegistration* registration, -+ GraphFloat32* graph, ObjectReader* reader) final; -+}; -+ -+absl::Status ParseTransformTensorBilinearV1Attributes( -+ const void* data, uint32_t data_size, -+ TransformTensorBilinearAttributes* attr, BHWC* output_shape); -+ -+absl::Status ParseTransformTensorBilinearV2Attributes( -+ const void* data, uint32_t data_size, -+ TransformTensorBilinearAttributes* attr, BHWC* output_shape); -+ -+// Converts Transform Tensor Bilinear operation of version 2 to version 1 with -+// align corners parameter set to true. -+class TransformTensorBilinearV2ToV1 : public NodeTransformation { -+ public: -+ TransformResult ApplyToNode(Node* node, GraphFloat32* graph) final; -+}; -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/selectors/BUILD b/tensorflow/lite/delegates/gpu/common/selectors/BUILD -index ec6c2281b9e..26cf9aab1a9 100644 ---- a/tensorflow/lite/delegates/gpu/common/selectors/BUILD -+++ b/tensorflow/lite/delegates/gpu/common/selectors/BUILD -@@ -45,9 +45,9 @@ cc_library( - "//tensorflow/lite/delegates/gpu/common:model", - "//tensorflow/lite/delegates/gpu/common:model_hints", - "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common/selectors/mediapipe:default_selector", - "//tensorflow/lite/delegates/gpu/common/task:gpu_operation", - "//tensorflow/lite/delegates/gpu/common/task:tensor_desc", -- _selectors_package + ":default_selector", - ], - ) - -diff --git a/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/BUILD b/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/BUILD -new file mode 100644 -index 00000000000..d5a28d6f72e ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/BUILD -@@ -0,0 +1,21 @@ -+package( -+ default_visibility = ["//visibility:public"], -+ licenses = ["notice"], -+) -+ -+cc_library( -+ name = "default_selector", -+ srcs = ["default_selector.cc"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:model", -+ "//tensorflow/lite/delegates/gpu/common:model_hints", -+ "//tensorflow/lite/delegates/gpu/common:operations", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common/selectors:subgraph", -+ "//tensorflow/lite/delegates/gpu/common/task:gpu_operation", -+ "//tensorflow/lite/delegates/gpu/common/tasks/mediapipe:landmarks_to_transform_matrix", -+ "//tensorflow/lite/delegates/gpu/common/tasks/mediapipe:transform_landmarks", -+ "//tensorflow/lite/delegates/gpu/common/tasks/mediapipe:transform_tensor_bilinear", -+ "@com_google_absl//absl/strings", -+ ], -+) -diff --git a/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/default_selector.cc b/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/default_selector.cc -new file mode 100644 -index 00000000000..9c93149f95b ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/selectors/mediapipe/default_selector.cc -@@ -0,0 +1,48 @@ -+#include -+ -+#include "absl/strings/str_cat.h" -+#include "tensorflow/lite/delegates/gpu/common/model.h" -+#include "tensorflow/lite/delegates/gpu/common/model_hints.h" -+#include "tensorflow/lite/delegates/gpu/common/operations.h" -+#include "tensorflow/lite/delegates/gpu/common/selectors/subgraph.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h" -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.h" -+ -+namespace tflite { -+namespace gpu { -+namespace { -+ -+absl::Status CustomGPUOperationFromNode( -+ const GpuInfo& gpu_info, const OperationDef& op_def, ModelHints hints, -+ const std::vector& inputs, const std::vector& outputs, -+ const Node& node, GPUOperationsSubgraph* gpu_subgraph) { -+ std::unique_ptr* gpu_op = -+ InitSingleOpSubgraph(inputs, outputs, gpu_subgraph); -+ if (node.operation.type == kLandmarksToTransformMatrixType) { -+ return CreateLandmarksToTransformMatrixFromNode(op_def, node, gpu_op); -+ } -+ if (node.operation.type == kTransformLandmarksType) { -+ return CreateTransformLandmarksFromNode(op_def, node, gpu_op); -+ } -+ if (node.operation.type == kTransformTensorBilinearType) { -+ return CreateTransformTensorBilinearFromNode(op_def, node, gpu_op); -+ } -+ -+ return absl::UnimplementedError( -+ absl::StrCat("No selector for ", node.operation.type)); -+} -+} // namespace -+ -+absl::Status SelectDefault(const GpuInfo& gpu_info, const OperationDef& op_def, -+ ModelHints hints, const std::vector& inputs, -+ const std::vector& outputs, const Node& node, -+ GPUOperationsSubgraph* gpu_subgraph) { -+ return CustomGPUOperationFromNode(gpu_info, op_def, hints, inputs, outputs, -+ node, gpu_subgraph); -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/BUILD b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/BUILD -new file mode 100644 -index 00000000000..9df0735f0eb ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/BUILD -@@ -0,0 +1,39 @@ -+package( -+ default_visibility = ["//visibility:public"], -+ licenses = ["notice"], -+) -+ -+cc_library( -+ name = "landmarks_to_transform_matrix", -+ srcs = ["landmarks_to_transform_matrix.cc"], -+ hdrs = ["landmarks_to_transform_matrix.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:landmarks_to_transform_matrix", -+ "//tensorflow/lite/delegates/gpu/common/task:gpu_operation", -+ ], -+) -+ -+cc_library( -+ name = "transform_landmarks", -+ srcs = ["transform_landmarks.cc"], -+ hdrs = ["transform_landmarks.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:transform_landmarks", -+ "//tensorflow/lite/delegates/gpu/common/task:gpu_operation", -+ "//tensorflow/lite/delegates/gpu/common/task:work_group_picking", -+ ], -+) -+ -+cc_library( -+ name = "transform_tensor_bilinear", -+ srcs = ["transform_tensor_bilinear.cc"], -+ hdrs = ["transform_tensor_bilinear.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:transform_tensor_bilinear", -+ "//tensorflow/lite/delegates/gpu/common/task:gpu_operation", -+ "//tensorflow/lite/delegates/gpu/common/task:work_group_picking", -+ ], -+) -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.cc b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.cc -new file mode 100644 -index 00000000000..18f28b19361 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.cc -@@ -0,0 +1,368 @@ -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.h" -+ -+#include -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+ -+namespace tflite { -+namespace gpu { -+namespace { -+ -+std::string GetLandmarksToTransformMatrixV1KernelCode( -+ const OperationDef& op_def, -+ const LandmarksToTransformMatrixV1Attributes& attr) { -+ const std::string batch_id = op_def.IsBatchSupported() ? "B" : ""; -+ std::string c; -+ c += "#define MAT_MUL_3x3(R0, R1, R2, A0, A1, A2, B0, B1, B2) \\\n"; -+ c += " R0.x = A0.x * B0.x + A1.x * B0.y + A2.x * B0.z; \\\n"; -+ c += " R0.y = A0.y * B0.x + A1.y * B0.y + A2.y * B0.z; \\\n"; -+ c += " R0.z = A0.z * B0.x + A1.z * B0.y + A2.z * B0.z; \\\n"; -+ c += " R1.x = A0.x * B1.x + A1.x * B1.y + A2.x * B1.z; \\\n"; -+ c += " R1.y = A0.y * B1.x + A1.y * B1.y + A2.y * B1.z; \\\n"; -+ c += " R1.z = A0.z * B1.x + A1.z * B1.y + A2.z * B1.z; \\\n"; -+ c += " R2.x = A0.x * B2.x + A1.x * B2.y + A2.x * B2.z; \\\n"; -+ c += " R2.y = A0.y * B2.x + A1.y * B2.y + A2.y * B2.z; \\\n"; -+ c += " R2.z = A0.z * B2.x + A1.z * B2.y + A2.z * B2.z; \n"; -+ -+ c += "MAIN_FUNCTION($0) {\n"; -+ // temporary -+ c += " int dummy_var = GLOBAL_ID_0;\n"; -+ if (op_def.IsBatchSupported()) { -+ c += " int B = GLOBAL_ID_0;\n"; -+ c += " if (B >= args.dst_tensor.Batch()) return;\n"; -+ c += " args.dst_tensor.SetBatchRef(B);\n"; -+ c += " args.src_tensor.SetBatchRef(B);\n"; -+ } -+ // reads x and y coords only. -+ auto read_landmark = [&](const std::string& result, const std::string& id) { -+ c += " {\n"; -+ c += " int start = " + id + " * " + std::to_string(attr.dimensions) + -+ ";\n"; -+ c += " int ZC = start / 4;\n"; -+ if (attr.dimensions == 2) { -+ c += " float4 t_res = args.src_tensor.Read(0, 0, ZC);\n"; -+ c += " " + result + ".xy = t_res.xy;\n"; -+ } else if (attr.dimensions == 3) { -+ c += " float4 t_res = args.src_tensor.Read(0, 0, ZC);\n"; -+ c += " int rem = start % 4;\n"; -+ c += " if (rem == 0) {\n"; -+ c += " " + result + ".xy = t_res.xy;\n"; -+ c += " } else if (rem == 1) {\n"; -+ c += " " + result + ".xy = t_res.yz;\n"; -+ c += " } else if (rem == 2) {\n"; -+ c += " " + result + ".xy = t_res.zw;\n"; -+ c += " } else {\n"; -+ c += " float4 t_res_next = args.src_tensor.Read(0, 0, ZC + " -+ "1);\n"; -+ c += " " + result + ".xy = INIT_FLOAT2v2(t_res.w, t_res_next.x);\n"; -+ c += " }\n"; -+ } -+ c += " }\n"; -+ }; -+ c += " float2 l_pt, r_pt;\n"; -+ read_landmark("l_pt", "args.rotations_idx_x"); -+ read_landmark("r_pt", "args.rotations_idx_y"); -+ c += " float alpha = -atan2(r_pt.y - l_pt.y, r_pt.x - l_pt.x);\n"; -+ c += " float cosa = cos(alpha);\n"; -+ c += " float sina = sin(alpha);\n"; -+ c += " float2 max_value = INIT_FLOAT2v2(-100000.0f, -100000.0f);\n"; -+ c += " float2 min_value = INIT_FLOAT2v2(100000.0f, 100000.0f);\n"; -+ c += " for (int i = 0; i < args.subset_size; i++) {\n"; -+ c += " float2 p0, p1;\n"; -+ c += " int2 subset_v = args.subset.Read(i);\n"; -+ read_landmark("p0", "subset_v.x"); -+ read_landmark("p1", "subset_v.y"); -+ c += " // rotation\n"; -+ c += -+ " p0 = INIT_FLOAT2v2(p0.x*cosa - p0.y*sina, p0.x*sina + p0.y*cosa);\n"; -+ c += -+ " p1 = INIT_FLOAT2v2(p1.x*cosa - p1.y*sina, p1.x*sina + p1.y*cosa);\n"; -+ c += " max_value.x = max(max(p0.x, p1.x), max_value.x);\n"; -+ c += " max_value.y = max(max(p0.y, p1.y), max_value.y);\n"; -+ c += " min_value.x = min(min(p0.x, p1.x), min_value.x);\n"; -+ c += " min_value.y = min(min(p0.y, p1.y), min_value.y);\n"; -+ c += " }\n"; -+ c += " float2 bbox_size = (max_value - min_value) * " -+ "args.bbox_size_multiplier;\n"; -+ c += -+ " float3 scale_mat_c0 = INIT_FLOAT3v3(bbox_size.x / args.l_range, 0.0f, " -+ "0.0f);\n"; -+ c += -+ " float3 scale_mat_c1 = INIT_FLOAT3v3(0.0f, bbox_size.y / args.l_range, " -+ "0.0f);\n"; -+ c += " float3 scale_mat_c2 = INIT_FLOAT3v3(0.0f, 0.0f, 1.0f);\n"; -+ c += " float2 middle = (max_value + min_value) * 0.5f;\n"; -+ c += " float2 rotated_middle;\n"; -+ c += " float cosnega = cos(-alpha);\n"; -+ c += " float sinnega = sin(-alpha);\n"; -+ c += " rotated_middle.x = middle.x * cosnega - middle.y * sinnega;\n"; -+ c += " rotated_middle.y = middle.x * sinnega + middle.y * cosnega;\n"; -+ c += " float3 rot_mat_c0 = INIT_FLOAT3v3(cosnega, sinnega, 0.0f);\n"; -+ c += " float3 rot_mat_c1 = INIT_FLOAT3v3(-sinnega, cosnega, 0.0f);\n"; -+ c += " float3 rot_mat_c2 = INIT_FLOAT3v3(rotated_middle.x / args.l_range * " -+ "2.0f - " -+ "1.0f, rotated_middle.y / args.l_range * 2.0f - 1.0f, 1.0f);\n"; -+ c += " float3 to_relative_c0 = INIT_FLOAT3v3(2.0f / (args.output_size_x - " -+ "1.0f), 0.0f, 0.0f);\n"; -+ c += " float3 to_relative_c1 = INIT_FLOAT3v3(0.0f, 2.0f / " -+ "(args.output_size_y - 1.0f), 0.0f);\n"; -+ c += " float3 to_relative_c2 = INIT_FLOAT3v3(-1.0f, -1.0f, 1.0f);\n"; -+ c += " float3 to_absolute_c0 = INIT_FLOAT3v3((args.input_size_x - 1.0f) / " -+ "2.0f, 0.0f, 0.0f);\n"; -+ c += " float3 to_absolute_c1 = INIT_FLOAT3v3(0.0f, (args.input_size_y - " -+ "1.0f) / 2.0f, 0.0f);\n"; -+ c += " float3 to_absolute_c2 = INIT_FLOAT3v3((args.input_size_x - 1.0f) / " -+ "2.0f, (args.input_size_y - 1.0f) / 2.0f, 1.0f);\n"; -+ c += " float3 t0;\n"; -+ c += " float3 t1;\n"; -+ c += " float3 t2;\n"; -+ c += " // t0 = to_absolute * rotation_matrix\n"; -+ c += " MAT_MUL_3x3(t0, t1, t2, to_absolute_c0, to_absolute_c1, " -+ "to_absolute_c2, rot_mat_c0, rot_mat_c1, rot_mat_c2);\n"; -+ c += " float3 u0;\n"; -+ c += " float3 u1;\n"; -+ c += " float3 u2;\n"; -+ c += " // u0 = t0 * scale_matrix\n"; -+ c += " MAT_MUL_3x3(u0, u1, u2, t0, t1, t2, scale_mat_c0, scale_mat_c1, " -+ "scale_mat_c2);\n"; -+ c += " float3 res_c0;\n"; -+ c += " float3 res_c1;\n"; -+ c += " float3 res_c2;\n"; -+ c += " MAT_MUL_3x3(res_c0, res_c1, res_c2, u0, u1, u2, to_relative_c0, " -+ "to_relative_c1, to_relative_c2);\n"; -+ c += " FLT4 r0 = INIT_FLT4v4(res_c0.x, res_c1.x, 0.0f, res_c2.x);\n"; -+ c += " FLT4 r1 = INIT_FLT4v4(res_c0.y, res_c1.y, 0.0f, res_c2.y);\n"; -+ c += " FLT4 r2 = INIT_FLT4v4(res_c0.z, res_c1.z, res_c2.z, 0.0f);\n"; -+ c += " FLT4 r3 = INIT_FLT4v4( 0.0f, 0.0f, 0.0f, 1.0f);\n"; -+ c += " args.dst_tensor.Write(r0, 0, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r1, 1, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r2, 2, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r3, 3, 0, 0);\n"; -+ c += "}\n"; -+ return c; -+} -+ -+std::string GetLandmarksToTransformMatrixV2KernelCode( -+ const OperationDef& op_def, -+ const LandmarksToTransformMatrixV2Attributes& attr) { -+ std::string c; -+ c += "#define MAT_MUL_3x3(R0, R1, R2, A0, A1, A2, B0, B1, B2) \\\n"; -+ c += " R0.x = A0.x * B0.x + A1.x * B0.y + A2.x * B0.z; \\\n"; -+ c += " R0.y = A0.y * B0.x + A1.y * B0.y + A2.y * B0.z; \\\n"; -+ c += " R0.z = A0.z * B0.x + A1.z * B0.y + A2.z * B0.z; \\\n"; -+ c += " R1.x = A0.x * B1.x + A1.x * B1.y + A2.x * B1.z; \\\n"; -+ c += " R1.y = A0.y * B1.x + A1.y * B1.y + A2.y * B1.z; \\\n"; -+ c += " R1.z = A0.z * B1.x + A1.z * B1.y + A2.z * B1.z; \\\n"; -+ c += " R2.x = A0.x * B2.x + A1.x * B2.y + A2.x * B2.z; \\\n"; -+ c += " R2.y = A0.y * B2.x + A1.y * B2.y + A2.y * B2.z; \\\n"; -+ c += " R2.z = A0.z * B2.x + A1.z * B2.y + A2.z * B2.z; \n"; -+ -+ c += "MAIN_FUNCTION($0) {\n"; -+ // temporary -+ c += " int dummy_var = GLOBAL_ID_0;\n"; -+ if (op_def.IsBatchSupported()) { -+ c += " int B = GLOBAL_ID_0;\n"; -+ c += " if (B >= args.dst_tensor.Batch()) return;\n"; -+ c += " args.dst_tensor.SetBatchRef(B);\n"; -+ c += " args.src_tensor.SetBatchRef(B);\n"; -+ } -+ // reads x and y coords only. -+ auto read_landmark = [&](const std::string& result, const std::string& id) { -+ c += " {\n"; -+ c += " int start = " + id + " * 3; // only 3 dimensional landmarks\n"; -+ c += " int ZC = start / 4;\n"; -+ c += " float4 t_res = args.src_tensor.Read(0, 0, ZC);\n"; -+ c += " int rem = start % 4;\n"; -+ c += " if (rem == 0) {\n"; -+ c += " " + result + ".xy = t_res.xy;\n"; -+ c += " } else if (rem == 1) {\n"; -+ c += " " + result + ".xy = t_res.yz;\n"; -+ c += " } else if (rem == 2) {\n"; -+ c += " " + result + ".xy = t_res.zw;\n"; -+ c += " } else {\n"; -+ c += " float4 t_res_next = args.src_tensor.Read(0, 0, ZC + " -+ "1);\n"; -+ c += " " + result + ".xy = INIT_FLOAT2v2(t_res.w, t_res_next.x);\n"; -+ c += " }\n"; -+ c += " " + result + " *= args.multiplier;\n"; -+ c += " }\n"; -+ }; -+ c += " float2 left_landmark, right_landmark;\n"; -+ read_landmark("left_landmark", "args.left_rotation_idx"); -+ read_landmark("right_landmark", "args.right_rotation_idx"); -+ c += " float diff_y = right_landmark.y - left_landmark.y;\n"; -+ c += " float diff_x = right_landmark.x - left_landmark.x;\n"; -+ c += " float rotation = 0.0;\n"; -+ c += " if (diff_y != 0.0 && diff_x != 0.0) {" -+ " rotation = atan2(diff_y, diff_x);\n" -+ " }"; -+ c += " float r = args.target_rotation_radians - rotation;\n"; -+ c += " float cosr = cos(r);\n"; -+ c += " float sinr = sin(r);\n"; -+ c += " float2 max_value = INIT_FLOAT2v2(-100000.0f, -100000.0f);\n"; -+ c += " float2 min_value = INIT_FLOAT2v2(100000.0f, 100000.0f);\n"; -+ c += " for (int i = 0; i < args.subset_idxs_size; i++) {\n"; -+ c += " float2 p0, p1;\n"; -+ c += " int2 subset_idxs_v = args.subset_idxs.Read(i);\n"; -+ read_landmark("p0", "subset_idxs_v.x"); -+ read_landmark("p1", "subset_idxs_v.y"); -+ c += " // rotation\n"; -+ c += -+ " p0 = INIT_FLOAT2v2(p0.x*cosr - p0.y*sinr, p0.x*sinr + p0.y*cosr);\n"; -+ c += -+ " p1 = INIT_FLOAT2v2(p1.x*cosr - p1.y*sinr, p1.x*sinr + p1.y*cosr);\n"; -+ c += " max_value.x = max(max(p0.x, p1.x), max_value.x);\n"; -+ c += " max_value.y = max(max(p0.y, p1.y), max_value.y);\n"; -+ c += " min_value.x = min(min(p0.x, p1.x), min_value.x);\n"; -+ c += " min_value.y = min(min(p0.y, p1.y), min_value.y);\n"; -+ c += " }\n"; -+ c += " float crop_width = max_value.x - min_value.x;\n"; -+ c += " float crop_height = max_value.y - min_value.y;\n"; -+ c += " float2 crop_xy1 = (max_value + min_value) / 2.0f;\n"; -+ c += " float crop_x = cos(-r) * crop_xy1.x - sin(-r) * crop_xy1.y;\n"; -+ c += " float crop_y = sin(-r) * crop_xy1.x + cos(-r) * crop_xy1.y;\n"; -+ c += " float3 shift_c0 = INIT_FLOAT3v3(1.0, 0.0, 0.0);\n"; -+ c += " float3 shift_c1 = INIT_FLOAT3v3(0.0, 1.0, 0.0);\n"; -+ c += " float3 shift_c2 = INIT_FLOAT3v3(crop_x, crop_y, 1.0);\n"; -+ c += " r = -r;\n"; -+ c += " float3 rotation_c0 = INIT_FLOAT3v3(cos(r), sin(r), 0.0);\n"; -+ c += " float3 rotation_c1 = INIT_FLOAT3v3(-sin(r), cos(r), 0.0);\n"; -+ c += " float3 rotation_c2 = INIT_FLOAT3v3(0.0, 0.0, 1.0);\n"; -+ c += " float3 t0;\n"; -+ c += " float3 t1;\n"; -+ c += " float3 t2;\n"; -+ c += " MAT_MUL_3x3(t0, t1, t2, shift_c0, shift_c1, shift_c2, " -+ " rotation_c0, rotation_c1, rotation_c2);\n"; -+ c += " float cs_x = args.scale_x * crop_width / args.output_width;\n"; -+ c += " float cs_y = args.scale_y * crop_height / args.output_height;\n"; -+ c += " float3 scale_c0 = INIT_FLOAT3v3(cs_x, 0.0, 0.0);\n"; -+ c += " float3 scale_c1 = INIT_FLOAT3v3(0.0, cs_y, 0.0);\n"; -+ c += " float3 scale_c2 = INIT_FLOAT3v3(0.0, 0.0, 1.0);\n"; -+ c += " MAT_MUL_3x3(t0, t1, t2, t0, t1, t2, " -+ " scale_c0, scale_c1, scale_c2);\n"; -+ c += " float shift_x = -1.0 * (args.output_width / 2.0);\n"; -+ c += " float shift_y = -1.0 * (args.output_height / 2.0);\n"; -+ c += " float3 shift2_c0 = INIT_FLOAT3v3(1.0, 0.0, 0.0);\n"; -+ c += " float3 shift2_c1 = INIT_FLOAT3v3(0.0, 1.0, 0.0);\n"; -+ c += " float3 shift2_c2 = INIT_FLOAT3v3(shift_x, shift_y, 1.0);\n"; -+ c += " MAT_MUL_3x3(t0, t1, t2, t0, t1, t2, " -+ " shift2_c0, shift2_c1, shift2_c2);\n"; -+ c += " FLT4 r0 = INIT_FLT4v4(t0.x, t1.x, 0.0f, t2.x);\n"; -+ c += " FLT4 r1 = INIT_FLT4v4(t0.y, t1.y, 0.0f, t2.y);\n"; -+ c += " FLT4 r2 = INIT_FLT4v4(t0.z, t1.z, t2.z, 0.0f);\n"; -+ c += " FLT4 r3 = INIT_FLT4v4(0.0f, 0.0f, 0.0f, 1.0f);\n"; -+ c += " args.dst_tensor.Write(r0, 0, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r1, 1, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r2, 2, 0, 0);\n"; -+ c += " args.dst_tensor.Write(r3, 3, 0, 0);\n"; -+ c += "}\n"; -+ return c; -+} -+ -+} // namespace -+ -+absl::Status CreateLandmarksToTransformMatrixFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op) { -+ auto* attr_v1 = absl::any_cast( -+ &node.operation.attributes); -+ if (attr_v1) { -+ GPUOperation operation = -+ CreateLandmarksToTransformMatrixV1(op_def, *attr_v1); -+ *gpu_op = absl::make_unique(std::move(operation)); -+ return absl::OkStatus(); -+ } -+ auto* attr_v2 = absl::any_cast( -+ &node.operation.attributes); -+ if (attr_v2) { -+ GPUOperation operation = -+ CreateLandmarksToTransformMatrixV2(op_def, *attr_v2); -+ *gpu_op = absl::make_unique(std::move(operation)); -+ return absl::OkStatus(); -+ } -+ return absl::InvalidArgumentError( -+ "Landmarks To Transform Matrix operation supports only version 1 or " -+ "2."); -+} -+ -+GPUOperation CreateLandmarksToTransformMatrixV1( -+ const OperationDef& definition, -+ const LandmarksToTransformMatrixV1Attributes& attr) { -+ std::vector data(attr.subset.size() * 2); -+ for (int i = 0; i < attr.subset.size(); ++i) { -+ data[i * 2 + 0] = attr.subset[i].x; -+ data[i * 2 + 1] = attr.subset[i].y; -+ } -+ -+ BufferDescriptor desc; -+ desc.element_type = DataType::INT32; -+ desc.element_size = 2; -+ desc.memory_type = MemoryType::GLOBAL; -+ desc.size = attr.subset.size() * sizeof(int32_t) * 2; -+ desc.data.resize(desc.size); -+ memcpy(desc.data.data(), data.data(), desc.size); -+ -+ GPUOperation result(definition); -+ result.AddSrcTensor("src_tensor", definition.src_tensors[0]); -+ result.AddDstTensor("dst_tensor", definition.dst_tensors[0]); -+ result.args_.AddFloat("l_range", attr.landmarks_range); -+ result.args_.AddFloat("bbox_size_multiplier", attr.bbox_size_multiplier); -+ result.args_.AddInt("rotations_idx_x", attr.left_rotation_idx); -+ result.args_.AddInt("rotations_idx_y", attr.right_rotation_idx); -+ result.args_.AddFloat("input_size_x", attr.input_hw.w); -+ result.args_.AddFloat("input_size_y", attr.input_hw.h); -+ result.args_.AddFloat("output_size_x", attr.output_hw.w); -+ result.args_.AddFloat("output_size_y", attr.output_hw.h); -+ result.args_.AddInt("subset_size", attr.subset.size()); -+ result.args_.AddObject("subset", -+ absl::make_unique(std::move(desc))); -+ result.code_ = GetLandmarksToTransformMatrixV1KernelCode(definition, attr); -+ result.work_group_size_ = int3(1, 1, 1); -+ result.tensor_to_grid_ = TensorToGrid::kBToX_YIs1_ZIs1; -+ -+ return result; -+} -+ -+GPUOperation CreateLandmarksToTransformMatrixV2( -+ const OperationDef& definition, -+ const LandmarksToTransformMatrixV2Attributes& attr) { -+ std::vector data(attr.subset_idxs.size() * 2); -+ for (int i = 0; i < attr.subset_idxs.size(); ++i) { -+ data[i * 2 + 0] = attr.subset_idxs[i].x; -+ data[i * 2 + 1] = attr.subset_idxs[i].y; -+ } -+ -+ BufferDescriptor desc; -+ desc.element_type = DataType::INT32; -+ desc.element_size = 2; -+ desc.memory_type = MemoryType::GLOBAL; -+ desc.size = attr.subset_idxs.size() * sizeof(int32_t) * 2; -+ desc.data.resize(desc.size); -+ memcpy(desc.data.data(), data.data(), desc.size); -+ -+ GPUOperation result(definition); -+ result.AddSrcTensor("src_tensor", definition.src_tensors[0]); -+ result.AddDstTensor("dst_tensor", definition.dst_tensors[0]); -+ -+ result.args_.AddInt("left_rotation_idx", attr.left_rotation_idx); -+ result.args_.AddInt("right_rotation_idx", attr.right_rotation_idx); -+ result.args_.AddFloat("target_rotation_radians", -+ attr.target_rotation_radians); -+ result.args_.AddFloat("output_height", attr.output_height); -+ result.args_.AddFloat("output_width", attr.output_width); -+ result.args_.AddFloat("scale_x", attr.scale_x); -+ result.args_.AddFloat("scale_y", attr.scale_y); -+ result.args_.AddFloat("multiplier", attr.multiplier); -+ -+ result.args_.AddInt("subset_idxs_size", attr.subset_idxs.size()); -+ result.args_.AddObject("subset_idxs", -+ absl::make_unique(std::move(desc))); -+ result.code_ = GetLandmarksToTransformMatrixV2KernelCode(definition, attr); -+ result.work_group_size_ = int3(1, 1, 1); -+ result.tensor_to_grid_ = TensorToGrid::kBToX_YIs1_ZIs1; -+ return result; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.h b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.h -new file mode 100644 -index 00000000000..2fd523df7c7 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/landmarks_to_transform_matrix.h -@@ -0,0 +1,26 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPELANDMARKS_TO_TRANSFORM_MATRIX_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPELANDMARKS_TO_TRANSFORM_MATRIX_H_ -+ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status CreateLandmarksToTransformMatrixFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op); -+ -+GPUOperation CreateLandmarksToTransformMatrixV1( -+ const OperationDef& definition, -+ const LandmarksToTransformMatrixV1Attributes& attr); -+ -+GPUOperation CreateLandmarksToTransformMatrixV2( -+ const OperationDef& definition, -+ const LandmarksToTransformMatrixV2Attributes& attr); -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPELANDMARKS_TO_TRANSFORM_MATRIX_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.cc b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.cc -new file mode 100644 -index 00000000000..999917a9251 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.cc -@@ -0,0 +1,116 @@ -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.h" -+ -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h" -+ -+namespace tflite { -+namespace gpu { -+namespace { -+ -+std::string GetTransformLandmarksKernelCode(const OperationDef& op_def, -+ int dimension, float scale) { -+ std::string c; -+ c += "MAIN_FUNCTION($0) {\n"; -+ if (op_def.IsBatchSupported()) { -+ c += " int linear_id = GLOBAL_ID_0;\n"; -+ c += " int X = linear_id / args.dst_tensor.Batch();\n"; -+ c += " int B = linear_id % args.dst_tensor.Batch();\n"; -+ c += " args.dst_tensor.SetBatchRef(B);\n"; -+ c += " args.matrix_transform.SetBatchRef(B);\n"; -+ c += " args.src_tensor.SetBatchRef(B);\n"; -+ } else { -+ c += " int X = GLOBAL_ID_0;\n"; -+ } -+ c += " int Y = GLOBAL_ID_1;\n"; -+ c += " int Z = GLOBAL_ID_2;\n"; -+ c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " -+ "Z >= args.dst_tensor.Slices()) " -+ "return;\n"; -+ c += " float4 x_transform = args.matrix_transform.Read(0, 0, 0);\n"; -+ c += " float4 y_transform = args.matrix_transform.Read(1, 0, 0);\n"; -+ if (scale != 1.0) { -+ c += " x_transform.w *= args.scale;\n"; -+ c += " y_transform.w *= args.scale;\n"; -+ } -+ c += " float4 landmks = args.src_tensor.Read(X, Y, Z);\n"; -+ c += " float4 result = INIT_FLOAT4(0.0f);\n"; -+ if (dimension == 2) { -+ c += " float4 l_pair1_ = INIT_FLOAT4v4(landmks.x, landmks.y, 0.0f, " -+ "1.0f);\n"; -+ c += " float4 l_pair2_ = INIT_FLOAT4v4(landmks.z, landmks.w, 0.0f, " -+ "1.0f);\n"; -+ c += " result.x = dot(x_transform, l_pair1_);\n"; -+ c += " result.y = dot(y_transform, l_pair1_);\n"; -+ c += " result.z = dot(x_transform, l_pair2_);\n"; -+ c += " result.w = dot(y_transform, l_pair2_);\n"; -+ } else if (dimension == 3) { -+ c += " int reminder = (Z * 4) % 3;\n"; -+ c += " if (reminder == 0) { // 0, 3, 6\n"; -+ c += " // x y z x\n"; -+ c += " float4 landmks_next = args.src_tensor.Read(X, Y, Z+1);\n"; -+ c += " float4 l_= landmks;\n"; -+ c += " l_.z = 0.0f;\n"; -+ c += " l_.w = 1.0f;\n"; -+ c += " result.x = dot(x_transform, l_);\n"; -+ c += " result.y = dot(y_transform, l_);\n"; -+ c += " result.z = landmks.z;\n"; -+ c += " result.w = dot(x_transform, INIT_FLOAT4v4(landmks.w, " -+ "landmks_next.x, " -+ "0.0f, 1.0f));\n"; -+ c += " } else if (reminder == 1) { // 1, 4, 7\n"; -+ c += " // y z x y\n"; -+ c += " float4 landmks_prev = args.src_tensor.Read(X, Y, Z-1);\n"; -+ c += " float4 l_ = INIT_FLOAT4v4(landmks.z, landmks.w, 0.0f, 1.0f);\n"; -+ c += " result.x = dot(y_transform, INIT_FLOAT4v4(landmks_prev.w, " -+ "landmks.x, " -+ "0.0f, 1.0f));\n"; -+ c += " result.y = landmks.y;\n"; -+ c += " result.z = dot(x_transform, l_);\n"; -+ c += " result.w = dot(y_transform, l_);\n"; -+ c += " } else { // reminder == 2; // 2, 5, 8\n"; -+ c += " // z, x, y, z\n"; -+ c += " float4 l_ = INIT_FLOAT4v4(landmks.y, landmks.z, 0.0f, 1.0f);\n"; -+ c += " result.x = landmks.x;\n"; -+ c += " result.y = dot(x_transform, l_);\n"; -+ c += " result.z = dot(y_transform, l_);\n"; -+ c += " result.w = landmks.w;\n"; -+ c += " }\n"; -+ } -+ c += " FLT4 res = TO_FLT4(result);\n"; -+ c += " args.dst_tensor.Write(res, X, Y, Z);\n"; -+ c += "}\n"; -+ return c; -+} -+} // namespace -+ -+absl::Status CreateTransformLandmarksFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op) { -+ auto attr = -+ absl::any_cast(node.operation.attributes); -+ if (attr.version != 1) { -+ return absl::InvalidArgumentError( -+ "Transform Landmarks operation supports only version 1."); -+ } -+ GPUOperation operation = CreateTransformLandmarks(op_def, attr); -+ *gpu_op = absl::make_unique(std::move(operation)); -+ return absl::OkStatus(); -+} -+ -+GPUOperation CreateTransformLandmarks( -+ const OperationDef& definition, const TransformLandmarksAttributes& attr) { -+ GPUOperation op(definition); -+ op.AddSrcTensor("src_tensor", definition.src_tensors[0]); -+ op.AddSrcTensor("matrix_transform", definition.src_tensors[1]); -+ op.AddDstTensor("dst_tensor", definition.dst_tensors[0]); -+ op.args_.AddFloat("scale", attr.scale); -+ op.code_ = -+ GetTransformLandmarksKernelCode(definition, attr.dimensions, attr.scale); -+ op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; -+ return op; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.h b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.h -new file mode 100644 -index 00000000000..5c0be19033a ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_landmarks.h -@@ -0,0 +1,21 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_LANDMARKS_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_LANDMARKS_H_ -+ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status CreateTransformLandmarksFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op); -+ -+GPUOperation CreateTransformLandmarks(const OperationDef& definition, -+ const TransformLandmarksAttributes& attr); -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_LANDMARKS_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.cc b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.cc -new file mode 100644 -index 00000000000..2723216f324 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.cc -@@ -0,0 +1,123 @@ -+#include "tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.h" -+ -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/work_group_picking.h" -+ -+namespace tflite { -+namespace gpu { -+namespace { -+ -+std::string AlignCornersCorrection(bool align_corners) { -+ // Align corners correction: T -> S * ( T * A ), where T is a -+ // transformation matrix, and subtruction and addition matrices are: -+ // S A -+ // 1 0 0 -0.5 1 0 0 0.5 -+ // 0 1 0 -0.5 0 1 0 0.5 -+ // 0 0 1 0 0 0 1 0 -+ // 0 0 0 1 0 0 0 1 -+ // Transformation matrix column 3 and rows 3, 4 are identity, which makes -+ // the final formula pretty simple and easy to get if doing a manual -+ // multiuplication. -+ return align_corners ? R"( -+ first_line.w += first_line.x * 0.5 + first_line.y * 0.5 - 0.5; -+ second_line.w += second_line.x * 0.5 + second_line.y * 0.5 - 0.5; -+ )" -+ : ""; -+} -+ -+std::string GetTransformTensorBilinearKernelCode(const OperationDef& op_def, -+ bool align_corners) { -+ std::string c; -+ c += "MAIN_FUNCTION($0) {\n"; -+ c += " int Y = GLOBAL_ID_1;\n"; -+ c += " int Z = GLOBAL_ID_2;\n"; -+ if (op_def.IsBatchSupported()) { -+ c += " int linear_id = GLOBAL_ID_0;\n"; -+ c += " int X = linear_id / args.dst_tensor.Batch();\n"; -+ c += " int B = linear_id % args.dst_tensor.Batch();\n"; -+ c += " args.dst_tensor.SetBatchRef(B);\n"; -+ c += " args.matrix_transform.SetBatchRef(B);\n"; -+ c += " args.src_tensor.SetBatchRef(B);\n"; -+ } else { -+ c += " int X = GLOBAL_ID_0;\n"; -+ } -+ c += " if (X >= args.dst_tensor.Width() || Y >= args.dst_tensor.Height() || " -+ "Z >= args.dst_tensor.Slices()) " -+ "return;\n"; -+ c += " float4 first_line = args.matrix_transform.Read(0, 0, 0);\n"; -+ c += " float4 second_line = args.matrix_transform.Read(1, 0, 0);\n"; -+ c += AlignCornersCorrection(align_corners); -+ c += " float4 before_transform_coord_2d = INIT_FLOAT4v4(INIT_FLOAT(X), " -+ "INIT_FLOAT(Y), " -+ "0.0f, 1.0f);\n"; -+ c += " // Get transformed coordinates\n"; -+ c += -+ " float2 xy = INIT_FLOAT2v2(dot(first_line, before_transform_coord_2d), " -+ "dot(second_line, before_transform_coord_2d));\n"; -+ c += " float2 xy_floor = floor(xy);\n"; -+ c += " int4 st;\n"; -+ c += " st.xy = INIT_INT2v2(xy_floor.x, xy_floor.y);\n"; -+ c += " st.zw = INIT_INT2v2(xy_floor.x, xy_floor.y) + INIT_INT2v2(1, 1);\n"; -+ c += " // Apply interpolation if coordinate is in bounds.\n"; -+ c += " float4 result = INIT_FLOAT4(0.0f);\n"; -+ c += " float2 t = xy - xy_floor;\n"; -+ c += " if(xy.x >= 0.0 && xy.x <= INIT_FLOAT(args.src_tensor.Width() - 1) && " -+ "xy.y >= 0.0 && " -+ "xy.y <= INIT_FLOAT(args.src_tensor.Height() - 1)) {\n"; -+ c += " float4 p0 = INIT_FLOAT4(0.0f);\n"; -+ c += " float4 p1 = INIT_FLOAT4(0.0f);\n"; -+ c += " float4 p2 = INIT_FLOAT4(0.0f);\n"; -+ c += " float4 p3 = INIT_FLOAT4(0.0f);\n"; -+ auto read_src = [&](const std::string& result, const std::string& xc, -+ const std::string& yc, const std::string& zc) { -+ c += " if(" + xc + " >= 0 && " + yc + " >= 0 && " + xc + -+ " < args.src_tensor.Width() && " + yc + -+ " < args.src_tensor.Height()) {\n"; -+ c += " " + result + " = args.src_tensor.Read(" + xc + ", " + -+ yc + ", " + zc + ");\n"; -+ c += " }\n"; -+ }; -+ read_src("p0", "st.x", "st.y", "Z"); -+ read_src("p1", "st.z", "st.y", "Z"); -+ read_src("p2", "st.x", "st.w", "Z"); -+ read_src("p3", "st.z", "st.w", "Z"); -+ c += " result = mix(mix(p0, p1, t.x), mix(p2, p3, t.x), t.y);\n"; -+ c += " }\n"; -+ c += " FLT4 res = TO_FLT4(result);\n"; -+ c += " args.dst_tensor.Write(res, X, Y, Z);\n"; -+ c += "}\n"; -+ return c; -+} -+} // namespace -+ -+absl::Status CreateTransformTensorBilinearFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op) { -+ auto attr = absl::any_cast( -+ node.operation.attributes); -+ if (attr.version != 1) { -+ return absl::InvalidArgumentError( -+ "Transform Tensor Bilinear operation supports only version 1."); -+ } -+ GPUOperation operation = CreateTransformTensorBilinear(op_def, attr); -+ *gpu_op = absl::make_unique(std::move(operation)); -+ return absl::OkStatus(); -+} -+ -+GPUOperation CreateTransformTensorBilinear( -+ const OperationDef& definition, -+ const TransformTensorBilinearAttributes& attr) { -+ GPUOperation op(definition); -+ op.AddSrcTensor("src_tensor", definition.src_tensors[0]); -+ op.AddSrcTensor("matrix_transform", definition.src_tensors[1]); -+ op.AddDstTensor("dst_tensor", definition.dst_tensors[0]); -+ op.code_ = -+ GetTransformTensorBilinearKernelCode(definition, attr.align_corners); -+ op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ; -+ return op; -+} -+ -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.h b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.h -new file mode 100644 -index 00000000000..0251265cdf4 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/common/tasks/mediapipe/transform_tensor_bilinear.h -@@ -0,0 +1,22 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_TENSOR_BILINEAR_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_TENSOR_BILINEAR_H_ -+ -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/task/gpu_operation.h" -+ -+namespace tflite { -+namespace gpu { -+ -+absl::Status CreateTransformTensorBilinearFromNode( -+ const OperationDef& op_def, const Node& node, -+ std::unique_ptr* gpu_op); -+ -+GPUOperation CreateTransformTensorBilinear( -+ const OperationDef& definition, -+ const TransformTensorBilinearAttributes& attr); -+ -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_TASKS_MEDIAPIPETRANSFORM_TENSOR_BILINEAR_H_ -diff --git a/tensorflow/lite/delegates/gpu/common/transformations/BUILD b/tensorflow/lite/delegates/gpu/common/transformations/BUILD -index d26b4f807de..9596dbab7e6 100644 ---- a/tensorflow/lite/delegates/gpu/common/transformations/BUILD -+++ b/tensorflow/lite/delegates/gpu/common/transformations/BUILD -@@ -287,7 +287,7 @@ cc_library( - ":merge_padding_with", - ":remove_noop", - "//tensorflow/lite/delegates/gpu/common:model_transformer", -- ] + tf_platform_alias("custom_transformations", "//tensorflow/lite/delegates/gpu/common/"), -+ ] + ["//tensorflow/lite/delegates/gpu/common/mediapipe:custom_transformations"], - ) - - cc_library( -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD -index b7860b44ede..30cc160d32c 100644 ---- a/tensorflow/lite/delegates/gpu/gl/kernels/BUILD -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/BUILD -@@ -153,10 +153,11 @@ cc_test( - - cc_library( - name = "custom_registry", -- srcs = ["custom_registry.cc"], -+ srcs = ["//tensorflow/lite/delegates/gpu/gl/kernels/mediapipe:registry.cc"], - hdrs = ["custom_registry.h"], - deps = [ - "//tensorflow/lite/delegates/gpu/gl:node_shader", -+ "//tensorflow/lite/delegates/gpu/gl/kernels/mediapipe:all_custom_ops", - "@com_google_absl//absl/container:flat_hash_map", - ], - ) -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/BUILD b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/BUILD -new file mode 100644 -index 00000000000..f5e696d0859 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/BUILD -@@ -0,0 +1,85 @@ -+load("//tensorflow/lite:special_rules.bzl", "tflite_portable_test_suite") -+ -+package( -+ default_visibility = ["//visibility:public"], -+ licenses = ["notice"], -+) -+ -+exports_files([ -+ "registry.cc", -+ "landmarks_to_transform_matrix.h", -+ "transform_landmarks.h", -+ "transform_tensor_bilinear.h", -+]) -+ -+cc_library( -+ name = "all_custom_ops", -+ hdrs = [ -+ "landmarks_to_transform_matrix.h", -+ "transform_landmarks.h", -+ "transform_tensor_bilinear.h", -+ ], -+ deps = [ -+ ":landmarks_to_transform_matrix", -+ ":transform_landmarks", -+ ":transform_tensor_bilinear", -+ "//tensorflow/lite/delegates/gpu/common:operations", -+ "//tensorflow/lite/delegates/gpu/gl:node_shader", -+ ], -+) -+ -+cc_library( -+ name = "landmarks_to_transform_matrix", -+ srcs = ["landmarks_to_transform_matrix.cc"], -+ hdrs = ["landmarks_to_transform_matrix.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:operations", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:types", -+ "//tensorflow/lite/delegates/gpu/common:util", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:landmarks_to_transform_matrix", -+ "//tensorflow/lite/delegates/gpu/gl:node_shader", -+ "@com_google_absl//absl/memory", -+ "@com_google_absl//absl/strings", -+ "@com_google_absl//absl/types:any", -+ ], -+) -+ -+cc_library( -+ name = "transform_tensor_bilinear", -+ srcs = ["transform_tensor_bilinear.cc"], -+ hdrs = ["transform_tensor_bilinear.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:operations", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:types", -+ "//tensorflow/lite/delegates/gpu/common:util", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:transform_tensor_bilinear", -+ "//tensorflow/lite/delegates/gpu/gl:node_shader", -+ "@com_google_absl//absl/memory", -+ "@com_google_absl//absl/strings", -+ "@com_google_absl//absl/types:any", -+ ], -+) -+ -+cc_library( -+ name = "transform_landmarks", -+ srcs = ["transform_landmarks.cc"], -+ hdrs = ["transform_landmarks.h"], -+ deps = [ -+ "//tensorflow/lite/delegates/gpu/common:operations", -+ "//tensorflow/lite/delegates/gpu/common:shape", -+ "//tensorflow/lite/delegates/gpu/common:status", -+ "//tensorflow/lite/delegates/gpu/common:types", -+ "//tensorflow/lite/delegates/gpu/common:util", -+ "//tensorflow/lite/delegates/gpu/common/mediapipe:transform_landmarks", -+ "//tensorflow/lite/delegates/gpu/gl:node_shader", -+ "@com_google_absl//absl/memory", -+ "@com_google_absl//absl/strings", -+ "@com_google_absl//absl/types:any", -+ ], -+) -+ -+tflite_portable_test_suite() -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc -new file mode 100644 -index 00000000000..de75dd7df2e ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc -@@ -0,0 +1,356 @@ -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "absl/memory/memory.h" -+#include "absl/strings/substitute.h" -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/types.h" -+#include "tensorflow/lite/delegates/gpu/common/util.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+namespace { -+ -+namespace v1 { -+ -+std::string ReadLandmark(const std::string& landmark, const std::string& idx) { -+ std::string source = R"( -+ vec4 )" + landmark + -+ R"(; -+ { -+ int z_coord = )" + -+ idx + -+ R"( * $dimensions$ / 4; -+ vec4 result = $input_data_0[0, 0, z_coord]$; -+ int rest = )" + idx + -+ R"( * $dimensions$ % 4; -+ if (rest != 0) { -+ if (rest == 1) { -+ result.x = result.y; -+ result.y = result.z; -+ } -+ if (rest == 2) { -+ result.x = result.z; -+ result.y = result.w; -+ } -+ if (rest == 3) { -+ vec4 next_after_result = $input_data_0[0, 0, z_coord + 1]$; -+ result.x = result.w; -+ result.y = next_after_result.x; -+ } -+ } -+ )" + landmark + R"( = result; -+ } -+ )"; -+ return source; -+} -+ -+bool IsSupported(const LandmarksToTransformMatrixV1Attributes& attr) { -+ return attr.dimensions == 3; -+} -+ -+absl::Status GenerateCode(const LandmarksToTransformMatrixV1Attributes& attr, -+ const NodeShader::GenerationContext& ctx, -+ GeneratedCode* generated_code) { -+ if (!IsSupported(attr)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by LandmarksToTransformMatrix v1"); -+ } -+ -+ std::vector params = { -+ {"dimensions", static_cast(attr.dimensions)}, -+ {"landmarks_range", static_cast(attr.landmarks_range)}, -+ {"left_rotation_idx", static_cast(attr.left_rotation_idx)}, -+ {"right_rotation_idx", static_cast(attr.right_rotation_idx)}, -+ {"bbox_size_multiplier", static_cast(attr.bbox_size_multiplier)}, -+ {"input_h", static_cast(attr.input_hw.h)}, -+ {"input_w", static_cast(attr.input_hw.w)}, -+ {"output_h", static_cast(attr.output_hw.h)}, -+ {"output_w", static_cast(attr.output_hw.w)}, -+ {"subset", attr.subset}, -+ {"subset_size", static_cast(attr.subset.size())}, -+ }; -+ -+ std::string source = R"( -+ )" + ReadLandmark("left_landmark", "$left_rotation_idx$") + -+ R"( -+ -+ )" + ReadLandmark("right_landmark", "$right_rotation_idx$") + -+ R"( -+ -+ float alpha = -atan(right_landmark.y - left_landmark.y, -+ right_landmark.x - left_landmark.x); -+ -+ vec4 max_value = vec4(-100000, -100000, 0.0, 0.0); -+ vec4 min_value = vec4(100000, 100000, 0.0, 0.0); -+ for (int i = 0; i < $subset_size$; i++) { -+ for (int j = 0; j < 2; j++) { -+ )" + ReadLandmark("landmark_current", "$subset$[i][j]") + -+ R"( -+ -+ vec4 rotated = vec4(landmark_current.x * cos(alpha) - -+ landmark_current.y * sin(alpha), -+ landmark_current.x * sin(alpha) + -+ landmark_current.y * cos(alpha), -+ 0.0, 0.0); -+ // both by x and y -+ max_value = vec4(max(max_value.x, rotated.x), -+ max(max_value.y, rotated.y), -+ 0.0, 0.0); -+ min_value = vec4(min(min_value.x, rotated.x), -+ min(min_value.y, rotated.y), -+ 0.0, 0.0); -+ } -+ } -+ -+ vec4 bbox_size = max_value - min_value; -+ bbox_size *= $bbox_size_multiplier$; -+ -+ mat3 scale_matrix = -+ mat3(bbox_size.x / float($landmarks_range$), 0.0, 0.0, // first column -+ 0.0, bbox_size.y / float($landmarks_range$), 0.0, // second column -+ 0.0, 0.0, 1.0); // third column -+ -+ vec4 middle = (max_value + min_value) / 2.0; -+ -+ vec4 rotated_middle = -+ vec4(middle.x * cos(-alpha) - middle.y * sin(-alpha), -+ middle.x * sin(-alpha) + middle.y * cos(-alpha), 0.0, 0.0); -+ -+ mat3 rotation_matrix = -+ mat3(cos(-alpha), sin(-alpha), 0, // first column -+ -sin(-alpha), cos(-alpha), 0, // second column -+ // third column -+ (rotated_middle.x / float($landmarks_range$)) * 2.0 - 1.0, -+ (rotated_middle.y / float($landmarks_range$)) * 2.0 - 1.0, 1); -+ -+ mat3 to_relative = -+ mat3(2.0 / (float($output_w$) - 1.0), 0.0, 0.0, // first column -+ 0.0, 2.0 / (float($output_h$) - 1.0), 0.0, // second column -+ -1.0, -1.0, 1.0); // third column -+ -+ mat3 to_absolute = -+ mat3((float($input_w$) - 1.0) / 2.0, 0.0, 0.0, // first column -+ 0.0, (float($input_h$) - 1.0) / 2.0, 0.0, // second column -+ // third column -+ (float($input_w$) - 1.0) / 2.0, (float($input_h$) - 1.0)/2.0, 1.0); -+ -+ // Transformstion Matrix -+ mat3 tm = to_absolute * rotation_matrix * scale_matrix * to_relative; -+ -+ // Inverse Transformation Matrix -+ $output_data_0[0, 0, 0] = vec4(tm[0][0], tm[1][0], 0.0, tm[2][0])$; -+ $output_data_0[1, 0, 0] = vec4(tm[0][1], tm[1][1], 0.0, tm[2][1])$; -+ $output_data_0[2, 0, 0] = vec4(tm[0][2], tm[1][2], tm[2][2], 0.0)$; -+ $output_data_0[3, 0, 0] = vec4( 0, 0, 0, 1.0)$; -+ )"; -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(1, 1, 1), -+ /*workgroup=*/uint3(1, 1, 1), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::ONLY_DEFINITIONS, -+ }; -+ return absl::OkStatus(); -+} -+ -+} // namespace v1 -+ -+namespace v2 { -+ -+std::string ReadLandmark(const std::string& landmark, const std::string& idx) { -+ std::string source = R"( -+ vec4 )" + landmark + -+ R"(; -+ { -+ int z_coord = )" + -+ idx + -+ R"( * $dimensions$ / 4; -+ vec4 result = $input_data_0[0, 0, z_coord]$; -+ int rest = )" + idx + -+ R"( * $dimensions$ % 4; -+ if (rest != 0) { -+ if (rest == 1) { -+ result.x = result.y; -+ result.y = result.z; -+ } -+ if (rest == 2) { -+ result.x = result.z; -+ result.y = result.w; -+ } -+ if (rest == 3) { -+ vec4 next_after_result = $input_data_0[0, 0, z_coord + 1]$; -+ result.x = result.w; -+ result.y = next_after_result.x; -+ } -+ } -+ result *= $multiplier$; -+ )" + landmark + R"( = result; -+ } )"; -+ return source; -+} -+ -+static bool IsSupported(const NodeShader::GenerationContext& ctx) { -+ return ctx.input_shapes.size() == 1 && ctx.input_shapes[0][1] == 1 && -+ ctx.input_shapes[0][2] == 1 && ctx.input_shapes[0][3] % 3 == 0; -+} -+ -+absl::Status GenerateCode(const LandmarksToTransformMatrixV2Attributes& attr, -+ const NodeShader::GenerationContext& ctx, -+ GeneratedCode* generated_code) { -+ if (!IsSupported(ctx)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by LandmarksToTransformMatrixV2"); -+ } -+ -+ std::vector params = { -+ {"dimensions", static_cast(3)}, -+ {"scale_x", static_cast(attr.scale_x)}, -+ {"scale_y", static_cast(attr.scale_y)}, -+ {"left_rotation_idx", static_cast(attr.left_rotation_idx)}, -+ {"right_rotation_idx", static_cast(attr.right_rotation_idx)}, -+ {"target_rotation_radians", -+ static_cast(attr.target_rotation_radians)}, -+ {"output_width", static_cast(attr.output_width)}, -+ {"output_height", static_cast(attr.output_height)}, -+ {"subset_idxs", attr.subset_idxs}, -+ {"subset_idxs_size", static_cast(attr.subset_idxs.size())}, -+ {"multiplier", static_cast(attr.multiplier)}, -+ }; -+ -+ std::string source = R"( -+ )" + ReadLandmark("left_landmark", "$left_rotation_idx$") + -+ R"( -+ )" + ReadLandmark("right_landmark", "$right_rotation_idx$") + -+ R"( -+ -+ float diff_y = right_landmark.y - left_landmark.y; -+ float diff_x = right_landmark.x - left_landmark.x; -+ float rotation = 0.0; -+ if (diff_y != 0.0 && diff_x != 0.0) rotation = atan(diff_y, diff_x); -+ float r = $target_rotation_radians$ - rotation; -+ -+ vec4 max_value = vec4(-100000, -100000, 0.0, 0.0); -+ vec4 min_value = vec4(100000, 100000, 0.0, 0.0); -+ for (int i = 0; i < $subset_idxs_size$; i++) { -+ for (int j = 0; j < 2; j++) { -+ )" + ReadLandmark("landmark_current", "$subset_idxs$[i][j]") + -+ R"( -+ vec4 rotated = vec4(landmark_current.x * cos(r) - -+ landmark_current.y * sin(r), -+ landmark_current.x * sin(r) + -+ landmark_current.y * cos(r), -+ 0.0, 0.0); -+ // both by x and y -+ max_value = vec4(max(max_value.x, rotated.x), -+ max(max_value.y, rotated.y), -+ 0.0, 0.0); -+ min_value = vec4(min(min_value.x, rotated.x), -+ min(min_value.y, rotated.y), -+ 0.0, 0.0); -+ } -+ } -+ -+ float crop_width = max_value.x - min_value.x; -+ float crop_height = max_value.y - min_value.y; -+ -+ vec4 crop_xy1 = (max_value + min_value) / vec4(2.0); -+ -+ float crop_x = cos(-r) * crop_xy1.x - sin(-r) * crop_xy1.y; -+ float crop_y = sin(-r) * crop_xy1.x + cos(-r) * crop_xy1.y; -+ -+ -+ mat4 t = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ -+ mat4 t_shift = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ crop_x, crop_y, 0.0, 1.0); // forth column -+ t *= t_shift; -+ -+ r = -r; -+ -+ mat4 t_rotation = mat4(cos(r), sin(r), 0.0, 0.0, // first column -+ -sin(r), cos(r), 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ -+ t *= t_rotation; -+ // cropped scale for x and y -+ float cs_x = $scale_x$ * crop_width / $output_width$; -+ float cs_y = $scale_y$ * crop_height / $output_height$; -+ mat4 t_scale = mat4(cs_x, 0.0, 0.0, 0.0, // first column -+ 0.0, cs_y, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ t *= t_scale; -+ float shift_x = -1.0 * ($output_width$ / 2.0); -+ float shift_y = -1.0 * ($output_height$ / 2.0); -+ mat4 t_shift2 = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ shift_x, shift_y, 0.0, 1.0); // forth column -+ t *= t_shift2; -+ // Inverse Transformation Matrix -+ $output_data_0[0, 0, 0] = vec4(t[0][0], t[1][0], t[2][0], t[3][0])$; -+ $output_data_0[1, 0, 0] = vec4(t[0][1], t[1][1], t[2][1], t[3][1])$; -+ $output_data_0[2, 0, 0] = vec4(t[0][2], t[1][2], t[2][2], t[3][2])$; -+ $output_data_0[3, 0, 0] = vec4(t[0][3], t[1][3], t[2][3], t[3][3])$; -+ )"; -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(1, 1, 1), -+ /*workgroup=*/uint3(1, 1, 1), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::ONLY_DEFINITIONS, -+ }; -+ return absl::OkStatus(); -+} -+ -+} // namespace v2 -+ -+class LandmarksToTransformMatrix : public NodeShader { -+ public: -+ absl::Status GenerateCode(const GenerationContext& ctx, -+ GeneratedCode* generated_code) const final { -+ auto* attr_v1 = -+ absl::any_cast(&ctx.op_attr); -+ if (attr_v1) return v1::GenerateCode(*attr_v1, ctx, generated_code); -+ -+ auto* attr_v2 = -+ absl::any_cast(&ctx.op_attr); -+ if (attr_v2) return v2::GenerateCode(*attr_v2, ctx, generated_code); -+ -+ return absl::InvalidArgumentError("Incorrect attributes' type."); -+ } -+}; -+ -+} // namespace -+ -+std::unique_ptr NewLandmarksToTransformMatrixNodeShader() { -+ return absl::make_unique(); -+} -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc.orig b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc.orig -new file mode 100644 -index 00000000000..3e884b643a5 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.cc.orig -@@ -0,0 +1,356 @@ -+#include "mediapipe/util/tflite/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "third_party/absl/memory/memory.h" -+#include "third_party/absl/strings/substitute.h" -+#include "third_party/absl/types/any.h" -+#include "mediapipe/util/tflite/gpu/common/mediapipe/landmarks_to_transform_matrix.h" -+#include "third_party/tensorflow/lite/delegates/gpu/common/shape.h" -+#include "third_party/tensorflow/lite/delegates/gpu/common/status.h" -+#include "third_party/tensorflow/lite/delegates/gpu/common/types.h" -+#include "third_party/tensorflow/lite/delegates/gpu/common/util.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+namespace { -+ -+namespace v1 { -+ -+std::string ReadLandmark(const std::string& landmark, const std::string& idx) { -+ std::string source = R"( -+ vec4 )" + landmark + -+ R"(; -+ { -+ int z_coord = )" + -+ idx + -+ R"( * $dimensions$ / 4; -+ vec4 result = $input_data_0[0, 0, z_coord]$; -+ int rest = )" + idx + -+ R"( * $dimensions$ % 4; -+ if (rest != 0) { -+ if (rest == 1) { -+ result.x = result.y; -+ result.y = result.z; -+ } -+ if (rest == 2) { -+ result.x = result.z; -+ result.y = result.w; -+ } -+ if (rest == 3) { -+ vec4 next_after_result = $input_data_0[0, 0, z_coord + 1]$; -+ result.x = result.w; -+ result.y = next_after_result.x; -+ } -+ } -+ )" + landmark + R"( = result; -+ } -+ )"; -+ return source; -+} -+ -+bool IsSupported(const LandmarksToTransformMatrixV1Attributes& attr) { -+ return attr.dimensions == 3; -+} -+ -+absl::Status GenerateCode(const LandmarksToTransformMatrixV1Attributes& attr, -+ const NodeShader::GenerationContext& ctx, -+ GeneratedCode* generated_code) { -+ if (!IsSupported(attr)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by LandmarksToTransformMatrix v1"); -+ } -+ -+ std::vector params = { -+ {"dimensions", static_cast(attr.dimensions)}, -+ {"landmarks_range", static_cast(attr.landmarks_range)}, -+ {"left_rotation_idx", static_cast(attr.left_rotation_idx)}, -+ {"right_rotation_idx", static_cast(attr.right_rotation_idx)}, -+ {"bbox_size_multiplier", static_cast(attr.bbox_size_multiplier)}, -+ {"input_h", static_cast(attr.input_hw.h)}, -+ {"input_w", static_cast(attr.input_hw.w)}, -+ {"output_h", static_cast(attr.output_hw.h)}, -+ {"output_w", static_cast(attr.output_hw.w)}, -+ {"subset", attr.subset}, -+ {"subset_size", static_cast(attr.subset.size())}, -+ }; -+ -+ std::string source = R"( -+ )" + ReadLandmark("left_landmark", "$left_rotation_idx$") + -+ R"( -+ -+ )" + ReadLandmark("right_landmark", "$right_rotation_idx$") + -+ R"( -+ -+ float alpha = -atan(right_landmark.y - left_landmark.y, -+ right_landmark.x - left_landmark.x); -+ -+ vec4 max_value = vec4(-100000, -100000, 0.0, 0.0); -+ vec4 min_value = vec4(100000, 100000, 0.0, 0.0); -+ for (int i = 0; i < $subset_size$; i++) { -+ for (int j = 0; j < 2; j++) { -+ )" + ReadLandmark("landmark_current", "$subset$[i][j]") + -+ R"( -+ -+ vec4 rotated = vec4(landmark_current.x * cos(alpha) - -+ landmark_current.y * sin(alpha), -+ landmark_current.x * sin(alpha) + -+ landmark_current.y * cos(alpha), -+ 0.0, 0.0); -+ // both by x and y -+ max_value = vec4(max(max_value.x, rotated.x), -+ max(max_value.y, rotated.y), -+ 0.0, 0.0); -+ min_value = vec4(min(min_value.x, rotated.x), -+ min(min_value.y, rotated.y), -+ 0.0, 0.0); -+ } -+ } -+ -+ vec4 bbox_size = max_value - min_value; -+ bbox_size *= $bbox_size_multiplier$; -+ -+ mat3 scale_matrix = -+ mat3(bbox_size.x / float($landmarks_range$), 0.0, 0.0, // first column -+ 0.0, bbox_size.y / float($landmarks_range$), 0.0, // second column -+ 0.0, 0.0, 1.0); // third column -+ -+ vec4 middle = (max_value + min_value) / 2.0; -+ -+ vec4 rotated_middle = -+ vec4(middle.x * cos(-alpha) - middle.y * sin(-alpha), -+ middle.x * sin(-alpha) + middle.y * cos(-alpha), 0.0, 0.0); -+ -+ mat3 rotation_matrix = -+ mat3(cos(-alpha), sin(-alpha), 0, // first column -+ -sin(-alpha), cos(-alpha), 0, // second column -+ // third column -+ (rotated_middle.x / float($landmarks_range$)) * 2.0 - 1.0, -+ (rotated_middle.y / float($landmarks_range$)) * 2.0 - 1.0, 1); -+ -+ mat3 to_relative = -+ mat3(2.0 / (float($output_w$) - 1.0), 0.0, 0.0, // first column -+ 0.0, 2.0 / (float($output_h$) - 1.0), 0.0, // second column -+ -1.0, -1.0, 1.0); // third column -+ -+ mat3 to_absolute = -+ mat3((float($input_w$) - 1.0) / 2.0, 0.0, 0.0, // first column -+ 0.0, (float($input_h$) - 1.0) / 2.0, 0.0, // second column -+ // third column -+ (float($input_w$) - 1.0) / 2.0, (float($input_h$) - 1.0)/2.0, 1.0); -+ -+ // Transformstion Matrix -+ mat3 tm = to_absolute * rotation_matrix * scale_matrix * to_relative; -+ -+ // Inverse Transformation Matrix -+ $output_data_0[0, 0, 0] = vec4(tm[0][0], tm[1][0], 0.0, tm[2][0])$; -+ $output_data_0[1, 0, 0] = vec4(tm[0][1], tm[1][1], 0.0, tm[2][1])$; -+ $output_data_0[2, 0, 0] = vec4(tm[0][2], tm[1][2], tm[2][2], 0.0)$; -+ $output_data_0[3, 0, 0] = vec4( 0, 0, 0, 1.0)$; -+ )"; -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(1, 1, 1), -+ /*workgroup=*/uint3(1, 1, 1), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::ONLY_DEFINITIONS, -+ }; -+ return absl::OkStatus(); -+} -+ -+} // namespace v1 -+ -+namespace v2 { -+ -+std::string ReadLandmark(const std::string& landmark, const std::string& idx) { -+ std::string source = R"( -+ vec4 )" + landmark + -+ R"(; -+ { -+ int z_coord = )" + -+ idx + -+ R"( * $dimensions$ / 4; -+ vec4 result = $input_data_0[0, 0, z_coord]$; -+ int rest = )" + idx + -+ R"( * $dimensions$ % 4; -+ if (rest != 0) { -+ if (rest == 1) { -+ result.x = result.y; -+ result.y = result.z; -+ } -+ if (rest == 2) { -+ result.x = result.z; -+ result.y = result.w; -+ } -+ if (rest == 3) { -+ vec4 next_after_result = $input_data_0[0, 0, z_coord + 1]$; -+ result.x = result.w; -+ result.y = next_after_result.x; -+ } -+ } -+ result *= $multiplier$; -+ )" + landmark + R"( = result; -+ } )"; -+ return source; -+} -+ -+static bool IsSupported(const NodeShader::GenerationContext& ctx) { -+ return ctx.input_shapes.size() == 1 && ctx.input_shapes[0][1] == 1 && -+ ctx.input_shapes[0][2] == 1 && ctx.input_shapes[0][3] % 3 == 0; -+} -+ -+absl::Status GenerateCode(const LandmarksToTransformMatrixV2Attributes& attr, -+ const NodeShader::GenerationContext& ctx, -+ GeneratedCode* generated_code) { -+ if (!IsSupported(ctx)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by LandmarksToTransformMatrixV2"); -+ } -+ -+ std::vector params = { -+ {"dimensions", static_cast(3)}, -+ {"scale_x", static_cast(attr.scale_x)}, -+ {"scale_y", static_cast(attr.scale_y)}, -+ {"left_rotation_idx", static_cast(attr.left_rotation_idx)}, -+ {"right_rotation_idx", static_cast(attr.right_rotation_idx)}, -+ {"target_rotation_radians", -+ static_cast(attr.target_rotation_radians)}, -+ {"output_width", static_cast(attr.output_width)}, -+ {"output_height", static_cast(attr.output_height)}, -+ {"subset_idxs", attr.subset_idxs}, -+ {"subset_idxs_size", static_cast(attr.subset_idxs.size())}, -+ {"multiplier", static_cast(attr.multiplier)}, -+ }; -+ -+ std::string source = R"( -+ )" + ReadLandmark("left_landmark", "$left_rotation_idx$") + -+ R"( -+ )" + ReadLandmark("right_landmark", "$right_rotation_idx$") + -+ R"( -+ -+ float diff_y = right_landmark.y - left_landmark.y; -+ float diff_x = right_landmark.x - left_landmark.x; -+ float rotation = 0.0; -+ if (diff_y != 0.0 && diff_x != 0.0) rotation = atan(diff_y, diff_x); -+ float r = $target_rotation_radians$ - rotation; -+ -+ vec4 max_value = vec4(-100000, -100000, 0.0, 0.0); -+ vec4 min_value = vec4(100000, 100000, 0.0, 0.0); -+ for (int i = 0; i < $subset_idxs_size$; i++) { -+ for (int j = 0; j < 2; j++) { -+ )" + ReadLandmark("landmark_current", "$subset_idxs$[i][j]") + -+ R"( -+ vec4 rotated = vec4(landmark_current.x * cos(r) - -+ landmark_current.y * sin(r), -+ landmark_current.x * sin(r) + -+ landmark_current.y * cos(r), -+ 0.0, 0.0); -+ // both by x and y -+ max_value = vec4(max(max_value.x, rotated.x), -+ max(max_value.y, rotated.y), -+ 0.0, 0.0); -+ min_value = vec4(min(min_value.x, rotated.x), -+ min(min_value.y, rotated.y), -+ 0.0, 0.0); -+ } -+ } -+ -+ float crop_width = max_value.x - min_value.x; -+ float crop_height = max_value.y - min_value.y; -+ -+ vec4 crop_xy1 = (max_value + min_value) / vec4(2.0); -+ -+ float crop_x = cos(-r) * crop_xy1.x - sin(-r) * crop_xy1.y; -+ float crop_y = sin(-r) * crop_xy1.x + cos(-r) * crop_xy1.y; -+ -+ -+ mat4 t = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ -+ mat4 t_shift = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ crop_x, crop_y, 0.0, 1.0); // forth column -+ t *= t_shift; -+ -+ r = -r; -+ -+ mat4 t_rotation = mat4(cos(r), sin(r), 0.0, 0.0, // first column -+ -sin(r), cos(r), 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ -+ t *= t_rotation; -+ // cropped scale for x and y -+ float cs_x = $scale_x$ * crop_width / $output_width$; -+ float cs_y = $scale_y$ * crop_height / $output_height$; -+ mat4 t_scale = mat4(cs_x, 0.0, 0.0, 0.0, // first column -+ 0.0, cs_y, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ 0.0, 0.0, 0.0, 1.0); // forth column -+ t *= t_scale; -+ float shift_x = -1.0 * ($output_width$ / 2.0); -+ float shift_y = -1.0 * ($output_height$ / 2.0); -+ mat4 t_shift2 = mat4(1.0, 0.0, 0.0, 0.0, // first column -+ 0.0, 1.0, 0.0, 0.0, // second column -+ 0.0, 0.0, 1.0, 0.0, // third column -+ shift_x, shift_y, 0.0, 1.0); // forth column -+ t *= t_shift2; -+ // Inverse Transformation Matrix -+ $output_data_0[0, 0, 0] = vec4(t[0][0], t[1][0], t[2][0], t[3][0])$; -+ $output_data_0[1, 0, 0] = vec4(t[0][1], t[1][1], t[2][1], t[3][1])$; -+ $output_data_0[2, 0, 0] = vec4(t[0][2], t[1][2], t[2][2], t[3][2])$; -+ $output_data_0[3, 0, 0] = vec4(t[0][3], t[1][3], t[2][3], t[3][3])$; -+ )"; -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(1, 1, 1), -+ /*workgroup=*/uint3(1, 1, 1), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::ONLY_DEFINITIONS, -+ }; -+ return absl::OkStatus(); -+} -+ -+} // namespace v2 -+ -+class LandmarksToTransformMatrix : public NodeShader { -+ public: -+ absl::Status GenerateCode(const GenerationContext& ctx, -+ GeneratedCode* generated_code) const final { -+ auto* attr_v1 = -+ absl::any_cast(&ctx.op_attr); -+ if (attr_v1) return v1::GenerateCode(*attr_v1, ctx, generated_code); -+ -+ auto* attr_v2 = -+ absl::any_cast(&ctx.op_attr); -+ if (attr_v2) return v2::GenerateCode(*attr_v2, ctx, generated_code); -+ -+ return absl::InvalidArgumentError("Incorrect attributes' type."); -+ } -+}; -+ -+} // namespace -+ -+std::unique_ptr NewLandmarksToTransformMatrixNodeShader() { -+ return absl::make_unique(); -+} -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h -new file mode 100644 -index 00000000000..d3949050578 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h -@@ -0,0 +1,19 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -+ -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/operations.h" -+#include "tensorflow/lite/delegates/gpu/gl/node_shader.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+ -+std::unique_ptr NewLandmarksToTransformMatrixNodeShader(); -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_LANDMARKS_TO_TRANSFORM_MATRIX_H_ -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/registry.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/registry.cc -new file mode 100644 -index 00000000000..3ef02a248c3 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/registry.cc -@@ -0,0 +1,28 @@ -+#include -+#include -+#include -+ -+#include "absl/container/flat_hash_map.h" -+#include "tensorflow/lite/delegates/gpu/gl/kernels/custom_registry.h" -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/landmarks_to_transform_matrix.h" -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+ -+void RegisterCustomOps( -+ absl::flat_hash_map>>* -+ shaders) { -+ (*shaders)["landmarks_to_transform_matrix"].push_back( -+ NewLandmarksToTransformMatrixNodeShader()); -+ (*shaders)["transform_landmarks"].push_back( -+ NewTransformLandmarksNodeShader()); -+ (*shaders)["transform_tensor_bilinear"].push_back( -+ NewTransformTensorBilinearNodeShader()); -+} -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.cc -new file mode 100644 -index 00000000000..980e2aa99e6 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.cc -@@ -0,0 +1,123 @@ -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "absl/memory/memory.h" -+#include "absl/strings/substitute.h" -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_landmarks.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/types.h" -+#include "tensorflow/lite/delegates/gpu/common/util.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+namespace { -+ -+class TransformLandmarks : public NodeShader { -+ public: -+ absl::Status GenerateCode(const GenerationContext& ctx, -+ GeneratedCode* generated_code) const final { -+ if (!IsSupported(ctx)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by TransformLandmarks"); -+ } -+ -+ const auto& attr = -+ absl::any_cast(ctx.op_attr); -+ -+ // For transformlandmarks v2 scale parameter is set to 1 when operation is -+ // parsed. -+ std::vector params; -+ if (attr.scale != 1) { -+ params.push_back({"scale", static_cast(attr.scale)}); -+ } -+ std::string source = R"( -+ vec4 x_transform = $input_data_1[0, 0, 0]$; -+ vec4 y_transform = $input_data_1[1, 0, 0]$; )"; -+ if (attr.scale != 1) { -+ source += R"( -+ x_transform.w *= $scale$; -+ y_transform.w *= $scale$; -+ )"; -+ } -+ source += R"( -+ vec4 landmks = $input_data_0[gid.x, gid.y, gid.z]$; -+ vec4 transformed = vec4(0.0); -+ )"; -+ switch (attr.dimensions) { -+ case 2: -+ source += R"( -+ // x y x y -+ vec4 l_pair1_ = vec4(landmks.x, landmks.y, 0.0, 1.0); -+ vec4 l_pair2_ = vec4(landmks.z, landmks.w, 0.0, 1.0); -+ transformed = vec4(dot(x_transform, l_pair1_), dot(y_transform, l_pair1_), -+ dot(x_transform, l_pair2_), dot(y_transform, l_pair2_)); -+ -+ value_0 = transformed; -+ )"; -+ break; -+ case 3: -+ source += R"( -+ if ((gid.z * 4) % 3 == 0) { // 0, 3, 6 -+ // x y z x -+ vec4 landmks_next = $input_data_0[gid.x, gid.y, gid.z + 1]$; -+ vec4 l_= landmks; -+ l_.z = 0.0; -+ l_.w = 1.0; -+ transformed = vec4(dot(x_transform, l_), -+ dot(y_transform, l_), -+ landmks.z, dot(x_transform, vec4(landmks.w, landmks_next.x, 0.0, 1.0))); -+ } else if ((gid.z * 4) % 3 == 1) { // 1, 4, 7 -+ // y z x y -+ vec4 landmks_prev = $input_data_0[gid.x, gid.y, gid.z - 1]$; -+ vec4 l_ = vec4(landmks.z, landmks.w, 0.0, 1.0); -+ transformed = vec4(dot(y_transform, vec4(landmks_prev.w, landmks.x, 0.0, 1.0)), landmks.y, -+ dot(x_transform, l_), dot(y_transform, l_)); -+ } else if ((gid.z * 4) % 3 == 2) { // 2, 5, 8 -+ // z, x, y, z -+ vec4 l_ = vec4(landmks.y, landmks.z, 0.0, 1.0); -+ transformed = vec4(landmks.x, dot(x_transform, l_), -+ dot(y_transform, l_), landmks.w); -+ } -+ value_0 = transformed; -+ )"; -+ break; -+ } -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(), -+ /*workgroup=*/uint3(), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::AUTO, -+ }; -+ return absl::OkStatus(); -+ } -+ -+ private: -+ static bool IsSupported(const GenerationContext& ctx) { -+ const auto& attr = -+ absl::any_cast(ctx.op_attr); -+ return (attr.dimensions == 2 || attr.dimensions == 3) && attr.version == 1; -+ } -+}; -+ -+} // namespace -+ -+std::unique_ptr NewTransformLandmarksNodeShader() { -+ return absl::make_unique(); -+} -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.h b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.h -new file mode 100644 -index 00000000000..cfb656675e4 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_landmarks.h -@@ -0,0 +1,19 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -+ -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/operations.h" -+#include "tensorflow/lite/delegates/gpu/gl/node_shader.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+ -+std::unique_ptr NewTransformLandmarksNodeShader(); -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_LANDMARKS_H_ -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.cc b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.cc -new file mode 100644 -index 00000000000..8013b9b3505 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.cc -@@ -0,0 +1,169 @@ -+#include "tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.h" -+ -+#include -+#include -+#include -+#include -+#include -+ -+#include "absl/memory/memory.h" -+#include "absl/strings/substitute.h" -+#include "absl/types/any.h" -+#include "tensorflow/lite/delegates/gpu/common/mediapipe/transform_tensor_bilinear.h" -+#include "tensorflow/lite/delegates/gpu/common/shape.h" -+#include "tensorflow/lite/delegates/gpu/common/status.h" -+#include "tensorflow/lite/delegates/gpu/common/types.h" -+#include "tensorflow/lite/delegates/gpu/common/util.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+namespace { -+ -+class TransformTensorBilinear : public NodeShader { -+ public: -+ absl::Status GenerateCode(const GenerationContext& ctx, -+ GeneratedCode* generated_code) const final { -+ if (!IsSupported(ctx)) { -+ return absl::InvalidArgumentError( -+ "This case is not supported by TransformTensorBilinear."); -+ } -+ -+ std::vector params = { -+ {"input_data_0_h", static_cast(ctx.input_shapes[0][1])}, -+ {"input_data_0_w", static_cast(ctx.input_shapes[0][2])}}; -+ -+ // Only bilinear transformation is supported right now. -+ std::string source = R"( -+ vec4 first_line = $input_data_1[0, 0, 0]$; -+ vec4 second_line = $input_data_1[1, 0, 0]$; -+ )" + AlignCornersCorrection(ctx) + -+ R"( -+ vec4 before_transform_coord_2d = vec4(gid.x, gid.y, 0.0, 1.0); -+ -+ // Get transformed coordinates -+ vec2 xy = vec2(dot(first_line, before_transform_coord_2d), -+ dot(second_line, before_transform_coord_2d)); -+ -+ // Get coordinates of corners to interpolate from. -+ int x1 = int(floor(xy.x)); // x2 is x1 + 1 -+ int y1 = int(floor(xy.y)); // y2 is y1 + 1 -+ -+ // Apply interpolation if coordinate is in bounds. -+ vec4 result = vec4(0.0); -+ -+ if(xy.x >= 0.0 && xy.x <= float($input_data_0_w$ -1) && -+ xy.y >= 0.0 && xy.y <= float($input_data_0_h$ -1)) { -+ -+ // Corners position: -+ // q_11 --- q_21 -+ // ---- ---- -+ // q_12 --- q_22 -+)"; -+ source += SampleFromInput0("q_11", "x1", "y1") + -+ SampleFromInput0("q_12", "x1", "y1 + 1") + -+ SampleFromInput0("q_21", "x1 + 1", "y1") + -+ SampleFromInput0("q_22", "x1 + 1", "y1 + 1") + R"( -+ -+ float right_contrib = xy.x - float(x1); -+ float lower_contrib = xy.y - float(y1); -+ -+ vec4 upper = (1.0 - right_contrib) * q_11 + right_contrib * q_21; -+ vec4 lower = (1.0 - right_contrib) * q_12 + right_contrib * q_22; -+ -+ result = lower_contrib * lower + (1.0 - lower_contrib) * upper; -+ -+ } -+ value_0 = result; -+ )"; -+ -+ *generated_code = { -+ /*parameters=*/params, -+ /*objects=*/{}, -+ /*shared_variables=*/{}, -+ /*workload=*/uint3(), -+ /*workgroup=*/uint3(), -+ /*source_code=*/std::move(source), -+ /*input=*/IOStructure::ONLY_DEFINITIONS, -+ /*output=*/IOStructure::AUTO, -+ }; -+ return absl::OkStatus(); -+ } -+ -+ private: -+ std::string SampleFromInput0(absl::string_view variable, -+ absl::string_view x_coord, -+ absl::string_view y_coord) const { -+ // This function generates code, which samples data from the first input -+ // tensor and checks the coordinates' bounds: -+ // -+ // vec4 q = vec4(0.0); -+ // [0, H) -+ // if (x >= 0 && x < $input_data_0_w$ && y >= 0 && y < $input_data_0_h$) { -+ // q = $input_data_0[x, y, gid.z]$; -+ // } -+ -+ // Create zero initialized variable on stack -+ std::string result = -+ absl::Substitute(" vec4 $0 = vec4(0.0);\n", variable); -+ // If coordinates are not out of scope, load value from input_data_0 -+ absl::SubstituteAndAppend( -+ &result, -+ " if ($0 >= 0 && $1 < $$input_data_0_w$$ && " -+ "$2 >= 0 && $3 < $$input_data_0_h$$) {\n", -+ x_coord, x_coord, y_coord, y_coord); -+ absl::SubstituteAndAppend( -+ &result, -+ " $0 = $$input_data_0[$1, $2, gid.z]$$;\n }\n\n", -+ variable, x_coord, y_coord); -+ return result; -+ } -+ -+ std::string AlignCornersCorrection(const GenerationContext& ctx) const { -+ const auto& attr = -+ absl::any_cast(ctx.op_attr); -+ // Align corners correction: T -> S * ( T * A ), where T is a -+ // transformation matrix, and subtruction and addition matrices are: -+ // S A -+ // 1 0 0 -0.5 1 0 0 0.5 -+ // 0 1 0 -0.5 0 1 0 0.5 -+ // 0 0 1 0 0 0 1 0 -+ // 0 0 0 1 0 0 0 1 -+ // Transformation matrix column 3 and rows 3, 4 are identity, which makes -+ // the final formula pretty simple and easy to get if doing a manual -+ // multiuplication. -+ if (attr.align_corners) { -+ return R"( -+ first_line.w += first_line.x * 0.5 + first_line.y * 0.5 - 0.5; -+ second_line.w += second_line.x * 0.5 + second_line.y * 0.5 - 0.5; -+ )"; -+ } else { -+ return ""; -+ } -+ } -+ -+ static bool IsSupported(const GenerationContext& ctx) { -+ // if version 2 - align corners is turned on. -+ // both versions expect transformation matrix as 1x1x1x16 -+ if (ctx.input_shapes.size() != 2) return false; -+ -+ if (ctx.input_shapes[1][0] != 1 || ctx.input_shapes[1][1] != 1 || -+ ctx.input_shapes[1][2] != 4 || ctx.input_shapes[1][3] != 4) -+ return false; -+ -+ const auto& attr = -+ absl::any_cast(ctx.op_attr); -+ return attr.output_size.h > 0 && attr.output_size.w > 0 && -+ attr.version == 1; -+ } -+}; -+ -+} // namespace -+ -+std::unique_ptr NewTransformTensorBilinearNodeShader() { -+ return absl::make_unique(); -+} -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -diff --git a/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.h b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.h -new file mode 100644 -index 00000000000..c62387a4b96 ---- /dev/null -+++ b/tensorflow/lite/delegates/gpu/gl/kernels/mediapipe/transform_tensor_bilinear.h -@@ -0,0 +1,19 @@ -+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ -+#define TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ -+ -+#include -+ -+#include "tensorflow/lite/delegates/gpu/common/operations.h" -+#include "tensorflow/lite/delegates/gpu/gl/node_shader.h" -+ -+namespace tflite { -+namespace gpu { -+namespace gl { -+ -+std::unique_ptr NewTransformTensorBilinearNodeShader(); -+ -+} // namespace gl -+} // namespace gpu -+} // namespace tflite -+ -+#endif // TENSORFLOW_LITE_DELEGATES_GPU_GL_KERNELS_MEDIAPIPE_TRANSFORM_TENSOR_BILINEAR_H_ diff --git a/ports/tensorflow-lite/portfile.cmake b/ports/tensorflow-lite/portfile.cmake index 52910aba..673e43b9 100644 --- a/ports/tensorflow-lite/portfile.cmake +++ b/ports/tensorflow-lite/portfile.cmake @@ -2,31 +2,20 @@ if(NOT VCPKG_TARGET_IS_IOS) vcpkg_check_linkage(ONLY_DYNAMIC_LIBRARY) endif() -# vcpkg_download_distfile(MP_PATCH_1 -# URLS "https://raw.githubusercontent.com/google/mediapipe/v0.9.2.1/third_party/org_tensorflow_compatibility_fixes.diff" -# FILENAME org_tensorflow_compatibility_fixes.diff -# SHA512 4f30038f78e2cc8991a7ec173f6b081ba8bd151163569e840fa34d091ece0ec61eeebde18210a2f11b9bc21a5d8a0bde29a9c0a3638a4d7936b99de8781b7df1 -# ) -# vcpkg_download_distfile(MP_PATCH_2 -# URLS "https://raw.githubusercontent.com/google/mediapipe/v0.9.2.1/third_party/org_tensorflow_custom_ops.diff" -# FILENAME org_tensorflow_custom_ops.diff -# SHA512 11fb8f48e39ef30328af0a216c3ea6bcbbbf68980dbbb5b6a9e4a1f11586f5f7836caf8ab6357785c624c3c6d10f516b185a504ea1bbcdaa69ce84522c8df60a -# ) - vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO tensorflow/tensorflow - REF v2.11.1 - SHA512 2ca39d005efa129b5bebd3729f2550d8de659acd57b797f501307c28eb3d0d482703abe4d7364d5572fa600287505f4ed3b4f78eaae6867dc85b4a7d53d4b60b + REF v2.14.0 + SHA512 ae39fd8049f9cd3118c1f10285d5272531380bbe0506dc7fb14c8e9da34a578284af486795abdae0f82ef0b84d14896564386595669ef303a1b8dbfa06b88f7a PATCHES - fix-cmake.patch - fix-source.patch - fix-absl.patch - fix-opencl-extension.patch - org_tensorflow_compatibility_fixes.diff # ${MP_PATCH_1} - org_tensorflow_custom_ops.diff # ${MP_PATCH_2} + fix-cmake-use-vcpkg.patch # use packages from vcpkg + fix-cmake-c-api.patch # includ C API sources + fix-cmake-gpu.patch # build settings for GPU features + fix-cmake-nnapi.patch # Android NNAPI + fix-source-abseil.patch # replace std:: to absl:: + fix-source-cpp20.patch # use C++17 syntax + fix-source-gpu.patch # source changes for GPU features ) - file(REMOVE_RECURSE "${SOURCE_PATH}/third_party/eigen3") file(COPY "${CURRENT_INSTALLED_DIR}/include/eigen3" DESTINATION "${SOURCE_PATH}/third_party") @@ -34,39 +23,49 @@ find_program(FLATC NAMES flatc PATHS "${CURRENT_HOST_INSTALLED_DIR}/tools/flatbuffers" REQUIRED NO_DEFAULT_PATH NO_CMAKE_PATH ) +# see https://flatbuffers.dev/flatbuffers_guide_using_schema_compiler.html message(STATUS "Using flatc: ${FLATC}") find_program(PROTOC NAMES protoc PATHS "${CURRENT_HOST_INSTALLED_DIR}/tools/protobuf" REQUIRED NO_DEFAULT_PATH NO_CMAKE_PATH ) +# see https://protobuf.dev/overview/#syntax message(STATUS "Using protoc: ${PROTOC}") # Run codegen with existing .fbs, .proto files set(TENSORFLOW_SOURCE_DIR "${SOURCE_PATH}") set(TFLITE_SOURCE_DIR "${SOURCE_PATH}/tensorflow/lite") -set(EXPERIMANTAL_ACC_CONFIG_PATH "${TFLITE_SOURCE_DIR}/experimental/acceleration/configuration") +set(ACCELERATION_CONFIGURATION_PATH "${TFLITE_SOURCE_DIR}/acceleration/configuration") vcpkg_execute_required_process( COMMAND ${FLATC} --proto configuration.proto LOGNAME codegen-flatc-configuration - WORKING_DIRECTORY "${EXPERIMANTAL_ACC_CONFIG_PATH}" + WORKING_DIRECTORY "${ACCELERATION_CONFIGURATION_PATH}" ) +# see ${ACCELERATION_CONFIGURATION_PATH}/BUILD +vcpkg_replace_string("${ACCELERATION_CONFIGURATION_PATH}/configuration.fbs" "tflite.proto" "tflite") + vcpkg_execute_required_process( - COMMAND ${PROTOC} --cpp_out=. configuration.proto + COMMAND ${PROTOC} --cpp_out . configuration.proto LOGNAME codegen-protoc-configuration - WORKING_DIRECTORY "${EXPERIMANTAL_ACC_CONFIG_PATH}" + WORKING_DIRECTORY "${ACCELERATION_CONFIGURATION_PATH}" ) vcpkg_execute_required_process( - COMMAND ${FLATC} --cpp --scoped-enums configuration.fbs + COMMAND ${FLATC} --cpp --gen-compare configuration.fbs LOGNAME codegen-flatc-cpp-configuration - WORKING_DIRECTORY "${EXPERIMANTAL_ACC_CONFIG_PATH}" + WORKING_DIRECTORY "${ACCELERATION_CONFIGURATION_PATH}" ) set(SCHEMA_PATH "${TFLITE_SOURCE_DIR}/schema") vcpkg_execute_required_process( - COMMAND ${FLATC} -c --gen-object-api --gen-mutable schema.fbs - LOGNAME codegen-flatc-c-schema + COMMAND ${FLATC} --cpp --gen-mutable --gen-object-api schema.fbs + LOGNAME codegen-flatc-schema + WORKING_DIRECTORY "${SCHEMA_PATH}" +) +vcpkg_execute_required_process( + COMMAND ${FLATC} --cpp conversion_metadata.fbs + LOGNAME codegen-flatc-conversion_metadata WORKING_DIRECTORY "${SCHEMA_PATH}" ) @@ -94,17 +93,17 @@ if(VCPKG_TARGET_IS_OSX OR VCPKG_TARGET_IS_IOS) else() set(DELEGATES_GPU_GL_PATH "${TFLITE_SOURCE_DIR}/delegates/gpu/gl") vcpkg_execute_required_process( - COMMAND ${FLATC} --cpp --scoped-enums common.fbs + COMMAND ${FLATC} --cpp common.fbs LOGNAME codegen-flatc-cpp-gl-common WORKING_DIRECTORY "${DELEGATES_GPU_GL_PATH}" ) vcpkg_execute_required_process( - COMMAND ${FLATC} --cpp --scoped-enums metadata.fbs + COMMAND ${FLATC} --cpp metadata.fbs LOGNAME codegen-flatc-cpp-gl-metadata WORKING_DIRECTORY "${DELEGATES_GPU_GL_PATH}" ) vcpkg_execute_required_process( - COMMAND ${FLATC} --cpp --scoped-enums workgroups.fbs + COMMAND ${FLATC} --cpp workgroups.fbs LOGNAME codegen-flatc-cpp-gl-workgroups WORKING_DIRECTORY "${DELEGATES_GPU_GL_PATH}" ) @@ -131,14 +130,21 @@ vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS FEATURES gpu TFLITE_ENABLE_GPU gpu TFLITE_ENABLE_METAL - mmap TFLITE_ENABLE_MMAP - mediapipe WITH_MEDIAPIPE ) +if(VCPKG_TARGET_IS_LINUX OR VCPKG_TARGET_IS_ANDROID) + list(APPEND FEATURE_OPTIONS -DTFLITE_ENABLE_MMAP=ON) +endif() +if(VCPKG_TARGET_IS_OSX OR VCPKG_TARGET_IS_IOS) + list(APPEND GENERATOR_OPTIONS GENERATOR Xcode) +endif() + vcpkg_cmake_configure( SOURCE_PATH "${SOURCE_PATH}/tensorflow/lite" + ${GENERATOR_OPTIONS} OPTIONS ${FEATURE_OPTIONS} + -DSYSTEM_PTHREADPOOL=ON -DTFLITE_ENABLE_RESOURCE=ON -DTFLITE_ENABLE_RUY=ON -DTFLITE_ENABLE_XNNPACK=ON @@ -146,15 +152,17 @@ vcpkg_cmake_configure( -DTFLITE_ENABLE_EXTERNAL_DELEGATE=ON -DTFLITE_ENABLE_INSTALL=ON -DTENSORFLOW_SOURCE_DIR:PATH="${SOURCE_PATH}" + -DFLATBUFFERS_FLATC_EXECUTABLE:FILEPATH="${FLATC}" OPTIONS_DEBUG -DTFLITE_ENABLE_NNAPI_VERBOSE_VALIDATION=${VCPKG_TARGET_IS_ANDROID} + MAYBE_UNUSED_VARIABLES + FLATBUFFERS_FLATC_EXECUTABLE ) vcpkg_cmake_install() vcpkg_copy_pdbs() vcpkg_cmake_config_fixup(CONFIG_PATH "lib/cmake/${PORT}") file(INSTALL "${SOURCE_PATH}/tensorflow/core/public/version.h" DESTINATION "${CURRENT_PACKAGES_DIR}/include/tensorflow/core/public") -file(INSTALL "${SOURCE_PATH}/LICENSE" DESTINATION "${CURRENT_PACKAGES_DIR}/share/${PORT}" RENAME copyright) file(REMOVE_RECURSE "${CURRENT_PACKAGES_DIR}/debug/include" "${CURRENT_PACKAGES_DIR}/debug/share" @@ -176,3 +184,5 @@ else() ) endif() endif() + +vcpkg_install_copyright(FILE_LIST "${SOURCE_PATH}/LICENSE") diff --git a/ports/tensorflow-lite/vcpkg.json b/ports/tensorflow-lite/vcpkg.json index 3e9cdc2a..79109256 100644 --- a/ports/tensorflow-lite/vcpkg.json +++ b/ports/tensorflow-lite/vcpkg.json @@ -1,7 +1,6 @@ { "name": "tensorflow-lite", - "version-semver": "2.11.1", - "port-version": 1, + "version-semver": "2.14.0", "description": "Open standard for machine learning interoperability", "homepage": "https://www.tensorflow.org/", "license": "Apache-2.0", @@ -16,6 +15,7 @@ "host": true }, "gemmlowp", + "ml-dtypes", "neon2sse", { "name": "nsync", @@ -60,9 +60,6 @@ }, "vulkan-headers" ] - }, - "mediapipe": { - "description": "Build with sources of Google/MediaPipe" } } }