From 163aa8f30abea7d25cce8ce6e03570546688fec8 Mon Sep 17 00:00:00 2001 From: Gin <> Date: Thu, 19 Dec 2024 17:27:20 -0800 Subject: [PATCH] fix neon bug --- ...naryImage_BasicFilters_Core_64x8_arm64_NEON.cpp | 3 +++ .../Kernels_BinaryImage_BasicFilters_arm64_NEON.h | 14 +++++++------- .../Waterfill/Kernels_Waterfill_Session.tpp | 6 +++--- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp index 716259ebe..52d718340 100644 --- a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp +++ b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_Core_64x8_arm64_NEON.cpp @@ -10,6 +10,7 @@ #include "Kernels_BinaryImage_BasicFilters_Routines.h" #include "Kernels_BinaryImage_BasicFilters_arm64_NEON.h" + namespace PokemonAutomation{ namespace Kernels{ @@ -24,6 +25,8 @@ void filter_by_mask_64x8_arm64_NEON( } + + void compress_rgb32_to_binary_range_64x8_arm64_NEON( const uint32_t* image, size_t bytes_per_row, PackedBinaryMatrix_IB& matrix0, uint32_t mins0, uint32_t maxs0 diff --git a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h index 848e1d49d..514908197 100644 --- a/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h +++ b/SerialPrograms/Source/Kernels/BinaryImageFilters/Kernels_BinaryImage_BasicFilters_arm64_NEON.h @@ -9,9 +9,9 @@ #include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h" -#include -using std::cout; -using std::endl; +// #include +// using std::cout; +// using std::endl; namespace PokemonAutomation{ namespace Kernels{ @@ -149,8 +149,6 @@ class FilterByMask_arm64_NEON{ const uint32x4_t m_zeros; }; - - // Compress given pixels buffer (of up to 64-pixel long) into bit map and store in one uint64_t. class Compressor_RgbRange_arm64_NEON{ public: @@ -178,13 +176,14 @@ class Compressor_RgbRange_arm64_NEON{ bits |= convert16(pixels + c) << c; c += 16; } - for(; c < count; c += 4){ + + count %= 16; + for(size_t i = 0; i < count / 4; i++, c+=4){ const uint8x16_t pixel = vld1q_u8((const uint8_t*)(pixels + c)); bits |= convert4(pixel) << c; } count %= 4; if (count){ - c -= 4; PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t)); const uint8x16_t pixel = loader.load(pixels + c); const uint64_t mask = ((uint64_t)1 << count) - 1; @@ -273,6 +272,7 @@ class Compressor_RgbEuclidean_arm64_NEON{ for(size_t i = 0; i < count / 4; i++, c+=4){ bits |= convert4(pixels + c) << c; } + count %= 4; if (count){ PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t)); const uint8x16_t pixel = loader.load(pixels + c); diff --git a/SerialPrograms/Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp b/SerialPrograms/Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp index b6d7db850..880128c9a 100644 --- a/SerialPrograms/Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp +++ b/SerialPrograms/Source/Kernels/Waterfill/Kernels_Waterfill_Session.tpp @@ -16,9 +16,9 @@ #include "Kernels/BinaryMatrix/Kernels_SparseBinaryMatrixCore.h" #include "Kernels_Waterfill_Session.h" -//#include -//using std::cout; -//using std::endl; +// #include +// using std::cout; +// using std::endl; namespace PokemonAutomation{ namespace Kernels{