Skip to content

Commit

Permalink
fix neon bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Gin committed Dec 20, 2024
1 parent 67a0212 commit 163aa8f
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "Kernels_BinaryImage_BasicFilters_Routines.h"
#include "Kernels_BinaryImage_BasicFilters_arm64_NEON.h"


namespace PokemonAutomation{
namespace Kernels{

Expand All @@ -24,6 +25,8 @@ void filter_by_mask_64x8_arm64_NEON(
}




void compress_rgb32_to_binary_range_64x8_arm64_NEON(
const uint32_t* image, size_t bytes_per_row,
PackedBinaryMatrix_IB& matrix0, uint32_t mins0, uint32_t maxs0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@

#include "Kernels/PartialWordAccess/Kernels_PartialWordAccess_arm64_NEON.h"

#include <iostream>
using std::cout;
using std::endl;
// #include <iostream>
// using std::cout;
// using std::endl;

namespace PokemonAutomation{
namespace Kernels{
Expand Down Expand Up @@ -149,8 +149,6 @@ class FilterByMask_arm64_NEON{
const uint32x4_t m_zeros;
};



// Compress given pixels buffer (of up to 64-pixel long) into bit map and store in one uint64_t.
class Compressor_RgbRange_arm64_NEON{
public:
Expand Down Expand Up @@ -178,13 +176,14 @@ class Compressor_RgbRange_arm64_NEON{
bits |= convert16(pixels + c) << c;
c += 16;
}
for(; c < count; c += 4){

count %= 16;
for(size_t i = 0; i < count / 4; i++, c+=4){
const uint8x16_t pixel = vld1q_u8((const uint8_t*)(pixels + c));
bits |= convert4(pixel) << c;
}
count %= 4;
if (count){
c -= 4;
PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t));
const uint8x16_t pixel = loader.load(pixels + c);
const uint64_t mask = ((uint64_t)1 << count) - 1;
Expand Down Expand Up @@ -273,6 +272,7 @@ class Compressor_RgbEuclidean_arm64_NEON{
for(size_t i = 0; i < count / 4; i++, c+=4){
bits |= convert4(pixels + c) << c;
}
count %= 4;
if (count){
PartialWordAccess_arm64_NEON loader(count * sizeof(uint32_t));
const uint8x16_t pixel = loader.load(pixels + c);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,9 @@
#include "Kernels/BinaryMatrix/Kernels_SparseBinaryMatrixCore.h"
#include "Kernels_Waterfill_Session.h"

//#include <iostream>
//using std::cout;
//using std::endl;
// #include <iostream>
// using std::cout;
// using std::endl;

namespace PokemonAutomation{
namespace Kernels{
Expand Down

0 comments on commit 163aa8f

Please sign in to comment.