Skip to content

Commit

Permalink
Add black white filter NEON impl.
Browse files Browse the repository at this point in the history
  • Loading branch information
Gin committed Nov 23, 2023
1 parent 8e0bfa1 commit 7e32047
Show file tree
Hide file tree
Showing 5 changed files with 171 additions and 37 deletions.
13 changes: 12 additions & 1 deletion SerialPrograms/Source/CommonFramework/ImageTools/ImageFilter.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ ImageRGB32 filter_rgb32_euclidean(
// If `replace_color_within_range` is true, replace the colors within (<=) `max_euclidean_distance` of the
// `expected_color` with `replacement_color`.
// If `replace_color_within_range` is false, replace the color outside of the distance with the color `replacement_color`.
// Returns the # of pixels inside the distance.
// Returns the # of pixels inside the distance as `pixels_in_range`.
// Note: the alpha channel of `image` and `expected_color` are ignored during computation.
ImageRGB32 filter_rgb32_euclidean(
size_t& pixels_in_range,
Expand All @@ -83,10 +83,16 @@ ImageRGB32 filter_rgb32_euclidean(
// Convert the image to black and white.
// Inside [mins, maxs] is white, otherwise it's black.
// Set "in_range_black" to true to invert the colors.
// Both white and black colors have alpha=255.
ImageRGB32 to_blackwhite_rgb32_range(
const ImageViewRGB32& image,
uint32_t mins, uint32_t maxs, bool in_range_black
);
// Convert the image to black and white.
// Inside [mins, maxs] is white, otherwise it's black.
// Set "in_range_black" to true to invert the colors.
// Both white and black colors have alpha=255.
// Returns the # of pixels inside the distance as `pixels_in_range`.
ImageRGB32 to_blackwhite_rgb32_range(
size_t& pixels_in_range,
const ImageViewRGB32& image,
Expand All @@ -97,6 +103,11 @@ ImageRGB32 to_blackwhite_rgb32_range(

// Run multiple filters at once. This is more memory efficient than making
// multiple calls to one filter at a time.
// For each filter:
// If `in_range_black` is true, replace the color range [mins, maxs] with color black while the rest white.
// If `in_range_black` is false, replace the color range [mins, maxs] with color white while the rest black.
// Both white and black colors have alpha=255.
// For each filter, return the filtered image and the # of pixels inside the [mins, maxs] range of the filter.
struct BlackWhiteRgb32Range{
uint32_t mins;
uint32_t maxs;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,6 @@
namespace PokemonAutomation{
namespace Kernels{

//TODO: impl this file
size_t to_blackwhite_rgb32_range_Default(
const uint32_t* in, size_t in_bytes_per_row, size_t width, size_t height,
uint32_t* out, size_t out_bytes_per_row,
uint32_t mins, uint32_t maxs, bool in_range_black
);

void to_blackwhite_rgb32_range_Default(
const uint32_t* image, size_t bytes_per_row, size_t width, size_t height,
ToBlackWhiteRgb32RangeFilter* filter, size_t filter_count
);

class ImageFilter_RgbRange_arm64_NEON{
public:
static const size_t VECTOR_SIZE = 4;
Expand Down Expand Up @@ -66,14 +54,14 @@ class ImageFilter_RgbRange_arm64_NEON{
// We achieve +=1 by substracting 0xFFFFFFFF
m_count_u32 = vsubq_u32(m_count_u32, cmp_u32);
// select replacement color or in_u8 based on cmp_u32:
uint32x4_t out_u8;
uint32x4_t out_u32;
if (m_replace_color_within_range){
// vbslq_u32(a, b, c) for 1 bits in a, choose b; for 0 bits in a, choose c
out_u8 = vbslq_u32(cmp_u32, m_replacement_color_u32, vreinterpretq_u32_u8(in_u8));
out_u32 = vbslq_u32(cmp_u32, m_replacement_color_u32, vreinterpretq_u32_u8(in_u8));
} else{
out_u8 = vbslq_u32(cmp_u32, vreinterpretq_u32_u8(in_u8), m_replacement_color_u32);
out_u32 = vbslq_u32(cmp_u32, vreinterpretq_u32_u8(in_u8), m_replacement_color_u32);
}
vst1q_u32(out, out_u8);
vst1q_u32(out, out_u32);
}
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, size_t left){
uint32_t buffer_in[4], buffer_out[4];
Expand Down Expand Up @@ -161,14 +149,14 @@ class ImageFilter_RgbEuclidean_arm64_NEON{
// We achieve +=1 by substracting 0xFFFFFFFF
m_count_u32 = vsubq_u32(m_count_u32, cmp_u32);
// select replacement color or in_u8 based on cmp_u32:
uint32x4_t out_u8;
uint32x4_t out_u32;
if (m_replace_color_within_range){
// vbslq_u32(a, b, c) for 1 bits in a, choose b; for 0 bits in a, choose c
out_u8 = vbslq_u32(cmp_u32, m_replacement_color_u32, in_u32);
out_u32 = vbslq_u32(cmp_u32, m_replacement_color_u32, in_u32);
} else{
out_u8 = vbslq_u32(cmp_u32, in_u32, m_replacement_color_u32);
out_u32 = vbslq_u32(cmp_u32, in_u32, m_replacement_color_u32);
}
vst1q_u32(out, out_u8);
vst1q_u32(out, out_u32);
}
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, size_t left){
uint32_t buffer_in[4], buffer_out[4];
Expand Down Expand Up @@ -211,23 +199,55 @@ class ToBlackWhite_RgbRange_arm64_NEON{

public:
ToBlackWhite_RgbRange_arm64_NEON(uint32_t mins, uint32_t maxs, bool in_range_black)
: m_mins_u8(vreinterpretq_u8_u32(vdupq_n_u32(mins)))
, m_maxs_u8(vreinterpretq_u8_u32(vdupq_n_u32(maxs)))
, m_zeros_u8(vreinterpretq_u32_u8(vdupq_n_u8(0)))
, m_in_range_color_u32(vdupq_n_u32(in_range_black ? 0xFF000000 : 0xFFFFFFFF))
, m_out_of_range_color_u32(vdupq_n_u32(in_range_black ? 0xFFFFFFFF : 0xFF000000))
, m_count_u32(vdupq_n_u32(0))
{}

PA_FORCE_INLINE size_t count() const{
return 0;
uint64x2_t sum_u64 = vpaddlq_u32(m_count_u32);
return sum_u64[0] + sum_u64[1];
}

PA_FORCE_INLINE void process_full(uint32_t* out, const uint32_t* in){
uint8x16_t in_u8 = vreinterpretq_u8_u32(vld1q_u32(in));

// Check if mins > pixel per color channel
uint8x16_t cmp0 = vcgtq_u8(m_mins_u8, in_u8);
// Check if pixel > maxs per color channel
uint8x16_t cmp1 = vcgtq_u8(in_u8, m_maxs_u8);
// cmp: if mins > pixel or pixel > maxs per color channel
uint8x16_t cmp_u8 = vorrq_u8(cmp0, cmp1);
// cmp_u32: if each pixel is within the range
// If a pixel is within [mins, maxs], its uint32_t in `cmp_u32` is all 1 bits, otherwise, all 0 bits
uint32x4_t cmp_u32 = vceqq_u32(vreinterpretq_u32_u8(cmp_u8), m_zeros_u8);
// Increase count for each pixel in range. Each uint32 lane is counted separately.
// We achieve +=1 by substracting 0xFFFFFFFF
m_count_u32 = vsubq_u32(m_count_u32, cmp_u32);
// select replacement color or in_u8 based on cmp_u32:
uint32x4_t out_u32;
// vbslq_u32(a, b, c) for 1 bits in a, choose b; for 0 bits in a, choose c
out_u32 = vbslq_u32(cmp_u32, m_in_range_color_u32, m_out_of_range_color_u32);

vst1q_u32(out, out_u32);
}
PA_FORCE_INLINE void process_partial(uint32_t* out, const uint32_t* in, size_t left){
uint32_t buffer_in[4], buffer_out[4];
memcpy(buffer_in, in, sizeof(uint32_t) * left);
process_full(buffer_out, buffer_in);
memcpy(out, buffer_out, sizeof(uint32_t) * left);
}

private:
PA_FORCE_INLINE int process_word(int pixel){
return 0;
}

private:
uint8x16_t m_mins_u8;
uint8x16_t m_maxs_u8;
uint8x16_t m_zeros_u8;
uint32x4_t m_in_range_color_u32;
uint32x4_t m_out_of_range_color_u32;
uint32x4_t m_count_u32;
};


Expand All @@ -237,21 +257,19 @@ size_t to_blackwhite_rgb32_range_arm64_NEON(
uint32_t* out, size_t out_bytes_per_row,
uint32_t mins, uint32_t maxs, bool in_range_black
){
return to_blackwhite_rgb32_range_Default(in, in_bytes_per_row, width, height,
out, out_bytes_per_row, mins, maxs, in_range_black);
// ToBlackWhite_RgbRange_arm64_NEON filter(mins, maxs, in_range_black);
// filter_per_pixel(in, in_bytes_per_row, width, height, filter, out, out_bytes_per_row);
// return filter.count();
// return to_blackwhite_rgb32_range_Default(in, in_bytes_per_row, width, height,
// out, out_bytes_per_row, mins, maxs, in_range_black);
ToBlackWhite_RgbRange_arm64_NEON filter(mins, maxs, in_range_black);
filter_per_pixel(in, in_bytes_per_row, width, height, filter, out, out_bytes_per_row);
return filter.count();
}
void to_blackwhite_rgb32_range_arm64_NEON(
const uint32_t* image, size_t bytes_per_row, size_t width, size_t height,
ToBlackWhiteRgb32RangeFilter* filter, size_t filter_count
){
return to_blackwhite_rgb32_range_Default(image, bytes_per_row, width, height,
filter, filter_count);
// to_blackwhite_rbg32<ToBlackWhite_RgbRange_arm64_NEON>(
// image, bytes_per_row, width, height, filter, filter_count
// );
return to_blackwhite_rbg32<ToBlackWhite_RgbRange_arm64_NEON>(
image, bytes_per_row, width, height, filter, filter_count
);
}


Expand Down
101 changes: 101 additions & 0 deletions SerialPrograms/Source/Tests/Kernels_Tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,107 @@ int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image){
return 0;
}

int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image){
const size_t width = image.width();
const size_t height = image.height();
cout << "Testing to_black_white_rgb32_range(), image size " << width << " x " << height << endl;

Color min_color(0, 0, 0);
Color max_color(63, 63, 63);
// Color max_color(238, 24, 42);

const uint32_t mins = uint32_t(min_color);
const uint32_t maxs = uint32_t(max_color);

ImageRGB32 image_out(image.width(), image.height());
ImageRGB32 image_out_2(image.width(), image.height());
size_t pixels_in_range = 0;

const bool in_range_black = true;
auto time_start = current_time();
// auto new_image = filter_rgb32_range(image, mins, maxs, COLOR_WHITE, replace_color_within_range);
pixels_in_range = Kernels::to_blackwhite_rgb32_range(
image.data(), image.bytes_per_row(), image.width(), image.height(),
image_out.data(), image_out.bytes_per_row(), mins, maxs, in_range_black
);
auto time_end = current_time();
auto ns = std::chrono::duration_cast<std::chrono::nanoseconds>(time_end - time_start).count();
auto ms = ns / 1000000.;
cout << "One filter time: " << ms << " ms" << endl;

size_t pixels_in_range_2 = Kernels::to_blackwhite_rgb32_range(
image.data(), image.bytes_per_row(), image.width(), image.height(),
image_out_2.data(), image_out_2.bytes_per_row(), mins, maxs, !in_range_black
);

TEST_RESULT_EQUAL(pixels_in_range, pixels_in_range_2);

size_t actual_num_pixels_in_range = 0;
size_t error_count = 0;
for (size_t r = 0; r < height; r++){
for (size_t c = 0; c < width; c++){
const Color color(image.pixel(c, r));
const Color new_color(image_out.pixel(c, r));
const Color new_color_2(image_out_2.pixel(c, r));
bool in_range = (min_color.alpha() <= color.alpha() && color.alpha() <= max_color.alpha());
in_range = in_range && (min_color.red() <= color.red() && color.red() <= max_color.red());
in_range = in_range && (min_color.green() <= color.green() && color.green() <= max_color.green());
in_range = in_range && (min_color.blue() <= color.blue() && color.blue() <= max_color.blue());
actual_num_pixels_in_range += in_range;
if (error_count < 10){
// Print first 10 errors:
if (in_range && new_color != COLOR_BLACK){
cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = "
<< c << ", " << r << ", should be black due to in range but not so" << endl;
++error_count;
}
else if (in_range == false && new_color != COLOR_WHITE){
cout << "Error: wrong filter result: old color " << color.to_string() << ", (x,y) = "
<< c << ", " << r << ", should be white due to out of range but not so" << endl;
++error_count;
}

if (in_range && new_color_2 != COLOR_WHITE){
cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = "
<< c << ", " << r << ", should be white due to in range but not so" << endl;
++error_count;
}
else if (in_range == false && new_color_2 != COLOR_BLACK){
cout << "Error: wrong inverse filter result: old color " << color.to_string() << ", (x,y) = "
<< c << ", " << r << ", should be black due to out of range but not so" << endl;
++error_count;
}
}
}
}
cout << "Found " << actual_num_pixels_in_range << " pixels in range" << endl;
if (pixels_in_range != actual_num_pixels_in_range){
cout << "Error: wrong pixels in range: " << pixels_in_range << " actual: " << actual_num_pixels_in_range << endl;
return 1;
}

if (error_count){
return 1;
}

// We try to wait for three seconds:
const size_t num_iters = size_t(3000 / ms);
time_start = current_time();
for(size_t i = 0; i < num_iters; i++){
Kernels::to_blackwhite_rgb32_range(
image.data(), image.bytes_per_row(), image.width(), image.height(),
image_out.data(), image_out.bytes_per_row(), mins, maxs, in_range_black
);
}
time_end = current_time();
ms = (double)std::chrono::duration_cast<Milliseconds>(time_end - time_start).count();
cout << "Running " << num_iters << " iters, avg filter time: " << ms / num_iters << " ms" << endl;

return 0;
}



int test_kernels_Waterfill(const ImageViewRGB32& image){

ImagePixelBox box(0, 0, image.width(), image.height());
Expand Down
3 changes: 3 additions & 0 deletions SerialPrograms/Source/Tests/Kernels_Tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ int test_kernels_FilterRGB32Range(const ImageViewRGB32& image);

int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image);

int test_kernels_FilterRGB32Euclidean(const ImageViewRGB32& image);

int test_kernels_ToBlackWhiteRGB32Range(const ImageViewRGB32& image);

int test_kernels_Waterfill(const ImageViewRGB32& image);

Expand Down
1 change: 1 addition & 0 deletions SerialPrograms/Source/Tests/TestMap.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,7 @@ const std::map<std::string, TestFunction> TEST_MAP = {
{"Kernels_BinaryMatrix", std::bind(image_void_detector_helper, test_kernels_BinaryMatrix, _1)},
{"Kernels_FilterRGB32Range", std::bind(image_void_detector_helper, test_kernels_FilterRGB32Range, _1)},
{"Kernels_FilterRGB32Euclidean", std::bind(image_void_detector_helper, test_kernels_FilterRGB32Euclidean, _1)},
{"Kernels_ToBlackWhiteRGB32Range", std::bind(image_void_detector_helper, test_kernels_ToBlackWhiteRGB32Range, _1)},
{"Kernels_Waterfill", std::bind(image_void_detector_helper, test_kernels_Waterfill, _1)},
{"CommonFramework_BlackBorderDetector", std::bind(image_bool_detector_helper, test_CommonFramework_BlackBorderDetector, _1)},
{"NintendoSwitch_UpdateMenuDetector", std::bind(image_bool_detector_helper, test_NintendoSwitch_UpdateMenuDetector, _1)},
Expand Down

0 comments on commit 7e32047

Please sign in to comment.