diff --git a/setup.py b/setup.py index 8a24f139..f954cba1 100644 --- a/setup.py +++ b/setup.py @@ -1,11 +1,12 @@ import platform -import sys from setuptools import setup, Extension import setuptools_scm # noqa Ensure it’s installed -if platform.machine() == "x86_64" or platform.machine() == "AMD64": - DEFINE_MACROS = [("USE_SSE2", None)] +if platform.machine() == "AMD64": + # Macro is defined by default for clang and GCC on relevant targets, but + # not by MSVC. + DEFINE_MACROS = [("__SSE2__", 1)] else: DEFINE_MACROS = [] diff --git a/src/dnaio/_core.pyx b/src/dnaio/_core.pyx index ff9d091f..1cbcb09f 100644 --- a/src/dnaio/_core.pyx +++ b/src/dnaio/_core.pyx @@ -15,14 +15,7 @@ cdef extern from "Python.h": bint PyUnicode_IS_COMPACT_ASCII(object o) object PyUnicode_New(Py_ssize_t size, Py_UCS4 maxchar) -cdef extern from *: - """ - #if defined(USE_SSE2) - #include "ascii_check_sse2.h" - #else - #include "ascii_check.h" - #endif - """ +cdef extern from "ascii_check.h": int string_is_ascii(char *string, size_t length) cdef extern from "_conversions.h": diff --git a/src/dnaio/ascii_check.h b/src/dnaio/ascii_check.h index 45922590..e35f4354 100644 --- a/src/dnaio/ascii_check.h +++ b/src/dnaio/ascii_check.h @@ -1,34 +1,48 @@ -#define ASCII_MASK_8BYTE 0x8080808080808080ULL -#define ASCII_MASK_1BYTE 0x80 - #include #include +#ifdef __SSE2__ +#include "emmintrin.h" +#endif +#define ASCII_MASK_8BYTE 0x8080808080808080ULL +#define ASCII_MASK_1BYTE 0x80 + +/** + * @brief Check if a string of given length only contains ASCII characters. + * + * @param string A char pointer to the start of the string. + * @param length The length of the string. This funtion does not check for + * terminating NULL bytes. + * @returns 1 if the string is ASCII-only, 0 otherwise. + */ static int -string_is_ascii(char * string, size_t length) { - size_t n = length; +string_is_ascii(const char * string, size_t length) { + // By performing bitwise OR on all characters in 8-byte chunks (16-byte + // with SSE2) we can + // determine ASCII status in a non-branching (except the loops) fashion. uint64_t all_chars = 0; - char * char_ptr = string; - // The first loop aligns the memory address. Char_ptr is cast to a size_t - // to return the memory address. Uint64_t is 8 bytes long, and the processor - // handles this better when its address is a multiplier of 8. This loops - // handles the first few bytes that are not on such a multiplier boundary. - while ((size_t)char_ptr % sizeof(uint64_t) && n != 0) { - all_chars |= *char_ptr; - char_ptr += 1; - n -= 1; + const char *cursor = string; + const char *string_end_ptr = string + length; + const char *string_8b_end_ptr = string_end_ptr - sizeof(uint64_t); + int non_ascii_in_vec = 0; + #ifdef __SSE2__ + const char *string_16b_end_ptr = string_end_ptr - sizeof(__m128i); + __m128i vec_all_chars = _mm_setzero_si128(); + while (cursor < string_16b_end_ptr) { + __m128i loaded_chars = _mm_loadu_si128((__m128i *)cursor); + vec_all_chars = _mm_or_si128(loaded_chars, vec_all_chars); + cursor += sizeof(__m128i); } - uint64_t *longword_ptr = (uint64_t *)char_ptr; - while (n >= sizeof(uint64_t)) { - all_chars |= *longword_ptr; - longword_ptr += 1; - n -= sizeof(uint64_t); + non_ascii_in_vec = _mm_movemask_epi8(vec_all_chars); + #endif + + while (cursor < string_8b_end_ptr) { + all_chars |= *(uint64_t *)cursor; + cursor += sizeof(uint64_t); } - char_ptr = (char *)longword_ptr; - while (n != 0) { - all_chars |= *char_ptr; - char_ptr += 1; - n -= 1; + while (cursor < string_end_ptr) { + all_chars |= *cursor; + cursor += 1; } - return !(all_chars & ASCII_MASK_8BYTE); + return !(non_ascii_in_vec + (all_chars & ASCII_MASK_8BYTE)); } diff --git a/src/dnaio/ascii_check_sse2.h b/src/dnaio/ascii_check_sse2.h deleted file mode 100644 index 113ebe2d..00000000 --- a/src/dnaio/ascii_check_sse2.h +++ /dev/null @@ -1,67 +0,0 @@ -// Copyright (c) 2022 Leiden University Medical Center - -// Permission is hereby granted, free of charge, to any person obtaining a copy -// of this software and associated documentation files (the "Software"), to -// deal in the Software without restriction, including without limitation the -// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -// sell copies of the Software, and to permit persons to whom the Software is -// furnished to do so, subject to the following conditions: - -// The above copyright notice and this permission notice shall be included in -// all copies or substantial portions of the Software. - -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -// IN THE SOFTWARE. - -// This file is maintained and tested at -// https://github.com/rhpvorderman/ascii-check -// Please report bugs and feature requests there. - -#include -#include -#include - -#define ASCII_MASK_1BYTE 0x80 - -/** - * @brief Check if a string of given length only contains ASCII characters. - * - * @param string A char pointer to the start of the string. - * @param length The length of the string. This funtion does not check for - * terminating NULL bytes. - * @returns 1 if the string is ASCII-only, 0 otherwise. - */ -static int -string_is_ascii(const char * string, size_t length) { - size_t n = length; - const char * char_ptr = string; - typedef __m128i longword; - char all_chars = 0; - longword all_words = _mm_setzero_si128(); - - // First align the memory adress - while ((size_t)char_ptr % sizeof(longword) && n != 0) { - all_chars |= *char_ptr; - char_ptr += 1; - n -= 1; - } - const longword * longword_ptr = (longword *)char_ptr; - while (n >= sizeof(longword)) { - all_words = _mm_or_si128(all_words, *longword_ptr); - longword_ptr += 1; - n -= sizeof(longword); - } - char_ptr = (char *)longword_ptr; - while (n != 0) { - all_chars |= *char_ptr; - char_ptr += 1; - n -= 1; - } - // Check the most significant bits in the accumulated words and chars. - return !(_mm_movemask_epi8(all_words) || (all_chars & ASCII_MASK_1BYTE)); -}