From 7775fdd744d702956003072c567408eae01c877e Mon Sep 17 00:00:00 2001 From: Freddie Witherden Date: Fri, 13 Dec 2024 07:24:12 -0600 Subject: [PATCH] Add support for array-based bulk insert (#340) * Reduce overhead. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Reduce overhead. * Add support for array-based bulk insertion. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- rtree/core.py | 17 ++++++++++++++ rtree/index.py | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 77 insertions(+), 2 deletions(-) diff --git a/rtree/core.py b/rtree/core.py index 1bd2b80d..e9e7a355 100644 --- a/rtree/core.py +++ b/rtree/core.py @@ -125,6 +125,23 @@ def free_error_msg_ptr(result, func, cargs): rt.Index_CreateWithStream.restype = ctypes.c_void_p rt.Index_CreateWithStream.errcheck = check_void # type: ignore +try: + rt.Index_CreateWithArray.argtypes = [ + ctypes.c_void_p, + ctypes.c_uint64, + ctypes.c_uint32, + ctypes.c_uint64, + ctypes.c_uint64, + ctypes.c_uint64, + ctypes.c_void_p, + ctypes.c_void_p, + ctypes.c_void_p, + ] + rt.Index_CreateWithArray.restype = ctypes.c_void_p + rt.Index_CreateWithArray.errcheck = check_void # type: ignore +except AttributeError: + pass + rt.Index_Destroy.argtypes = [ctypes.c_void_p] rt.Index_Destroy.restype = None rt.Index_Destroy.errcheck = check_void_done # type: ignore diff --git a/rtree/index.py b/rtree/index.py index 90e53a54..f363107f 100644 --- a/rtree/index.py +++ b/rtree/index.py @@ -207,20 +207,26 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.interleaved = bool(kwargs.get("interleaved", True)) stream = None + arrays = None basename = None storage = None if args: if isinstance(args[0], str) or isinstance(args[0], bytes): # they sent in a filename basename = args[0] - # they sent in a filename, stream + # they sent in a filename, stream or filename, buffers if len(args) > 1: - stream = args[1] + if isinstance(args[1], tuple): + arrays = args[1] + else: + stream = args[1] elif isinstance(args[0], ICustomStorage): storage = args[0] # they sent in a storage, stream if len(args) > 1: stream = args[1] + elif isinstance(args[0], tuple): + arrays = args[0] else: stream = args[0] @@ -272,6 +278,18 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if stream and self.properties.type == RT_RTree: self._exception = None self.handle = self._create_idx_from_stream(stream) + if self._exception: + raise self._exception + elif arrays and self.properties.type == RT_RTree: + self._exception = None + + try: + self.handle = self._create_idx_from_array(*arrays) + except NameError: + raise NotImplementedError( + "libspatialindex >= 2.1 needed for bulk insert" + ) + if self._exception: raise self._exception else: @@ -279,6 +297,8 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: if stream: # Bulk insert not supported, so add one by one for item in stream: self.insert(*item) + elif arrays: + raise NotImplementedError("Bulk insert only supported for RTrees") def get_size(self) -> int: warnings.warn( @@ -1250,6 +1270,36 @@ def py_next_item(p_id, p_mins, p_maxs, p_dimension, p_data, p_length): stream = core.NEXTFUNC(py_next_item) return IndexStreamHandle(self.properties.handle, stream) + def _create_idx_from_array(self, ibuf, minbuf, maxbuf): + assert len(ibuf) == len(minbuf) + assert len(ibuf) == len(maxbuf) + assert minbuf.strides == maxbuf.strides + + # Cast + ibuf = ibuf.astype(int) + minbuf = minbuf.astype(float) + maxbuf = maxbuf.astype(float) + + # Extract counts + n, d = minbuf.shape + + # Compute strides + i_stri = ibuf.strides[0] // 8 + d_i_stri = minbuf.strides[0] // 8 + d_j_stri = minbuf.strides[1] // 8 + + return IndexArrayHandle( + self.properties.handle, + n, + d, + i_stri, + d_i_stri, + d_j_stri, + ibuf.ctypes.data, + minbuf.ctypes.data, + maxbuf.ctypes.data, + ) + def leaves(self): leaf_node_count = ctypes.c_uint32() p_leafsizes = ctypes.pointer(ctypes.c_uint32()) @@ -1431,6 +1481,14 @@ class IndexStreamHandle(IndexHandle): _create = core.rt.Index_CreateWithStream +try: + + class IndexArrayHandle(IndexHandle): + _create = core.rt.Index_CreateWithArray +except AttributeError: + pass + + class PropertyHandle(Handle): _create = core.rt.IndexProperty_Create _destroy = core.rt.IndexProperty_Destroy