Skip to content

Commit

Permalink
working on c-code
Browse files Browse the repository at this point in the history
  • Loading branch information
knutdrand committed Oct 20, 2023
1 parent 63b8af9 commit f67c7d5
Show file tree
Hide file tree
Showing 10 changed files with 127 additions and 41 deletions.
48 changes: 48 additions & 0 deletions npstructures/c_code/copying.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
static PyObject *
example_wrapper(PyObject *dummy, PyObject *args)
{
PyObject *arg1=NULL, *arg2=NULL, *out=NULL;
PyObject *arr1=NULL, *arr2=NULL, *oarr=NULL;

if (!PyArg_ParseTuple(args, "OOO!", &arg1, &arg2,
&PyArray_Type, &out)) return NULL;

arr1 = PyArray_FROM_OTF(arg1, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
if (arr1 == NULL) return NULL;
arr2 = PyArray_FROM_OTF(arg2, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
if (arr2 == NULL) goto fail;
#if NPY_API_VERSION >= 0x0000000c
oarr = PyArray_FROM_OTF(out, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY2);
#else
oarr = PyArray_FROM_OTF(out, NPY_DOUBLE, NPY_ARRAY_INOUT_ARRAY);
#endif
if (oarr == NULL) goto fail;

/* code that makes use of arguments */
/* You will probably need at least
nd = PyArray_NDIM(<..>) -- number of dimensions
dims = PyArray_DIMS(<..>) -- npy_intp array of length nd
showing length in each dim.
dptr = (double *)PyArray_DATA(<..>) -- pointer to data.
If an error occurs goto fail.
*/

Py_DECREF(arr1);
Py_DECREF(arr2);
#if NPY_API_VERSION >= 0x0000000c
PyArray_ResolveWritebackIfCopy(oarr);
#endif
Py_DECREF(oarr);
Py_INCREF(Py_None);
return Py_None;

fail:
Py_XDECREF(arr1);
Py_XDECREF(arr2);
#if NPY_API_VERSION >= 0x0000000c
PyArray_DiscardWritebackIfCopy(oarr);
#endif
Py_XDECREF(oarr);
return NULL;
}
23 changes: 13 additions & 10 deletions npstructures/copy_segment.pyx
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
# cython: infer_types=True
# import numpy as np
cimport cython
cimport numpy as np
from numpy import int8

ctypedef fused my_type:
int
double
long long
np.uint8_t
np.int8_t
np.uint16_t
np.int16_t

char
unsigned char
short
unsigned short

ctypedef fused const_type:
const int
const long long
const char
const unsigned char
const short
const unsigned short


@cython.boundscheck(False)
@cython.wraparound(False)
def compute(my_type[:] input_array, my_type[:] output_array, Py_ssize_t[:] starts, Py_ssize_t[:] ends):
def compute(const_type[:] input_array, my_type[:] output_array, Py_ssize_t[:] starts, Py_ssize_t[:] ends, Py_ssize_t step):
cdef Py_ssize_t segment = starts.shape[0]
cdef Py_ssize_t input_i, output_i, slice
assert tuple(starts.shape) == tuple(ends.shape)
Expand All @@ -27,6 +30,6 @@ def compute(my_type[:] input_array, my_type[:] output_array, Py_ssize_t[:] start
cdef Py_ssize_t x, y
output_i = 0
for segment in range(segment):
for input_i in range(starts[segment], ends[segment]):
for input_i in range(starts[segment], ends[segment], step):
output_array[output_i] = input_array[input_i]
output_i += 1
25 changes: 16 additions & 9 deletions npstructures/raggedarray/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from functools import lru_cache

from ..util import np
import numpy.typing as npt
from ..raggedshape import RaggedShape, RaggedView2, RaggedView, native_extract_segments, c_extract_segments
Expand All @@ -16,42 +18,47 @@ def __init__(self, data, shape):
self.is_contigous = False
else:
self.is_contigous = True
self._size = None

@property
def size(self) -> int:
if self._size is None:
self._size = int(np.sum(self._shape.lengths))
return self._size
return self.__data.size

@property
def dtype(self) -> npt.DTypeLike:
return self.__data.dtype

@property
def _cls(self):
return self.__class__

def _change_view(self, new_view):
ret = self.__class__(self.__data, new_view)
#ret.is_contigous = False
ret = self._cls(self.__data, new_view)
return ret

def _flatten_myself(self):
assert not self.is_contigous
if not isinstance(self._shape, (RaggedView2, RaggedView)):
return
if isinstance(self._shape, RaggedView2) and self._shape.n_rows>0 and any(self.dtype==d for d in (np.uint8, np.int32, np.int64, np.uint64, np.float64)):
if False and isinstance(self._shape, RaggedView2) and self._shape.n_rows>0 and any(self.dtype==d for d in (np.uint8, np.int32, np.int64, np.uint64, np.float64)):
shape = self._shape.get_shape()
if self.__data.size == 0:
pass
elif self._shape.col_step == 1:
elif self._shape.col_step == 1 and isinstance(self.__data, np.ndarray) and np.issubdtype(self.__data.dtype, np.integer):
self.__data = c_extract_segments(self.__data, self._shape, shape, self._shape.col_step)
else:
self.__data = native_extract_segments(self.__data, self._shape, shape, self._shape.col_step)
self._shape = shape
assert isinstance(shape, RaggedShape)
self.is_contigous = True
return

idx, shape = self._shape.get_flat_indices()
#if np.issubdtype(idx.dtype, bool):
# self.__data = self.__data[:idx.size]
self.__data = self.__data[idx]
self._shape = shape
self.is_contigous = True
# else:
# #assert False, self._shape

def ravel(self) -> npt.ArrayLike:
"""Return a flattened view of the data.
Expand Down
1 change: 1 addition & 0 deletions npstructures/raggedarray/raggedslice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

def ragged_slice(array, starts=None, ends=None):
if isinstance(array, RaggedArray):
array.ravel() #This is a MESS
base_starts = array._shape.starts
base_ends = array._shape.ends
else:
Expand Down
14 changes: 9 additions & 5 deletions npstructures/raggedshape.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from numbers import Number
from dataclasses import dataclass

from .copy_segment import compute
#try:
# from .copy_segment import compute
#except ImportError:
compute = None
from .util import np


def build_indices(view, to_shape, step):
step = 1 if step is None else step
index_builder = np.full(int(to_shape.size + 1), step, dtype=view._dtype)
Expand All @@ -29,12 +32,13 @@ def native_extract_segments(input_array, view, to_shape, step):


def c_extract_segments(input_array, view, to_shape, step):
assert step == 1
if compute is None:
return native_extract_segments(input_array, view, to_shape, step)
new_array = np.empty_like(input_array, shape=(to_shape.size,))
print(input_array.dtype, view.starts.dtype, view.ends.dtype)
compute(input_array, new_array, view.starts, view.ends)
compute(input_array, new_array, view.starts, view.ends, step)
return new_array


class ViewBase:
_dtype = np.int64

Expand Down
22 changes: 11 additions & 11 deletions npstructures/runlengtharray.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _step_subset(self, step, indices, values):
indices = (indices+step_size-1)//step_size
indices, values = self.remove_empty_intervals(indices, values)
return indices, values

def _getitem_tuple(self, raw_idx):
if len(raw_idx) == 1:
return self[raw_idx[0]]
Expand All @@ -74,7 +74,7 @@ def _getitem_tuple(self, raw_idx):
idx = (slice(None),) + idx
elif len(idx) == 1 and raw_idx[1] is Ellipsis:
idx = idx + (slice(None),)

if len(idx) == 2:
row_idx, col_idx = idx
rows = self[row_idx]
Expand All @@ -99,7 +99,7 @@ def _getitem_tuple(self, raw_idx):
if (start is not None and stop is not None):
if (is_reverse and (start <= stop) and start > 0) or (not is_reverse and (start >= stop) and stop>0):
return self.__class__(np.zeros((len(rows), 1), dtype=int), np.empty((len(rows), 0), dtype=int))

if stop is not None:
if stop >= 0:
stop = np.minimum(rows.shape[-1], stop)[:, np.newaxis]
Expand Down Expand Up @@ -300,7 +300,7 @@ def __from_intervals(cls, starts: ArrayLike, ends: ArrayLike, size: int , values
-------
'RunLengthArray'
"""

assert np.all(ends > starts)
assert np.all(starts[1:] > ends[:-1])
prefix = [0] if (len(starts) == 0 or starts[0] != 0) else []
Expand Down Expand Up @@ -354,7 +354,7 @@ def __array_function__(self, func: callable, types: List, args: List, kwargs: Di

def __array_ufunc__(self, ufunc: callable, method: str, *inputs, **kwargs):
"""Handle numpy unfuncs called on the runlength array
Currently only handles '__call__' modes and unary and binary functions
Parameters
Expand Down Expand Up @@ -528,7 +528,7 @@ def _step_subset(self, step: int):
i=[0,1,2,3,4,5], v=[0, 1, 2, 3, 4]
/3[0,0,0,1,1,1]
->[0,1,1,1,2,2] -> (i+(step-1)
i=[0, 1, 2], v=[0, 3]
[0, 0, 0], step=2
Expand Down Expand Up @@ -598,7 +598,7 @@ def __getitem__(self, idx: Union[Tuple, List[int], Number, ArrayLike, slice]):


class RunLength2dArray(IndexableMixin, np.lib.mixins.NDArrayOperatorsMixin):
''' Multiple RunLengthArrays of the same size. Behaves like a 2d numpy array'''
''' Multiple RunLengthArrays of the same size. Behaves like a 2d numpy array'''
def __init__(self, indices: RaggedArray, values: RaggedArray, row_len: int=None):
self._values = values
self._indices = indices
Expand All @@ -624,15 +624,15 @@ def dtype(self) -> DTypeLike:
return self._values.dtype()

def to_array(self) -> ArrayLike:
''' Convert to a normal 2d numpy array '''
''' Convert to a normal 2d numpy array '''
return np.array([row.to_array() for row in self])

def __len__(self) -> int:
return len(self._indices)

def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
"""Handle numpy unfuncs called on the runlength array
Currently only handles '__call__' modes and unary and binary functions
Parameters
Expand Down Expand Up @@ -794,7 +794,7 @@ class RunLengthRaggedArray(RunLength2dArray, IndexableMixin):
def shape(self) -> Tuple[int]:
if self._row_len is None:
return (len(self._indices), self._indices[..., -1])
return (len(self._indices), self._row_len)
return (len(self._indices), self._row_len)


@staticmethod
Expand All @@ -820,7 +820,7 @@ def remove_empty_intervals(events: ArrayLike, values: ArrayLike) -> Tuple[ArrayL
mask2[..., 1:] = mask
values = values[mask]
events = events[mask2]

return RaggedArray(events, mask2.sum(axis=-1)), RaggedArray(values, mask.sum(axis=-1))

def __get_col_reverse(self, indices, values):
Expand Down
23 changes: 23 additions & 0 deletions profiling/copying.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np

from npstructures.raggedshape import native_extract_segments, c_extract_segments, RaggedView2
from time import time
data_size = 100000000
data = np.empty(data_size, dtype=np.uint8)
segment_size = 100
n_segments = data_size//(segment_size*2)
starts = np.arange(n_segments)*(segment_size*2)
lens = np.full(n_segments, segment_size)
view = RaggedView2(starts, lens, 1)
to_shape = view.get_shape()
print('-----------------------')
t = time()
native_extract_segments(data, view, to_shape, 1)
print('native_extract_segments', time()-t)
t = time()
c_extract_segments(data, view, to_shape, 1)
print('c_extract_segments', time()-t)
print('-----------------------')



6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

"""The setup script."""

from setuptools import setup, find_packages, Extension
from setuptools import setup, find_packages # , Extension

module = Extension('npstructures.copy_segment', sources=['npstructures/copy_segment.pyx'])
#module = Extension('npstructures.copy_segment', sources=['npstructures/copy_segment.pyx'])
with open('README.rst') as readme_file:
readme = readme_file.read()

Expand Down Expand Up @@ -42,7 +42,7 @@
url='https://github.com/knutdrand/npstructures',
version='0.2.10',
zip_safe=False,
ext_modules=[module],
# ext_modules=[module],
)

# python -m build
Expand Down
4 changes: 2 additions & 2 deletions tests/property_tests/test_runlengtharray.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,8 @@ def test_run_length_indexing(data):
[0, 1]], dtype=int16), (slice(None, None, None), slice(-1, 0, None))))
@example(data=(array([[0, 0]], dtype=int8),
(slice(None, None, None), slice(0, -1, None))))
@example(data=(array([[0, 1, 1]], dtype=int8),
(slice(None, None, None), slice(2, -1, -1))))
@example(data=(array([[0, 1, 1]], dtype=int8), (slice(None, None, None), slice(2, -1, -1))))
@example(data=(array([[0, 0, 0, 0]], dtype=int8),(slice(None, None, None), slice(3, -1, -1))))
# @settings(max_examples=500)
def test_run_lengthragged_indexing(data):
matrix, idx = data
Expand Down
2 changes: 1 addition & 1 deletion tests/test_c_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ def test_copy():
array_2 = np.zeros_like(array_1, shape=(5,))
starts = np.array([1, 5])
ends = np.array([3, 8])
compute(array_1, array_2,starts, ends)
compute(array_1, array_2,starts, ends, 1)
assert_array_equal(array_2, np.array([2, 3, 6, 7, 8]))

0 comments on commit f67c7d5

Please sign in to comment.