Skip to content

Commit

Permalink
Merge pull request #101 from higlass/flekschas/simplify-npvector
Browse files Browse the repository at this point in the history
Calculate the np.nan array on the fly of not available
  • Loading branch information
flekschas authored Jul 5, 2019
2 parents 50174e6 + 76abbce commit 598c673
Showing 1 changed file with 17 additions and 30 deletions.
47 changes: 17 additions & 30 deletions clodius/tiles/npvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def tiles_wrapper(array, tile_ids, not_nan_array=None):

ret_array = tiles(array, z, x, not_nan_array).reshape((-1))

tile_values += [(tile_id,
ctf.format_dense_tile(ret_array))]
tile_values += [(tile_id, ctf.format_dense_tile(ret_array))]

return tile_values

Expand Down Expand Up @@ -105,54 +104,42 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
The x tile position
not_nan_array: np.array
An array storing the number of values which are not nan
in the original array
in the original array. Can be precalculated for speed.
bin_size: int
The number of values per bin
'''
# print("max_dim", max_dim)
max_zoom, x_start, x_end = max_zoom_and_data_bounds(array, z, x, bin_size)
data = array[x_start:x_end]

# print("tile_width", tile_width)
num_to_sum = 2 ** (max_zoom - z)
# print("num_to_sum", num_to_sum)
# print("data:", data)

# add some data so that the data can be divided into squares
divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum)
divisible_x_pad = divisible_x_width - data.shape[0]
# print("data.shape", data.shape)

# print("divisible_x_pad:", divisible_x_pad)
# print("len(data)", len(data))

a = np.pad(data, ((0, divisible_x_pad),), 'constant',
constant_values=(np.nan,))

ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1)

# print('ret_array:', len(ret_array))

# ret_array[ret_array == 0.] = np.nan
# print('ret_array:', ret_array)

# print("sum:", np.nansum(ret_array))

if not_nan_array is not None:
# print("normalizing")
# we want to calculate the means of the data points
if not_nan_array is None:
not_nan_data = ~np.isnan(array[x_start:x_end])
else:
not_nan_data = not_nan_array[x_start:x_end]
na = np.pad(not_nan_data, ((0, divisible_x_pad)), 'constant',
constant_values=(np.nan,))
norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
# print("len:", len(na), len(ret_array), len(norm_array))

ret_array = ret_array / (norm_array + 1)
# we want to calculate the means of the data points
na = np.pad(
not_nan_data,
((0, divisible_x_pad)),
'constant',
constant_values=(np.nan,)
)
norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
ret_array = ret_array / (norm_array + 1)

# determine how much to pad the array
x_pad = bin_size - ret_array.shape[0]

# print("ret_array:", ret_array.shape)
# print("x_pad:", x_pad)

return np.pad(ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, ))
return np.pad(
ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, )
)

0 comments on commit 598c673

Please sign in to comment.