Skip to content

Commit

Permalink
Calculate the np.nan array on the fly of not available
Browse files Browse the repository at this point in the history
1. It's fast enough
2. Otherwise one is forced to provide `not_nan_array` to get normalization, which is confusing as the doc string doesn't mention that and no normalization seems odd
  • Loading branch information
flekschas committed Jul 5, 2019
1 parent 50174e6 commit 47c50e5
Showing 1 changed file with 17 additions and 30 deletions.
47 changes: 17 additions & 30 deletions clodius/tiles/npvector.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@ def tiles_wrapper(array, tile_ids, not_nan_array=None):

ret_array = tiles(array, z, x, not_nan_array).reshape((-1))

tile_values += [(tile_id,
ctf.format_dense_tile(ret_array))]
tile_values += [(tile_id, ctf.format_dense_tile(ret_array))]

return tile_values

Expand Down Expand Up @@ -105,54 +104,42 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
The x tile position
not_nan_array: np.array
An array storing the number of values which are not nan
in the original array
in the original array. Can be precalculated for speed.
bin_size: int
The number of values per bin
'''
# print("max_dim", max_dim)
max_zoom, x_start, x_end = max_zoom_and_data_bounds(array, z, x, bin_size)
data = array[x_start:x_end]

# print("tile_width", tile_width)
num_to_sum = 2 ** (max_zoom - z)
# print("num_to_sum", num_to_sum)
# print("data:", data)

# add some data so that the data can be divided into squares
divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum)
divisible_x_pad = divisible_x_width - data.shape[0]
# print("data.shape", data.shape)

# print("divisible_x_pad:", divisible_x_pad)
# print("len(data)", len(data))

a = np.pad(data, ((0, divisible_x_pad),), 'constant',
constant_values=(np.nan,))

ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1)

# print('ret_array:', len(ret_array))

# ret_array[ret_array == 0.] = np.nan
# print('ret_array:', ret_array)

# print("sum:", np.nansum(ret_array))

if not_nan_array is not None:
# print("normalizing")
# we want to calculate the means of the data points
if not_nan_array is None:
not_nan_data = ~np.isnan(ret_array[x_start:x_end])
else:
not_nan_data = not_nan_array[x_start:x_end]
na = np.pad(not_nan_data, ((0, divisible_x_pad)), 'constant',
constant_values=(np.nan,))
norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
# print("len:", len(na), len(ret_array), len(norm_array))

ret_array = ret_array / (norm_array + 1)
# we want to calculate the means of the data points
na = np.pad(
not_nan_data,
((0, divisible_x_pad)),
'constant',
constant_values=(np.nan,)
)
norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
ret_array = ret_array / (norm_array + 1)

# determine how much to pad the array
x_pad = bin_size - ret_array.shape[0]

# print("ret_array:", ret_array.shape)
# print("x_pad:", x_pad)

return np.pad(ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, ))
return np.pad(
ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, )
)

0 comments on commit 47c50e5

Please sign in to comment.