From 47c50e556e6a49e15da67560131c45ee73b4f942 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas Date: Fri, 5 Jul 2019 14:49:00 -0400 Subject: [PATCH 1/2] Calculate the np.nan array on the fly of not available 1. It's fast enough 2. Otherwise one is forced to provide `not_nan_array` to get normalization, which is confusing as the doc string doesn't mention that and no normalization seems odd --- clodius/tiles/npvector.py | 47 ++++++++++++++------------------------- 1 file changed, 17 insertions(+), 30 deletions(-) diff --git a/clodius/tiles/npvector.py b/clodius/tiles/npvector.py index 5d878e56..6f9cbf46 100644 --- a/clodius/tiles/npvector.py +++ b/clodius/tiles/npvector.py @@ -17,8 +17,7 @@ def tiles_wrapper(array, tile_ids, not_nan_array=None): ret_array = tiles(array, z, x, not_nan_array).reshape((-1)) - tile_values += [(tile_id, - ctf.format_dense_tile(ret_array))] + tile_values += [(tile_id, ctf.format_dense_tile(ret_array))] return tile_values @@ -105,54 +104,42 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024): The x tile position not_nan_array: np.array An array storing the number of values which are not nan - in the original array + in the original array. Can be precalculated for speed. bin_size: int The number of values per bin ''' - # print("max_dim", max_dim) max_zoom, x_start, x_end = max_zoom_and_data_bounds(array, z, x, bin_size) data = array[x_start:x_end] - # print("tile_width", tile_width) num_to_sum = 2 ** (max_zoom - z) - # print("num_to_sum", num_to_sum) - # print("data:", data) # add some data so that the data can be divided into squares divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum) divisible_x_pad = divisible_x_width - data.shape[0] - # print("data.shape", data.shape) - - # print("divisible_x_pad:", divisible_x_pad) - # print("len(data)", len(data)) a = np.pad(data, ((0, divisible_x_pad),), 'constant', constant_values=(np.nan,)) ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1) - # print('ret_array:', len(ret_array)) - - # ret_array[ret_array == 0.] = np.nan - # print('ret_array:', ret_array) - - # print("sum:", np.nansum(ret_array)) - - if not_nan_array is not None: - # print("normalizing") - # we want to calculate the means of the data points + if not_nan_array is None: + not_nan_data = ~np.isnan(ret_array[x_start:x_end]) + else: not_nan_data = not_nan_array[x_start:x_end] - na = np.pad(not_nan_data, ((0, divisible_x_pad)), 'constant', - constant_values=(np.nan,)) - norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1) - # print("len:", len(na), len(ret_array), len(norm_array)) - ret_array = ret_array / (norm_array + 1) + # we want to calculate the means of the data points + na = np.pad( + not_nan_data, + ((0, divisible_x_pad)), + 'constant', + constant_values=(np.nan,) + ) + norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1) + ret_array = ret_array / (norm_array + 1) # determine how much to pad the array x_pad = bin_size - ret_array.shape[0] - # print("ret_array:", ret_array.shape) - # print("x_pad:", x_pad) - - return np.pad(ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, )) + return np.pad( + ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, ) + ) From 76abbced25f01e5ad2b6d52e9ab9ccba379af459 Mon Sep 17 00:00:00 2001 From: Fritz Lekschas Date: Fri, 5 Jul 2019 15:15:38 -0400 Subject: [PATCH 2/2] Fix typo --- clodius/tiles/npvector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clodius/tiles/npvector.py b/clodius/tiles/npvector.py index 6f9cbf46..6ff4930a 100644 --- a/clodius/tiles/npvector.py +++ b/clodius/tiles/npvector.py @@ -123,7 +123,7 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024): ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1) if not_nan_array is None: - not_nan_data = ~np.isnan(ret_array[x_start:x_end]) + not_nan_data = ~np.isnan(array[x_start:x_end]) else: not_nan_data = not_nan_array[x_start:x_end]