Merge pull request #101 from higlass/flekschas/simplify-npvector

Calculate the np.nan array on the fly of not available
higlass · Jul 5, 2019 · 598c673 · 598c673
2 parents 50174e6 + 76abbce
commit 598c673
Showing 1 changed file with 17 additions and 30 deletions.
diff --git a/clodius/tiles/npvector.py b/clodius/tiles/npvector.py
@@ -17,8 +17,7 @@ def tiles_wrapper(array, tile_ids, not_nan_array=None):
 
         ret_array = tiles(array, z, x, not_nan_array).reshape((-1))
 
-        tile_values += [(tile_id,
-                         ctf.format_dense_tile(ret_array))]
+        tile_values += [(tile_id, ctf.format_dense_tile(ret_array))]
 
     return tile_values
 
@@ -105,54 +104,42 @@ def tiles(array, z, x, not_nan_array=None, bin_size=1024):
         The x tile position
     not_nan_array: np.array
         An array storing the number of values which are not nan
-            in the original array
+        in the original array. Can be precalculated for speed.
     bin_size: int
         The number of values per bin
     '''
-    # print("max_dim", max_dim)
     max_zoom, x_start, x_end = max_zoom_and_data_bounds(array, z, x, bin_size)
     data = array[x_start:x_end]
 
-    # print("tile_width", tile_width)
     num_to_sum = 2 ** (max_zoom - z)
-    # print("num_to_sum", num_to_sum)
-    # print("data:", data)
 
     # add some data so that the data can be divided into squares
     divisible_x_width = num_to_sum * math.ceil(data.shape[0] / num_to_sum)
     divisible_x_pad = divisible_x_width - data.shape[0]
-    # print("data.shape", data.shape)
-
-    # print("divisible_x_pad:", divisible_x_pad)
-    # print("len(data)", len(data))
 
     a = np.pad(data, ((0, divisible_x_pad),), 'constant',
                constant_values=(np.nan,))
 
     ret_array = np.nansum(a.reshape((-1, num_to_sum)), axis=1)
 
-    # print('ret_array:', len(ret_array))
-
-    # ret_array[ret_array == 0.] = np.nan
-    # print('ret_array:', ret_array)
-
-    # print("sum:", np.nansum(ret_array))
-
-    if not_nan_array is not None:
-        # print("normalizing")
-        # we want to calculate the means of the data points
+    if not_nan_array is None:
+        not_nan_data = ~np.isnan(array[x_start:x_end])
+    else:
         not_nan_data = not_nan_array[x_start:x_end]
-        na = np.pad(not_nan_data, ((0, divisible_x_pad)), 'constant',
-                    constant_values=(np.nan,))
-        norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
-        # print("len:", len(na), len(ret_array), len(norm_array))
 
-        ret_array = ret_array / (norm_array + 1)
+    # we want to calculate the means of the data points
+    na = np.pad(
+        not_nan_data,
+        ((0, divisible_x_pad)),
+        'constant',
+        constant_values=(np.nan,)
+    )
+    norm_array = np.nansum(na.reshape((-1, num_to_sum)), axis=1)
+    ret_array = ret_array / (norm_array + 1)
 
     # determine how much to pad the array
     x_pad = bin_size - ret_array.shape[0]
 
-    # print("ret_array:", ret_array.shape)
-    # print("x_pad:", x_pad)
-
-    return np.pad(ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, ))
+    return np.pad(
+        ret_array, ((0, x_pad)), 'constant', constant_values=(np.nan, )
+    )