Skip to content

Commit

Permalink
Fix count of resulting entries
Browse files Browse the repository at this point in the history
  • Loading branch information
ferdonline committed Nov 26, 2024
1 parent 3d6d9e6 commit 63ca038
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions tools/rebalance-corenrn-data.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import heapq
import itertools
import logging
import math
import os
import sys

Expand Down Expand Up @@ -113,12 +114,17 @@ def batch(iterable, first=0):
yield group + [CORENRN_SKIP_MARK] * (ranks_per_machine - len(group))
break
yield group
first, last = last, last + 40
first, last = last, last + ranks_per_machine
group = iterable[first:last]

# compute max number of cell groups per rank so we know the n_files in the header
max_len = max(len(m) for m in buckets)
max_groups_rank = math.ceil(max_len / ranks_per_machine)
total_entries = max_groups_rank * ranks_per_machine * len(buckets)

with open(output_file, "w") as out:
print(infos["version"], file=out)
print(infos["n_files"], file=out)
print(total_entries, file=out)

for buckets in itertools.zip_longest(*[batch(m) for m in buckets]):
for entries in buckets:
Expand Down

0 comments on commit 63ca038

Please sign in to comment.