Skip to content

Commit

Permalink
add structure to compute on the fly, but not impoemented yet
Browse files Browse the repository at this point in the history
  • Loading branch information
christophmluscher committed Dec 3, 2024
1 parent 03d7a3e commit 79d96b7
Showing 1 changed file with 7 additions and 4 deletions.
11 changes: 7 additions & 4 deletions text/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ def __init__(
self,
text_file: tk.Path,
num_lines_per_split: int,
num_text_file_lines: int,
num_text_file_lines: Optional[int] = None,
zip_output: bool = True,
):
"""
Expand All @@ -336,9 +336,12 @@ def __init__(
self.num_text_file_lines = num_text_file_lines
self.zip_output = zip_output

self.num_output_files = self.num_text_file_lines // self.num_lines_per_split + int(
bool(self.num_text_file_lines % self.num_lines_per_split)
)
if num_text_file_lines is not None:
self.num_output_files = self.num_text_file_lines // self.num_lines_per_split + int(
bool(self.num_text_file_lines % self.num_lines_per_split)
)
else:
raise NotImplementedError

self.out_split_text_files = {
k: self.output_path(f'split.{k:04}.{"txt.gz" if zip_output else "txt"}')
Expand Down

0 comments on commit 79d96b7

Please sign in to comment.