Skip to content

Commit

Permalink
fix(paramserver): file is too large error
Browse files Browse the repository at this point in the history
  • Loading branch information
pallabpain committed Nov 12, 2024
1 parent f0ebeb3 commit 970382f
Showing 1 changed file with 43 additions and 13 deletions.
56 changes: 43 additions & 13 deletions rapyuta_io/clients/paramserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import enum
import errno
import hashlib
import json
import mimetypes
import os
import tempfile
Expand Down Expand Up @@ -151,10 +152,16 @@ def process_dir(self, executor, rootdir, tree_path, level, dir_futures, file_fut
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
if file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
if self.should_upload_as_binary(data, self.yaml_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
Expand All @@ -167,22 +174,45 @@ def process_folder(self, executor, rootdir, tree_path, level, dir_futures, file_
if isdir(full_path):
future = executor.submit(self.create_folder, new_tree_path)
dir_futures[future] = (new_tree_path, level + 1)
else:
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
continue
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
if self.should_upload_as_binary(data, self.yaml_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path

return dir_futures, file_futures

def should_upload_as_binary(self, filedata, content_type):
"""Determines if the file should be uploaded as binary based on the file size
While the file data may be less than the supported limit, the combined size of
the API payload is what is stored by paramserver. This method calculates the
size of the payload and determines if it exceeds the supported limit. If it does,
the file is uploaded as a binary to the blob store.
We cannot entirely rely on the file stat result since the file data is sent as
a string in the API payload and the final size inflates when json.dumps is
performed on the final payload.
"""
metadata_size_buffer = 100 # In bytes
payload = {'type': _Node.File, 'data': filedata, 'contentType': content_type}
return len(json.dumps(payload)) + metadata_size_buffer > self.max_non_binary_size

def upload_configurations(self, rootdir, tree_names, delete_existing_trees, as_folder=False):
self.validate_args(rootdir, tree_names, delete_existing_trees, as_folder)
with futures.ThreadPoolExecutor(max_workers=15) as executor:
Expand Down

0 comments on commit 970382f

Please sign in to comment.