Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(paramserver): file is too large error #98

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 43 additions & 13 deletions rapyuta_io/clients/paramserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import enum
import errno
import hashlib
import json
import mimetypes
import os
import tempfile
Expand Down Expand Up @@ -151,10 +152,16 @@ def process_dir(self, executor, rootdir, tree_path, level, dir_futures, file_fut
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
if file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
if self.should_upload_as_binary(data, self.yaml_content_type):
pallabpain marked this conversation as resolved.
Show resolved Hide resolved
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
Expand All @@ -167,22 +174,45 @@ def process_folder(self, executor, rootdir, tree_path, level, dir_futures, file_
if isdir(full_path):
future = executor.submit(self.create_folder, new_tree_path)
dir_futures[future] = (new_tree_path, level + 1)
else:
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
continue
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
if self.should_upload_as_binary(data, self.yaml_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path

return dir_futures, file_futures

def should_upload_as_binary(self, filedata, content_type):
"""Determines if the file should be uploaded as binary based on the file size

While the file data may be less than the supported limit, the combined size of
the API payload is what is stored by paramserver. This method calculates the
size of the payload and determines if it exceeds the supported limit. If it does,
the file is uploaded as a binary to the blob store.

We cannot entirely rely on the file stat result since the file data is sent as
a string in the API payload and the final size inflates when json.dumps is
performed on the final payload.
"""
metadata_size_buffer = 200 # In bytes
payload = {'type': _Node.File, 'data': filedata, 'contentType': content_type}
return len(json.dumps(payload)) + metadata_size_buffer > self.max_non_binary_size

def upload_configurations(self, rootdir, tree_names, delete_existing_trees, as_folder=False):
self.validate_args(rootdir, tree_names, delete_existing_trees, as_folder)
with futures.ThreadPoolExecutor(max_workers=15) as executor:
Expand Down
Loading