Skip to content

Commit

Permalink
fix(paramserver): file is too large error
Browse files Browse the repository at this point in the history
This commit fixes a specific case when a json/yaml file is uploaded to
the paramserver and is stored by the backend in etcd. The issue existed
till date because the SDK performs a stat to check the size of a file
and determine whether to upload a file as blob or not. However, in cases
where the file size is close to the upper limit i.e. 128 KiB, the API
payload may inflate due to additional metadata and string conversion. In
such cases, while the file size may be under 128KiB, the final data to
be stored in etcd is greater than 128KiB and it throws a `file is too
large` error.

Fixes AB#33921
  • Loading branch information
pallabpain committed Nov 12, 2024
1 parent f0ebeb3 commit 99e6826
Showing 1 changed file with 43 additions and 13 deletions.
56 changes: 43 additions & 13 deletions rapyuta_io/clients/paramserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import enum
import errno
import hashlib
import json
import mimetypes
import os
import tempfile
Expand Down Expand Up @@ -151,10 +152,16 @@ def process_dir(self, executor, rootdir, tree_path, level, dir_futures, file_fut
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
if file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
if self.should_upload_as_binary(data, self.yaml_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
Expand All @@ -167,22 +174,45 @@ def process_folder(self, executor, rootdir, tree_path, level, dir_futures, file_
if isdir(full_path):
future = executor.submit(self.create_folder, new_tree_path)
dir_futures[future] = (new_tree_path, level + 1)
else:
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
continue
file_stat = os.stat(full_path)
file_name = os.path.basename(full_path)
if file_stat.st_size > self.max_non_binary_size:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
if self.should_upload_as_binary(data, self.yaml_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
elif file_name.endswith('.yaml'):
data = parse_yaml(full_path)
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_file, new_tree_path, data)
elif file_name.endswith('.json'):
data = parse_json(full_path)
if self.should_upload_as_binary(data, self.json_content_type):
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path
else:
future = executor.submit(self.create_file, new_tree_path, data, content_type=self.json_content_type)
else:
future = executor.submit(self.create_binary_file, new_tree_path, full_path)
file_futures[future] = new_tree_path

return dir_futures, file_futures

def should_upload_as_binary(self, filedata, content_type):
"""Determines if the file should be uploaded as binary based on the file size
While the file data may be less than the supported limit, the combined size of
the API payload is what is stored by paramserver. This method calculates the
size of the payload and determines if it exceeds the supported limit. If it does,
the file is uploaded as a binary to the blob store.
We cannot entirely rely on the file stat result since the file data is sent as
a string in the API payload and the final size inflates when json.dumps is
performed on the final payload.
"""
metadata_size_buffer = 100 # In bytes
payload = {'type': _Node.File, 'data': filedata, 'contentType': content_type}
return len(json.dumps(payload)) + metadata_size_buffer > self.max_non_binary_size

def upload_configurations(self, rootdir, tree_names, delete_existing_trees, as_folder=False):
self.validate_args(rootdir, tree_names, delete_existing_trees, as_folder)
with futures.ThreadPoolExecutor(max_workers=15) as executor:
Expand Down

0 comments on commit 99e6826

Please sign in to comment.