From 362652b86ad9099509f8380c5fb2807d9202eb55 Mon Sep 17 00:00:00 2001 From: artur Date: Fri, 25 Oct 2024 22:11:08 +0200 Subject: [PATCH] Fixed upload scripts Updated README.md --- README.md | 4 ++-- utils/fileupload.py | 14 +++++++------- utils/fileupload.sh | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index 6eb7969..fbe0c85 100644 --- a/README.md +++ b/README.md @@ -41,12 +41,12 @@ docker-compose -f docker-compose.dev.yaml up --build File uploading using bash: ``` -bash ./utils/fileupload.sh http://localhost:8082 ~/rnd/data/govdocs_fits/govdocs1/000/ +bash ./utils/fileupload.sh http://localhost:8082 ~/path/to/files collection_name ``` File uploading using python (pip package requests in necessary): ``` -python ./utils/fileupload.py http://localhost:8082/multipleupload ~/rnd/data/govdocs_fits/govdocs1/000/ 100 3 +python ./utils/fileupload.py http://localhost:8082/multipleupload ~/path/to/files 100 3 collection_name ``` ## Issues diff --git a/utils/fileupload.py b/utils/fileupload.py index 45393a0..781f041 100644 --- a/utils/fileupload.py +++ b/utils/fileupload.py @@ -19,7 +19,7 @@ def upload_chunk(url, chunk_files, chunk_count): return end_time - start_time -def upload_files_in_chunks_parallel(url, folder_path, chunk_size=100, num_parallel_requests=10): +def upload_files_in_chunks_parallel(url, folder_path, chunk_size=100, num_parallel_requests=10, collection_name="dataset"): headers = { 'accept': '*/*', } @@ -37,7 +37,7 @@ def upload_files_in_chunks_parallel(url, folder_path, chunk_size=100, num_parall if len(chunk_files) == chunk_size: with ThreadPoolExecutor(max_workers=num_parallel_requests) as executor: - future = executor.submit(upload_chunk, url, chunk_files, (chunk_count+1)*chunk_size) + future = executor.submit(upload_chunk, url + f"?datasetName={collection_name}", chunk_files, (chunk_count+1)*chunk_size) duration = future.result() total_duration += duration @@ -47,25 +47,25 @@ def upload_files_in_chunks_parallel(url, folder_path, chunk_size=100, num_parall if chunk_files: with ThreadPoolExecutor(max_workers=num_parallel_requests) as executor: - future = executor.submit(upload_chunk, url, chunk_files, chunk_count) + future = executor.submit(upload_chunk, url + f"?datasetName={collection_name}", chunk_files, chunk_count) duration = future.result() total_duration += duration return total_duration if __name__ == "__main__": - if len(sys.argv) != 5: - print("Usage: python script.py ") + if len(sys.argv) != 6: + print("Usage: python script.py ") sys.exit(1) upload_url = sys.argv[1] folder_to_upload = sys.argv[2] chunk_size = int(sys.argv[3]) num_parallel_requests = int(sys.argv[4]) - + collection_name = sys.argv[5] start_script_time = time.time() - total_duration = upload_files_in_chunks_parallel(upload_url, folder_to_upload, chunk_size, num_parallel_requests) + total_duration = upload_files_in_chunks_parallel(upload_url, folder_to_upload, chunk_size, num_parallel_requests, collection_name) end_script_time = time.time() script_duration = end_script_time - start_script_time diff --git a/utils/fileupload.sh b/utils/fileupload.sh index 022ad26..efa00aa 100644 --- a/utils/fileupload.sh +++ b/utils/fileupload.sh @@ -9,7 +9,7 @@ do (( COUNTER++ )) (( t=t%PARALLEL_TASKS )); ((t++==0)) && wait curl --silent --output /dev/null --show-error -X "POST" \ - "${1}/upload" \ + "${1}/upload?datasetName=${3}" \ -H "accept: */*" \ -H "Content-Type: multipart/form-data" \ -F "file=@${i}" &