From d7b155246b665528a2c7b43fcbced8c69888f539 Mon Sep 17 00:00:00 2001 From: Dimitris Rozakis Date: Sun, 13 Nov 2022 22:49:06 +0200 Subject: [PATCH] Optimize _encode_multipart_message (#75) * Optimize _encode_multipart_message By using BytesIO while generating the body instead of continuous string concatenations. This drastically reduces execution time by not recreating the string for each concatenation. In one case, execution time fell from 140 seconds to 0.14; three orders of magnitude! * Fix comment about default chunk_size in MB --- src/dicomweb_client/web.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/dicomweb_client/web.py b/src/dicomweb_client/web.py index 448dd1a..342fc55 100644 --- a/src/dicomweb_client/web.py +++ b/src/dicomweb_client/web.py @@ -233,7 +233,7 @@ def __init__( Maximum number of bytes that should be transferred per data chunk when streaming data from the server using chunked transfer encoding (used by ``iter_*()`` methods as well as the ``store_instances()`` - method); defaults to ``10**6`` bytes (10MB) + method); defaults to ``10**6`` bytes (1MB) Warning ------- @@ -743,13 +743,15 @@ def _encode_multipart_message( raise ValueError( 'No "boundary" parameter in found in content-type field' ) - body = b'' - for part in content: - body += f'\r\n--{boundary}'.encode('utf-8') - body += f'\r\nContent-Type: {content_type}\r\n\r\n'.encode('utf-8') - body += part - body += f'\r\n--{boundary}--'.encode('utf-8') - return body + with BytesIO() as b: + for part in content: + b.write(f'\r\n--{boundary}'.encode('utf-8')) + b.write( + f'\r\nContent-Type: {content_type}\r\n\r\n'.encode('utf-8') + ) + b.write(part) + b.write(f'\r\n--{boundary}--'.encode('utf-8')) + return b.getvalue() @classmethod def _assert_media_type_is_valid(cls, media_type: str):