Skip to content

Commit

Permalink
Remove mtime from gzip.compress, use monkeypatch for tests
Browse files Browse the repository at this point in the history
mtime is not available for Python < 3.8
  • Loading branch information
michael-kotliar committed Dec 15, 2020
1 parent 0a80928 commit bfbdc12
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 27 deletions.
9 changes: 3 additions & 6 deletions cwl_airflow/utilities/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def yield_file_content(location):
yield line.strip()


def get_compressed(data_str, reset_position=None, mtime=None):
def get_compressed(data_str, reset_position=None):
"""
Converts character string "data_str" as "utf-8" into bytes ("utf-8"
is default encoding for Python3 string). Encoded bytes are then being
Expand All @@ -43,10 +43,7 @@ def get_compressed(data_str, reset_position=None, mtime=None):
failed to dump it with json, assume that "data_str" was a stream, from
where we read content either as "utf-8" or as bytes, depending on the mode
that the file was opened with. In this case if "reset_position" is true,
reset to the beginning of the file. mtime is an optional numeric timestamp
to be written to the last modification time field in the stream when
compressing. If omitted or None, the current time is used. We need this
argument only for reproducible results in unit tests.
reset to the beginning of the file.
"""

reset_position = True if reset_position is None else reset_position
Expand All @@ -64,7 +61,7 @@ def get_compressed(data_str, reset_position=None, mtime=None):
else: # file was opened in a text mode and need to be "utf-8" encoded
data_str_utf = data_str.read().encode("utf-8")
return base64.b64encode(
gzip.compress(data=data_str_utf, mtime=mtime)
gzip.compress(data=data_str_utf)
).decode("utf-8")


Expand Down
40 changes: 19 additions & 21 deletions tests/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tempfile
import zlib
import binascii
import time

from os import environ, path, listdir

Expand Down Expand Up @@ -62,80 +63,77 @@ def test_get_md5_sum(location, control_md5sum):


@pytest.mark.parametrize(
"raw_data, control_data, mtime",
"raw_data, control_data",
[
(
"hello world",
"H4sIAEi9118C/8tIzcnJVyjPL8pJAQCFEUoNCwAAAA==",
1607974216.711175
"H4sIAEi9118C/8tIzcnJVyjPL8pJAQCFEUoNCwAAAA=="
),
(
{"data": "hello world"},
"H4sIAEi9118C/6tWSkksSVSyUlDKSM3JyVcozy/KSVGqBQAnYva2FwAAAA==",
1607974216.711175
"H4sIAEi9118C/6tWSkksSVSyUlDKSM3JyVcozy/KSVGqBQAnYva2FwAAAA=="
)
]
)
def test_get_compressed(raw_data, control_data, mtime):
compressed_data = get_compressed(raw_data, mtime=mtime)
def test_get_compressed(raw_data, control_data, monkeypatch):
monkeypatch.setattr(time, "time", lambda : 1607974216.711175)
compressed_data = get_compressed(raw_data)
assert control_data == compressed_data, \
"Failed to compress data"


@pytest.mark.parametrize(
"location, control_data, reset_position, mtime",
"location, control_data, reset_position",
[
(
path.join(DATA_FOLDER, "jobs", "bam-bedgraph-bigwig.json"),
"H4sIAEi9118C/62NTQrCQAyF93OKkrVMW3DlAbxGiNNoB+aPJoJQevdOR0Hcm+X3vby3mq\
4e3Cji3QeGS7c20qgLJFIRXA91+oqQHanP6XDW9j6Vp0rv5uWM4zBgpFJ4woVpEluroX1u7\
wKosRwxcHro/JdRTBT5U2j1pb9z4qhNjGYzO393NUbuAAAA",
None,
1607974216.711175
None
),
(
path.join(DATA_FOLDER, "jobs", "bam-bedgraph-bigwig.json"),
"H4sIAEi9118C/5WNSwrCQBBE93OKptcyScBVDuA1mnYymoH5kW5BkNxdkxiCS2tbVe8BbE\
EXWQR7wEuIHk8G9iIWxxpKXjprm5DrQ6Vx43Smrm0pca1+oMnzIPbKCdfnvAHwMyuJos93H\
em2kHt4Hez/pZQ5+S/Q6lN/deJ4VXRmNm9wvXwe2gAAAA==",
False,
1607974216.711175
False
)
]
)
def test_get_compressed_from_text_stream(location, control_data, reset_position, mtime):
def test_get_compressed_from_text_stream(location, control_data, reset_position, monkeypatch):
monkeypatch.setattr(time, "time", lambda : 1607974216.711175)
with open(location, "r") as input_stream:
input_stream.read(20) # change position while reading from file
compressed_data = get_compressed(input_stream, reset_position, mtime)
compressed_data = get_compressed(input_stream, reset_position)
assert control_data == compressed_data, \
"Failed to compress data"


@pytest.mark.parametrize(
"location, control_data, reset_position, mtime",
"location, control_data, reset_position",
[
(
path.join(DATA_FOLDER, "jobs", "bam-bedgraph-bigwig.json"),
"H4sIAEi9118C/62NTQrCQAyF93OKkrVMW3DlAbxGiNNoB+aPJoJQevdOR0Hcm+X3vby3mq\
4e3Cji3QeGS7c20qgLJFIRXA91+oqQHanP6XDW9j6Vp0rv5uWM4zBgpFJ4woVpEluroX1u7\
wKosRwxcHro/JdRTBT5U2j1pb9z4qhNjGYzO393NUbuAAAA",
None,
1607974216.711175
None
),
(
path.join(DATA_FOLDER, "jobs", "bam-bedgraph-bigwig.json"),
"FH4sIAEi9118C/5WNSwrCQBBE93OKptcyScBVDuA1mnYymoH5kW5BkNxdkxiCS2tbVe8Bb\
EEXWQR7wEuIHk8G9iIWxxpKXjprm5DrQ6Vx43Smrm0pca1+oMnzIPbKCdfnvAHwMyuJos93\
Hem2kHt4Hez/pZQ5+S/Q6lN/deJ4VXRmNm9wvXwe2gAAAA==",
False,
1607974216.711175
False
)
]
)
def test_get_compressed_from_binary_stream(location, control_data, reset_position, mtime):
def test_get_compressed_from_binary_stream(location, control_data, reset_position, monkeypatch):
monkeypatch.setattr(time, "time", lambda : 1607974216.711175)
with open(location, "rb") as input_stream:
input_stream.read(20) # change position while reading from file
compressed_data = get_compressed(input_stream, reset_position, mtime)
compressed_data = get_compressed(input_stream, reset_position)
assert control_data == compressed_data, \
"Failed to compress data"

Expand Down

0 comments on commit bfbdc12

Please sign in to comment.