From 5eaa07445049c67a38153844a93ae99f4ef0c6d4 Mon Sep 17 00:00:00 2001 From: Paul Natsuo Kishimoto Date: Thu, 28 Nov 2024 12:24:12 +0100 Subject: [PATCH] Add fuzzed private data for testing .iea.web 2024 edition --- .../data/test/iea/web/2024-07-25/WBIG1.zip | 3 ++ .../data/test/iea/web/2024-07-25/WBIG2.zip | 3 ++ message_ix_models/testing/cli.py | 28 +++++++++++++------ 3 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 message_ix_models/data/test/iea/web/2024-07-25/WBIG1.zip create mode 100644 message_ix_models/data/test/iea/web/2024-07-25/WBIG2.zip diff --git a/message_ix_models/data/test/iea/web/2024-07-25/WBIG1.zip b/message_ix_models/data/test/iea/web/2024-07-25/WBIG1.zip new file mode 100644 index 0000000000..520fdb95a5 --- /dev/null +++ b/message_ix_models/data/test/iea/web/2024-07-25/WBIG1.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fee0dbac438e255d0faddb7bc43705678180f06bd738ba8434d24017c2449047 +size 8334294 diff --git a/message_ix_models/data/test/iea/web/2024-07-25/WBIG2.zip b/message_ix_models/data/test/iea/web/2024-07-25/WBIG2.zip new file mode 100644 index 0000000000..1f28c86434 --- /dev/null +++ b/message_ix_models/data/test/iea/web/2024-07-25/WBIG2.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14ff34fbbde1b4105df603571da15cb04be073ece885ee9db5cb9fcb9990e749 +size 9897875 diff --git a/message_ix_models/testing/cli.py b/message_ix_models/testing/cli.py index c34e9894f9..a69ad04198 100644 --- a/message_ix_models/testing/cli.py +++ b/message_ix_models/testing/cli.py @@ -16,6 +16,8 @@ def cli(): "iea/372f7e29-en.zip", "iea/8624f431-en.zip", "iea/cac5fa90-en.zip", + "iea/web/2024-07-25/WBIG1.zip", + "iea/web/2024-07-25/WBIG2.zip", "shape/gdp_v1p0.mif", "shape/gdp_v1p1.mif", "shape/gdp_v1p2.mif", @@ -38,8 +40,9 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover """Create random data for testing. This command creates data files in message_ix_models/data/test/… based on - corresponding private files in message_data/data/…. This supports testing of code in - message_ix_models that handles these files. + corresponding private files in either message_data/data/… or the local data + directory. This supports testing of code in message_ix_models that handles these + files. The files are identical in structure and layout, except the values are "fuzzed", or replaced with random values. @@ -55,11 +58,11 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover from numpy import char, random from message_ix_models.project.advance.data import NAME - from message_ix_models.util import package_data_path, private_data_path + from message_ix_models.util import package_data_path, path_fallback # Paths p = Path(filename) - path_in = private_data_path(p) + path_in = path_fallback(p, where="private local") path_out = package_data_path("test", p) # Shared arguments for read_csv() and to_csv() @@ -70,21 +73,28 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover sep = ";" # Read the data + zf_member_name = None with TemporaryDirectory() as td: td_path = Path(td) if "advance" in filename: # Manually unpack one member of the multi-member archive `path_in` + zf_member_name = NAME target: Union[IO, Path, str] = zipfile.ZipFile(path_in).extract( - NAME, path=td_path + zf_member_name, path=td_path ) elif "iea" in filename: # Manually unpack so that dask.dataframe.read_csv() can be used - from message_ix_models.tools.iea.web import unpack_zip + from message_ix_models.tools.iea.web import fwf_to_csv, unpack_zip target = unpack_zip(path_in) + zf_member_name = target.name + if target.suffix == ".TXT": + target = fwf_to_csv(target, progress=True) else: target = path_in + print(f"Read {target}") + # - Read the data # - Use dask & pyarrow. # - Prevent values like "NA" being auto-transformed to np.nan. @@ -127,10 +137,12 @@ def fuzz_private_data(filename, frac: float): # pragma: no cover # Write to file, keeping only a few decimal points path_out.parent.mkdir(parents=True, exist_ok=True) - if "advance" in filename: + if path_out.suffix.lower() == ".zip": zf = zipfile.ZipFile(path_out, "w", compression=zipfile.ZIP_BZIP2) - target = zf.open(NAME) + target = zf.open(zf_member_name, "w") + print(f"Write to member {zf_member_name} in {path_out}") else: target = path_out + print(f"Write to {path_out}") df.to_csv(target, float_format="%.2f", index=False, sep=sep)