From 7a99a2c26ef953e720249bb4ec817c5eb21f26f1 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 15:53:05 +0000 Subject: [PATCH 01/10] Convert long_name underscore to space and use title case --- src/sdf_xarray/__init__.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 711fbef..011b384 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -271,7 +271,7 @@ def _process_grid_name(grid_name: str, transform_func) -> str: dim_name, coord, { - "long_name": label, + "long_name": label.replace("_", " "), "units": unit, "point_data": value.is_point_data, "full_name": value.name, @@ -290,11 +290,6 @@ def _process_grid_name(grid_name: str, transform_func) -> str: continue if isinstance(value, Constant) or value.grid is None: - data_attrs = {} - data_attrs["full_name"] = key - if value.units is not None: - data_attrs["units"] = value.units - # We don't have a grid, either because it's just a # scalar, or because it's an array over something # else. We have no more information, so just make up @@ -303,6 +298,12 @@ def _process_grid_name(grid_name: str, transform_func) -> str: dims = [f"dim_{key}_{n}" for n, _ in enumerate(shape)] base_name = _rename_with_underscore(key) + data_attrs = {} + data_attrs["full_name"] = key + data_attrs["long_name"] = base_name.replace("_", " ") + if value.units is not None: + data_attrs["units"] = value.units + data_vars[base_name] = Variable(dims, value.data, attrs=data_attrs) continue @@ -341,13 +342,15 @@ def _process_grid_name(grid_name: str, transform_func) -> str: ] # TODO: error handling here? other attributes? + base_name = _rename_with_underscore(key) + long_name = base_name.replace("_", " ") data_attrs = { "units": value.units, "point_data": value.is_point_data, "full_name": key, + "long_name": long_name, } lazy_data = indexing.LazilyIndexedArray(SDFBackendArray(key, self)) - base_name = _rename_with_underscore(key) data_vars[base_name] = Variable(var_coords, lazy_data, data_attrs) # TODO: might need to decode if mult is set? From 596ef2dbe44d135e4317c5c43286917859af3b6c Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 15:55:30 +0000 Subject: [PATCH 02/10] Add latex_name and tests --- src/sdf_xarray/__init__.py | 12 +++++++++++- tests/test_basic.py | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 011b384..3039235 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -250,6 +250,16 @@ def _process_grid_name(grid_name: str, transform_func) -> str: renamed_name = _rename_with_underscore(transformed_name) return renamed_name + def _process_latex_name(variable_name: str) -> str: + prefixes = ["E", "B", "J", "P"] + suffixes = ["x", "y", "z"] + for prefix in prefixes: + for suffix in suffixes: + affix = f"{prefix}{suffix}" + if affix in variable_name: + return variable_name.replace(affix, f"{prefix}$_{suffix}$") + return variable_name + for key, value in self.ds.grids.items(): if "cpu" in key.lower(): # Had some problems with these variables, so just ignore them for now @@ -343,7 +353,7 @@ def _process_grid_name(grid_name: str, transform_func) -> str: # TODO: error handling here? other attributes? base_name = _rename_with_underscore(key) - long_name = base_name.replace("_", " ") + long_name = _process_latex_name(base_name.replace("_", " ")) data_attrs = { "units": value.units, "point_data": value.is_point_data, diff --git a/tests/test_basic.py b/tests/test_basic.py index 83a5f43..a0590d0 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -115,6 +115,19 @@ def test_time_dim_units(): assert df["time"].full_name == "time" +def test_latex_rename_variables(): + df = xr.open_mfdataset(EXAMPLE_ARRAYS_DIR.glob("*.sdf"), preprocess=SDFPreprocess()) + assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$" + assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$" + assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$" + assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field B$_x$" + assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field B$_y$" + assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field B$_z$" + assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$" + assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$" + assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$" + + def test_arrays_with_no_grids(): with xr.open_dataset(EXAMPLE_ARRAYS_DIR / "0001.sdf") as df: laser_phase = "laser_x_min_phase" From 10ac19194343baea7d7f0c73969db082a532eabd Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 16:02:40 +0000 Subject: [PATCH 03/10] Fix check for processing latex name in only specific conditions --- src/sdf_xarray/__init__.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 3039235..dc3924b 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -255,9 +255,16 @@ def _process_latex_name(variable_name: str) -> str: suffixes = ["x", "y", "z"] for prefix in prefixes: for suffix in suffixes: - affix = f"{prefix}{suffix}" - if affix in variable_name: - return variable_name.replace(affix, f"{prefix}$_{suffix}$") + affix_spaces = f" {prefix}{suffix} " + affix_no_spaces = f" {prefix}{suffix}" + if affix_spaces in variable_name: + return variable_name.replace( + affix_spaces, f" {prefix}$_{suffix} $" + ) + elif affix_no_spaces in variable_name: + return variable_name.replace( + affix_no_spaces, f" {prefix}$_{suffix}$" + ) return variable_name for key, value in self.ds.grids.items(): From 0c7aea68308c5a882491ce9a6850e64a52611738 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 16:04:20 +0000 Subject: [PATCH 04/10] rename variable to affix_no_trailing_space --- src/sdf_xarray/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index dc3924b..5264ddb 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -256,14 +256,14 @@ def _process_latex_name(variable_name: str) -> str: for prefix in prefixes: for suffix in suffixes: affix_spaces = f" {prefix}{suffix} " - affix_no_spaces = f" {prefix}{suffix}" + affix_no_trailing_space = f" {prefix}{suffix}" if affix_spaces in variable_name: return variable_name.replace( affix_spaces, f" {prefix}$_{suffix} $" ) - elif affix_no_spaces in variable_name: + elif affix_no_trailing_space in variable_name: return variable_name.replace( - affix_no_spaces, f" {prefix}$_{suffix}$" + affix_no_trailing_space, f" {prefix}$_{suffix}$" ) return variable_name From 738f06b3bef99c5a9b50bde1c4d35eb0e4362b62 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 16:08:17 +0000 Subject: [PATCH 05/10] Fix incorrect replace for affix_spaces --- src/sdf_xarray/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 5264ddb..f0b07ee 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -259,7 +259,7 @@ def _process_latex_name(variable_name: str) -> str: affix_no_trailing_space = f" {prefix}{suffix}" if affix_spaces in variable_name: return variable_name.replace( - affix_spaces, f" {prefix}$_{suffix} $" + affix_spaces, f" {prefix}$_{suffix}$ " ) elif affix_no_trailing_space in variable_name: return variable_name.replace( From 62ba0597be7c0c9585759705a64920e866164ca8 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Tue, 19 Nov 2024 16:29:54 +0000 Subject: [PATCH 06/10] Add documentation to _process_latex_name --- src/sdf_xarray/__init__.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index f0b07ee..949523e 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -251,6 +251,17 @@ def _process_grid_name(grid_name: str, transform_func) -> str: return renamed_name def _process_latex_name(variable_name: str) -> str: + """Converts variable names to LaTeX format where possible + using the following rules: + - E -> E$_x$ + - E -> E$_y$ + - E -> E$_z$ + + This repeats for B, J and P. It only changes the variable + name if there are spaces around the affix (prefix + suffix) + or if there is no trailing space. This is to avoid changing variable + names that may contain these affixes as part of the variable name itself. + """ prefixes = ["E", "B", "J", "P"] suffixes = ["x", "y", "z"] for prefix in prefixes: From 8e76b567fb2cb71a5f282ed0a8536fdd7e0a2170 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Wed, 20 Nov 2024 10:10:38 +0000 Subject: [PATCH 07/10] Update tests --- tests/test_basic.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/test_basic.py b/tests/test_basic.py index a0590d0..348d692 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -116,7 +116,11 @@ def test_time_dim_units(): def test_latex_rename_variables(): - df = xr.open_mfdataset(EXAMPLE_ARRAYS_DIR.glob("*.sdf"), preprocess=SDFPreprocess()) + df = xr.open_mfdataset( + EXAMPLE_ARRAYS_DIR.glob("*.sdf"), + preprocess=SDFPreprocess(), + keep_particles=True, + ) assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$" assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$" assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$" @@ -126,6 +130,9 @@ def test_latex_rename_variables(): assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$" assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$" assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$" + assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles P$_x$ Electron" + assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles P$_y$ Electron" + assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles P$_z$ Electron" def test_arrays_with_no_grids(): From 14a4b654513d34613ca3e68ae4bb1ba95be9bb04 Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Wed, 20 Nov 2024 13:46:19 +0000 Subject: [PATCH 08/10] refactored _process_latex_name --- src/sdf_xarray/__init__.py | 54 ++++++++++++++++++-------------------- tests/test_basic.py | 29 +++++++++++--------- 2 files changed, 42 insertions(+), 41 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index 949523e..f4d4080 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -1,6 +1,8 @@ import os import pathlib +import re from collections import Counter, defaultdict +from itertools import product from typing import Iterable import numpy as np @@ -21,6 +23,30 @@ def _rename_with_underscore(name: str) -> str: return name.replace("/", "_").replace(" ", "_").replace("-", "_") +def _process_latex_name(variable_name: str) -> str: + """Converts variable names to LaTeX format where possible + using the following rules: + - E -> $E_x$ + - E -> $E_y$ + - E -> $E_z$ + + This repeats for B, J and P. It only changes the variable + name if there are spaces around the affix (prefix + suffix) + or if there is no trailing space. This is to avoid changing variable + names that may contain these affixes as part of the variable name itself. + """ + prefixes = ["E", "B", "J", "P"] + suffixes = ["x", "y", "z"] + for prefix, suffix in product(prefixes, suffixes): + # Match affix with preceding space and trailing space or end of string + # and capture the leading/trailing spaces + affix_pattern = rf"(\s+){prefix}{suffix}(\s*|$)" + # Insert LaTeX format while preserving spaces + replacement = rf"\1${prefix}_{suffix}$\2" + variable_name = re.sub(affix_pattern, replacement, variable_name) + return variable_name + + def combine_datasets(path_glob: Iterable | str, **kwargs) -> xr.Dataset: """Combine all datasets using a single time dimension""" @@ -250,34 +276,6 @@ def _process_grid_name(grid_name: str, transform_func) -> str: renamed_name = _rename_with_underscore(transformed_name) return renamed_name - def _process_latex_name(variable_name: str) -> str: - """Converts variable names to LaTeX format where possible - using the following rules: - - E -> E$_x$ - - E -> E$_y$ - - E -> E$_z$ - - This repeats for B, J and P. It only changes the variable - name if there are spaces around the affix (prefix + suffix) - or if there is no trailing space. This is to avoid changing variable - names that may contain these affixes as part of the variable name itself. - """ - prefixes = ["E", "B", "J", "P"] - suffixes = ["x", "y", "z"] - for prefix in prefixes: - for suffix in suffixes: - affix_spaces = f" {prefix}{suffix} " - affix_no_trailing_space = f" {prefix}{suffix}" - if affix_spaces in variable_name: - return variable_name.replace( - affix_spaces, f" {prefix}$_{suffix}$ " - ) - elif affix_no_trailing_space in variable_name: - return variable_name.replace( - affix_no_trailing_space, f" {prefix}$_{suffix}$" - ) - return variable_name - for key, value in self.ds.grids.items(): if "cpu" in key.lower(): # Had some problems with these variables, so just ignore them for now diff --git a/tests/test_basic.py b/tests/test_basic.py index 348d692..03f22c0 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -3,7 +3,7 @@ import pytest import xarray as xr -from sdf_xarray import SDFPreprocess, open_mfdataset +from sdf_xarray import SDFPreprocess, _process_latex_name, open_mfdataset EXAMPLE_FILES_DIR = pathlib.Path(__file__).parent / "example_files" EXAMPLE_MISMATCHED_FILES_DIR = ( @@ -121,18 +121,21 @@ def test_latex_rename_variables(): preprocess=SDFPreprocess(), keep_particles=True, ) - assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$" - assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$" - assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$" - assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field B$_x$" - assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field B$_y$" - assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field B$_z$" - assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$" - assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$" - assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$" - assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles P$_x$ Electron" - assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles P$_y$ Electron" - assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles P$_z$ Electron" + assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field $E_x$" + assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field $E_y$" + assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field $E_z$" + assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field $B_x$" + assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field $B_y$" + assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field $B_z$" + assert df["Current_Jx"].attrs["long_name"] == "Current $J_x$" + assert df["Current_Jy"].attrs["long_name"] == "Current $J_y$" + assert df["Current_Jz"].attrs["long_name"] == "Current $J_z$" + assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles $P_x$ Electron" + assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles $P_y$ Electron" + assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles $P_z$ Electron" + + assert _process_latex_name("Example") == "Example" + assert _process_latex_name("PxTest") == "PxTest" def test_arrays_with_no_grids(): From 90753839043ac0a4099c8cd116b57fc58e34f49d Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Wed, 20 Nov 2024 14:04:47 +0000 Subject: [PATCH 09/10] Add tests for non latex-ifying certain names --- tests/test_basic.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/test_basic.py b/tests/test_basic.py index 03f22c0..f9eb1db 100644 --- a/tests/test_basic.py +++ b/tests/test_basic.py @@ -137,6 +137,15 @@ def test_latex_rename_variables(): assert _process_latex_name("Example") == "Example" assert _process_latex_name("PxTest") == "PxTest" + assert ( + df["Absorption_Fraction_of_Laser_Energy_Absorbed"].attrs["long_name"] + == "Absorption Fraction of Laser Energy Absorbed" + ) + assert ( + df["Derived_Average_Particle_Energy"].attrs["long_name"] + == "Derived Average Particle Energy" + ) + def test_arrays_with_no_grids(): with xr.open_dataset(EXAMPLE_ARRAYS_DIR / "0001.sdf") as df: From 362e2ab5a38e447e46f8c60e90a9699998c6463f Mon Sep 17 00:00:00 2001 From: JoelLucaAdams Date: Thu, 21 Nov 2024 14:07:29 +0000 Subject: [PATCH 10/10] Simplify regex pattern --- src/sdf_xarray/__init__.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py index f4d4080..09ab200 100644 --- a/src/sdf_xarray/__init__.py +++ b/src/sdf_xarray/__init__.py @@ -39,10 +39,9 @@ def _process_latex_name(variable_name: str) -> str: suffixes = ["x", "y", "z"] for prefix, suffix in product(prefixes, suffixes): # Match affix with preceding space and trailing space or end of string - # and capture the leading/trailing spaces - affix_pattern = rf"(\s+){prefix}{suffix}(\s*|$)" + affix_pattern = rf"\b{prefix}{suffix}\b" # Insert LaTeX format while preserving spaces - replacement = rf"\1${prefix}_{suffix}$\2" + replacement = rf"${prefix}_{suffix}$" variable_name = re.sub(affix_pattern, replacement, variable_name) return variable_name