From 7a99a2c26ef953e720249bb4ec817c5eb21f26f1 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 15:53:05 +0000
Subject: [PATCH 01/10] Convert long_name underscore to space and use title
 case

---
 src/sdf_xarray/__init__.py | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index 711fbef..011b384 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -271,7 +271,7 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
                     dim_name,
                     coord,
                     {
-                        "long_name": label,
+                        "long_name": label.replace("_", " "),
                         "units": unit,
                         "point_data": value.is_point_data,
                         "full_name": value.name,
@@ -290,11 +290,6 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
                 continue
 
             if isinstance(value, Constant) or value.grid is None:
-                data_attrs = {}
-                data_attrs["full_name"] = key
-                if value.units is not None:
-                    data_attrs["units"] = value.units
-
                 # We don't have a grid, either because it's just a
                 # scalar, or because it's an array over something
                 # else. We have no more information, so just make up
@@ -303,6 +298,12 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
                 dims = [f"dim_{key}_{n}" for n, _ in enumerate(shape)]
                 base_name = _rename_with_underscore(key)
 
+                data_attrs = {}
+                data_attrs["full_name"] = key
+                data_attrs["long_name"] = base_name.replace("_", " ")
+                if value.units is not None:
+                    data_attrs["units"] = value.units
+
                 data_vars[base_name] = Variable(dims, value.data, attrs=data_attrs)
                 continue
 
@@ -341,13 +342,15 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
                 ]
 
             # TODO: error handling here? other attributes?
+            base_name = _rename_with_underscore(key)
+            long_name = base_name.replace("_", " ")
             data_attrs = {
                 "units": value.units,
                 "point_data": value.is_point_data,
                 "full_name": key,
+                "long_name": long_name,
             }
             lazy_data = indexing.LazilyIndexedArray(SDFBackendArray(key, self))
-            base_name = _rename_with_underscore(key)
             data_vars[base_name] = Variable(var_coords, lazy_data, data_attrs)
 
         # TODO: might need to decode if mult is set?

From 596ef2dbe44d135e4317c5c43286917859af3b6c Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 15:55:30 +0000
Subject: [PATCH 02/10] Add latex_name and tests

---
 src/sdf_xarray/__init__.py | 12 +++++++++++-
 tests/test_basic.py        | 13 +++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index 011b384..3039235 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -250,6 +250,16 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
             renamed_name = _rename_with_underscore(transformed_name)
             return renamed_name
 
+        def _process_latex_name(variable_name: str) -> str:
+            prefixes = ["E", "B", "J", "P"]
+            suffixes = ["x", "y", "z"]
+            for prefix in prefixes:
+                for suffix in suffixes:
+                    affix = f"{prefix}{suffix}"
+                    if affix in variable_name:
+                        return variable_name.replace(affix, f"{prefix}$_{suffix}$")
+            return variable_name
+
         for key, value in self.ds.grids.items():
             if "cpu" in key.lower():
                 # Had some problems with these variables, so just ignore them for now
@@ -343,7 +353,7 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
 
             # TODO: error handling here? other attributes?
             base_name = _rename_with_underscore(key)
-            long_name = base_name.replace("_", " ")
+            long_name = _process_latex_name(base_name.replace("_", " "))
             data_attrs = {
                 "units": value.units,
                 "point_data": value.is_point_data,
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 83a5f43..a0590d0 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -115,6 +115,19 @@ def test_time_dim_units():
     assert df["time"].full_name == "time"
 
 
+def test_latex_rename_variables():
+    df = xr.open_mfdataset(EXAMPLE_ARRAYS_DIR.glob("*.sdf"), preprocess=SDFPreprocess())
+    assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$"
+    assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$"
+    assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$"
+    assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field B$_x$"
+    assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field B$_y$"
+    assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field B$_z$"
+    assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$"
+    assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$"
+    assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$"
+
+
 def test_arrays_with_no_grids():
     with xr.open_dataset(EXAMPLE_ARRAYS_DIR / "0001.sdf") as df:
         laser_phase = "laser_x_min_phase"

From 10ac19194343baea7d7f0c73969db082a532eabd Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 16:02:40 +0000
Subject: [PATCH 03/10] Fix check for processing latex name in only specific
 conditions

---
 src/sdf_xarray/__init__.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index 3039235..dc3924b 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -255,9 +255,16 @@ def _process_latex_name(variable_name: str) -> str:
             suffixes = ["x", "y", "z"]
             for prefix in prefixes:
                 for suffix in suffixes:
-                    affix = f"{prefix}{suffix}"
-                    if affix in variable_name:
-                        return variable_name.replace(affix, f"{prefix}$_{suffix}$")
+                    affix_spaces = f" {prefix}{suffix} "
+                    affix_no_spaces = f" {prefix}{suffix}"
+                    if affix_spaces in variable_name:
+                        return variable_name.replace(
+                            affix_spaces, f" {prefix}$_{suffix} $"
+                        )
+                    elif affix_no_spaces in variable_name:
+                        return variable_name.replace(
+                            affix_no_spaces, f" {prefix}$_{suffix}$"
+                        )
             return variable_name
 
         for key, value in self.ds.grids.items():

From 0c7aea68308c5a882491ce9a6850e64a52611738 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 16:04:20 +0000
Subject: [PATCH 04/10] rename variable to affix_no_trailing_space

---
 src/sdf_xarray/__init__.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index dc3924b..5264ddb 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -256,14 +256,14 @@ def _process_latex_name(variable_name: str) -> str:
             for prefix in prefixes:
                 for suffix in suffixes:
                     affix_spaces = f" {prefix}{suffix} "
-                    affix_no_spaces = f" {prefix}{suffix}"
+                    affix_no_trailing_space = f" {prefix}{suffix}"
                     if affix_spaces in variable_name:
                         return variable_name.replace(
                             affix_spaces, f" {prefix}$_{suffix} $"
                         )
-                    elif affix_no_spaces in variable_name:
+                    elif affix_no_trailing_space in variable_name:
                         return variable_name.replace(
-                            affix_no_spaces, f" {prefix}$_{suffix}$"
+                            affix_no_trailing_space, f" {prefix}$_{suffix}$"
                         )
             return variable_name
 

From 738f06b3bef99c5a9b50bde1c4d35eb0e4362b62 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 16:08:17 +0000
Subject: [PATCH 05/10] Fix incorrect replace for affix_spaces

---
 src/sdf_xarray/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index 5264ddb..f0b07ee 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -259,7 +259,7 @@ def _process_latex_name(variable_name: str) -> str:
                     affix_no_trailing_space = f" {prefix}{suffix}"
                     if affix_spaces in variable_name:
                         return variable_name.replace(
-                            affix_spaces, f" {prefix}$_{suffix} $"
+                            affix_spaces, f" {prefix}$_{suffix}$ "
                         )
                     elif affix_no_trailing_space in variable_name:
                         return variable_name.replace(

From 62ba0597be7c0c9585759705a64920e866164ca8 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Tue, 19 Nov 2024 16:29:54 +0000
Subject: [PATCH 06/10] Add documentation to _process_latex_name

---
 src/sdf_xarray/__init__.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index f0b07ee..949523e 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -251,6 +251,17 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
             return renamed_name
 
         def _process_latex_name(variable_name: str) -> str:
+            """Converts variable names to LaTeX format where possible
+            using the following rules:
+            - E -> E$_x$
+            - E -> E$_y$
+            - E -> E$_z$
+
+            This repeats for B, J and P. It only changes the variable
+            name if there are spaces around the affix (prefix + suffix)
+            or if there is no trailing space. This is to avoid changing variable
+            names that may contain these affixes as part of the variable name itself.
+            """
             prefixes = ["E", "B", "J", "P"]
             suffixes = ["x", "y", "z"]
             for prefix in prefixes:

From 8e76b567fb2cb71a5f282ed0a8536fdd7e0a2170 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Wed, 20 Nov 2024 10:10:38 +0000
Subject: [PATCH 07/10] Update tests

---
 tests/test_basic.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tests/test_basic.py b/tests/test_basic.py
index a0590d0..348d692 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -116,7 +116,11 @@ def test_time_dim_units():
 
 
 def test_latex_rename_variables():
-    df = xr.open_mfdataset(EXAMPLE_ARRAYS_DIR.glob("*.sdf"), preprocess=SDFPreprocess())
+    df = xr.open_mfdataset(
+        EXAMPLE_ARRAYS_DIR.glob("*.sdf"),
+        preprocess=SDFPreprocess(),
+        keep_particles=True,
+    )
     assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$"
     assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$"
     assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$"
@@ -126,6 +130,9 @@ def test_latex_rename_variables():
     assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$"
     assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$"
     assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$"
+    assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles P$_x$ Electron"
+    assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles P$_y$ Electron"
+    assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles P$_z$ Electron"
 
 
 def test_arrays_with_no_grids():

From 14a4b654513d34613ca3e68ae4bb1ba95be9bb04 Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Wed, 20 Nov 2024 13:46:19 +0000
Subject: [PATCH 08/10] refactored _process_latex_name

---
 src/sdf_xarray/__init__.py | 54 ++++++++++++++++++--------------------
 tests/test_basic.py        | 29 +++++++++++---------
 2 files changed, 42 insertions(+), 41 deletions(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index 949523e..f4d4080 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -1,6 +1,8 @@
 import os
 import pathlib
+import re
 from collections import Counter, defaultdict
+from itertools import product
 from typing import Iterable
 
 import numpy as np
@@ -21,6 +23,30 @@ def _rename_with_underscore(name: str) -> str:
     return name.replace("/", "_").replace(" ", "_").replace("-", "_")
 
 
+def _process_latex_name(variable_name: str) -> str:
+    """Converts variable names to LaTeX format where possible
+    using the following rules:
+    - E -> $E_x$
+    - E -> $E_y$
+    - E -> $E_z$
+
+    This repeats for B, J and P. It only changes the variable
+    name if there are spaces around the affix (prefix + suffix)
+    or if there is no trailing space. This is to avoid changing variable
+    names that may contain these affixes as part of the variable name itself.
+    """
+    prefixes = ["E", "B", "J", "P"]
+    suffixes = ["x", "y", "z"]
+    for prefix, suffix in product(prefixes, suffixes):
+        # Match affix with preceding space and trailing space or end of string
+        # and capture the leading/trailing spaces
+        affix_pattern = rf"(\s+){prefix}{suffix}(\s*|$)"
+        # Insert LaTeX format while preserving spaces
+        replacement = rf"\1${prefix}_{suffix}$\2"
+        variable_name = re.sub(affix_pattern, replacement, variable_name)
+    return variable_name
+
+
 def combine_datasets(path_glob: Iterable | str, **kwargs) -> xr.Dataset:
     """Combine all datasets using a single time dimension"""
 
@@ -250,34 +276,6 @@ def _process_grid_name(grid_name: str, transform_func) -> str:
             renamed_name = _rename_with_underscore(transformed_name)
             return renamed_name
 
-        def _process_latex_name(variable_name: str) -> str:
-            """Converts variable names to LaTeX format where possible
-            using the following rules:
-            - E -> E$_x$
-            - E -> E$_y$
-            - E -> E$_z$
-
-            This repeats for B, J and P. It only changes the variable
-            name if there are spaces around the affix (prefix + suffix)
-            or if there is no trailing space. This is to avoid changing variable
-            names that may contain these affixes as part of the variable name itself.
-            """
-            prefixes = ["E", "B", "J", "P"]
-            suffixes = ["x", "y", "z"]
-            for prefix in prefixes:
-                for suffix in suffixes:
-                    affix_spaces = f" {prefix}{suffix} "
-                    affix_no_trailing_space = f" {prefix}{suffix}"
-                    if affix_spaces in variable_name:
-                        return variable_name.replace(
-                            affix_spaces, f" {prefix}$_{suffix}$ "
-                        )
-                    elif affix_no_trailing_space in variable_name:
-                        return variable_name.replace(
-                            affix_no_trailing_space, f" {prefix}$_{suffix}$"
-                        )
-            return variable_name
-
         for key, value in self.ds.grids.items():
             if "cpu" in key.lower():
                 # Had some problems with these variables, so just ignore them for now
diff --git a/tests/test_basic.py b/tests/test_basic.py
index 348d692..03f22c0 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -3,7 +3,7 @@
 import pytest
 import xarray as xr
 
-from sdf_xarray import SDFPreprocess, open_mfdataset
+from sdf_xarray import SDFPreprocess, _process_latex_name, open_mfdataset
 
 EXAMPLE_FILES_DIR = pathlib.Path(__file__).parent / "example_files"
 EXAMPLE_MISMATCHED_FILES_DIR = (
@@ -121,18 +121,21 @@ def test_latex_rename_variables():
         preprocess=SDFPreprocess(),
         keep_particles=True,
     )
-    assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field E$_x$"
-    assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field E$_y$"
-    assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field E$_z$"
-    assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field B$_x$"
-    assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field B$_y$"
-    assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field B$_z$"
-    assert df["Current_Jx"].attrs["long_name"] == "Current J$_x$"
-    assert df["Current_Jy"].attrs["long_name"] == "Current J$_y$"
-    assert df["Current_Jz"].attrs["long_name"] == "Current J$_z$"
-    assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles P$_x$ Electron"
-    assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles P$_y$ Electron"
-    assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles P$_z$ Electron"
+    assert df["Electric_Field_Ex"].attrs["long_name"] == "Electric Field $E_x$"
+    assert df["Electric_Field_Ey"].attrs["long_name"] == "Electric Field $E_y$"
+    assert df["Electric_Field_Ez"].attrs["long_name"] == "Electric Field $E_z$"
+    assert df["Magnetic_Field_Bx"].attrs["long_name"] == "Magnetic Field $B_x$"
+    assert df["Magnetic_Field_By"].attrs["long_name"] == "Magnetic Field $B_y$"
+    assert df["Magnetic_Field_Bz"].attrs["long_name"] == "Magnetic Field $B_z$"
+    assert df["Current_Jx"].attrs["long_name"] == "Current $J_x$"
+    assert df["Current_Jy"].attrs["long_name"] == "Current $J_y$"
+    assert df["Current_Jz"].attrs["long_name"] == "Current $J_z$"
+    assert df["Particles_Px_Electron"].attrs["long_name"] == "Particles $P_x$ Electron"
+    assert df["Particles_Py_Electron"].attrs["long_name"] == "Particles $P_y$ Electron"
+    assert df["Particles_Pz_Electron"].attrs["long_name"] == "Particles $P_z$ Electron"
+
+    assert _process_latex_name("Example") == "Example"
+    assert _process_latex_name("PxTest") == "PxTest"
 
 
 def test_arrays_with_no_grids():

From 90753839043ac0a4099c8cd116b57fc58e34f49d Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Wed, 20 Nov 2024 14:04:47 +0000
Subject: [PATCH 09/10] Add tests for non latex-ifying certain names

---
 tests/test_basic.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/tests/test_basic.py b/tests/test_basic.py
index 03f22c0..f9eb1db 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -137,6 +137,15 @@ def test_latex_rename_variables():
     assert _process_latex_name("Example") == "Example"
     assert _process_latex_name("PxTest") == "PxTest"
 
+    assert (
+        df["Absorption_Fraction_of_Laser_Energy_Absorbed"].attrs["long_name"]
+        == "Absorption Fraction of Laser Energy Absorbed"
+    )
+    assert (
+        df["Derived_Average_Particle_Energy"].attrs["long_name"]
+        == "Derived Average Particle Energy"
+    )
+
 
 def test_arrays_with_no_grids():
     with xr.open_dataset(EXAMPLE_ARRAYS_DIR / "0001.sdf") as df:

From 362e2ab5a38e447e46f8c60e90a9699998c6463f Mon Sep 17 00:00:00 2001
From: JoelLucaAdams <joelucadams@gmail.com>
Date: Thu, 21 Nov 2024 14:07:29 +0000
Subject: [PATCH 10/10] Simplify regex pattern

---
 src/sdf_xarray/__init__.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/sdf_xarray/__init__.py b/src/sdf_xarray/__init__.py
index f4d4080..09ab200 100644
--- a/src/sdf_xarray/__init__.py
+++ b/src/sdf_xarray/__init__.py
@@ -39,10 +39,9 @@ def _process_latex_name(variable_name: str) -> str:
     suffixes = ["x", "y", "z"]
     for prefix, suffix in product(prefixes, suffixes):
         # Match affix with preceding space and trailing space or end of string
-        # and capture the leading/trailing spaces
-        affix_pattern = rf"(\s+){prefix}{suffix}(\s*|$)"
+        affix_pattern = rf"\b{prefix}{suffix}\b"
         # Insert LaTeX format while preserving spaces
-        replacement = rf"\1${prefix}_{suffix}$\2"
+        replacement = rf"${prefix}_{suffix}$"
         variable_name = re.sub(affix_pattern, replacement, variable_name)
     return variable_name