Optimize summary reading

equinor · Jan 4, 2024 · 50664cb · 50664cb
1 parent 4526a60
commit 50664cb
Showing 1 changed file with 25 additions and 27 deletions.
diff --git a/src/ert/config/_read_summary.py b/src/ert/config/_read_summary.py
@@ -257,12 +257,8 @@ def read_summary(
     filepath: str, fetch_keys: Sequence[str]
 ) -> Tuple[List[str], Sequence[datetime], Any]:
     summary, spec = _get_summary_filenames(filepath)
-    date_index, start_date, date_units, keys, key_indecies = _read_spec(
-        spec, fetch_keys
-    )
-    fetched, time_map = _read_summary(
-        summary, start_date, date_units, key_indecies, date_index
-    )
+    date_index, start_date, date_units, keys, mask = _read_spec(spec, fetch_keys)
+    fetched, time_map = _read_summary(summary, start_date, date_units, mask, date_index)
 
     return (keys, time_map, fetched)
 
@@ -278,7 +274,7 @@ def _key2str(key: Union[bytes, str]) -> str:
 
 def _read_spec(
     spec: str, fetch_keys: Sequence[str]
-) -> Tuple[int, datetime, DateUnit, List[str], List[int]]:
+) -> Tuple[int, datetime, DateUnit, List[str], npt.NDArray[Any]]:
     date = None
     n = None
     nx = None
@@ -287,14 +283,14 @@ def _read_spec(
     arrays: Dict[str, Optional[npt.NDArray[Any]]] = {
         kw: None
         for kw in [
-            "WGNAMES",
-            "NUMS",
+            "WGNAMES ",
+            "NUMS    ",
             "KEYWORDS",
-            "NUMLX",
-            "NUMLY",
-            "NUMLZ",
+            "NUMLX   ",
+            "NUMLY   ",
+            "NUMLZ   ",
             "LGRNAMES",
-            "UNITS",
+            "UNITS   ",
         ]
     }
 
@@ -320,13 +316,13 @@ def _read_spec(
                 )
             ):
                 break
-            kw = _key2str(entry.read_keyword())
+            kw = entry.read_keyword()
             if kw in arrays:
                 vals = entry.read_array()
                 if vals is resfo.MESS or isinstance(vals, resfo.MESS):
                     raise ValueError(f"{kw} in {spec} was MESS")
                 arrays[kw] = vals
-            if kw == "DIMENS":
+            if kw == "DIMENS  ":
                 vals = entry.read_array()
                 if vals is resfo.MESS or isinstance(vals, resfo.MESS):
                     raise ValueError(f"DIMENS in {spec} was MESS")
@@ -355,11 +351,11 @@ def _read_spec(
                     microsecond=microsecond % 10**6,
                 )
     keywords = arrays["KEYWORDS"]
-    wgnames = arrays["WGNAMES"]
-    nums = arrays["NUMS"]
-    numlx = arrays["NUMLX"]
-    numly = arrays["NUMLY"]
-    numlz = arrays["NUMLZ"]
+    wgnames = arrays["WGNAMES "]
+    nums = arrays["NUMS    "]
+    numlx = arrays["NUMLX   "]
+    numly = arrays["NUMLY   "]
+    numlz = arrays["NUMLZ   "]
     lgr_names = arrays["LGRNAMES"]
 
     if date is None:
@@ -407,22 +403,24 @@ def optional_get(arr: Optional[npt.NDArray[Any]], idx: int) -> Any:
                 indices.append(i)
                 keys.append(key)
 
-    units = arrays["UNITS"]
+    mask = np.in1d(np.arange(n), indices)
+
+    units = arrays["UNITS   "]
     if units is None:
         raise ValueError(f"keyword units missing in {spec}")
     if date_index is None:
         raise ValueError(f"KEYWORDS did not contain TIME in {spec}")
     if date_index >= len(units):
         raise ValueError(f"Unit missing for TIME in {spec}")
 
-    return date_index, date, DateUnit[_key2str(units[date_index])], keys, indices
+    return date_index, date, DateUnit[_key2str(units[date_index])], keys, mask
 
 
 def _read_summary(
     summary: str,
     start_date: datetime,
     unit: DateUnit,
-    indices: List[int],
+    mask: npt.NDArray[Any],
     date_index: int,
 ) -> Tuple[npt.NDArray[np.float32], List[datetime]]:
     if summary.lower().endswith("funsmry"):
@@ -442,16 +440,16 @@ def read_params():
             vals = last_params.read_array()
             if vals is resfo.MESS or isinstance(vals, resfo.MESS):
                 raise ValueError(f"PARAMS in {summary} was MESS")
-            values.append(vals[indices])
+            values.append(vals[mask])
             dates.append(start_date + unit.make_delta(float(vals[date_index])))
             last_params = None
 
     with open(summary, mode) as fp:
         for entry in resfo.lazy_read(fp, format):
-            kw = _key2str(entry.read_keyword())
-            if kw == "PARAMS":
+            kw = entry.read_keyword()
+            if kw == "PARAMS  ":
                 last_params = entry
-            if kw == "SEQHDR":
+            if kw == "SEQHDR  ":
                 read_params()
         read_params()
     return np.array(values).T, dates