From 0b686e365edffc9034802a1cfe1e433e18b6ecb2 Mon Sep 17 00:00:00 2001
From: Patrick Shriwise <pshriwise@gmail.com>
Date: Sat, 25 May 2024 14:11:21 -0500
Subject: [PATCH] Correction for histogram interpolation of Tabular
 distributions (#2981)

Co-authored-by: Paul Romano <paul.k.romano@gmail.com>
---
 openmc/data/thermal.py         |  4 +--
 openmc/stats/univariate.py     | 60 ++++++++++++++++++----------------
 tests/unit_tests/test_stats.py | 23 +++++++++++--
 3 files changed, 54 insertions(+), 33 deletions(-)

diff --git a/openmc/data/thermal.py b/openmc/data/thermal.py
index f5841d9c90b..6df171d3a4f 100644
--- a/openmc/data/thermal.py
+++ b/openmc/data/thermal.py
@@ -698,8 +698,8 @@ def from_ace(cls, ace_or_filename, name=None):
                 # add an outgoing energy 0 eV that has a PDF of 0 (and of
                 # course, a CDF of 0 as well).
                 if eout_i.c[0] > 0.:
-                    eout_i.x = np.insert(eout_i.x, 0, 0.)
-                    eout_i.p = np.insert(eout_i.p, 0, 0.)
+                    eout_i._x = np.insert(eout_i.x, 0, 0.)
+                    eout_i._p = np.insert(eout_i.p, 0, 0.)
                     eout_i.c = np.insert(eout_i.c, 0, 0.)
 
                     # For this added outgoing energy (of 0 eV) we add a set of
diff --git a/openmc/stats/univariate.py b/openmc/stats/univariate.py
index a0e86c3f8f2..f44bce67cee 100644
--- a/openmc/stats/univariate.py
+++ b/openmc/stats/univariate.py
@@ -837,10 +837,11 @@ class Tabular(Univariate):
     x : Iterable of float
         Tabulated values of the random variable
     p : Iterable of float
-        Tabulated probabilities
+        Tabulated probabilities. For histogram interpolation, if the length of
+        `p` is the same as `x`, the last value is ignored.
     interpolation : {'histogram', 'linear-linear', 'linear-log', 'log-linear', 'log-log'}, optional
-        Indicate whether the density function is constant between tabulated
-        points or linearly-interpolated. Defaults to 'linear-linear'.
+        Indicates how the density function is interpolated between tabulated
+        points. Defaults to 'linear-linear'.
     ignore_negative : bool
         Ignore negative probabilities
 
@@ -850,9 +851,9 @@ class Tabular(Univariate):
         Tabulated values of the random variable
     p : numpy.ndarray
         Tabulated probabilities
-    interpolation : {'histogram', 'linear-linear', 'linear-log', 'log-linear', 'log-log'}, optional
-        Indicate whether the density function is constant between tabulated
-        points or linearly-interpolated.
+    interpolation : {'histogram', 'linear-linear', 'linear-log', 'log-linear', 'log-log'}
+        Indicates how the density function is interpolated between tabulated
+        points. Defaults to 'linear-linear'.
 
     """
 
@@ -863,35 +864,38 @@ def __init__(
             interpolation: str = 'linear-linear',
             ignore_negative: bool = False
         ):
-        self._ignore_negative = ignore_negative
-        self.x = x
-        self.p = p
         self.interpolation = interpolation
 
+        cv.check_type('tabulated values', x, Iterable, Real)
+        cv.check_type('tabulated probabilities', p, Iterable, Real)
+
+        x = np.array(x, dtype=float)
+        p = np.array(p, dtype=float)
+
+        if p.size > x.size:
+            raise ValueError('Number of probabilities exceeds number of table values.')
+        if self.interpolation != 'histogram' and x.size != p.size:
+            raise ValueError(f'Tabulated values ({x.size}) and probabilities '
+                             f'({p.size}) should have the same length')
+
+        if not ignore_negative:
+            for pk in p:
+                cv.check_greater_than('tabulated probability', pk, 0.0, True)
+
+        self._x = x
+        self._p = p
+
     def __len__(self):
-        return len(self.x)
+        return self.p.size
 
     @property
     def x(self):
         return self._x
 
-    @x.setter
-    def x(self, x):
-        cv.check_type('tabulated values', x, Iterable, Real)
-        self._x = np.array(x, dtype=float)
-
     @property
     def p(self):
         return self._p
 
-    @p.setter
-    def p(self, p):
-        cv.check_type('tabulated probabilities', p, Iterable, Real)
-        if not self._ignore_negative:
-            for pk in p:
-                cv.check_greater_than('tabulated probability', pk, 0.0, True)
-        self._p = np.array(p, dtype=float)
-
     @property
     def interpolation(self):
         return self._interpolation
@@ -907,7 +911,7 @@ def cdf(self):
         p = self.p
 
         if self.interpolation == 'histogram':
-            c[1:] = p[:-1] * np.diff(x)
+            c[1:] = p[:x.size-1] * np.diff(x)
         elif self.interpolation == 'linear-linear':
             c[1:] = 0.5 * (p[:-1] + p[1:]) * np.diff(x)
         else:
@@ -938,7 +942,7 @@ def mean(self):
         elif self.interpolation == 'histogram':
             x_l = self.x[:-1]
             x_r = self.x[1:]
-            p_l = self.p[:-1]
+            p_l = self.p[:self.x.size-1]
             mean = (0.5 * (x_l + x_r) * (x_r - x_l) * p_l).sum()
         else:
             raise NotImplementedError('Can only compute mean for tabular '
@@ -952,7 +956,7 @@ def mean(self):
 
     def normalize(self):
         """Normalize the probabilities stored on the distribution"""
-        self.p /= self.cdf().max()
+        self._p /= self.cdf().max()
 
     def sample(self, n_samples: int = 1, seed: typing.Optional[int] = None):
         rng = np.random.RandomState(seed)
@@ -1070,7 +1074,7 @@ def integral(self):
             Integral of tabular distrbution
         """
         if self.interpolation == 'histogram':
-            return np.sum(np.diff(self.x) * self.p[:-1])
+            return np.sum(np.diff(self.x) * self.p[:self.x.size-1])
         elif self.interpolation == 'linear-linear':
             return trapezoid(self.p, self.x)
         else:
@@ -1346,7 +1350,7 @@ def combine_distributions(
     # Apply probabilites to continuous distributions
     for i in cont_index:
         dist = dist_list[i]
-        dist.p *= probs[i]
+        dist._p *= probs[i]
 
     if discrete_index:
         # Create combined discrete distribution
diff --git a/tests/unit_tests/test_stats.py b/tests/unit_tests/test_stats.py
index 761f26ab3df..f5b467d0380 100644
--- a/tests/unit_tests/test_stats.py
+++ b/tests/unit_tests/test_stats.py
@@ -92,7 +92,7 @@ def test_clip_discrete():
 
     with pytest.raises(ValueError):
         d.clip(-1.)
-    
+
     with pytest.raises(ValueError):
         d.clip(5)
 
@@ -188,8 +188,8 @@ def test_watt():
 
 
 def test_tabular():
-    x = np.array([0.0, 5.0, 7.0])
-    p = np.array([10.0, 20.0, 5.0])
+    x = np.array([0.0, 5.0, 7.0, 10.0])
+    p = np.array([10.0, 20.0, 5.0, 6.0])
     d = openmc.stats.Tabular(x, p, 'linear-linear')
     elem = d.to_xml_element('distribution')
 
@@ -217,6 +217,23 @@ def test_tabular():
     d.normalize()
     assert d.integral() == pytest.approx(1.0)
 
+    # ensure that passing a set of probabilities shorter than x works
+    # for histogram interpolation
+    d = openmc.stats.Tabular(x, p[:-1], interpolation='histogram')
+    d.cdf()
+    d.mean()
+    assert_sample_mean(d.sample(n_samples), d.mean())
+
+    # passing a shorter probability set should raise an error for linear-linear
+    with pytest.raises(ValueError):
+        d = openmc.stats.Tabular(x, p[:-1], interpolation='linear-linear')
+        d.cdf()
+
+    # Use probabilities of correct length for linear-linear interpolation and
+    # call the CDF method
+    d = openmc.stats.Tabular(x, p, interpolation='linear-linear')
+    d.cdf()
+
 
 def test_legendre():
     # Pu239 elastic scattering at 100 keV