Skip to content

Commit

Permalink
add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tommyod committed Dec 12, 2024
1 parent c7f8431 commit 538777b
Showing 1 changed file with 38 additions and 4 deletions.
42 changes: 38 additions & 4 deletions src/semeio/fmudesign/iman_conover.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ def __init__(
seed=None,
verbose=False,
):
"""
"""Create a PermutationCorrelator instance, which induces correlations
between variables in X by randomly shuffling rows in a given column.
Parameters
----------
Expand All @@ -278,6 +278,15 @@ def __init__(
verbose : bool, optional
Whether or not to print information. The default is False.
Notes
-----
The paper "Correlation control in small-sample Monte Carlo type
simulations I: A simulated annealing approach" by Vořechovský et al.
proposes using simulated annealing. We implement a simple randomized
hill climbing procedure instead, because it is good enough.
- https://www.sciencedirect.com/science/article/pii/S0266892009000113
- https://en.wikipedia.org/wiki/Hill_climbing
Examples
--------
>>> rng = np.random.default_rng(42)
Expand Down Expand Up @@ -372,6 +381,13 @@ def __init__(
def _pearson(self, X):
"""Given a matrix X of shape (m, n), return a matrix of shape (n, n)
with Pearson correlation coefficients."""
# The majority of runtime is spent computing correlation coefficients.
# Any attempt to speed up this code should focus on that.
# It's possible to compute the difference is the objective function
# without explicitly computing the empirical correlation afresh in
# every iteration. If X has shape (m, n), then this can take the
# runtime from O(m*n*n) to O(n), but it requires Python-loops and
# bookkeeping.
return np.corrcoef(X, rowvar=False)

def _spearman(self, X):
Expand All @@ -396,10 +412,28 @@ def _error(self, X):
return np.sqrt(np.sum(weighted_residuals_sq))

def hill_climb(self, X):
"""Hill climbing swaps two random rows (observations). If the result
leads to a smaller error, then it is kept. If not we try again."""
"""Hill climbing cycles through columns (variables), and for each
column it swaps two random rows (observations). If the result
leads to a smaller error (correlation closer to target), then it is
kept. If not we try again.
Parameters
----------
X : np.ndarray
A matrix with shape (observations, variables).
Returns
-------
A copy of X where rows within each column are shuffled.
"""
num_obs, num_vars = X.shape
if not (isinstance(X, np.ndarray) and X.ndim == 2):
raise ValueError("`X` must be a 2D numpy array.")
if not num_vars == self.C.shape[0]:
raise ValueError(
"Number of variables in `X` does not match `correlation_matrix`."
)

if self.verbose:
print(f"Running permutation correlator for {self.iters} iterations.")

Expand Down

0 comments on commit 538777b

Please sign in to comment.