online-ml · slach31 · Nov 13, 2024 · Nov 13, 2024 · Nov 16, 2024 · Nov 17, 2024
@@ -5,10 +5,12 @@
 from . import base
 from .alma import ALMAClassifier
 from .bayesian_lin_reg import BayesianLinearRegression
+from .incrementalAUC import IncrementalAUC
 from .lin_reg import LinearRegression
 from .log_reg import LogisticRegression
 from .pa import PAClassifier, PARegressor
 from .perceptron import Perceptron
+from .rls import RLS
 from .softmax import SoftmaxRegression
 
 __all__ = [
@@ -21,4 +23,6 @@
     "PARegressor",
     "Perceptron",
     "SoftmaxRegression",
+    "RLS",
+    "IncrementalAUC",
 ]
@@ -0,0 +1,119 @@
+from __future__ import annotations
+
+import numpy as np
+
+from river import metrics
+
+
+class IncrementalAUC(metrics.base.BinaryMetric):
+    """Calculates AUC incrementally."""
+
+    def __init__(self):
+        super().__init__()
+        self.positive_scores = []
+        self.negative_scores = []
+
+    def update(self, y_true, y_pred):
+        """Updates the metric with the new prediction and true label."""
+        if y_true == 1:
+            self.positive_scores.append(y_pred)
+        else:
+            self.negative_scores.append(y_pred)
+        return self
+
+    def get(
+        self, X_train, y_train, X_test, y_test, epochs=900, lr=0.5, n_mc=500, gamma=1e-4, eps=0.01
+    ):
+        """
+        Implements the stochastic gradient ascent method to optimize theta and computes the AUC
+        based on the accumulated scores.
+
+        Parameters:
+        - X_train: Training feature matrix.
+        - y_train: Training labels.
+        - X_test: Test feature matrix.
+        - y_test: Test labels.
+        - epochs: Number of training epochs.
+        - lr: Initial learning rate.
+        - n_mc: Number of Monte Carlo samples for gradient estimation.
+        - gamma: Learning rate discount factor.
+        - eps: Smoothing parameter for the sigmoid function.
+
+        Returns:
+        - auc: Final AUC score based on the accumulated scores.
+        """
+        from sklearn.metrics import roc_auc_score
+
+        # Separate the classes
+        X1 = X_train[y_train == 1]
+        X0 = X_train[y_train == 0]
+
+        # Initialize parameters
+        np.random.seed(123)
+        theta = np.random.randn(X_train.shape[1])
+        current_lr = lr
+
+        # Reset accumulated scores
+        self.positive_scores = []
+        self.negative_scores = []
+
+        # Optimization loop
+        for seed, epoch in enumerate(range(epochs)):
+            # Update learning rate
+            current_lr = current_lr / (1 + gamma)
+
+            # Update theta using stochastic gradient ascent
+            theta -= current_lr * self.stochastic_gradient(
+                theta, X1, X0, N=n_mc, eps=eps, random_state=seed
+            )
+
+        # After training, compute the scores on the test set
+        y_scores = np.dot(X_test, theta)
+
+        # Update accumulated scores using y_test and y_scores
+        for y_true_sample, y_score in zip(y_test, y_scores):
+            self.update(y_true_sample, y_score)
+
+        # Compute AUC based on accumulated scores
+        y_scores_accumulated = self.positive_scores + self.negative_scores
+        y_true_accumulated = [1] * len(self.positive_scores) + [0] * len(self.negative_scores)
+
+        auc = roc_auc_score(y_true_accumulated, y_scores_accumulated)
+        return auc
+
+    def sigma_eps(self, x, eps=0.01):
+        z = x / eps
+        if z > 35:
+            return 1
+        elif z < -35:
+            return 0
+        else:
+            return 1.0 / (1.0 + np.exp(-z))
+
+    def reg_u_statistic(self, y_true, y_probs, eps=0.01):
+        p = y_probs[y_true == 1]
+        q = y_probs[y_true == 0]
+
+        aux = []
+        for pp in p:
+            for qq in q:
+                aux.append(self.sigma_eps(pp - qq, eps=eps))
+
+        u = np.array(aux).mean()
+        return u
+
+    def stochastic_gradient(self, theta, X1, X0, N=1000, eps=0.01, random_state=1):
+        np.random.seed(random_state)
+
+        indices_1 = np.random.choice(np.arange(X1.shape[0]), size=N)
+        indices_0 = np.random.choice(np.arange(X0.shape[0]), size=N)
+
+        X1_, X0_ = X1[indices_1], X0[indices_0]
+
+        avg = np.zeros_like(theta)
+        for xi, xj in zip(X1_, X0_):
+            dx = xj - xi
+            sig = self.sigma_eps(theta @ dx, eps=eps)
+            avg = avg + sig * (1 - sig) * dx
+
+        return avg / (N * eps)
@@ -0,0 +1,139 @@
+from __future__ import annotations
+
+import numpy as np
+
+
+class RLS:
+    """
+    Recursive Least Squares (RLS)
+
+    The Recursive Least Squares (RLS) algorithm is an adaptive filtering method that adjusts filter coefficients
+    to minimize the weighted least squares error between the desired and predicted outputs. It is widely used
+    in signal processing and control systems for applications requiring fast adaptation to changes in input signals.
+
+    Parameters
+    ----------
+    p : int
+        The order of the filter (number of coefficients to be estimated).
+    forgetting_factor : float, optional, default=0.99
+        Forgetting factor (0 < l ≤ 1). Controls how quickly the algorithm forgets past data.
+        A smaller value makes the algorithm more responsive to recent data.
+    delta : float, optional, default=1000000
+        Initial value for the inverse correlation matrix (P(0)). A large value ensures numerical stability at the start.
+
+    Attributes
+    ----------
+    p : int
+        Filter order.
+    forgetting_factor : float
+        Forgetting factor.
+    delta : float
+        Initialization value for P(0).
+    currentStep : int
+        The current iteration step of the RLS algorithm.
+    x : numpy.ndarray
+        Input vector of size (p+1, 1). Stores the most recent inputs.
+    P : numpy.ndarray
+        Inverse correlation matrix, initialized to a scaled identity matrix.
+    estimates : list of numpy.ndarray
+        List of estimated weight vectors (filter coefficients) at each step.
+    Pks : list of numpy.ndarray
+        List of inverse correlation matrices (P) at each step.
+
+    Methods
+    -------
+    estimate(xn, dn)
+        Updates the filter coefficients using the current input (`xn`) and desired output (`dn`).
+
+
+    Examples
+    --------
+    >>> from river import linear_model
+    >>> import numpy as np
+
+    >>> # Initialize the RLS filter with order 2, forgetting factor 0.98, and delta 1e6
+    >>> rls = linear_model.RLS(p=2, forgetting_factor=0.98, delta=1e6)
+
+    >>> # Simulate some data
+    >>> np.random.seed(42)
+    >>> num_samples = 100
+    >>> x_data = np.sin(np.linspace(0, 10, num_samples))  # Input signal
+    >>> noise = np.random.normal(0, 0.1, num_samples)    # Add some noise
+    >>> d_data = 0.5 * x_data + 0.3 + noise              # Desired output
+
+    >>> # Apply RLS algorithm
+    >>> for xn, dn in zip(x_data, d_data):
+    ...     weights = rls.estimate(xn, dn)
+    >>> print("Final Weights:", rls.estimates[-1].flatten())
+    Final Weights: [ 3.48065382 -6.15301727  3.3361416 ]
+    """
+
+    def __init__(self, p: int, forgetting_factor=0.99, delta=1000000):
+        """
+        Initializes the Recursive Least Squares (RLS) filter.
+
+        Parameters
+        ----------
+        p : int
+            Filter order (number of coefficients).
+        forgetting_factor : float, optional
+            Forgetting factor (default is 0.99).
+        delta : float, optional
+            Initial value for the inverse correlation matrix (default is 1,000,000).
+        """
+        self.p = p  # Filter order
+        self.forgetting_factor = forgetting_factor  # Forgetting factor
+        self.delta = delta  # Value to initialise P(0)
+
+        self.currentStep = 0
+
+        self.x = np.zeros((p + 1, 1))  # Column vector
+        self.P = np.identity(p + 1) * self.delta
+
+        self.estimates = []
+        self.estimates.append(np.zeros((p + 1, 1)))  # Weight vector initialized to zeros
+
+        self.Pks = []
+        self.Pks.append(self.P)
+
+    def estimate(self, xn: float, dn: float):
+        """
+        Performs one iteration of the RLS algorithm to update filter coefficients.
+
+        Parameters
+        ----------
+        xn : float
+            The current input sample.
+        dn : float
+            The desired output corresponding to the current input.
+
+        Returns
+        -------
+        numpy.ndarray
+            Updated weight vector (filter coefficients) after the current iteration.
+        """
+        # Update input vector
+        self.x = np.roll(self.x, -1)
+        self.x[-1, 0] = xn
+
+        # Get previous weight vector
+        wn_prev = self.estimates[-1]
+
+        # Compute gain vector
+        denominator = self.forgetting_factor + self.x.T @ self.Pks[-1] @ self.x
+        gn = (self.Pks[-1] @ self.x) / denominator
+
+        # Compute a priori error
+        alpha = dn - (self.x.T @ wn_prev)
+
+        # Update inverse correlation matrix
+        Pn = (self.Pks[-1] - gn @ self.x.T @ self.Pks[-1]) / self.forgetting_factor
+        self.Pks.append(Pn)
+
+        # Update weight vector
+        wn = wn_prev + gn * alpha
+        self.estimates.append(wn)
+
+        self.currentStep += 1
+
+        return wn