From 75d7308024297d3fcc35c79f3ece70501a620f23 Mon Sep 17 00:00:00 2001 From: Yi Peng Date: Fri, 19 Mar 2021 12:47:51 +0100 Subject: [PATCH 1/2] #122 error de pca corregidos por raul marticorena --- .../controller/AlgorithmExecuter.java | 126 +++++++++++++++--- 1 file changed, 104 insertions(+), 22 deletions(-) diff --git a/src/main/java/es/ubu/lsi/ubumonitor/clustering/controller/AlgorithmExecuter.java b/src/main/java/es/ubu/lsi/ubumonitor/clustering/controller/AlgorithmExecuter.java index 62044385..66ad9dcc 100644 --- a/src/main/java/es/ubu/lsi/ubumonitor/clustering/controller/AlgorithmExecuter.java +++ b/src/main/java/es/ubu/lsi/ubumonitor/clustering/controller/AlgorithmExecuter.java @@ -6,7 +6,6 @@ import java.util.List; import java.util.Map; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.apache.commons.math3.exception.NumberIsTooSmallException; import org.apache.commons.math3.ml.clustering.Cluster; @@ -28,10 +27,12 @@ * Clase encargada de ejecutar un algoritmo de clustering. * * @author Xing Long Ji + * @author Raúl Marticorena Sánchez * */ public class AlgorithmExecuter { + private static final int MINUS_ONE = -1; private Clusterer clusterer; private Distance distance; private List usersData; @@ -184,25 +185,22 @@ public static List> clustersTo(int dim, List centers = new ArrayList<>(); - for (ClusterWrapper clusterWrapper : clusters) { - double[] center = clusterWrapper.getCenter(); - if (center != null) { - centers.add(center); - } - } - - double[][] matrix = Stream - .concat(clusters.stream().flatMap(ClusterWrapper::stream).map(UserData::getPoint), centers.stream()) + + // Only use the point data, not use centroids (corrected bug). + double[][] matrix = clusters.stream().flatMap(ClusterWrapper::stream).map(UserData::getPoint) .toArray(double[][]::new); - - PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(); - List> points = new ArrayList<>(); - + + List> points = new ArrayList<>(); + // PCA with T-SNE + PrincipalComponentAnalysis pca = new PrincipalComponentAnalysis(); if (matrix[0].length > dim) { matrix = pca.pca(matrix, dim); } + + // Correct problem of twists with PCA projections + changeSignInMatrixProjection(matrix, pca, dim); // fix problem of symmetry wit PCA + + // Add points int i = 0; for (List list : clusters) { Map map = new LinkedHashMap<>(); @@ -210,14 +208,98 @@ public static List> clustersTo(int dim, List clusters, final double[][] matrix){ + // number of clusters x number of dimensions after PCA + double[][] centroids = new double[clusters.size()][matrix[0].length]; + + int position = 0; // initial row position in data numerical matrix + int currentNumberOfCentroid = 0; + for (ClusterWrapper cluster : clusters) { + double[] centroid = obtainCentroid(matrix, position, position + cluster.size()); + centroids[currentNumberOfCentroid++] = centroid; // add centroid + position += cluster.size(); // move forward to the next cluster row + } + return centroids; + } + + /** + * Obtain the projection in N dimensions for a cluster. + * + * @param matrix data numerical points + * @param begin initial row in the cluster + * @param end number of elements in the cluster + * @return centroid coordinates + */ + private static double[] obtainCentroid(double[][] matrix, final int begin, final int end) { + int dimensions = matrix[0].length; + double[] centroid = new double[dimensions]; + // add the x_i values in each dimension... + for (int i = begin; i < end; i++) { + for (int j = 0; j < dimensions; j++) { + centroid[j] += matrix[i][j]; + } + } + // aritmetic mean for added values in each dimension + for (int j = 0; j < dimensions; j++) { + centroid[j] = centroid[j]/(end-begin); + } + return centroid; + } + + /** + * Corrects the sign problem with PCA. + * + * The PCA algorithm can give different solutions with the only different of the + * sign in the principal components. To correct the problem, we check the sign (of + * the first value in the principal component) and in the case of being negative, + * we change the corresponding sign in the projected column. In this way, it is + * guaranteed that the projections are always the same (without twists). + * + * Thanks to César Ignacio García Osorio and Juan José Rodriguez Diez. + * + * @param matrix matrix + * @param pca pca + * @param numberOfComponents number of components + */ + private static void changeSignInMatrixProjection(double[][] matrix, final PrincipalComponentAnalysis pca, final int numberOfComponents) { + for (int i = 0; i < numberOfComponents; i++) { + // first element sign in i component + boolean negativeComponent = pca.getBasisVector(i)[0] < 0 ? true : false; + if (negativeComponent) { + changeSignInColumn(matrix, i); // Chaging sign in i-component (i-column) + } + } + } + + /** + * Change sign in all elements in column. + * + * @param matrix matrix + * @param column column to change sign + */ + private static void changeSignInColumn(double[][] matrix, final int column) { + for (int i = 0; i < matrix.length; i++) { + matrix[i][column] = matrix[i][column] * MINUS_ONE; + } + } + } From 56270ae0674bb4fdec654fe52f17d2dc39669b80 Mon Sep 17 00:00:00 2001 From: Yi Peng Date: Fri, 19 Mar 2021 12:58:10 +0100 Subject: [PATCH 2/2] Cambio de version 2.8.2 --- pom.xml | 2 +- src/main/java/es/ubu/lsi/ubumonitor/AppInfo.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index d652ba11..7f87655d 100644 --- a/pom.xml +++ b/pom.xml @@ -5,7 +5,7 @@ es.ubu.lsi ubumonitor - 2.8.0 + 2.8.2 UBUMonitor diff --git a/src/main/java/es/ubu/lsi/ubumonitor/AppInfo.java b/src/main/java/es/ubu/lsi/ubumonitor/AppInfo.java index be3d78e4..365290a2 100644 --- a/src/main/java/es/ubu/lsi/ubumonitor/AppInfo.java +++ b/src/main/java/es/ubu/lsi/ubumonitor/AppInfo.java @@ -8,7 +8,7 @@ */ public class AppInfo { - public static final String VERSION = "2.8.0"; + public static final String VERSION = "2.8.2"; public static final String APPLICATION_VERSION = "v" + VERSION;