VEuPathDB · d-callan · Apr 4, 2024 · Mar 25, 2024 · Mar 25, 2024 · Mar 28, 2024
diff --git a/src/main/java/org/veupathdb/service/eda/compute/plugins/alphadiv/AlphaDivPlugin.java b/src/main/java/org/veupathdb/service/eda/compute/plugins/alphadiv/AlphaDivPlugin.java
@@ -3,6 +3,7 @@
 import org.gusdb.fgputil.ListBuilder;
 import org.jetbrains.annotations.NotNull;
 import org.veupathdb.service.eda.common.client.spec.StreamSpec;
+import org.veupathdb.service.eda.common.model.CollectionDef;
 import org.veupathdb.service.eda.common.model.EntityDef;
 import org.veupathdb.service.eda.common.model.ReferenceMetadata;
 import org.veupathdb.service.eda.common.model.VariableDef;
@@ -42,6 +43,8 @@ protected void execute() {
     AlphaDivComputeConfig computeConfig = getConfig();
     PluginUtil util = getUtil();
     ReferenceMetadata meta = getContext().getReferenceMetadata();
+    CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
+    String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
     String entityId = computeConfig.getCollectionVariable().getEntityId();
     EntityDef entity = meta.getEntity(entityId).orElseThrow();
     VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
@@ -74,7 +77,7 @@ protected void execute() {
       }
       dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";
 
-      connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA + 
+      connection.voidEval("abundDT <- microbiomeData::AbundanceData(name=" + util.singleQuote(collectionMemberType) + ",data=" + INPUT_DATA + 
                                                                           ",recordIdColumn=" + util.singleQuote(computeEntityIdColName) + 
                                                                           ",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
                                                                           ",imputeZero=TRUE)");

diff --git a/src/main/java/org/veupathdb/service/eda/compute/plugins/betadiv/BetaDivPlugin.java b/src/main/java/org/veupathdb/service/eda/compute/plugins/betadiv/BetaDivPlugin.java
@@ -3,6 +3,7 @@
 import org.gusdb.fgputil.ListBuilder;
 import org.jetbrains.annotations.NotNull;
 import org.veupathdb.service.eda.common.client.spec.StreamSpec;
+import org.veupathdb.service.eda.common.model.CollectionDef;
 import org.veupathdb.service.eda.common.model.EntityDef;
 import org.veupathdb.service.eda.common.model.ReferenceMetadata;
 import org.veupathdb.service.eda.common.model.VariableDef;
@@ -43,6 +44,8 @@ protected void execute() {
     BetaDivComputeConfig computeConfig = getConfig();
     PluginUtil util = getUtil();
     ReferenceMetadata meta = getContext().getReferenceMetadata();
+    CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
+    String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
     String entityId = computeConfig.getCollectionVariable().getEntityId();
     EntityDef entity = meta.getEntity(entityId).orElseThrow();
     VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
@@ -76,7 +79,7 @@ protected void execute() {
       }
       dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";
 
-      connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA + 
+      connection.voidEval("abundDT <- microbiomeData::AbundanceData(name=" + singleQuote(collectionMemberType) + ",data=" + INPUT_DATA + 
                                                                           ",recordIdColumn=" + singleQuote(computeEntityIdColName) +
                                                                           ",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
                                                                           ",imputeZero=TRUE)");

diff --git a/src/main/java/org/veupathdb/service/eda/compute/plugins/correlation/CorrelationPlugin.java b/src/main/java/org/veupathdb/service/eda/compute/plugins/correlation/CorrelationPlugin.java
@@ -186,9 +186,9 @@ protected void execute() {
       // NOTE: getMember tells us the member type, rather than gives us a literal member
       String assayType = collection.getMember() == null ? "unknown" : collection.getMember();
 
-      boolean isEigengene = false;
+      String dataClassRString = "microbiomeData::AbundanceData";
       if (assayType.toLowerCase().contains("eigengene")) {
-        isEigengene = true;
+        dataClassRString = "veupathUtils::CollectionWithMetadata";
       }
 
       // THIS CASE IS ASSAY X METADATA
@@ -208,43 +208,24 @@ protected void execute() {
         connection.voidEval(util.getVoidEvalFreadCommand(INPUT_DATA, metadataInputVars));
         connection.voidEval("sampleMetadata <- " + INPUT_DATA); 
 
-         // Prep data and run correlation
-        if (isEigengene)  {
-          // If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
-          // only need to make data frames for the assay data and sample metadata.
-          connection.voidEval("eigengeneData <- assayData[order(" + computeEntityIdColName + ")]; " + 
-            "eigengeneData <- eigengeneData[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
-            "eigengeneData <- eigengeneData[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE];");
-
-          connection.voidEval("sampleMetadata <- sampleMetadata[order(" + computeEntityIdColName + ")]; " + 
-            "sampleMetadata <- sampleMetadata[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
-            "sampleMetadata <- sampleMetadata[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE];");
-
-          connection.voidEval("computeResult <- veupathUtils::correlation(data1=eigengeneData, data2=sampleMetadata" +
-                                    ", method=" + singleQuote(method) +
-                                    ", verbose=TRUE)");
-        } else {
-          // If we don't have eigengene data, for now we can assume the data is abundance data.
-          // Abundance data can go through our microbiomeComputations pipeline.
-          connection.voidEval("sampleMetadata <- microbiomeData::SampleMetadata(data = sampleMetadata" +
+        connection.voidEval("sampleMetadata <- microbiomeData::SampleMetadata(data = sampleMetadata" +
                                     ", recordIdColumn=" + singleQuote(computeEntityIdColName) +
                                     ", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + "))");
 
-          connection.voidEval("abundanceData <- microbiomeData::AbundanceData(data=assayData" + 
+        connection.voidEval("abundanceData <- " + dataClassRString + "(name= " + singleQuote(assayType) + ",data=assayData" + 
                                     ", sampleMetadata=sampleMetadata" +
                                     ", recordIdColumn=" + singleQuote(computeEntityIdColName) +
                                     ", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
                                     ", imputeZero=TRUE)");       
 
-          // Run correlation!
-          connection.voidEval("computeResult <- veupathUtils::correlation(data1=abundanceData" +
+        // Run correlation!
+        connection.voidEval("computeResult <- veupathUtils::correlation(data1=abundanceData" +
                                                               ", method=" + singleQuote(method) +
                                                               proportionNonZeroThresholdRParam +
                                                               varianceThresholdRParam +
                                                               stdDevThresholdRParam +
                                                               ", verbose=TRUE)");
-        }
-    // THIS CASE IS ASSAY X ASSAY
+      // THIS CASE IS ASSAY X ASSAY
       } else {
         // Get the second assay collection
         CollectionSpec assay2 = computeConfig.getCollectionVariable2();
@@ -282,50 +263,25 @@ protected void execute() {
         String dotNotatedEntity2IdColumnsString = util.listToRVector(dotNotatedEntity2IdColumns);
 
         String collection2MemberType = collection2.getMember() == null ? "unknown" : collection2.getMember();
-        // If either collection is an eigengene, we'll use our base correlation function in veupathUtils,
-        // so we want to set the isEigengene flag to true.
-        if (collection2MemberType.toLowerCase().contains("eigengene")) {
-          isEigengene = true;
-        }
+        String data2ClassRString = collection2MemberType.toLowerCase().contains("eigengene") ? "veupathUtils::CollectionWithMetadata" : "microbiomeData::AbundanceData";
 
-        // Prep data and run correlation
-        if (isEigengene) {
-          // If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
-          // only need to make data frames for the assay data and sample metadata.
-          connection.voidEval("data1 <- assayData; " + 
-            "data1 <- data1[order(" + revisedComputeEntityIdColName + ")]; " + 
-            "data1 <- data1[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
-            "data1 <- data1[, -" + singleQuote(revisedComputeEntityIdColName) + ", with=FALSE]");
-
-          connection.voidEval("data2 <- assay2Data; " +
-            "data2 <- data2[order(" + revisedComputeEntityIdColName + ")]; " + 
-            "data2 <- data2[, -as.character(" + dotNotatedEntity2IdColumnsString + "), with=FALSE];" +
-            "data2 <- data2[, -" + singleQuote(revisedComputeEntityIdColName) + ", with=FALSE]");
-
-          connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
-                                                              ", method=" + singleQuote(method) +
-                                                              ", verbose=TRUE)");
-        } else {
-          // If we don't have eigengene data, for now we can assume the data is abundance data.
-          // Abundance data can go through our microbiomeComputations pipeline.
-          connection.voidEval("data1 <- microbiomeData::AbundanceData(data=assayData" + 
+        connection.voidEval("data1 <- " + dataClassRString + "(name= " + singleQuote(assayType) + ",data=assayData" + 
                                     ", recordIdColumn=" + singleQuote(revisedComputeEntityIdColName) +
                                     ", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
                                     ", imputeZero=TRUE)");
 
-          connection.voidEval("data2 <- microbiomeData::AbundanceData(data = assay2Data" +
+        connection.voidEval("data2 <- " + data2ClassRString + "(name= " + singleQuote(collection2MemberType) + ",data = assay2Data" +
                                     ", recordIdColumn=" + singleQuote(revisedComputeEntityIdColName) +
                                     ", ancestorIdColumns=as.character(" + dotNotatedEntity2IdColumnsString + ")" +
                                     ", imputeZero=TRUE)");
 
-          // Run correlation!
-          connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
+        // Run correlation!
+        connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
                                                               ", method=" + singleQuote(method) +
                                                               proportionNonZeroThresholdRParam +
                                                               varianceThresholdRParam +
                                                               stdDevThresholdRParam +
                                                               ", verbose=TRUE)");
-        }
       }
 
       // Write results
@@ -334,4 +290,4 @@ protected void execute() {
       getWorkspace().writeStatisticsResult(connection, statsCmd);
     });
   }
-}
+}
diff --git a/...pathdb/service/eda/compute/plugins/differentialabundance/DifferentialAbundancePlugin.java b/...pathdb/service/eda/compute/plugins/differentialabundance/DifferentialAbundancePlugin.java
@@ -3,6 +3,7 @@
 import org.gusdb.fgputil.ListBuilder;
 import org.jetbrains.annotations.NotNull;
 import org.veupathdb.service.eda.common.client.spec.StreamSpec;
+import org.veupathdb.service.eda.common.model.CollectionDef;
 import org.veupathdb.service.eda.common.model.EntityDef;
 import org.veupathdb.service.eda.common.model.ReferenceMetadata;
 import org.veupathdb.service.eda.common.model.VariableDef;
@@ -48,6 +49,8 @@ protected void execute() {
     ReferenceMetadata meta = getContext().getReferenceMetadata();
 
     CollectionSpec collectionSpec = computeConfig.getCollectionVariable();
+    CollectionDef collection = meta.getCollection(collectionSpec).orElseThrow();
+    String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
     String entityId = collectionSpec.getEntityId();
     EntityDef entity = meta.getEntity(entityId).orElseThrow();
     VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
@@ -125,7 +128,8 @@ protected void execute() {
       {
         abundanceDataClass = "AbsoluteAbundanceData";
       }
-      connection.voidEval("inputData <- microbiomeData::" + abundanceDataClass + "(data=abundanceData" + 
+      connection.voidEval("inputData <- microbiomeData::" + abundanceDataClass + "(name=" + singleQuote(collectionMemberType) + 
+                                                                          ", data=abundanceData" + 
                                                                           ", sampleMetadata=sampleMetadata" +
                                                                           ", recordIdColumn=" + singleQuote(computeEntityIdColName) +
                                                                           ", ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +

diff --git a/...java/org/veupathdb/service/eda/compute/plugins/rankedabundance/RankedAbundancePlugin.java b/...java/org/veupathdb/service/eda/compute/plugins/rankedabundance/RankedAbundancePlugin.java
@@ -3,6 +3,7 @@
 import org.gusdb.fgputil.ListBuilder;
 import org.jetbrains.annotations.NotNull;
 import org.veupathdb.service.eda.common.client.spec.StreamSpec;
+import org.veupathdb.service.eda.common.model.CollectionDef;
 import org.veupathdb.service.eda.common.model.EntityDef;
 import org.veupathdb.service.eda.common.model.ReferenceMetadata;
 import org.veupathdb.service.eda.common.model.VariableDef;
@@ -42,6 +43,8 @@ protected void execute() {
     RankedAbundanceComputeConfig computeConfig = getConfig();
     PluginUtil util = getUtil();
     ReferenceMetadata meta = getContext().getReferenceMetadata();
+    CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
+    String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
     String entityId = computeConfig.getCollectionVariable().getEntityId();
     EntityDef entity = meta.getEntity(entityId).orElseThrow();
     VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
@@ -75,7 +78,7 @@ protected void execute() {
       }
       dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";
 
-      connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA + 
+      connection.voidEval("abundDT <- microbiomeData::AbundanceData(name= " + util.singleQuote(collectionMemberType) + ",data=" + INPUT_DATA + 
                                                                           ",recordIdColumn=" + util.singleQuote(computeEntityIdColName) + 
                                                                           ",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
                                                                           ",imputeZero=TRUE)");

diff --git a/...java/org/veupathdb/service/eda/compute/plugins/selfcorrelation/SelfCorrelationPlugin.java b/...java/org/veupathdb/service/eda/compute/plugins/selfcorrelation/SelfCorrelationPlugin.java
@@ -110,37 +110,23 @@ protected void execute() {
       // i think we cross that bridge when we get there and know more.. 
       // NOTE: getMember tells us the member type, rather than gives us a literal member
       String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
-      boolean isEigengene = false;
+      String dataClassRString = "microbiomeData::AbundanceData";
       if (collectionMemberType.toLowerCase().contains("eigengene")) {
-        isEigengene = true;
+        dataClassRString = "veupathUtils::CollectionWithMetadata";
       }
 
-      // Prep data and run correlation
-      if (isEigengene)  {
-        // If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
-        // only need to make data frames for the assay data and sample metadata.
-        connection.voidEval("assayData <- assayData[order(" + computeEntityIdColName + ")]; " + 
-          "assayData <- assayData[, -as.character(" + dotNotatedEntityIdColumnsString +"), with=FALSE];" +
-          "assayData <- assayData[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE]");
-
-        connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=assayData" +
-                                  ", method=" + singleQuote(method) +
-                                  ", verbose=TRUE)");
-      } else {
-        // If we don't have eigengene data, for now we can assume the data is abundance data.
-        // Abundance data can go through our microbiomeComputations pipeline.
-        connection.voidEval("data <- microbiomeData::AbundanceData(data=assayData" + 
+      connection.voidEval("data <- " + dataClassRString + "(name=" + singleQuote(collectionMemberType) + ",data=assayData" + 
                                   ", recordIdColumn=" + singleQuote(computeEntityIdColName) +
                                   ", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
                                   ", imputeZero=TRUE)");
-        // Run correlation!
-        connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=data" +
+
+      // Run correlation!
+      connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=data" +
                                                             ", method=" + singleQuote(method) +
                                                             proportionNonZeroThresholdRParam +
                                                             varianceThresholdRParam +
                                                             stdDevThresholdRParam +
                                                             ", verbose=TRUE)");
-      }
 
       String statsCmd = "writeStatistics(computeResult, NULL, TRUE)";