Skip to content
This repository has been archived by the owner on Aug 27, 2024. It is now read-only.

prefilters for eigengenes #91

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.gusdb.fgputil.ListBuilder;
import org.jetbrains.annotations.NotNull;
import org.veupathdb.service.eda.common.client.spec.StreamSpec;
import org.veupathdb.service.eda.common.model.CollectionDef;
import org.veupathdb.service.eda.common.model.EntityDef;
import org.veupathdb.service.eda.common.model.ReferenceMetadata;
import org.veupathdb.service.eda.common.model.VariableDef;
Expand Down Expand Up @@ -42,6 +43,8 @@ protected void execute() {
AlphaDivComputeConfig computeConfig = getConfig();
PluginUtil util = getUtil();
ReferenceMetadata meta = getContext().getReferenceMetadata();
CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
String entityId = computeConfig.getCollectionVariable().getEntityId();
EntityDef entity = meta.getEntity(entityId).orElseThrow();
VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
Expand Down Expand Up @@ -74,7 +77,7 @@ protected void execute() {
}
dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";

connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA +
connection.voidEval("abundDT <- microbiomeData::AbundanceData(name=" + util.singleQuote(collectionMemberType) + ",data=" + INPUT_DATA +
",recordIdColumn=" + util.singleQuote(computeEntityIdColName) +
",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
",imputeZero=TRUE)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.gusdb.fgputil.ListBuilder;
import org.jetbrains.annotations.NotNull;
import org.veupathdb.service.eda.common.client.spec.StreamSpec;
import org.veupathdb.service.eda.common.model.CollectionDef;
import org.veupathdb.service.eda.common.model.EntityDef;
import org.veupathdb.service.eda.common.model.ReferenceMetadata;
import org.veupathdb.service.eda.common.model.VariableDef;
Expand Down Expand Up @@ -43,6 +44,8 @@ protected void execute() {
BetaDivComputeConfig computeConfig = getConfig();
PluginUtil util = getUtil();
ReferenceMetadata meta = getContext().getReferenceMetadata();
CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
String entityId = computeConfig.getCollectionVariable().getEntityId();
EntityDef entity = meta.getEntity(entityId).orElseThrow();
VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
Expand Down Expand Up @@ -76,7 +79,7 @@ protected void execute() {
}
dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";

connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA +
connection.voidEval("abundDT <- microbiomeData::AbundanceData(name=" + singleQuote(collectionMemberType) + ",data=" + INPUT_DATA +
",recordIdColumn=" + singleQuote(computeEntityIdColName) +
",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
",imputeZero=TRUE)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,9 @@ protected void execute() {
// NOTE: getMember tells us the member type, rather than gives us a literal member
String assayType = collection.getMember() == null ? "unknown" : collection.getMember();

boolean isEigengene = false;
String dataClassRString = "microbiomeData::AbundanceData";
if (assayType.toLowerCase().contains("eigengene")) {
isEigengene = true;
dataClassRString = "veupathUtils::CollectionWithMetadata";
}

// THIS CASE IS ASSAY X METADATA
Expand All @@ -208,43 +208,24 @@ protected void execute() {
connection.voidEval(util.getVoidEvalFreadCommand(INPUT_DATA, metadataInputVars));
connection.voidEval("sampleMetadata <- " + INPUT_DATA);

// Prep data and run correlation
if (isEigengene) {
// If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
// only need to make data frames for the assay data and sample metadata.
connection.voidEval("eigengeneData <- assayData[order(" + computeEntityIdColName + ")]; " +
"eigengeneData <- eigengeneData[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
"eigengeneData <- eigengeneData[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE];");

connection.voidEval("sampleMetadata <- sampleMetadata[order(" + computeEntityIdColName + ")]; " +
"sampleMetadata <- sampleMetadata[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
"sampleMetadata <- sampleMetadata[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE];");

connection.voidEval("computeResult <- veupathUtils::correlation(data1=eigengeneData, data2=sampleMetadata" +
", method=" + singleQuote(method) +
", verbose=TRUE)");
} else {
// If we don't have eigengene data, for now we can assume the data is abundance data.
// Abundance data can go through our microbiomeComputations pipeline.
connection.voidEval("sampleMetadata <- microbiomeData::SampleMetadata(data = sampleMetadata" +
connection.voidEval("sampleMetadata <- microbiomeData::SampleMetadata(data = sampleMetadata" +
", recordIdColumn=" + singleQuote(computeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + "))");

connection.voidEval("abundanceData <- microbiomeData::AbundanceData(data=assayData" +
connection.voidEval("abundanceData <- " + dataClassRString + "(name= " + singleQuote(assayType) + ",data=assayData" +
", sampleMetadata=sampleMetadata" +
", recordIdColumn=" + singleQuote(computeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
", imputeZero=TRUE)");

// Run correlation!
connection.voidEval("computeResult <- veupathUtils::correlation(data1=abundanceData" +
// Run correlation!
connection.voidEval("computeResult <- veupathUtils::correlation(data1=abundanceData" +
", method=" + singleQuote(method) +
proportionNonZeroThresholdRParam +
varianceThresholdRParam +
stdDevThresholdRParam +
", verbose=TRUE)");
}
// THIS CASE IS ASSAY X ASSAY
// THIS CASE IS ASSAY X ASSAY
} else {
// Get the second assay collection
CollectionSpec assay2 = computeConfig.getCollectionVariable2();
Expand Down Expand Up @@ -282,50 +263,25 @@ protected void execute() {
String dotNotatedEntity2IdColumnsString = util.listToRVector(dotNotatedEntity2IdColumns);

String collection2MemberType = collection2.getMember() == null ? "unknown" : collection2.getMember();
// If either collection is an eigengene, we'll use our base correlation function in veupathUtils,
// so we want to set the isEigengene flag to true.
if (collection2MemberType.toLowerCase().contains("eigengene")) {
isEigengene = true;
}
String data2ClassRString = collection2MemberType.toLowerCase().contains("eigengene") ? "veupathUtils::CollectionWithMetadata" : "microbiomeData::AbundanceData";

// Prep data and run correlation
if (isEigengene) {
// If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
// only need to make data frames for the assay data and sample metadata.
connection.voidEval("data1 <- assayData; " +
"data1 <- data1[order(" + revisedComputeEntityIdColName + ")]; " +
"data1 <- data1[, -as.character(" + dotNotatedEntityIdColumnsString + "), with=FALSE];" +
"data1 <- data1[, -" + singleQuote(revisedComputeEntityIdColName) + ", with=FALSE]");

connection.voidEval("data2 <- assay2Data; " +
"data2 <- data2[order(" + revisedComputeEntityIdColName + ")]; " +
"data2 <- data2[, -as.character(" + dotNotatedEntity2IdColumnsString + "), with=FALSE];" +
"data2 <- data2[, -" + singleQuote(revisedComputeEntityIdColName) + ", with=FALSE]");

connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
", method=" + singleQuote(method) +
", verbose=TRUE)");
} else {
// If we don't have eigengene data, for now we can assume the data is abundance data.
// Abundance data can go through our microbiomeComputations pipeline.
connection.voidEval("data1 <- microbiomeData::AbundanceData(data=assayData" +
connection.voidEval("data1 <- " + dataClassRString + "(name= " + singleQuote(assayType) + ",data=assayData" +
", recordIdColumn=" + singleQuote(revisedComputeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
", imputeZero=TRUE)");

connection.voidEval("data2 <- microbiomeData::AbundanceData(data = assay2Data" +
connection.voidEval("data2 <- " + data2ClassRString + "(name= " + singleQuote(collection2MemberType) + ",data = assay2Data" +
", recordIdColumn=" + singleQuote(revisedComputeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedEntity2IdColumnsString + ")" +
", imputeZero=TRUE)");

// Run correlation!
connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
// Run correlation!
connection.voidEval("computeResult <- veupathUtils::correlation(data1=data1, data2=data2" +
", method=" + singleQuote(method) +
proportionNonZeroThresholdRParam +
varianceThresholdRParam +
stdDevThresholdRParam +
", verbose=TRUE)");
}
}

// Write results
Expand All @@ -334,4 +290,4 @@ protected void execute() {
getWorkspace().writeStatisticsResult(connection, statsCmd);
});
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.gusdb.fgputil.ListBuilder;
import org.jetbrains.annotations.NotNull;
import org.veupathdb.service.eda.common.client.spec.StreamSpec;
import org.veupathdb.service.eda.common.model.CollectionDef;
import org.veupathdb.service.eda.common.model.EntityDef;
import org.veupathdb.service.eda.common.model.ReferenceMetadata;
import org.veupathdb.service.eda.common.model.VariableDef;
Expand Down Expand Up @@ -48,6 +49,8 @@ protected void execute() {
ReferenceMetadata meta = getContext().getReferenceMetadata();

CollectionSpec collectionSpec = computeConfig.getCollectionVariable();
CollectionDef collection = meta.getCollection(collectionSpec).orElseThrow();
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
String entityId = collectionSpec.getEntityId();
EntityDef entity = meta.getEntity(entityId).orElseThrow();
VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
Expand Down Expand Up @@ -125,7 +128,8 @@ protected void execute() {
{
abundanceDataClass = "AbsoluteAbundanceData";
}
connection.voidEval("inputData <- microbiomeData::" + abundanceDataClass + "(data=abundanceData" +
connection.voidEval("inputData <- microbiomeData::" + abundanceDataClass + "(name=" + singleQuote(collectionMemberType) +
", data=abundanceData" +
", sampleMetadata=sampleMetadata" +
", recordIdColumn=" + singleQuote(computeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import org.gusdb.fgputil.ListBuilder;
import org.jetbrains.annotations.NotNull;
import org.veupathdb.service.eda.common.client.spec.StreamSpec;
import org.veupathdb.service.eda.common.model.CollectionDef;
import org.veupathdb.service.eda.common.model.EntityDef;
import org.veupathdb.service.eda.common.model.ReferenceMetadata;
import org.veupathdb.service.eda.common.model.VariableDef;
Expand Down Expand Up @@ -42,6 +43,8 @@ protected void execute() {
RankedAbundanceComputeConfig computeConfig = getConfig();
PluginUtil util = getUtil();
ReferenceMetadata meta = getContext().getReferenceMetadata();
CollectionDef collection = meta.getCollection(computeConfig.getCollectionVariable()).orElseThrow();
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
String entityId = computeConfig.getCollectionVariable().getEntityId();
EntityDef entity = meta.getEntity(entityId).orElseThrow();
VariableDef computeEntityIdVarSpec = util.getEntityIdVarSpec(entityId);
Expand Down Expand Up @@ -75,7 +78,7 @@ protected void execute() {
}
dotNotatedIdColumnsString = dotNotatedIdColumnsString + ")";

connection.voidEval("abundDT <- microbiomeData::AbundanceData(data=" + INPUT_DATA +
connection.voidEval("abundDT <- microbiomeData::AbundanceData(name= " + util.singleQuote(collectionMemberType) + ",data=" + INPUT_DATA +
",recordIdColumn=" + util.singleQuote(computeEntityIdColName) +
",ancestorIdColumns=as.character(" + dotNotatedIdColumnsString + ")" +
",imputeZero=TRUE)");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,37 +110,23 @@ protected void execute() {
// i think we cross that bridge when we get there and know more..
// NOTE: getMember tells us the member type, rather than gives us a literal member
String collectionMemberType = collection.getMember() == null ? "unknown" : collection.getMember();
boolean isEigengene = false;
String dataClassRString = "microbiomeData::AbundanceData";
if (collectionMemberType.toLowerCase().contains("eigengene")) {
isEigengene = true;
dataClassRString = "veupathUtils::CollectionWithMetadata";
}

// Prep data and run correlation
if (isEigengene) {
// If we have eigenegene data, we'll use our base correlation function in veupathUtils, so we
// only need to make data frames for the assay data and sample metadata.
connection.voidEval("assayData <- assayData[order(" + computeEntityIdColName + ")]; " +
"assayData <- assayData[, -as.character(" + dotNotatedEntityIdColumnsString +"), with=FALSE];" +
"assayData <- assayData[, -" + singleQuote(computeEntityIdColName) + ", with=FALSE]");

connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=assayData" +
", method=" + singleQuote(method) +
", verbose=TRUE)");
} else {
// If we don't have eigengene data, for now we can assume the data is abundance data.
// Abundance data can go through our microbiomeComputations pipeline.
connection.voidEval("data <- microbiomeData::AbundanceData(data=assayData" +
connection.voidEval("data <- " + dataClassRString + "(name=" + singleQuote(collectionMemberType) + ",data=assayData" +
", recordIdColumn=" + singleQuote(computeEntityIdColName) +
", ancestorIdColumns=as.character(" + dotNotatedEntityIdColumnsString + ")" +
", imputeZero=TRUE)");
// Run correlation!
connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=data" +

// Run correlation!
connection.voidEval("computeResult <- veupathUtils::selfCorrelation(data=data" +
", method=" + singleQuote(method) +
proportionNonZeroThresholdRParam +
varianceThresholdRParam +
stdDevThresholdRParam +
", verbose=TRUE)");
}

String statsCmd = "writeStatistics(computeResult, NULL, TRUE)";

Expand Down