Skip to content

Commit

Permalink
Add correlation refactor migration plugin (#89)
Browse files Browse the repository at this point in the history
  • Loading branch information
dmgaldi authored Apr 26, 2024
1 parent cbd4618 commit f3f30ee
Show file tree
Hide file tree
Showing 2 changed files with 254 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.apache.log4j.Logger;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class CorrelationRefactorPlugin extends AbstractAnalysisUpdater {
private static final Logger LOG = Logger.getLogger(CorrelationRefactorPlugin.class);
public static final String COLLECTION_SPEC_KEY = "collectionSpec";
public static final String DATA_1_KEY = "data1";
public static final String DATA_2_KEY = "data2";
public static final String DATA_TYPE_KEY = "dataType";
public static final String COLLECTION_TYPE = "collection";

@Override
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception {
super.configure(wdkModel, additionalArgs);
}

@Override
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception {
JSONObject descriptor = nextRow.getDescriptor();
JSONArray computations = descriptor.getJSONArray("computations");

LOG.info("Descriptor before migration: " + descriptor);

boolean updateNeeded = computations.toList().stream()
.anyMatch(CorrelationRefactorPlugin::needsUpdate);

if (!updateNeeded) {
// Short-circuit to avoid doing migration work.
return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(false);
}

List<JSONObject> updatedComputations = computations.toList().stream()
.map(computation -> migrateComputation(new JSONObject((Map<?, ?>) computation)))
.collect(Collectors.toList());

descriptor.put("computations", updatedComputations);

LOG.info("Descriptor after migration: " + descriptor);

return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(_writeToDb);
}

private static boolean needsUpdate(Object computation) {
List<String> migratableComputeTypes = List.of("correlationassaymetadata", "correlationassayassay");
return migratableComputeTypes.contains(new JSONObject((Map<?, ?>) computation).getJSONObject("descriptor").getString("type"));
}

/**
* Migrates a computation, mutating it in-place and returning the migrated object.
*
* Only migrates computations of type:
* - correlationassaymetadata
* - correlationassayassay
* @param computation Specification of the computation to migrate.
* @return Migrated computation.
*/
private JSONObject migrateComputation(JSONObject computation) {
String computationType = computation.getJSONObject("descriptor").getString("type");
JSONObject descriptor = computation.getJSONObject("descriptor");
if (computationType.equals("correlationassaymetadata")) {
JSONObject configuration = descriptor.getJSONObject("configuration");
descriptor.put("type", "correlation");

JSONObject data1 = new JSONObject();
configuration.put(DATA_1_KEY, data1);
data1.put(DATA_TYPE_KEY, COLLECTION_TYPE);
data1.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable"));
configuration.remove("collectionVariable");

JSONObject data2 = new JSONObject();
configuration.put(DATA_2_KEY, data2);
data2.put(DATA_TYPE_KEY, "metadata");
return computation;
} else if (computationType.equals("correlationassayassay")) {
JSONObject configuration = descriptor.getJSONObject("configuration");
descriptor.put("type", "correlation");

JSONObject data1 = new JSONObject();
configuration.put(DATA_1_KEY, data1);
data1.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable1"));
data1.put(DATA_TYPE_KEY, COLLECTION_TYPE);
configuration.remove("collectionVariable1");

JSONObject data2 = new JSONObject();
configuration.put(DATA_2_KEY, data2);
data2.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable2"));
data2.put(DATA_TYPE_KEY, COLLECTION_TYPE);
configuration.remove("collectionVariable2");
return computation;
}
return computation;
}

@Override
public void dumpStatistics() {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class CorrelationRefactorPluginTest {

@Test
public void test() throws Exception {
CorrelationRefactorPlugin plugin = new CorrelationRefactorPlugin();
TableRowInterfaces.RowResult<AnalysisRow> result = plugin.processRecord(new AnalysisRow("id", "id", new JSONObject(testConfig()), 3, 3, 3));
String descriptorAsString = result.getRow().getDescriptor().toString();
Assert.assertTrue(descriptorAsString.contains("data1"));
Assert.assertTrue(descriptorAsString.contains("data2"));
}

private static String testConfig() {
return "{\n" +
" \"subset\": {\n" +
" \"descriptor\": [],\n" +
" \"uiSettings\": {}\n" +
" },\n" +
" \"computations\": [\n" +
" {\n" +
" \"computationId\": \"iopsx\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"correlationassaymetadata\",\n" +
" \"configuration\": {\n" +
" \"prefilterThresholds\": {\n" +
" \"proportionNonZero\": 0.05,\n" +
" \"variance\": 0,\n" +
" \"standardDeviation\": 0\n" +
" },\n" +
" \"collectionVariable\": {\n" +
" \"entityId\": \"EUPATH_0000808\",\n" +
" \"collectionId\": \"EUPATH_0009256\"\n" +
" },\n" +
" \"correlationMethod\": \"spearman\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"e7411924-0ecb-48c9-9a8f-f8ea3ed73468\",\n" +
" \"displayName\": \"Taxa correlated with age\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"bipartitenetwork\",\n" +
" \"configuration\": {\n" +
" \"correlationCoefThreshold\": 0.5,\n" +
" \"significanceThreshold\": 0.05\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"computationId\": \"jwdb9\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"abundance\",\n" +
" \"configuration\": {\n" +
" \"collectionVariable\": {\n" +
" \"collectionId\": \"EUPATH_0009256\",\n" +
" \"entityId\": \"EUPATH_0000808\"\n" +
" },\n" +
" \"rankingMethod\": \"variance\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"6e6afcbf-fbf0-4d3c-bfa3-ab7bbc92dda9\",\n" +
" \"displayName\": \"Top taxa by age (genus); faceted by case/control status\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"boxplot\",\n" +
" \"configuration\": {\n" +
" \"dependentAxisValueSpec\": \"Full\",\n" +
" \"overlayVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0051008\"\n" +
" },\n" +
" \"facetVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" }\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"computationId\": \"pia0q\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"alphadiv\",\n" +
" \"configuration\": {\n" +
" \"collectionVariable\": {\n" +
" \"collectionId\": \"EUPATH_0009256\",\n" +
" \"entityId\": \"EUPATH_0000808\"\n" +
" },\n" +
" \"alphaDivMethod\": \"shannon\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"e7d48cfb-aeb4-4f2a-9c68-699a27ebefa6\",\n" +
" \"displayName\": \"Alpha div (genus) by age w/ case/control status as overlay\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"boxplot\",\n" +
" \"configuration\": {\n" +
" \"dependentAxisValueSpec\": \"Full\",\n" +
" \"xAxisVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0051008\"\n" +
" },\n" +
" \"overlayVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" }\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"starredVariables\": [\n" +
" {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" },\n" +
" {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010367\"\n" +
" }\n" +
" ],\n" +
" \"dataTableConfig\": {},\n" +
" \"derivedVariables\": []\n" +
"}";
}
}

0 comments on commit f3f30ee

Please sign in to comment.