Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add correlation refactor migration plugin #89

Merged
merged 4 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.apache.log4j.Logger;
import org.gusdb.wdk.model.WdkModel;
import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONArray;
import org.json.JSONObject;

import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

public class CorrelationRefactorPlugin extends AbstractAnalysisUpdater {
private static final Logger LOG = Logger.getLogger(CorrelationRefactorPlugin.class);

@Override
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception {
super.configure(wdkModel, additionalArgs);
}

@Override
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception {
JSONObject descriptor = nextRow.getDescriptor();
JSONArray computations = descriptor.getJSONArray("computations");

LOG.info("Descriptor before migration: " + descriptor);

boolean updateNeeded = computations.toList().stream()
.anyMatch(CorrelationRefactorPlugin::needsUpdate);

if (!updateNeeded) {
// Short-circuit to avoid doing migration work.
return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(false);
}

List<JSONObject> updatedComputations = computations.toList().stream()
.map(computation -> migrateComputation(new JSONObject((Map<?, ?>) computation)))
.collect(Collectors.toList());

descriptor.put("computations", updatedComputations);

LOG.info("Descriptor after migration: " + descriptor);

return new TableRowInterfaces.RowResult<>(nextRow)
.setShouldWrite(_writeToDb);
}

private static boolean needsUpdate(Object computation) {
List<String> migratableComputeTypes = List.of("correlationassaymetadata", "correlationassayassay");
return migratableComputeTypes.contains(new JSONObject((Map<?, ?>) computation).getJSONObject("descriptor").getString("type"));
}

/**
* Migrates a computation, mutating it in-place and returning the migrated object.
*
* Only migrates computations of type:
* - correlationassaymetadata
* - correlationassayassay
* @param computation Specification of the computation to migrate.
* @return Migrated computation.
*/
private JSONObject migrateComputation(JSONObject computation) {
String computationType = computation.getJSONObject("descriptor").getString("type");
JSONObject descriptor = computation.getJSONObject("descriptor");
if (computationType.equals("correlationassaymetadata")) {
JSONObject configuration = descriptor.getJSONObject("configuration");
descriptor.put("type", "correlation");

configuration.put("data1", configuration.getJSONObject("collectionVariable"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't match Danielle's description. Should be

data1: {
    dataType: "collection",
    collectionSpec: <the value previously in collectionVariable>
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Careless mistake :( thanks!

configuration.getJSONObject("data1").put("dataType", "collection");
configuration.remove("collectionVariable");

configuration.put("data2", new JSONObject());
configuration.getJSONObject("data2").put("dataType", "metadata");
return computation;
} else if (computationType.equals("correlationassayassay")) {
JSONObject configuration = descriptor.getJSONObject("configuration");
descriptor.put("type", "correlation");

configuration.put("data1", configuration.getJSONObject("collectionVariable1"));
configuration.getJSONObject("data1").put("dataType", "collection");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same problem as above. The collection spec needs to be nested a layer down in "collectionSpec" under "data1"

configuration.remove("collectionVariable1");

configuration.put("data2", configuration.getJSONObject("collectionVariable2"));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same issue. Needs "collectionSpec" subprop.

configuration.getJSONObject("data2").put("dataType", "collection");
configuration.remove("collectionVariable2");
return computation;
}
return computation;
}

@Override
public void dumpStatistics() {

}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins;

import org.gusdb.wdk.model.fix.table.TableRowInterfaces;
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow;
import org.json.JSONObject;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class CorrelationRefactorPluginTest {

@Test
public void test() throws Exception {
CorrelationRefactorPlugin plugin = new CorrelationRefactorPlugin();
TableRowInterfaces.RowResult<AnalysisRow> result = plugin.processRecord(new AnalysisRow("id", "id", new JSONObject(testConfig()), 3, 3, 3));
String descriptorAsString = result.getRow().getDescriptor().toString();
Assert.assertTrue(descriptorAsString.contains("data1"));
Assert.assertTrue(descriptorAsString.contains("data2"));
}

private static String testConfig() {
return "{\n" +
" \"subset\": {\n" +
" \"descriptor\": [],\n" +
" \"uiSettings\": {}\n" +
" },\n" +
" \"computations\": [\n" +
" {\n" +
" \"computationId\": \"iopsx\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"correlationassaymetadata\",\n" +
" \"configuration\": {\n" +
" \"prefilterThresholds\": {\n" +
" \"proportionNonZero\": 0.05,\n" +
" \"variance\": 0,\n" +
" \"standardDeviation\": 0\n" +
" },\n" +
" \"collectionVariable\": {\n" +
" \"entityId\": \"EUPATH_0000808\",\n" +
" \"collectionId\": \"EUPATH_0009256\"\n" +
" },\n" +
" \"correlationMethod\": \"spearman\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"e7411924-0ecb-48c9-9a8f-f8ea3ed73468\",\n" +
" \"displayName\": \"Taxa correlated with age\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"bipartitenetwork\",\n" +
" \"configuration\": {\n" +
" \"correlationCoefThreshold\": 0.5,\n" +
" \"significanceThreshold\": 0.05\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"computationId\": \"jwdb9\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"abundance\",\n" +
" \"configuration\": {\n" +
" \"collectionVariable\": {\n" +
" \"collectionId\": \"EUPATH_0009256\",\n" +
" \"entityId\": \"EUPATH_0000808\"\n" +
" },\n" +
" \"rankingMethod\": \"variance\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"6e6afcbf-fbf0-4d3c-bfa3-ab7bbc92dda9\",\n" +
" \"displayName\": \"Top taxa by age (genus); faceted by case/control status\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"boxplot\",\n" +
" \"configuration\": {\n" +
" \"dependentAxisValueSpec\": \"Full\",\n" +
" \"overlayVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0051008\"\n" +
" },\n" +
" \"facetVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" }\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" },\n" +
" {\n" +
" \"computationId\": \"pia0q\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"alphadiv\",\n" +
" \"configuration\": {\n" +
" \"collectionVariable\": {\n" +
" \"collectionId\": \"EUPATH_0009256\",\n" +
" \"entityId\": \"EUPATH_0000808\"\n" +
" },\n" +
" \"alphaDivMethod\": \"shannon\"\n" +
" }\n" +
" },\n" +
" \"visualizations\": [\n" +
" {\n" +
" \"visualizationId\": \"e7d48cfb-aeb4-4f2a-9c68-699a27ebefa6\",\n" +
" \"displayName\": \"Alpha div (genus) by age w/ case/control status as overlay\",\n" +
" \"descriptor\": {\n" +
" \"type\": \"boxplot\",\n" +
" \"configuration\": {\n" +
" \"dependentAxisValueSpec\": \"Full\",\n" +
" \"xAxisVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0051008\"\n" +
" },\n" +
" \"overlayVariable\": {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" }\n" +
" },\n" +
" \"currentPlotFilters\": []\n" +
" }\n" +
" }\n" +
" ]\n" +
" }\n" +
" ],\n" +
" \"starredVariables\": [\n" +
" {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010375\"\n" +
" },\n" +
" {\n" +
" \"entityId\": \"EUPATH_0000096\",\n" +
" \"variableId\": \"EUPATH_0010367\"\n" +
" }\n" +
" ],\n" +
" \"dataTableConfig\": {},\n" +
" \"derivedVariables\": []\n" +
"}";
}
}
Loading