-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add correlation refactor migration plugin (#89)
- Loading branch information
Showing
2 changed files
with
254 additions
and
0 deletions.
There are no files selected for viewing
111 changes: 111 additions & 0 deletions
111
...ain/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/CorrelationRefactorPlugin.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
public class CorrelationRefactorPlugin extends AbstractAnalysisUpdater { | ||
private static final Logger LOG = Logger.getLogger(CorrelationRefactorPlugin.class); | ||
public static final String COLLECTION_SPEC_KEY = "collectionSpec"; | ||
public static final String DATA_1_KEY = "data1"; | ||
public static final String DATA_2_KEY = "data2"; | ||
public static final String DATA_TYPE_KEY = "dataType"; | ||
public static final String COLLECTION_TYPE = "collection"; | ||
|
||
@Override | ||
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception { | ||
super.configure(wdkModel, additionalArgs); | ||
} | ||
|
||
@Override | ||
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception { | ||
JSONObject descriptor = nextRow.getDescriptor(); | ||
JSONArray computations = descriptor.getJSONArray("computations"); | ||
|
||
LOG.info("Descriptor before migration: " + descriptor); | ||
|
||
boolean updateNeeded = computations.toList().stream() | ||
.anyMatch(CorrelationRefactorPlugin::needsUpdate); | ||
|
||
if (!updateNeeded) { | ||
// Short-circuit to avoid doing migration work. | ||
return new TableRowInterfaces.RowResult<>(nextRow) | ||
.setShouldWrite(false); | ||
} | ||
|
||
List<JSONObject> updatedComputations = computations.toList().stream() | ||
.map(computation -> migrateComputation(new JSONObject((Map<?, ?>) computation))) | ||
.collect(Collectors.toList()); | ||
|
||
descriptor.put("computations", updatedComputations); | ||
|
||
LOG.info("Descriptor after migration: " + descriptor); | ||
|
||
return new TableRowInterfaces.RowResult<>(nextRow) | ||
.setShouldWrite(_writeToDb); | ||
} | ||
|
||
private static boolean needsUpdate(Object computation) { | ||
List<String> migratableComputeTypes = List.of("correlationassaymetadata", "correlationassayassay"); | ||
return migratableComputeTypes.contains(new JSONObject((Map<?, ?>) computation).getJSONObject("descriptor").getString("type")); | ||
} | ||
|
||
/** | ||
* Migrates a computation, mutating it in-place and returning the migrated object. | ||
* | ||
* Only migrates computations of type: | ||
* - correlationassaymetadata | ||
* - correlationassayassay | ||
* @param computation Specification of the computation to migrate. | ||
* @return Migrated computation. | ||
*/ | ||
private JSONObject migrateComputation(JSONObject computation) { | ||
String computationType = computation.getJSONObject("descriptor").getString("type"); | ||
JSONObject descriptor = computation.getJSONObject("descriptor"); | ||
if (computationType.equals("correlationassaymetadata")) { | ||
JSONObject configuration = descriptor.getJSONObject("configuration"); | ||
descriptor.put("type", "correlation"); | ||
|
||
JSONObject data1 = new JSONObject(); | ||
configuration.put(DATA_1_KEY, data1); | ||
data1.put(DATA_TYPE_KEY, COLLECTION_TYPE); | ||
data1.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable")); | ||
configuration.remove("collectionVariable"); | ||
|
||
JSONObject data2 = new JSONObject(); | ||
configuration.put(DATA_2_KEY, data2); | ||
data2.put(DATA_TYPE_KEY, "metadata"); | ||
return computation; | ||
} else if (computationType.equals("correlationassayassay")) { | ||
JSONObject configuration = descriptor.getJSONObject("configuration"); | ||
descriptor.put("type", "correlation"); | ||
|
||
JSONObject data1 = new JSONObject(); | ||
configuration.put(DATA_1_KEY, data1); | ||
data1.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable1")); | ||
data1.put(DATA_TYPE_KEY, COLLECTION_TYPE); | ||
configuration.remove("collectionVariable1"); | ||
|
||
JSONObject data2 = new JSONObject(); | ||
configuration.put(DATA_2_KEY, data2); | ||
data2.put(COLLECTION_SPEC_KEY, configuration.getJSONObject("collectionVariable2")); | ||
data2.put(DATA_TYPE_KEY, COLLECTION_TYPE); | ||
configuration.remove("collectionVariable2"); | ||
return computation; | ||
} | ||
return computation; | ||
} | ||
|
||
@Override | ||
public void dumpStatistics() { | ||
|
||
} | ||
} |
143 changes: 143 additions & 0 deletions
143
...java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/CorrelationRefactorPluginTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONObject; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
public class CorrelationRefactorPluginTest { | ||
|
||
@Test | ||
public void test() throws Exception { | ||
CorrelationRefactorPlugin plugin = new CorrelationRefactorPlugin(); | ||
TableRowInterfaces.RowResult<AnalysisRow> result = plugin.processRecord(new AnalysisRow("id", "id", new JSONObject(testConfig()), 3, 3, 3)); | ||
String descriptorAsString = result.getRow().getDescriptor().toString(); | ||
Assert.assertTrue(descriptorAsString.contains("data1")); | ||
Assert.assertTrue(descriptorAsString.contains("data2")); | ||
} | ||
|
||
private static String testConfig() { | ||
return "{\n" + | ||
" \"subset\": {\n" + | ||
" \"descriptor\": [],\n" + | ||
" \"uiSettings\": {}\n" + | ||
" },\n" + | ||
" \"computations\": [\n" + | ||
" {\n" + | ||
" \"computationId\": \"iopsx\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"correlationassaymetadata\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"prefilterThresholds\": {\n" + | ||
" \"proportionNonZero\": 0.05,\n" + | ||
" \"variance\": 0,\n" + | ||
" \"standardDeviation\": 0\n" + | ||
" },\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000808\",\n" + | ||
" \"collectionId\": \"EUPATH_0009256\"\n" + | ||
" },\n" + | ||
" \"correlationMethod\": \"spearman\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"e7411924-0ecb-48c9-9a8f-f8ea3ed73468\",\n" + | ||
" \"displayName\": \"Taxa correlated with age\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"bipartitenetwork\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"correlationCoefThreshold\": 0.5,\n" + | ||
" \"significanceThreshold\": 0.05\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"computationId\": \"jwdb9\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"abundance\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"collectionId\": \"EUPATH_0009256\",\n" + | ||
" \"entityId\": \"EUPATH_0000808\"\n" + | ||
" },\n" + | ||
" \"rankingMethod\": \"variance\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"6e6afcbf-fbf0-4d3c-bfa3-ab7bbc92dda9\",\n" + | ||
" \"displayName\": \"Top taxa by age (genus); faceted by case/control status\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"boxplot\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"dependentAxisValueSpec\": \"Full\",\n" + | ||
" \"overlayVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0051008\"\n" + | ||
" },\n" + | ||
" \"facetVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"computationId\": \"pia0q\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"alphadiv\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"collectionId\": \"EUPATH_0009256\",\n" + | ||
" \"entityId\": \"EUPATH_0000808\"\n" + | ||
" },\n" + | ||
" \"alphaDivMethod\": \"shannon\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"e7d48cfb-aeb4-4f2a-9c68-699a27ebefa6\",\n" + | ||
" \"displayName\": \"Alpha div (genus) by age w/ case/control status as overlay\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"boxplot\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"dependentAxisValueSpec\": \"Full\",\n" + | ||
" \"xAxisVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0051008\"\n" + | ||
" },\n" + | ||
" \"overlayVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" }\n" + | ||
" ],\n" + | ||
" \"starredVariables\": [\n" + | ||
" {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010367\"\n" + | ||
" }\n" + | ||
" ],\n" + | ||
" \"dataTableConfig\": {},\n" + | ||
" \"derivedVariables\": []\n" + | ||
"}"; | ||
} | ||
} |