-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add correlation refactor migration plugin #89
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.apache.log4j.Logger; | ||
import org.gusdb.wdk.model.WdkModel; | ||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONArray; | ||
import org.json.JSONObject; | ||
|
||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.stream.Collectors; | ||
|
||
public class CorrelationRefactorPlugin extends AbstractAnalysisUpdater { | ||
private static final Logger LOG = Logger.getLogger(CorrelationRefactorPlugin.class); | ||
|
||
@Override | ||
public void configure(WdkModel wdkModel, List<String> additionalArgs) throws Exception { | ||
super.configure(wdkModel, additionalArgs); | ||
} | ||
|
||
@Override | ||
public TableRowInterfaces.RowResult<AnalysisRow> processRecord(AnalysisRow nextRow) throws Exception { | ||
JSONObject descriptor = nextRow.getDescriptor(); | ||
JSONArray computations = descriptor.getJSONArray("computations"); | ||
|
||
LOG.info("Descriptor before migration: " + descriptor); | ||
|
||
boolean updateNeeded = computations.toList().stream() | ||
.anyMatch(CorrelationRefactorPlugin::needsUpdate); | ||
|
||
if (!updateNeeded) { | ||
// Short-circuit to avoid doing migration work. | ||
return new TableRowInterfaces.RowResult<>(nextRow) | ||
.setShouldWrite(false); | ||
} | ||
|
||
List<JSONObject> updatedComputations = computations.toList().stream() | ||
.map(computation -> migrateComputation(new JSONObject((Map<?, ?>) computation))) | ||
.collect(Collectors.toList()); | ||
|
||
descriptor.put("computations", updatedComputations); | ||
|
||
LOG.info("Descriptor after migration: " + descriptor); | ||
|
||
return new TableRowInterfaces.RowResult<>(nextRow) | ||
.setShouldWrite(_writeToDb); | ||
} | ||
|
||
private static boolean needsUpdate(Object computation) { | ||
List<String> migratableComputeTypes = List.of("correlationassaymetadata", "correlationassayassay"); | ||
return migratableComputeTypes.contains(new JSONObject((Map<?, ?>) computation).getJSONObject("descriptor").getString("type")); | ||
} | ||
|
||
/** | ||
* Migrates a computation, mutating it in-place and returning the migrated object. | ||
* | ||
* Only migrates computations of type: | ||
* - correlationassaymetadata | ||
* - correlationassayassay | ||
* @param computation Specification of the computation to migrate. | ||
* @return Migrated computation. | ||
*/ | ||
private JSONObject migrateComputation(JSONObject computation) { | ||
String computationType = computation.getJSONObject("descriptor").getString("type"); | ||
JSONObject descriptor = computation.getJSONObject("descriptor"); | ||
if (computationType.equals("correlationassaymetadata")) { | ||
JSONObject configuration = descriptor.getJSONObject("configuration"); | ||
descriptor.put("type", "correlation"); | ||
|
||
configuration.put("data1", configuration.getJSONObject("collectionVariable")); | ||
configuration.getJSONObject("data1").put("dataType", "collection"); | ||
configuration.remove("collectionVariable"); | ||
|
||
configuration.put("data2", new JSONObject()); | ||
configuration.getJSONObject("data2").put("dataType", "metadata"); | ||
return computation; | ||
} else if (computationType.equals("correlationassayassay")) { | ||
JSONObject configuration = descriptor.getJSONObject("configuration"); | ||
descriptor.put("type", "correlation"); | ||
|
||
configuration.put("data1", configuration.getJSONObject("collectionVariable1")); | ||
configuration.getJSONObject("data1").put("dataType", "collection"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same problem as above. The collection spec needs to be nested a layer down in "collectionSpec" under "data1" |
||
configuration.remove("collectionVariable1"); | ||
|
||
configuration.put("data2", configuration.getJSONObject("collectionVariable2")); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same issue. Needs "collectionSpec" subprop. |
||
configuration.getJSONObject("data2").put("dataType", "collection"); | ||
configuration.remove("collectionVariable2"); | ||
return computation; | ||
} | ||
return computation; | ||
} | ||
|
||
@Override | ||
public void dumpStatistics() { | ||
|
||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,143 @@ | ||
package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; | ||
|
||
import org.gusdb.wdk.model.fix.table.TableRowInterfaces; | ||
import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; | ||
import org.json.JSONObject; | ||
import org.junit.Assert; | ||
import org.junit.Before; | ||
import org.junit.Test; | ||
|
||
public class CorrelationRefactorPluginTest { | ||
|
||
@Test | ||
public void test() throws Exception { | ||
CorrelationRefactorPlugin plugin = new CorrelationRefactorPlugin(); | ||
TableRowInterfaces.RowResult<AnalysisRow> result = plugin.processRecord(new AnalysisRow("id", "id", new JSONObject(testConfig()), 3, 3, 3)); | ||
String descriptorAsString = result.getRow().getDescriptor().toString(); | ||
Assert.assertTrue(descriptorAsString.contains("data1")); | ||
Assert.assertTrue(descriptorAsString.contains("data2")); | ||
} | ||
|
||
private static String testConfig() { | ||
return "{\n" + | ||
" \"subset\": {\n" + | ||
" \"descriptor\": [],\n" + | ||
" \"uiSettings\": {}\n" + | ||
" },\n" + | ||
" \"computations\": [\n" + | ||
" {\n" + | ||
" \"computationId\": \"iopsx\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"correlationassaymetadata\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"prefilterThresholds\": {\n" + | ||
" \"proportionNonZero\": 0.05,\n" + | ||
" \"variance\": 0,\n" + | ||
" \"standardDeviation\": 0\n" + | ||
" },\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000808\",\n" + | ||
" \"collectionId\": \"EUPATH_0009256\"\n" + | ||
" },\n" + | ||
" \"correlationMethod\": \"spearman\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"e7411924-0ecb-48c9-9a8f-f8ea3ed73468\",\n" + | ||
" \"displayName\": \"Taxa correlated with age\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"bipartitenetwork\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"correlationCoefThreshold\": 0.5,\n" + | ||
" \"significanceThreshold\": 0.05\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"computationId\": \"jwdb9\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"abundance\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"collectionId\": \"EUPATH_0009256\",\n" + | ||
" \"entityId\": \"EUPATH_0000808\"\n" + | ||
" },\n" + | ||
" \"rankingMethod\": \"variance\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"6e6afcbf-fbf0-4d3c-bfa3-ab7bbc92dda9\",\n" + | ||
" \"displayName\": \"Top taxa by age (genus); faceted by case/control status\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"boxplot\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"dependentAxisValueSpec\": \"Full\",\n" + | ||
" \"overlayVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0051008\"\n" + | ||
" },\n" + | ||
" \"facetVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"computationId\": \"pia0q\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"alphadiv\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"collectionVariable\": {\n" + | ||
" \"collectionId\": \"EUPATH_0009256\",\n" + | ||
" \"entityId\": \"EUPATH_0000808\"\n" + | ||
" },\n" + | ||
" \"alphaDivMethod\": \"shannon\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"visualizations\": [\n" + | ||
" {\n" + | ||
" \"visualizationId\": \"e7d48cfb-aeb4-4f2a-9c68-699a27ebefa6\",\n" + | ||
" \"displayName\": \"Alpha div (genus) by age w/ case/control status as overlay\",\n" + | ||
" \"descriptor\": {\n" + | ||
" \"type\": \"boxplot\",\n" + | ||
" \"configuration\": {\n" + | ||
" \"dependentAxisValueSpec\": \"Full\",\n" + | ||
" \"xAxisVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0051008\"\n" + | ||
" },\n" + | ||
" \"overlayVariable\": {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" }\n" + | ||
" },\n" + | ||
" \"currentPlotFilters\": []\n" + | ||
" }\n" + | ||
" }\n" + | ||
" ]\n" + | ||
" }\n" + | ||
" ],\n" + | ||
" \"starredVariables\": [\n" + | ||
" {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010375\"\n" + | ||
" },\n" + | ||
" {\n" + | ||
" \"entityId\": \"EUPATH_0000096\",\n" + | ||
" \"variableId\": \"EUPATH_0010367\"\n" + | ||
" }\n" + | ||
" ],\n" + | ||
" \"dataTableConfig\": {},\n" + | ||
" \"derivedVariables\": []\n" + | ||
"}"; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't match Danielle's description. Should be
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Careless mistake :( thanks!