From e8ed6cc51da063c36252066b801725bb1cec62fa Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 27 Mar 2024 12:45:09 -0400 Subject: [PATCH 01/16] Implement analysis migration for VDI --- Model/pom.xml | 6 + .../fix/table/edaanalysis/AnalysisRow.java | 25 ++++ .../plugins/VDIMigrationPlugin.java | 124 ++++++++++++++++++ .../plugins/VDIMigrationPluginTest.java | 41 ++++++ .../test/resources/migration-unit-test-1.json | 11 ++ 5 files changed, 207 insertions(+) create mode 100644 Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java create mode 100644 Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java create mode 100644 Model/src/test/resources/migration-unit-test-1.json diff --git a/Model/pom.xml b/Model/pom.xml index 14c53f624..5bbab219b 100644 --- a/Model/pom.xml +++ b/Model/pom.xml @@ -161,6 +161,12 @@ test + + org.mockito + mockito-core + test + + org.irods.jargon jargon-core diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/AnalysisRow.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/AnalysisRow.java index 0201971b2..8d6c2879e 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/AnalysisRow.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/AnalysisRow.java @@ -33,6 +33,15 @@ public AnalysisRow(ResultSet rs, DBPlatform platform) throws SQLException { _numVisualizations = rs.getInt("num_visualizations"); } + public AnalysisRow(String analysisId, String datasetId, JSONObject descriptor, int numFilters, int numComputations, int numVisualizations) { + _analysisId = analysisId; + _datasetId = datasetId; + _descriptor = descriptor; + _numFilters = numFilters; + _numComputations = numComputations; + _numVisualizations = numVisualizations; + } + public Object[] toOrderedValues() { return new Object[] { _datasetId, _descriptor.toString(), _numFilters, _numComputations, _numVisualizations, _analysisId @@ -61,6 +70,22 @@ public JSONObject getDescriptor() { return _descriptor; } + public String getAnalysisId() { + return _analysisId; + } + + public int getNumFilters() { + return _numFilters; + } + + public int getNumComputations() { + return _numComputations; + } + + public int getNumVisualizations() { + return _numVisualizations; + } + /** * Sets a new descriptor and refreshes the stats (number of filters, computations, * and visualizations) on this analysis. diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java new file mode 100644 index 000000000..109e18b85 --- /dev/null +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -0,0 +1,124 @@ +package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; + +import com.fasterxml.jackson.databind.JsonNode; +import org.apache.log4j.Logger; +import org.gusdb.fgputil.json.JsonUtil; +import org.gusdb.wdk.model.WdkModel; +import org.gusdb.wdk.model.fix.table.TableRowInterfaces; +import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; +import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; + +import java.io.File; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class VDIMigrationPlugin extends AbstractAnalysisUpdater { + private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class); + public static final String UD_DATASET_ID_PREFIX = "EDAUD_"; + + private Map legacyIdToVdiId; + private int missingFromVdiCount = 0; + + @Override + public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { + final String legacyDatasetId = nextRow.getDatasetId(); + final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, ""); + final String vdiId = legacyIdToVdiId.get(legacyUdId); + + if (vdiId == null) { + LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file."); + missingFromVdiCount++; + return new TableRowInterfaces.RowResult<>(nextRow); + } + + // Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study + // ID, which is the currency of EDA. + final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; + + // Create a copy with just the dataset ID updated to VDI counterpart. + AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, nextRow.getDescriptor(), + nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); + + return new TableRowInterfaces.RowResult<>(out); + } + + @Override + public void dumpStatistics() { + if (missingFromVdiCount > 0) { + LOG.warn("Failed to migrate " + missingFromVdiCount + " datasets, they were not found in the provided tinydb file."); + } + } + + @Override + public void configure(WdkModel wdkModel, List additionalArgs) throws Exception { + // Parse args in the format --= + final Map args = additionalArgs.stream() + .map(arg -> Arrays.stream(arg.split("=")) + .map(String::trim) // Trim whitespace from args + .collect(Collectors.toList())) + .collect(Collectors.toMap( + pair -> pair.get(0), + pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. + + // Validate required arg. + if (!args.containsKey("--tinyDb")) { + throw new IllegalArgumentException("Missing required flag --tinyDb"); + } + final File tinyDbFile = new File(args.get("--tinyDb")); + + this.legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile); + + // Default to dryrun to avoid incidental migrations when testing. + this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); + } + + /** + * Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. + * + * Example file format: + * + * { + * "_default": { + * "1": { + * "type": "owner", + * "udId": 1234, + * "vdiId": "123XyZ", + * "msg": null, + * "time": "Fri Mar 26 00:00:00 2024" + * } + * } + * + * @param tinyDbFile TinyDB file, output of the migration script run to migrate legacy UDs into VDI. + * @return Map of legacy UD Ids to VDI Ids. + */ + private Map readLegacyStudyIdToVdiId(File tinyDbFile) { + try { + JsonNode root = JsonUtil.Jackson.readTree(tinyDbFile); + JsonNode dbRoot = root.get("_default"); + + Map mapping = new HashMap<>(); + Iterator> fieldIterator = dbRoot.fields(); + + // Iterate through each field in the "_default" node. + // Ignore the numeric index keys and extract the udId and vdiId fields to create mapping. + while (fieldIterator.hasNext()) { + Map.Entry entry = fieldIterator.next(); + mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText()); + } + + LOG.info("Extracted a mapping of " + mapping.size() + " legacy to VDI identifiers."); + return mapping; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private enum CliArg { + + } +} diff --git a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java new file mode 100644 index 000000000..8f637705e --- /dev/null +++ b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java @@ -0,0 +1,41 @@ +package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; + +import org.gusdb.wdk.model.WdkModel; +import org.gusdb.wdk.model.fix.table.TableRowInterfaces; +import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; +import org.json.JSONObject; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mockito; + +import java.io.File; +import java.util.List; +import java.util.Objects; + +public class VDIMigrationPluginTest { + private WdkModel mockedModel; + private ClassLoader classLoader; + + @Before + public void setup() { + classLoader = getClass().getClassLoader(); + mockedModel = Mockito.mock(WdkModel.class); + } + + @Test + public void test() throws Exception { + final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); + final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); + final List args = List.of("--tinyDb=" + file.getPath()); + migrationPlugin.configure(mockedModel, args); + TableRowInterfaces.RowResult result = migrationPlugin.processRecord( + new AnalysisRow("x", + "EDAUD_1234", + new JSONObject(), + 3, + 4, + 5)); + Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); + } +} diff --git a/Model/src/test/resources/migration-unit-test-1.json b/Model/src/test/resources/migration-unit-test-1.json new file mode 100644 index 000000000..b6f52f60e --- /dev/null +++ b/Model/src/test/resources/migration-unit-test-1.json @@ -0,0 +1,11 @@ +{ + "_default": { + "1": { + "type": "owner", + "udId": 1234, + "vdiId": "123XyZ", + "msg": null, + "time": "Fri Mar 26 00:00:00 2024" + } + } +} From 25c706d1e0431c3dde9d7bfbe8dfa5897b1758cc Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Tue, 2 Apr 2024 11:22:03 -0400 Subject: [PATCH 02/16] Updates to support IDs --- Model/pom.xml | 5 + .../plugins/VDIEntityIdRetriever.java | 24 +++++ .../plugins/VDIMigrationPlugin.java | 100 ++++++++++++------ .../plugins/VDIMigrationPluginTest.java | 15 ++- .../test/resources/analysis-unit-test-1.json | 61 +++++++++++ 5 files changed, 172 insertions(+), 33 deletions(-) create mode 100644 Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java create mode 100644 Model/src/test/resources/analysis-unit-test-1.json diff --git a/Model/pom.xml b/Model/pom.xml index 5bbab219b..847794e4b 100644 --- a/Model/pom.xml +++ b/Model/pom.xml @@ -110,6 +110,11 @@ commons-lang3 + + commons-codec + commons-codec + + com.fasterxml.jackson.core jackson-databind diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java new file mode 100644 index 000000000..506c01832 --- /dev/null +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java @@ -0,0 +1,24 @@ +package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; + +import org.gusdb.fgputil.db.runner.SQLRunner; + +import javax.sql.DataSource; + +public class VDIEntityIdRetriever { + private DataSource eda; + + public VDIEntityIdRetriever(DataSource eda) { + this.eda = eda; + } + + public String queryEntityId(String vdiStableId) { + final String sql = "SELECT internal_abbrev FROM userstudydatasetid u" + + "JOIN vdi_datasets_dev_s.entitytypegraph etg" + + "ON u.study_stable_id = etg.study_stable_id" + + "WHERE dataset_stable_id = ?"; + return new SQLRunner(eda, sql).executeQuery(new Object[] { vdiStableId }, rs -> { + rs.next(); + return rs.getString("internal_abbrev"); + }); + } +} diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 109e18b85..a63dbde9d 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -1,34 +1,73 @@ package org.gusdb.wdk.model.fix.table.edaanalysis.plugins; import com.fasterxml.jackson.databind.JsonNode; +import org.apache.commons.codec.binary.Hex; +import org.apache.commons.codec.digest.DigestUtils; import org.apache.log4j.Logger; import org.gusdb.fgputil.json.JsonUtil; import org.gusdb.wdk.model.WdkModel; import org.gusdb.wdk.model.fix.table.TableRowInterfaces; import org.gusdb.wdk.model.fix.table.edaanalysis.AbstractAnalysisUpdater; import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; +import org.json.JSONObject; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; import java.util.stream.Collectors; public class VDIMigrationPlugin extends AbstractAnalysisUpdater { private static final Logger LOG = Logger.getLogger(VDIMigrationPlugin.class); - public static final String UD_DATASET_ID_PREFIX = "EDAUD_"; + private static final String UD_DATASET_ID_PREFIX = "EDAUD_"; + private static final Pattern VAR_ID_PATTERN = Pattern.compile("variableId\":\\s*\"([a-zA-Z0-9_-]+)"); + private static final Pattern ENTITY_ID_PATTERN = Pattern.compile("entityId\":\\s*\"([a-zA-Z0-9_-]+)"); - private Map legacyIdToVdiId; + private Map _legacyIdToVdiId; + private VDIEntityIdRetriever _vdiEntityIdRetriever; private int missingFromVdiCount = 0; + @Override + public void configure(WdkModel wdkModel, List additionalArgs) throws Exception { + configure(wdkModel, additionalArgs, new VDIEntityIdRetriever(wdkModel.getAppDb().getDataSource())); + } + + // Visible for testing. + void configure(WdkModel wdkModel, List additionalArgs, VDIEntityIdRetriever entityIdRetriever) { + // Parse args in the format --= + final Map args = additionalArgs.stream() + .map(arg -> Arrays.stream(arg.split("=")) + .map(String::trim) // Trim whitespace from args + .collect(Collectors.toList())) + .collect(Collectors.toMap( + pair -> pair.get(0), + pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. + + // Validate required arg. + if (!args.containsKey("--tinyDb")) { + throw new IllegalArgumentException("Missing required flag --tinyDb"); + } + final File tinyDbFile = new File(args.get("--tinyDb")); + + _legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile); + + // Default to dryrun to avoid incidental migrations when testing. + _writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); + _wdkModel = wdkModel; + _vdiEntityIdRetriever = entityIdRetriever; + } + @Override public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { final String legacyDatasetId = nextRow.getDatasetId(); final String legacyUdId = legacyDatasetId.replace(UD_DATASET_ID_PREFIX, ""); - final String vdiId = legacyIdToVdiId.get(legacyUdId); + final String vdiId = _legacyIdToVdiId.get(legacyUdId); if (vdiId == null) { LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file."); @@ -39,14 +78,40 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR // Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study // ID, which is the currency of EDA. final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; + final String vdiEntityId = _vdiEntityIdRetriever.queryEntityId(vdiDatasetId); + + String descriptor = nextRow.getDescriptor().toString(); + + // Find all variable IDs. + final Set legacyVariableIds = VAR_ID_PATTERN.matcher(descriptor).results() + .map(match -> match.group(1)) + .collect(Collectors.toSet()); + + final String entityId = ENTITY_ID_PATTERN.matcher(descriptor).results() + .findAny() + .map(m -> m.group(1)) + .orElse(null); + + if (entityId != null) { + descriptor = descriptor.replaceAll(entityId, vdiEntityId); + } + + for (String legacyVariableId: legacyVariableIds) { + descriptor = descriptor.replaceAll(legacyVariableId, convertToVdiId(legacyVariableId)); + } // Create a copy with just the dataset ID updated to VDI counterpart. - AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, nextRow.getDescriptor(), + AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); return new TableRowInterfaces.RowResult<>(out); } + private String convertToVdiId(String legacyVariableId) { + byte[] encodedId = DigestUtils.digest(DigestUtils.getSha1Digest(), legacyVariableId.getBytes(StandardCharsets.UTF_8)); + return "VAR_" + Hex.encodeHexString(encodedId).substring(0, 16); + } + @Override public void dumpStatistics() { if (missingFromVdiCount > 0) { @@ -54,29 +119,6 @@ public void dumpStatistics() { } } - @Override - public void configure(WdkModel wdkModel, List additionalArgs) throws Exception { - // Parse args in the format --= - final Map args = additionalArgs.stream() - .map(arg -> Arrays.stream(arg.split("=")) - .map(String::trim) // Trim whitespace from args - .collect(Collectors.toList())) - .collect(Collectors.toMap( - pair -> pair.get(0), - pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. - - // Validate required arg. - if (!args.containsKey("--tinyDb")) { - throw new IllegalArgumentException("Missing required flag --tinyDb"); - } - final File tinyDbFile = new File(args.get("--tinyDb")); - - this.legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile); - - // Default to dryrun to avoid incidental migrations when testing. - this._writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); - } - /** * Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. * @@ -117,8 +159,4 @@ private Map readLegacyStudyIdToVdiId(File tinyDbFile) { throw new RuntimeException(e); } } - - private enum CliArg { - - } } diff --git a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java index 8f637705e..4842ab345 100644 --- a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java +++ b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java @@ -3,39 +3,50 @@ import org.gusdb.wdk.model.WdkModel; import org.gusdb.wdk.model.fix.table.TableRowInterfaces; import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; +import org.hamcrest.MatcherAssert; import org.json.JSONObject; import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.mockito.Matchers; import org.mockito.Mockito; import java.io.File; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; import java.util.List; import java.util.Objects; public class VDIMigrationPluginTest { private WdkModel mockedModel; private ClassLoader classLoader; + private VDIEntityIdRetriever retriever; @Before public void setup() { classLoader = getClass().getClassLoader(); mockedModel = Mockito.mock(WdkModel.class); + retriever = Mockito.mock(VDIEntityIdRetriever.class); } @Test public void test() throws Exception { + File analysisFile = new File(Objects.requireNonNull(classLoader.getResource("analysis-unit-test-1.json")).getFile()); + JSONObject descriptor = new JSONObject(Files.readString(Path.of(analysisFile.getPath()))); final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); final List args = List.of("--tinyDb=" + file.getPath()); - migrationPlugin.configure(mockedModel, args); + Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn("EDAUD_Migrated_ID"); + migrationPlugin.configure(mockedModel, args, retriever); TableRowInterfaces.RowResult result = migrationPlugin.processRecord( new AnalysisRow("x", "EDAUD_1234", - new JSONObject(), + descriptor, 3, 4, 5)); Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); + Assert.assertTrue(result.getRow().getDescriptor().toString().contains("VAR_c73e53adb951e2fe")); } } diff --git a/Model/src/test/resources/analysis-unit-test-1.json b/Model/src/test/resources/analysis-unit-test-1.json new file mode 100644 index 000000000..a973aeeb2 --- /dev/null +++ b/Model/src/test/resources/analysis-unit-test-1.json @@ -0,0 +1,61 @@ +{ + "descriptor": { + "subset": { + "descriptor": [], + "uiSettings": {} + }, + "computations": [ + { + "computationId": "pass-through", + "displayName": "", + "descriptor": { + "type": "pass" + }, + "visualizations": [ + { + "visualizationId": "b28b42de-4a74-4a47-8232-b5c0ce7eb36d", + "displayName": "File vs. Database Subsetting Duration", + "descriptor": { + "type": "scatterplot", + "configuration": { + "valueSpecConfig": "Best fit line with raw", + "independentAxisLogScale": false, + "dependentAxisLogScale": false, + "xAxisVariable": { + "entityId": "TEMP_diy_performance_numericoutput", + "variableId": "TEMP_DB_DURATION" + }, + "yAxisVariable": { + "entityId": "TEMP_diy_performance_numericoutput", + "variableId": "TEMP_MAP_REDUCE_DURATION" + }, + "overlayVariable": { + "entityId": "TEMP_diy_performance_numericoutput", + "variableId": "TEMP_NUM_FILTER_VARS" + } + }, + "currentPlotFilters": [], + "thumbnail": "" + } + }, + { + "visualizationId": "15bb242d-b7d6-439c-86ee-98b5334775ed", + "displayName": "Unnamed visualization", + "descriptor": { + "type": "histogram", + "configuration": { + "dependentAxisLogScale": false, + "valueSpec": "count" + }, + "currentPlotFilters": [], + "thumbnail": "" + } + } + ] + } + ], + "starredVariables": [], + "dataTableConfig": {}, + "derivedVariables": [] + } +} \ No newline at end of file From 11b5e8492c023a87d9de9af024939e2e3230b0e8 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 10 Apr 2024 12:56:17 -0400 Subject: [PATCH 03/16] Add comments --- .../edaanalysis/plugins/VDIEntityIdRetriever.java | 5 +++-- .../edaanalysis/plugins/VDIMigrationPlugin.java | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java index 506c01832..1443411e4 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java @@ -3,6 +3,7 @@ import org.gusdb.fgputil.db.runner.SQLRunner; import javax.sql.DataSource; +import java.util.Optional; public class VDIEntityIdRetriever { private DataSource eda; @@ -11,14 +12,14 @@ public VDIEntityIdRetriever(DataSource eda) { this.eda = eda; } - public String queryEntityId(String vdiStableId) { + public Optional queryEntityId(String vdiStableId) { final String sql = "SELECT internal_abbrev FROM userstudydatasetid u" + "JOIN vdi_datasets_dev_s.entitytypegraph etg" + "ON u.study_stable_id = etg.study_stable_id" + "WHERE dataset_stable_id = ?"; return new SQLRunner(eda, sql).executeQuery(new Object[] { vdiStableId }, rs -> { rs.next(); - return rs.getString("internal_abbrev"); + return Optional.ofNullable(rs.getString("internal_abbrev")); }); } } diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index a63dbde9d..20e60cb56 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -19,6 +19,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -72,13 +73,19 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR if (vdiId == null) { LOG.warn("Unable to find legacy ID " + legacyUdId + " in the tinydb file."); missingFromVdiCount++; - return new TableRowInterfaces.RowResult<>(nextRow); + return new TableRowInterfaces.RowResult<>(nextRow) + .setShouldWrite(false); } // Append UD prefix to VDI ID. The prefix is prepended in the view that maps stable VDI IDs to the unstable study // ID, which is the currency of EDA. final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; - final String vdiEntityId = _vdiEntityIdRetriever.queryEntityId(vdiDatasetId); + final Optional vdiEntityId = _vdiEntityIdRetriever.queryEntityId(vdiDatasetId); + if (!vdiEntityId.isPresent()) { + LOG.warn("Unable to find entity ID in appdb for VDI dataset ID: " + vdiDatasetId); + return new TableRowInterfaces.RowResult<>(nextRow) + .setShouldWrite(false); + } String descriptor = nextRow.getDescriptor().toString(); @@ -92,10 +99,12 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR .map(m -> m.group(1)) .orElse(null); + // Replace all entityID with entityID looked up from database. if (entityId != null) { - descriptor = descriptor.replaceAll(entityId, vdiEntityId); + descriptor = descriptor.replaceAll(entityId, vdiEntityId.get()); } + // Replace all variable IDs with value converted from legacy variable ID. for (String legacyVariableId: legacyVariableIds) { descriptor = descriptor.replaceAll(legacyVariableId, convertToVdiId(legacyVariableId)); } From f6b6aa6b5f5259e342f5057de6d48d944b0a35a3 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 10 Apr 2024 12:58:04 -0400 Subject: [PATCH 04/16] Read dry-run property --- .../fix/table/edaanalysis/plugins/VDIMigrationPlugin.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 20e60cb56..a085e527a 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -113,7 +113,8 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); - return new TableRowInterfaces.RowResult<>(out); + return new TableRowInterfaces.RowResult<>(out) + .setShouldWrite(_writeToDb); } private String convertToVdiId(String legacyVariableId) { From 93a6ac10fb4f4b22a079fa5dedd6bf8ec15e58a5 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 10 Apr 2024 13:10:04 -0400 Subject: [PATCH 05/16] Fix test --- .../fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java index 4842ab345..69f1826df 100644 --- a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java +++ b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java @@ -17,6 +17,7 @@ import java.nio.file.Path; import java.util.List; import java.util.Objects; +import java.util.Optional; public class VDIMigrationPluginTest { private WdkModel mockedModel; @@ -37,7 +38,7 @@ public void test() throws Exception { final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); final List args = List.of("--tinyDb=" + file.getPath()); - Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn("EDAUD_Migrated_ID"); + Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn(Optional.of("EDAUD_Migrated_ID")); migrationPlugin.configure(mockedModel, args, retriever); TableRowInterfaces.RowResult result = migrationPlugin.processRecord( new AnalysisRow("x", From d339e5a9bb2e6b971833380c712feb9a86d45cbb Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 10 Apr 2024 15:00:39 -0400 Subject: [PATCH 06/16] Fixes for dry run --- .../plugins/VDIEntityIdRetriever.java | 10 ++++--- .../plugins/VDIMigrationPlugin.java | 30 ++++++++++++------- .../plugins/VDIMigrationPluginTest.java | 4 +-- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java index 1443411e4..e2248d33f 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java @@ -7,16 +7,18 @@ public class VDIEntityIdRetriever { private DataSource eda; + private String schema; - public VDIEntityIdRetriever(DataSource eda) { + public VDIEntityIdRetriever(DataSource eda, String schema) { this.eda = eda; + this.schema = schema; } public Optional queryEntityId(String vdiStableId) { - final String sql = "SELECT internal_abbrev FROM userstudydatasetid u" + - "JOIN vdi_datasets_dev_s.entitytypegraph etg" + + final String sql = String.format("SELECT internal_abbrev FROM userstudydatasetid u" + + "JOIN %s.entitytypegraph etg" + "ON u.study_stable_id = etg.study_stable_id" + - "WHERE dataset_stable_id = ?"; + "WHERE dataset_stable_id = ?", schema); return new SQLRunner(eda, sql).executeQuery(new Object[] { vdiStableId }, rs -> { rs.next(); return Optional.ofNullable(rs.getString("internal_abbrev")); diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index a085e527a..099d78671 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -36,11 +36,6 @@ public class VDIMigrationPlugin extends AbstractAnalysisUpdater { @Override public void configure(WdkModel wdkModel, List additionalArgs) throws Exception { - configure(wdkModel, additionalArgs, new VDIEntityIdRetriever(wdkModel.getAppDb().getDataSource())); - } - - // Visible for testing. - void configure(WdkModel wdkModel, List additionalArgs, VDIEntityIdRetriever entityIdRetriever) { // Parse args in the format --= final Map args = additionalArgs.stream() .map(arg -> Arrays.stream(arg.split("=")) @@ -50,20 +45,35 @@ void configure(WdkModel wdkModel, List additionalArgs, VDIEntityIdRetrie pair -> pair.get(0), pair -> pair.size() > 1 ? pair.get(1) : "true")); // A flag without an "=" is a boolean. Set true if present. - // Validate required arg. + // Validate required args. if (!args.containsKey("--tinyDb")) { throw new IllegalArgumentException("Missing required flag --tinyDb"); } - final File tinyDbFile = new File(args.get("--tinyDb")); + if (!args.containsKey(("--schema"))) { + throw new IllegalArgumentException("Missing required argument --schema"); + } + + final String schema = args.get("--schema"); + setEntityIdRetriever(new VDIEntityIdRetriever(wdkModel.getAppDb().getDataSource(), schema)); - _legacyIdToVdiId = readLegacyStudyIdToVdiId(tinyDbFile); + final File tinyDbFile = new File(args.get("--tinyDb")); + readVdiMappingFile(tinyDbFile); // Default to dryrun to avoid incidental migrations when testing. - _writeToDb = Boolean.parseBoolean(args.getOrDefault("--liveRun", "false")); + _writeToDb = Boolean.parseBoolean(args.getOrDefault("-write", "false")); _wdkModel = wdkModel; + } + + // Visible for testing. + void setEntityIdRetriever(VDIEntityIdRetriever entityIdRetriever) { _vdiEntityIdRetriever = entityIdRetriever; } + // Visible for testing + void readVdiMappingFile(File mappingFile) { + _legacyIdToVdiId = readLegacyStudyIdToVdiId(mappingFile); + } + @Override public TableRowInterfaces.RowResult processRecord(AnalysisRow nextRow) throws Exception { final String legacyDatasetId = nextRow.getDatasetId(); @@ -81,7 +91,7 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR // ID, which is the currency of EDA. final String vdiDatasetId = UD_DATASET_ID_PREFIX + vdiId; final Optional vdiEntityId = _vdiEntityIdRetriever.queryEntityId(vdiDatasetId); - if (!vdiEntityId.isPresent()) { + if (vdiEntityId.isEmpty()) { LOG.warn("Unable to find entity ID in appdb for VDI dataset ID: " + vdiDatasetId); return new TableRowInterfaces.RowResult<>(nextRow) .setShouldWrite(false); diff --git a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java index 69f1826df..150931baa 100644 --- a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java +++ b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java @@ -37,9 +37,9 @@ public void test() throws Exception { JSONObject descriptor = new JSONObject(Files.readString(Path.of(analysisFile.getPath()))); final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); - final List args = List.of("--tinyDb=" + file.getPath()); + migrationPlugin.readVdiMappingFile(file); Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn(Optional.of("EDAUD_Migrated_ID")); - migrationPlugin.configure(mockedModel, args, retriever); + migrationPlugin.setEntityIdRetriever(retriever); TableRowInterfaces.RowResult result = migrationPlugin.processRecord( new AnalysisRow("x", "EDAUD_1234", From 3ca62b1f47389b5328540a332d307161dbe2b52d Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Wed, 10 Apr 2024 15:27:47 -0400 Subject: [PATCH 07/16] Add file reader --- .../wdk/model/fix/VdiMigrationFileReader.java | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java b/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java new file mode 100644 index 000000000..230458212 --- /dev/null +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java @@ -0,0 +1,57 @@ +package org.gusdb.wdk.model.fix; + +import com.fasterxml.jackson.databind.JsonNode; +import org.gusdb.fgputil.json.JsonUtil; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +public class VdiMigrationFileReader { + private File file; + + public VdiMigrationFileReader(File file) { + this.file = file; + } + + /** + * Parse the tinydb file into a map of legacy UD identifiers to VDI identifiers. + * + * Example file format: + * + * { + * "_default": { + * "1": { + * "type": "owner", + * "udId": 1234, + * "vdiId": "123XyZ", + * "msg": null, + * "time": "Fri Mar 26 00:00:00 2024" + * } + * } + * + * @return Map of legacy UD Ids to VDI Ids. + */ + public Map readLegacyStudyIdToVdiId() { + try { + JsonNode root = JsonUtil.Jackson.readTree(file); + JsonNode dbRoot = root.get("_default"); + + Map mapping = new HashMap<>(); + Iterator> fieldIterator = dbRoot.fields(); + + // Iterate through each field in the "_default" node. + // Ignore the numeric index keys and extract the udId and vdiId fields to create mapping. + while (fieldIterator.hasNext()) { + Map.Entry entry = fieldIterator.next(); + mapping.put(entry.getValue().get("udId").asText(), entry.getValue().get("vdiId").asText()); + } + + return mapping; + } catch (IOException e) { + throw new RuntimeException(e); + } + } +} \ No newline at end of file From 36128305f30fb8e7ce885eab4e913225a7203146 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Thu, 11 Apr 2024 12:54:24 -0400 Subject: [PATCH 08/16] Fix merge conflicts --- .../wdk/model/fix/VdiMigrationFileReader.java | 4 -- .../plugins/VDIMigrationPluginTest.java | 51 ++++--------------- 2 files changed, 9 insertions(+), 46 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java b/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java index 8dcec6e61..d4578d59f 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/VdiMigrationFileReader.java @@ -54,8 +54,4 @@ public Map readLegacyStudyIdToVdiId() { throw new RuntimeException(e); } } -<<<<<<< HEAD } -======= -} ->>>>>>> master diff --git a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java index dddec078f..0a039a90d 100644 --- a/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java +++ b/Model/src/test/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPluginTest.java @@ -3,101 +3,68 @@ import org.gusdb.wdk.model.WdkModel; import org.gusdb.wdk.model.fix.table.TableRowInterfaces; import org.gusdb.wdk.model.fix.table.edaanalysis.AnalysisRow; -<<<<<<< HEAD -import org.hamcrest.MatcherAssert; -======= ->>>>>>> master import org.json.JSONObject; import org.junit.Assert; import org.junit.Before; import org.junit.Test; -<<<<<<< HEAD -import org.mockito.Matchers; import org.mockito.Mockito; import java.io.File; -import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; -import java.util.List; import java.util.Objects; import java.util.Optional; -======= -import org.mockito.Mockito; - -import java.io.File; -import java.util.List; -import java.util.Objects; ->>>>>>> master public class VDIMigrationPluginTest { private WdkModel mockedModel; private ClassLoader classLoader; -<<<<<<< HEAD private VDIEntityIdRetriever retriever; -======= ->>>>>>> master @Before public void setup() { classLoader = getClass().getClassLoader(); mockedModel = Mockito.mock(WdkModel.class); -<<<<<<< HEAD retriever = Mockito.mock(VDIEntityIdRetriever.class); } @Test - public void test() throws Exception { + public void testUpdateEnabled() throws Exception { File analysisFile = new File(Objects.requireNonNull(classLoader.getResource("analysis-unit-test-1.json")).getFile()); JSONObject descriptor = new JSONObject(Files.readString(Path.of(analysisFile.getPath()))); final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); + Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn(Optional.of("asdf")); migrationPlugin.readVdiMappingFile(file); - Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn(Optional.of("EDAUD_Migrated_ID")); migrationPlugin.setEntityIdRetriever(retriever); TableRowInterfaces.RowResult result = migrationPlugin.processRecord( new AnalysisRow("x", "EDAUD_1234", descriptor, -======= - } - - @Test - public void testUpdateEnabled() throws Exception { - final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); - final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); - final List args = List.of("--tinyDb=" + file.getPath()); - migrationPlugin.configure(mockedModel, args); - TableRowInterfaces.RowResult result = migrationPlugin.processRecord( - new AnalysisRow("x", - "EDAUD_1234", - new JSONObject(), ->>>>>>> master 3, 4, 5)); Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); -<<<<<<< HEAD Assert.assertTrue(result.getRow().getDescriptor().toString().contains("VAR_c73e53adb951e2fe")); -======= Assert.assertFalse(result.shouldWrite()); } @Test public void testUpdateDisabled() throws Exception { + File analysisFile = new File(Objects.requireNonNull(classLoader.getResource("analysis-unit-test-1.json")).getFile()); + JSONObject descriptor = new JSONObject(Files.readString(Path.of(analysisFile.getPath()))); final File file = new File(Objects.requireNonNull(classLoader.getResource("migration-unit-test-1.json")).getFile()); final VDIMigrationPlugin migrationPlugin = new VDIMigrationPlugin(); - final List args = List.of("--tinyDb=" + file.getPath(), "--liveRun"); - migrationPlugin.configure(mockedModel, args); + Mockito.when(retriever.queryEntityId("EDAUD_123XyZ")).thenReturn(Optional.of("asdf")); + migrationPlugin.readVdiMappingFile(file); + migrationPlugin.setEntityIdRetriever(retriever); TableRowInterfaces.RowResult result = migrationPlugin.processRecord( new AnalysisRow("x", "EDAUD_1234", - new JSONObject(), + descriptor, 3, 4, 5)); Assert.assertEquals("EDAUD_123XyZ", result.getRow().getDatasetId()); - Assert.assertTrue(result.shouldWrite()); ->>>>>>> master + Assert.assertFalse(result.shouldWrite()); } } From 9eba1c8f2332ede86c6702036252e5ba6cf9a51a Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Thu, 11 Apr 2024 13:06:09 -0400 Subject: [PATCH 09/16] Add logging --- .../fix/table/edaanalysis/plugins/VDIMigrationPlugin.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 97155335b..36588ca3a 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -102,7 +102,8 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR return new TableRowInterfaces.RowResult<>(nextRow) .setShouldWrite(false); } - + + LOG.info("Analysis descriptor before migration: " + nextRow.getDescriptor()); String descriptor = nextRow.getDescriptor().toString(); // Find all variable IDs. @@ -129,6 +130,8 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); + LOG.info("Analysis descriptor after migration: " + out.getDescriptor()); + return new TableRowInterfaces.RowResult<>(out) .setShouldWrite(_writeToDb); } From b6cabcff6267b51fcd6bfed3fca686d03c982192 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Tue, 16 Apr 2024 15:16:26 -0400 Subject: [PATCH 10/16] Fix plugin --- .../table/edaanalysis/plugins/VDIEntityIdRetriever.java | 8 ++++---- .../fix/table/edaanalysis/plugins/VDIMigrationPlugin.java | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java index e2248d33f..7aab37d0c 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java @@ -15,10 +15,10 @@ public VDIEntityIdRetriever(DataSource eda, String schema) { } public Optional queryEntityId(String vdiStableId) { - final String sql = String.format("SELECT internal_abbrev FROM userstudydatasetid u" + - "JOIN %s.entitytypegraph etg" + - "ON u.study_stable_id = etg.study_stable_id" + - "WHERE dataset_stable_id = ?", schema); + final String sql = String.format("SELECT internal_abbrev FROM %s.userstudydatasetid u" + + " JOIN %s.entitytypegraph etg" + + " ON u.study_stable_id = etg.study_stable_id" + + " WHERE dataset_stable_id = ?", schema, schema); return new SQLRunner(eda, sql).executeQuery(new Object[] { vdiStableId }, rs -> { rs.next(); return Optional.ofNullable(rs.getString("internal_abbrev")); diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 36588ca3a..43684c4e2 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -102,7 +102,7 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR return new TableRowInterfaces.RowResult<>(nextRow) .setShouldWrite(false); } - + LOG.info("Analysis descriptor before migration: " + nextRow.getDescriptor()); String descriptor = nextRow.getDescriptor().toString(); From 20646302290a20d2c22edbc8a5b0fe4e192779d6 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Tue, 16 Apr 2024 16:10:39 -0400 Subject: [PATCH 11/16] Fix NPE --- .../fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java index 7aab37d0c..7ae0e375f 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIEntityIdRetriever.java @@ -20,7 +20,10 @@ public Optional queryEntityId(String vdiStableId) { " ON u.study_stable_id = etg.study_stable_id" + " WHERE dataset_stable_id = ?", schema, schema); return new SQLRunner(eda, sql).executeQuery(new Object[] { vdiStableId }, rs -> { - rs.next(); + boolean hasNext = rs.next(); + if (!hasNext) { + return Optional.empty(); + } return Optional.ofNullable(rs.getString("internal_abbrev")); }); } From b714293b937329dd1f0dd45090c02236dcd33e91 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Thu, 18 Apr 2024 18:38:15 -0400 Subject: [PATCH 12/16] Test without thumbnail --- .../fix/table/edaanalysis/plugins/VDIMigrationPlugin.java | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 43684c4e2..ff57a6aaa 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -130,7 +130,12 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); - LOG.info("Analysis descriptor after migration: " + out.getDescriptor()); + // TODO REMOVE BELOW + JSONObject descriptorWithoutThumbnail = out.getDescriptor(); + descriptorWithoutThumbnail.remove("thumbnail"); + // TODO REMOVE ABOVE + + LOG.info("Analysis descriptor after migration: " + descriptorWithoutThumbnail); return new TableRowInterfaces.RowResult<>(out) .setShouldWrite(_writeToDb); From 33b371f262a8a5ae405a0486cdf1b1560be91bff Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Thu, 18 Apr 2024 18:47:00 -0400 Subject: [PATCH 13/16] Remove extra logging --- .../table/edaanalysis/plugins/VDIMigrationPlugin.java | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index ff57a6aaa..841140524 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -129,13 +129,8 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR // Create a copy with just the dataset ID updated to VDI counterpart. AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); - - // TODO REMOVE BELOW - JSONObject descriptorWithoutThumbnail = out.getDescriptor(); - descriptorWithoutThumbnail.remove("thumbnail"); - // TODO REMOVE ABOVE - - LOG.info("Analysis descriptor after migration: " + descriptorWithoutThumbnail); + + LOG.info("Analysis descriptor after migration: " + out); return new TableRowInterfaces.RowResult<>(out) .setShouldWrite(_writeToDb); From 9a51814ed6546a4c7cc76e688b0aaa32a4781921 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 22 Apr 2024 13:24:36 -0400 Subject: [PATCH 14/16] Address comments --- .../table/edaanalysis/plugins/VDIMigrationPlugin.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 841140524..3860e55bd 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -127,12 +127,12 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR } // Create a copy with just the dataset ID updated to VDI counterpart. - AnalysisRow out = new AnalysisRow(nextRow.getAnalysisId(), vdiDatasetId, new JSONObject(descriptor), - nextRow.getNumFilters(), nextRow.getNumComputations(), nextRow.getNumVisualizations()); - - LOG.info("Analysis descriptor after migration: " + out); + nextRow.setDescriptor(new JSONObject(descriptor)); + nextRow.setDatasetId(vdiDatasetId); - return new TableRowInterfaces.RowResult<>(out) + LOG.info("Analysis descriptor after migration: " + descriptor); + + return new TableRowInterfaces.RowResult<>(nextRow) .setShouldWrite(_writeToDb); } From 6fcf69ca4b044058bca17f3c1b2bcf3146706700 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Mon, 22 Apr 2024 13:25:34 -0400 Subject: [PATCH 15/16] Consistent arg formatting --- .../model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 3860e55bd..4f0fc47bc 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -61,7 +61,7 @@ public void configure(WdkModel wdkModel, List additionalArgs) throws Exc readVdiMappingFile(tinyDbFile); // Default to dryrun to avoid incidental migrations when testing. - _writeToDb = Boolean.parseBoolean(args.getOrDefault("-write", "false")); + _writeToDb = Boolean.parseBoolean(args.getOrDefault("--write", "false")); _wdkModel = wdkModel; } From 2d99a781fa5b0aee69b2fed5b4a256224f5a40d3 Mon Sep 17 00:00:00 2001 From: Dan Galdi Date: Tue, 23 Apr 2024 13:26:59 -0400 Subject: [PATCH 16/16] Fix migration plugin to use manual ID overrides --- .../edaanalysis/plugins/VDIMigrationPlugin.java | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java index 4f0fc47bc..18442dd77 100644 --- a/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java +++ b/Model/src/main/java/org/gusdb/wdk/model/fix/table/edaanalysis/plugins/VDIMigrationPlugin.java @@ -30,6 +30,15 @@ public class VDIMigrationPlugin extends AbstractAnalysisUpdater { private static final String UD_DATASET_ID_PREFIX = "EDAUD_"; private static final Pattern VAR_ID_PATTERN = Pattern.compile("variableId\":\\s*\"([a-zA-Z0-9_-]+)"); private static final Pattern ENTITY_ID_PATTERN = Pattern.compile("entityId\":\\s*\"([a-zA-Z0-9_-]+)"); + private static final Map VAR_ID_MAPPING_OVERRIDE = Map.of( + // Latitude mappings + "VAR_13DCE851F0DDECBE", "OBI_0001620", + "VAR_4A934C04C995BF7B", "OBI_0001620", + "VAR_F3074604E6180BE6", "OBI_0001620", + // Longitude mappings + "VAR_44452C5F22B37BB", "OBI_0001621", + "VAR_86723E25E8EE8FD8", "OBI_0001621" + ); private Map _legacyIdToVdiId; private VDIEntityIdRetriever _vdiEntityIdRetriever; @@ -137,6 +146,9 @@ public TableRowInterfaces.RowResult processRecord(AnalysisRow nextR } private String convertToVdiId(String legacyVariableId) { + if (VAR_ID_MAPPING_OVERRIDE.containsKey(legacyVariableId)) { + return VAR_ID_MAPPING_OVERRIDE.get(legacyVariableId); + } byte[] encodedId = DigestUtils.digest(DigestUtils.getSha1Digest(), legacyVariableId.getBytes(StandardCharsets.UTF_8)); return "VAR_" + Hex.encodeHexString(encodedId).substring(0, 16); }