Skip to content

Commit

Permalink
adding harvesting feature to handle missing controlled values
Browse files Browse the repository at this point in the history
  • Loading branch information
stevenwinship committed Feb 15, 2024
1 parent f456e51 commit d82c730
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

`AllowHarvestingMissingCVV` setting to enable/disable allowing datasets to be harvested with Controlled Vocabulary Values that existed in the originating Dataverse Project but are not in the harvesting Dataverse Project.
The default value of this setting is false/no which will cause the harvesting of the dataset to fail.
By activating this feature (true/yes) the value in question will be removed from the list of values and the dataset will be harvested without the missing value.

`curl http://localhost:8080/api/admin/settings/:AllowHarvestingMissingCVV -X PUT -d yes`
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,12 @@ Whether Harvesting (OAI) service is enabled
* When ingesting tabular data files, store the generated tab-delimited
* files *with* the variable names line up top.
*/
StoreIngestedTabularFilesWithVarHeaders
StoreIngestedTabularFilesWithVarHeaders,

/**
* Should we ignore missing controlled vocabulary values when harvesting
*/
AllowHarvestingMissingCVV
;

@Override
Expand Down
7 changes: 7 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/util/SystemConfig.java
Original file line number Diff line number Diff line change
Expand Up @@ -1181,4 +1181,11 @@ public Long getTestStorageQuotaLimit() {
public boolean isStoringIngestedFilesWithHeaders() {
return settingsService.isTrueForKey(SettingsServiceBean.Key.StoreIngestedTabularFilesWithVarHeaders, false);
}

/**
* Should we ignore missing controlled vocabulary values when harvesting
*/
public boolean allowHarvestingMissingCVV() {
return settingsService.isTrueForKey(SettingsServiceBean.Key.AllowHarvestingMissingCVV, false);
}
}
34 changes: 19 additions & 15 deletions src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -69,7 +68,8 @@ public class JsonParser {
MetadataBlockServiceBean blockService;
SettingsServiceBean settingsService;
LicenseServiceBean licenseService;
HarvestingClient harvestingClient = null;
HarvestingClient harvestingClient = null;
boolean allowHarvestingMissingCVV = false;

/**
* if lenient, we will accept alternate spellings for controlled vocabulary values
Expand All @@ -93,6 +93,7 @@ public JsonParser(DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceB
this.settingsService = settingsService;
this.licenseService = licenseService;
this.harvestingClient = harvestingClient;
this.allowHarvestingMissingCVV = (harvestingClient != null && settingsService.isTrueForKey(SettingsServiceBean.Key.AllowHarvestingMissingCVV, false));
}

public JsonParser() {
Expand Down Expand Up @@ -931,30 +932,30 @@ private String jsonValueToString(JsonValue jv) {
}

public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFieldType cvvType, JsonObject json) throws JsonParseException {
List<ControlledVocabularyValue> vals = new LinkedList<>();
try {
if (cvvType.isAllowMultiples()) {
try {
json.getJsonArray("value").getValuesAs(JsonObject.class);
} catch (ClassCastException cce) {
throw new JsonParseException("Invalid values submitted for " + cvvType.getName() + ". It should be an array of values.");
}
List<ControlledVocabularyValue> vals = new LinkedList<>();
}
for (JsonString strVal : json.getJsonArray("value").getValuesAs(JsonString.class)) {
String strValue = strVal.getString();
ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient);
if (cvv == null) {
if (cvv == null && !allowHarvestingMissingCVV) {
throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
}
// Only add value to the list if it is not a duplicate
if (strValue.equals("Other")) {
System.out.println("vals = " + vals + ", contains: " + vals.contains(cvv));
}
if (!vals.contains(cvv)) {
vals.add(cvv);
if (cvv != null) {
// Only add value to the list if it is not a duplicate
if (strValue.equals("Other")) {
System.out.println("vals = " + vals + ", contains: " + vals.contains(cvv));
}
if (!vals.contains(cvv)) {
vals.add(cvv);
}
}
}
return vals;

} else {
try {
json.getString("value");
Expand All @@ -963,11 +964,14 @@ public List<ControlledVocabularyValue> parseControlledVocabularyValue(DatasetFie
}
String strValue = json.getString("value", "");
ControlledVocabularyValue cvv = datasetFieldSvc.findControlledVocabularyValueByDatasetFieldTypeAndStrValue(cvvType, strValue, lenient);
if (cvv == null) {
if (cvv == null && !allowHarvestingMissingCVV) {
throw new ControlledVocabularyException("Value '" + strValue + "' does not exist in type '" + cvvType.getName() + "'", cvvType, strValue);
}
return Collections.singletonList(cvv);
if (cvv != null) {
vals.add(cvv);
}
}
return vals;
} catch (ClassCastException cce) {
throw new JsonParseException("Invalid values submitted for " + cvvType.getName());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import java.util.logging.Logger;

import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.Test;

import io.restassured.RestAssured;
Expand Down Expand Up @@ -37,8 +39,8 @@ public class HarvestingClientsIT {
private static final String ARCHIVE_URL = "https://demo.dataverse.org";
private static final String HARVEST_METADATA_FORMAT = "oai_dc";
private static final String ARCHIVE_DESCRIPTION = "RestAssured harvesting client test";
private static final String CONTROL_OAI_SET = "controlTestSet";
private static final int DATASETS_IN_CONTROL_SET = 7;
private static final String CONTROL_OAI_SET = "controlTestSet2";
private static final int DATASETS_IN_CONTROL_SET = 8;
private static String normalUserAPIKey;
private static String adminUserAPIKey;
private static String harvestCollectionAlias;
Expand All @@ -54,6 +56,10 @@ public static void setUpClass() {
setupCollection();

}
@AfterEach
public void cleanup() {
UtilIT.deleteSetting(SettingsServiceBean.Key.AllowHarvestingMissingCVV);
}

private static void setupUsers() {
Response cu0 = UtilIT.createRandomUser();
Expand Down Expand Up @@ -157,9 +163,24 @@ public void testCreateEditDeleteClient() throws InterruptedException {
logger.info("rDelete.getStatusCode(): " + rDelete.getStatusCode());
assertEquals(OK.getStatusCode(), rDelete.getStatusCode());
}


@Test
public void testHarvestingClientRun_AllowHarvestingMissingCVV_True() throws InterruptedException {
harvestingClientRun(true);
}
@Test
public void testHarvestingClientRun() throws InterruptedException {
public void testHarvestingClientRun_AllowHarvestingMissingCVV_False() throws InterruptedException {
harvestingClientRun(false);
}

private void harvestingClientRun(boolean allowHarvestingMissingCVV) throws InterruptedException {
int expectedNumberOfSetsHarvested = allowHarvestingMissingCVV ? DATASETS_IN_CONTROL_SET : DATASETS_IN_CONTROL_SET - 1;
if (allowHarvestingMissingCVV) {
UtilIT.enableSetting(SettingsServiceBean.Key.AllowHarvestingMissingCVV);
} else {
UtilIT.deleteSetting(SettingsServiceBean.Key.AllowHarvestingMissingCVV);
}

// This test will create a client and attempt to perform an actual
// harvest and validate the resulting harvested content.

Expand Down Expand Up @@ -242,7 +263,7 @@ public void testHarvestingClientRun() throws InterruptedException {
assertEquals(harvestTimeStamp, responseJsonPath.getString("data.lastNonEmpty"));

// d) Confirm that the correct number of datasets have been harvested:
assertEquals(DATASETS_IN_CONTROL_SET, responseJsonPath.getInt("data.lastDatasetsHarvested"));
assertEquals(expectedNumberOfSetsHarvested, responseJsonPath.getInt("data.lastDatasetsHarvested"));

// ok, it looks like the harvest has completed successfully.
break;
Expand Down

0 comments on commit d82c730

Please sign in to comment.