Skip to content

Commit

Permalink
Merged in task/dspace-cris-2023_02_x/IPL-2 (pull request DSpace#2590)
Browse files Browse the repository at this point in the history
[IPL-2][IPL-3] refactoring code to support import-plus

* [IPL-2][IPL-3] Refactor for importplus

* [[IPL-2][IPL-3] Refactor for importplus

* [IPL-2][IPL-3] Refactor for importplus

*  [IPL-2][IPL-3] minor fix

* [IPL-2][IPL-3] additional fixes improvements for import plus

* [IPL-2][IPL-3] additional fixes improvements for import plus

* [IPL-2][IPL-3] disable DTD validator for Pubmed due to error 404
  • Loading branch information
steph-ieffam committed Aug 13, 2024
1 parent c676600 commit 6065c69
Show file tree
Hide file tree
Showing 12 changed files with 183 additions and 46 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ public void internalRun() throws Exception {

for (Item researcher : researchers) {

oairePublicationLoader.importAuthorRecords(context, researcher);
oairePublicationLoader.importRecords(context, researcher);
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.SolrServerException;
import org.dspace.content.Item;
import org.dspace.content.service.ItemService;
import org.dspace.core.Context;
import org.dspace.external.model.ExternalDataObject;
Expand Down Expand Up @@ -137,4 +138,31 @@ public void flagRelatedSuggestionsAsProcessed(Context context, ExternalDataObjec
*/
protected abstract boolean isExternalDataObjectPotentiallySuggested(Context context,
ExternalDataObject externalDataObject);

/**
* Save a List of ImportRecord into Solr.
* ImportRecord will be translate into a SolrDocument by the method translateImportRecordToSolrDocument.
*
* @param context the DSpace Context
* @param item a DSpace Item
* @throws SolrServerException
* @throws IOException
*/
public abstract void importRecords(Context context, Item item) throws Exception;

/**
* Save a List of ImportRecord into Solr.
* ImportRecord will be translate into a SolrDocument by the method translateImportRecordToSolrDocument.
*
* @param context the DSpace Context
* @param query the query to be run
* @throws SolrServerException
* @throws IOException
*/
public abstract void importRecords(Context context, String query) throws Exception;

public boolean isSupportingQuery() {
return false;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,21 @@ Suggestion findUnprocessedSuggestion(Context context, String source, UUID target
* @throws IOException
*/
SuggestionTarget findTarget(Context context, String source, UUID target) throws SolrServerException, IOException;

/**
* Find all the unprocessed suggestions related to the given source and have score
* greater than or equal to given score.
* @param context the DSpace Context
* @param source the source name
* @param score the score
* @param pageSize the page size
* @param offset the page offset
* @param ascending true to retrieve the suggestions ordered by score
* ascending
* @return the found suggestions
* @throws SolrServerException
* @throws IOException
*/
List<Suggestion> findAllUnprocessedSuggestionsBySourceAndScore(Context context, String source, String score,
int pageSize, long offset, boolean ascending) throws SolrServerException, IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import com.google.gson.Gson;
import com.google.gson.reflect.TypeToken;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.SolrClient;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
Expand Down Expand Up @@ -53,6 +54,9 @@ public class SolrSuggestionStorageServiceImpl implements SolrSuggestionStorageSe

protected SolrClient solrSuggestionClient;

private static final Logger log =
org.apache.logging.log4j.LogManager.getLogger(SolrSuggestionStorageServiceImpl.class);

@Autowired
private ItemService itemService;

Expand All @@ -73,29 +77,67 @@ protected SolrClient getSolr() {
@Override
public void addSuggestion(Suggestion suggestion, boolean force, boolean commit)
throws SolrServerException, IOException {
if (force || !exist(suggestion)) {
Gson gson = new Gson();
SolrInputDocument document = new SolrInputDocument();
document.addField(SOURCE, suggestion.getSource());
String suggestionFullID = suggestion.getID();
document.addField(SUGGESTION_FULLID, suggestionFullID);
document.addField(SUGGESTION_ID, suggestionFullID.split(":", 3)[2]);
document.addField(TARGET_ID, suggestion.getTarget().getID().toString());
document.addField(DISPLAY, suggestion.getDisplay());
document.addField(TITLE, getFirstValue(suggestion, "dc", "title", null));
document.addField(DATE, getFirstValue(suggestion, "dc", "date", "issued"));
document.addField(CONTRIBUTORS, getAllValues(suggestion, "dc", "contributor", "author"));
document.addField(ABSTRACT, getFirstValue(suggestion, "dc", "description", "abstract"));
document.addField(CATEGORY, getAllValues(suggestion, "dc", "source", null));
document.addField(EXTERNAL_URI, suggestion.getExternalSourceUri());
document.addField(SCORE, suggestion.getScore());
document.addField(PROCESSED, false);
document.addField(EVIDENCES, gson.toJson(suggestion.getEvidences()));
getSolr().add(document);
if (commit) {
getSolr().commit();
try {
if (force || !exist(suggestion)) {
Gson gson = new Gson();
SolrInputDocument document = new SolrInputDocument();
document.addField(SOURCE, suggestion.getSource());
String suggestionFullID = suggestion.getID();
document.addField(SUGGESTION_FULLID, suggestionFullID);
document.addField(SUGGESTION_ID, suggestionFullID.split(":", 3)[2]);
document.addField(TARGET_ID, suggestion.getTarget().getID().toString());
document.addField(DISPLAY, suggestion.getDisplay());
document.addField(TITLE, getFirstValue(suggestion, "dc", "title", null));
document.addField(DATE, getFirstValue(suggestion, "dc", "date", "issued"));
document.addField(CONTRIBUTORS, getAllValues(suggestion, "dc", "contributor", "author"));
document.addField(ABSTRACT, getFirstValue(suggestion, "dc", "description", "abstract"));
document.addField(CATEGORY, getAllValues(suggestion, "dc", "source", null));
document.addField(EXTERNAL_URI, suggestion.getExternalSourceUri());
document.addField(SCORE, suggestion.getScore());
document.addField(PROCESSED, false);
document.addField(EVIDENCES, gson.toJson(suggestion.getEvidences()));
getSolr().add(document);
if (commit) {
getSolr().commit();
}
}
} catch (Exception e) {
log.error(e);
}
}

@Override
public List<Suggestion> findAllUnprocessedSuggestionsBySourceAndScore(Context context, String source, String score,
int pageSize, long offset, boolean ascending) throws SolrServerException, IOException {

SolrQuery solrQuery = new SolrQuery();
solrQuery.setRows(pageSize);
solrQuery.setStart((int) offset);
solrQuery.setQuery("*:*");
solrQuery.addFilterQuery(
SOURCE + ":" + source,
SCORE + ":[ " + score + " TO * ]",
PROCESSED + ":false");

if (ascending) {
solrQuery.addSort(SortClause.asc("trust"));
} else {
solrQuery.addSort(SortClause.desc("trust"));
}

solrQuery.addSort(SortClause.desc("date"));
solrQuery.addSort(SortClause.asc("suggestion_id"));
solrQuery.addSort(SortClause.asc("title"));

QueryResponse response = getSolr().query(solrQuery);
List<Suggestion> suggestions = new ArrayList<Suggestion>();
for (SolrDocument solrDoc : response.getResults()) {
Suggestion suggestion = convertSolrDoc(context, solrDoc, source);
if (suggestion != null) {
suggestions.add(suggestion);
}
}
return suggestions;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,16 +35,16 @@
*/
public class OAIREPublicationLoader extends SolrSuggestionProvider {

private List<String> names;
protected List<String> names;

private ExternalDataProvider primaryProvider;
protected ExternalDataProvider primaryProvider;

private List<ExternalDataProvider> otherProviders;
protected List<ExternalDataProvider> otherProviders;

@Autowired
private ConfigurationService configurationService;
protected ConfigurationService configurationService;

private List<EvidenceScorer> pipeline;
protected List<EvidenceScorer> pipeline;

public void setPrimaryProvider(ExternalDataProvider primaryProvider) {
this.primaryProvider = primaryProvider;
Expand Down Expand Up @@ -103,8 +103,9 @@ public List<Suggestion> reduceAndTransform(Item researcher, List<ExternalDataObj
* @throws SolrServerException
* @throws IOException
*/
public void importAuthorRecords(Context context, Item researcher)
throws SolrServerException, IOException {
@Override
public void importRecords(Context context, Item researcher)
throws Exception {
List<ExternalDataObject> metadata = getImportRecords(researcher);
List<Suggestion> records = reduceAndTransform(researcher, metadata);
for (Suggestion record : records) {
Expand Down Expand Up @@ -214,7 +215,7 @@ private boolean isDuplicate(ExternalDataObject dto, List<ExternalDataObject> imp
* @param researcher DSpace item
* @return list of metadata values
*/
private List<String> searchMetadataValues(Item researcher) {
public List<String> searchMetadataValues(Item researcher) {
List<String> authors = new ArrayList<String>();
for (String name : names) {
String value = itemService.getMetadata(researcher, name);
Expand All @@ -237,4 +238,9 @@ protected boolean isExternalDataObjectPotentiallySuggested(Context context, Exte
}
}

@Override
public void importRecords(Context context, String query) throws Exception {
throw new UnsupportedOperationException("This operation is not supported by OAIRE loader");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,11 @@ public void importWorks(Context context, Item profile, String orcid) throws Solr
solrSuggestionStorageService.commit();
}

@Override
public void importRecords(Context context, Item researcher) throws Exception {
importWorks(context, researcher, itemService.getMetadata(researcher, "person.identifier.orcid"));
}

private List<Suggestion> convertToSuggestions(Item profile, List<ExternalDataObject> externalDataObjects) {
return externalDataObjects.stream()
.map(externalDataObject -> convertToSuggestion(profile, externalDataObject))
Expand Down Expand Up @@ -124,4 +129,9 @@ public void setProvider(ExternalDataProvider provider) {
this.provider = provider;
}

@Override
public void importRecords(Context context, String query) throws Exception {
throw new UnsupportedOperationException("This operation is not supported by orcid loader");
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.dspace.authority.service.AuthorityValueService;
import org.dspace.authority.service.ItemReferenceResolver;
import org.dspace.authority.service.ItemSearcher;
Expand Down Expand Up @@ -125,7 +126,8 @@ public void resolveReferences(Context context, Item item) {

@SuppressWarnings("rawtypes")
private Item performSearchByMetadata(Context context, String searchParam) throws SearchServiceException {
String query = metadata + ":" + searchParam;
String query = metadata + ":" +
ClientUtils.escapeQueryChars(searchParam);
DiscoverQuery discoverQuery = new DiscoverQuery();
discoverQuery.addDSpaceObjectFilter(IndexableItem.TYPE);
discoverQuery.addDSpaceObjectFilter(IndexableWorkspaceItem.TYPE);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import java.util.LinkedList;
import java.util.List;

import org.dspace.content.DCPersonName;
import org.dspace.importer.external.metadatamapping.MetadataFieldConfig;
import org.dspace.importer.external.metadatamapping.MetadataFieldMapping;
import org.dspace.importer.external.metadatamapping.MetadatumDTO;
Expand Down Expand Up @@ -80,19 +81,15 @@ public Collection<MetadatumDTO> contributeMetadata(T t) {
LinkedList<MetadatumDTO> metadatums = (LinkedList<MetadatumDTO>) metadatumContributor.contributeMetadata(t);
metadatumLists.add(metadatums);
}

for (int i = 0; i < metadatumLists.getFirst().size(); i++) {

StringBuilder value = new StringBuilder();

for (LinkedList<MetadatumDTO> metadatums : metadatumLists) {
value.append(metadatums.get(i).getValue());

if (!metadatums.equals(metadatumLists.getLast())) {
value.append(separator);
}
}
values.add(metadataFieldMapping.toDCValue(field, value.toString()));
MetadatumDTO[] firstList = new MetadatumDTO[metadatumLists.getFirst().size()];
firstList = metadatumLists.getFirst().toArray(firstList);
MetadatumDTO[] secondList = new MetadatumDTO[firstList.length];
secondList = metadatumLists.getLast().toArray(secondList);

for (int i = 0; i < firstList.length; i++) {
DCPersonName name =
new DCPersonName(firstList[i].getValue(), secondList[i].getValue());
values.add(metadataFieldMapping.toDCValue(field, name.toString()));
}

return values;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.xml.sax.InputSource;

/**
* Implements a data source for querying PubMed Central
Expand Down Expand Up @@ -356,6 +357,8 @@ private List<Element> splitToRecords(String recordsSrc) {
saxBuilder.setFeature("http://xml.org/sax/features/external-general-entities", false);
saxBuilder.setFeature("http://xml.org/sax/features/external-parameter-entities", false);
saxBuilder.setExpandEntities(false);
saxBuilder.setEntityResolver((publicId, systemId) -> new InputSource(new StringReader("")));

Document document = saxBuilder.build(new StringReader(recordsSrc));
Element root = document.getRootElement();

Expand All @@ -365,7 +368,7 @@ private List<Element> splitToRecords(String recordsSrc) {
List<Element> recordsList = xpath.evaluate(root);
return recordsList;
} catch (JDOMException | IOException e) {
return null;
return List.of();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ public Integer call() throws Exception {
return null;
}
}
return null;
throw new RuntimeException("APIKEY for Scopus is not set");
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
package org.dspace.app.suggestion;

import org.apache.commons.lang3.StringUtils;
import org.dspace.content.Item;
import org.dspace.core.Context;
import org.dspace.external.model.ExternalDataObject;

Expand All @@ -17,4 +18,14 @@ public class MockSolrSuggestionProvider extends SolrSuggestionProvider {
protected boolean isExternalDataObjectPotentiallySuggested(Context context, ExternalDataObject externalDataObject) {
return StringUtils.equals(MockSuggestionExternalDataSource.NAME, externalDataObject.getSource());
}

@Override
public void importRecords(Context context, Item item) throws Exception {
throw new UnsupportedOperationException();
}

@Override
public void importRecords(Context context, String query) throws Exception {
throw new UnsupportedOperationException();
}
}
21 changes: 21 additions & 0 deletions dspace/config/registries/cris-types.xml
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,27 @@
<scope_note></scope_note>
</dc-type>

<dc-type>
<schema>cris</schema>
<element>lastimport</element>
<qualifier>loader-pubmed</qualifier>
<scope_note></scope_note>
</dc-type>

<dc-type>
<schema>cris</schema>
<element>lastimport</element>
<qualifier>loader-pubmedeu</qualifier>
<scope_note></scope_note>
</dc-type>

<dc-type>
<schema>cris</schema>
<element>lastimport</element>
<qualifier>loader-scopus</qualifier>
<scope_note></scope_note>
</dc-type>

<dc-type>
<schema>cris</schema>
<element>entity</element>
Expand Down

0 comments on commit 6065c69

Please sign in to comment.