From 57f94edb5e89d4a8afb156e4194bdbdec85930f4 Mon Sep 17 00:00:00 2001 From: LeonardoGonzales Date: Fri, 27 Oct 2023 16:27:20 +0100 Subject: [PATCH 1/4] TRM-29836: Create an UniProtKB result column to show publication DOI ids --- .../parser/tsv/uniprot/EntryReferenceMap.java | 20 ++-- .../tsv/uniprot/EntryReferenceMapTest.java | 110 ++++++++++++++++++ 2 files changed, 120 insertions(+), 10 deletions(-) create mode 100644 core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java diff --git a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java index 9dd6218ae..ee5d68b93 100644 --- a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java +++ b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java @@ -2,6 +2,7 @@ import java.util.*; import java.util.stream.Collectors; +import java.util.stream.Stream; import org.uniprot.core.CrossReference; import org.uniprot.core.citation.CitationDatabase; @@ -9,7 +10,7 @@ import org.uniprot.core.uniprotkb.UniProtKBReference; public class EntryReferenceMap implements NamedValueMap { - static final List FIELDS = Arrays.asList("lit_pubmed_id"); + static final List FIELDS = Arrays.asList("lit_pubmed_id", "lit_doi_id"); private final List references; public EntryReferenceMap(List references) { @@ -26,16 +27,15 @@ public Map attributeValues() { return Collections.emptyMap(); } - String result = - references.stream() - .map(UniProtKBReference::getCitation) - .filter(val -> val.getCitationCrossReferences() != null) - .flatMap(val -> val.getCitationCrossReferences().stream()) - .filter(val -> val.getDatabase().equals(CitationDatabase.PUBMED)) - .map(CrossReference::getId) - .collect(Collectors.joining("; ")); + Map idMaps = references.stream() + .map(UniProtKBReference::getCitation) + .filter(val -> val.getCitationCrossReferences() != null) + .flatMap(val -> val.getCitationCrossReferences().stream()) + .collect(Collectors.groupingBy(CrossReference::getDatabase, Collectors.mapping(CrossReference::getId, Collectors.joining("; ")))); + Map map = new HashMap<>(); - map.put(FIELDS.get(0), result); + map.put(FIELDS.get(0), idMaps.getOrDefault(CitationDatabase.PUBMED, "")); + map.put(FIELDS.get(1), idMaps.getOrDefault(CitationDatabase.DOI, "")); return map; } diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java new file mode 100644 index 000000000..664828f78 --- /dev/null +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java @@ -0,0 +1,110 @@ +package org.uniprot.core.parser.tsv.uniprot; + +import org.junit.jupiter.api.Test; +import org.uniprot.core.citation.Citation; +import org.uniprot.core.citation.CitationDatabase; +import org.uniprot.core.citation.impl.ElectronicArticleBuilder; +import org.uniprot.core.impl.CrossReferenceBuilder; +import org.uniprot.core.uniprotkb.UniProtKBReference; +import org.uniprot.core.uniprotkb.impl.UniProtKBReferenceBuilder; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +import static org.junit.jupiter.api.Assertions.*; + +class EntryReferenceMapTest { + + private static final String FIELD_SEPARATOR = "; "; + + @Test + void testFields() { + List fields = EntryReferenceMap.FIELDS; + List expected = Arrays.asList("lit_pubmed_id", "lit_doi_id"); + assertEquals(expected, fields); + } + + @Test + void testLitDoiId() { + String doiId = "doiId1"; + List references = getUniProtKBReferences(CitationDatabase.DOI, doiId); + EntryReferenceMap mapper = new EntryReferenceMap(references); + Map result = mapper.attributeValues(); + + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId); + } + + @Test + void testMultipleLitDoiId() { + String doiId = "doiId1"; + String doiId2 = "doiId2"; + List references = getUniProtKBReferences(CitationDatabase.DOI, doiId, doiId2); + EntryReferenceMap mapper = new EntryReferenceMap(references); + Map result = mapper.attributeValues(); + + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId + FIELD_SEPARATOR + doiId2); + } + + @Test + void testLitPubmedId() { + String pubmedId = "pubmedId1"; + List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId); + EntryReferenceMap mapper = new EntryReferenceMap(references); + Map result = mapper.attributeValues(); + + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId); + } + + @Test + void testMultipleLitPubmedId() { + String pubmedId = "pubmedId1"; + String pubmedId2 = "pubmedId2"; + List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); + + EntryReferenceMap mapper = new EntryReferenceMap(references); + Map result = mapper.attributeValues(); + + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); + } + + @Test + void testMixedIds() { + String pubmedId = "pubmedId1"; + String pubmedId2 = "pubmedId2"; + String doiId = "doiId1"; + String doiId2 = "doiId2"; + List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); + references.addAll(getUniProtKBReferences(CitationDatabase.DOI, doiId, doiId2)); + + EntryReferenceMap mapper = new EntryReferenceMap(references); + Map result = mapper.attributeValues(); + + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId + FIELD_SEPARATOR + doiId2); + } + + private List getUniProtKBReferences(CitationDatabase database, String ... ids) { + List result = new ArrayList<>(); + for (String id: ids) { + Citation citation = new ElectronicArticleBuilder() + .citationCrossReferencesAdd(new CrossReferenceBuilder() + .database(database) + .id(id) + .build()) + .build(); + UniProtKBReference reference = new UniProtKBReferenceBuilder() + .citation(citation) + .build(); + result.add(reference); + } + return result; + } + + private void verifyFieldValue(Map result, String fieldName, String expectedValue) { + String resultValue = result.get(fieldName); + assertNotNull(resultValue); + assertEquals(expectedValue, resultValue); + } +} \ No newline at end of file From fd67fde9597ba81d0a589245d92640b5612681c9 Mon Sep 17 00:00:00 2001 From: automated changes Date: Fri, 27 Oct 2023 15:29:17 +0000 Subject: [PATCH 2/4] code format with spotless automatic --- .../parser/tsv/uniprot/EntryReferenceMap.java | 16 +++-- .../tsv/uniprot/EntryReferenceMapTest.java | 63 +++++++++++-------- 2 files changed, 46 insertions(+), 33 deletions(-) diff --git a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java index ee5d68b93..d047b99b9 100644 --- a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java +++ b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java @@ -2,7 +2,6 @@ import java.util.*; import java.util.stream.Collectors; -import java.util.stream.Stream; import org.uniprot.core.CrossReference; import org.uniprot.core.citation.CitationDatabase; @@ -27,11 +26,16 @@ public Map attributeValues() { return Collections.emptyMap(); } - Map idMaps = references.stream() - .map(UniProtKBReference::getCitation) - .filter(val -> val.getCitationCrossReferences() != null) - .flatMap(val -> val.getCitationCrossReferences().stream()) - .collect(Collectors.groupingBy(CrossReference::getDatabase, Collectors.mapping(CrossReference::getId, Collectors.joining("; ")))); + Map idMaps = + references.stream() + .map(UniProtKBReference::getCitation) + .filter(val -> val.getCitationCrossReferences() != null) + .flatMap(val -> val.getCitationCrossReferences().stream()) + .collect( + Collectors.groupingBy( + CrossReference::getDatabase, + Collectors.mapping( + CrossReference::getId, Collectors.joining("; ")))); Map map = new HashMap<>(); map.put(FIELDS.get(0), idMaps.getOrDefault(CitationDatabase.PUBMED, "")); diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java index 664828f78..be8fd58a7 100644 --- a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java @@ -1,5 +1,12 @@ package org.uniprot.core.parser.tsv.uniprot; +import static org.junit.jupiter.api.Assertions.*; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + import org.junit.jupiter.api.Test; import org.uniprot.core.citation.Citation; import org.uniprot.core.citation.CitationDatabase; @@ -8,13 +15,6 @@ import org.uniprot.core.uniprotkb.UniProtKBReference; import org.uniprot.core.uniprotkb.impl.UniProtKBReferenceBuilder; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; -import java.util.Map; - -import static org.junit.jupiter.api.Assertions.*; - class EntryReferenceMapTest { private static final String FIELD_SEPARATOR = "; "; @@ -33,14 +33,15 @@ void testLitDoiId() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId); + verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId); } @Test void testMultipleLitDoiId() { String doiId = "doiId1"; String doiId2 = "doiId2"; - List references = getUniProtKBReferences(CitationDatabase.DOI, doiId, doiId2); + List references = + getUniProtKBReferences(CitationDatabase.DOI, doiId, doiId2); EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); @@ -50,7 +51,8 @@ void testMultipleLitDoiId() { @Test void testLitPubmedId() { String pubmedId = "pubmedId1"; - List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId); + List references = + getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId); EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); @@ -61,12 +63,14 @@ void testLitPubmedId() { void testMultipleLitPubmedId() { String pubmedId = "pubmedId1"; String pubmedId2 = "pubmedId2"; - List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); + List references = + getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue( + result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); } @Test @@ -75,36 +79,41 @@ void testMixedIds() { String pubmedId2 = "pubmedId2"; String doiId = "doiId1"; String doiId2 = "doiId2"; - List references = getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); + List references = + getUniProtKBReferences(CitationDatabase.PUBMED, pubmedId, pubmedId2); references.addAll(getUniProtKBReferences(CitationDatabase.DOI, doiId, doiId2)); EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue( + result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId + FIELD_SEPARATOR + doiId2); } - private List getUniProtKBReferences(CitationDatabase database, String ... ids) { + private List getUniProtKBReferences( + CitationDatabase database, String... ids) { List result = new ArrayList<>(); - for (String id: ids) { - Citation citation = new ElectronicArticleBuilder() - .citationCrossReferencesAdd(new CrossReferenceBuilder() - .database(database) - .id(id) - .build()) - .build(); - UniProtKBReference reference = new UniProtKBReferenceBuilder() - .citation(citation) - .build(); + for (String id : ids) { + Citation citation = + new ElectronicArticleBuilder() + .citationCrossReferencesAdd( + new CrossReferenceBuilder() + .database(database) + .id(id) + .build()) + .build(); + UniProtKBReference reference = + new UniProtKBReferenceBuilder().citation(citation).build(); result.add(reference); } return result; } - private void verifyFieldValue(Map result, String fieldName, String expectedValue) { + private void verifyFieldValue( + Map result, String fieldName, String expectedValue) { String resultValue = result.get(fieldName); assertNotNull(resultValue); assertEquals(expectedValue, resultValue); } -} \ No newline at end of file +} From 2ed8b8aea2261e0c414ab8959ccb023a54d33873 Mon Sep 17 00:00:00 2001 From: LeonardoGonzales Date: Fri, 3 Nov 2023 15:44:27 +0000 Subject: [PATCH 3/4] Code review --- .../parser/tsv/uniprot/EntryReferenceMap.java | 8 +++++--- .../tsv/uniprot/EntryReferenceMapTest.java | 18 +++++++++--------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java index d047b99b9..4870bf73e 100644 --- a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java +++ b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMap.java @@ -9,7 +9,9 @@ import org.uniprot.core.uniprotkb.UniProtKBReference; public class EntryReferenceMap implements NamedValueMap { - static final List FIELDS = Arrays.asList("lit_pubmed_id", "lit_doi_id"); + static final String LIT_PUBMED_ID = "lit_pubmed_id"; + static final String LIT_DOI_ID = "lit_doi_id"; + static final List FIELDS = Arrays.asList(LIT_PUBMED_ID, LIT_DOI_ID); private final List references; public EntryReferenceMap(List references) { @@ -38,8 +40,8 @@ public Map attributeValues() { CrossReference::getId, Collectors.joining("; ")))); Map map = new HashMap<>(); - map.put(FIELDS.get(0), idMaps.getOrDefault(CitationDatabase.PUBMED, "")); - map.put(FIELDS.get(1), idMaps.getOrDefault(CitationDatabase.DOI, "")); + map.put(LIT_PUBMED_ID, idMaps.getOrDefault(CitationDatabase.PUBMED, "")); + map.put(LIT_DOI_ID, idMaps.getOrDefault(CitationDatabase.DOI, "")); return map; } diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java index be8fd58a7..e95cb1d75 100644 --- a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java @@ -1,6 +1,7 @@ package org.uniprot.core.parser.tsv.uniprot; import static org.junit.jupiter.api.Assertions.*; +import static org.uniprot.core.parser.tsv.uniprot.EntryReferenceMap.*; import java.util.ArrayList; import java.util.Arrays; @@ -21,9 +22,8 @@ class EntryReferenceMapTest { @Test void testFields() { - List fields = EntryReferenceMap.FIELDS; - List expected = Arrays.asList("lit_pubmed_id", "lit_doi_id"); - assertEquals(expected, fields); + List expected = Arrays.asList(LIT_PUBMED_ID, LIT_DOI_ID); + assertEquals(expected, FIELDS); } @Test @@ -33,7 +33,7 @@ void testLitDoiId() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId); + verifyFieldValue(result, LIT_DOI_ID, doiId); } @Test @@ -45,7 +45,7 @@ void testMultipleLitDoiId() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId + FIELD_SEPARATOR + doiId2); + verifyFieldValue(result, LIT_DOI_ID, doiId + FIELD_SEPARATOR + doiId2); } @Test @@ -56,7 +56,7 @@ void testLitPubmedId() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(0), pubmedId); + verifyFieldValue(result, LIT_PUBMED_ID, pubmedId); } @Test @@ -70,7 +70,7 @@ void testMultipleLitPubmedId() { Map result = mapper.attributeValues(); verifyFieldValue( - result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); + result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); } @Test @@ -87,8 +87,8 @@ void testMixedIds() { Map result = mapper.attributeValues(); verifyFieldValue( - result, EntryReferenceMap.FIELDS.get(0), pubmedId + FIELD_SEPARATOR + pubmedId2); - verifyFieldValue(result, EntryReferenceMap.FIELDS.get(1), doiId + FIELD_SEPARATOR + doiId2); + result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue(result, LIT_DOI_ID, doiId + FIELD_SEPARATOR + doiId2); } private List getUniProtKBReferences( From e6f9c2904dd783b17908c2187e6b9b656d97da12 Mon Sep 17 00:00:00 2001 From: automated changes Date: Fri, 3 Nov 2023 15:45:55 +0000 Subject: [PATCH 4/4] code format with spotless automatic --- .../core/parser/tsv/uniprot/EntryReferenceMapTest.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java index e95cb1d75..751f58085 100644 --- a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryReferenceMapTest.java @@ -69,8 +69,7 @@ void testMultipleLitPubmedId() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue( - result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue(result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); } @Test @@ -86,8 +85,7 @@ void testMixedIds() { EntryReferenceMap mapper = new EntryReferenceMap(references); Map result = mapper.attributeValues(); - verifyFieldValue( - result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); + verifyFieldValue(result, LIT_PUBMED_ID, pubmedId + FIELD_SEPARATOR + pubmedId2); verifyFieldValue(result, LIT_DOI_ID, doiId + FIELD_SEPARATOR + doiId2); }