diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 04159396d..8ab770c4d 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -30,7 +30,7 @@ core:settings: - 'wget --no-cache --no-cookies -O ${MAVEN_SETTINGS} "${URL_MAVEN_SETTINGS}"' script: - if [ ! -f ${MAVEN_SETTINGS} ]; - then echo "CI settings missing"; + then echo "CI settings file missing"; fi artifacts: when: always @@ -38,7 +38,7 @@ core:settings: paths: - ./${MAVEN_SETTINGS} rules: - - if: '$CI_COMMIT_REF_NAME == "master"' + - if: '$CI_COMMIT_REF_NAME == "main"' - when: manual allow_failure: true @@ -59,7 +59,7 @@ core:test: junit: - ./*/target/surefire-reports/TEST-*.xml rules: - - if: '$CI_COMMIT_REF_NAME == "master"' + - if: '$CI_COMMIT_REF_NAME == "main"' - when: manual allow_failure: true @@ -76,7 +76,7 @@ core:sonar: - ./target - ./*/target rules: - - if: '$CI_COMMIT_REF_NAME == "master"' + - if: '$CI_COMMIT_REF_NAME == "main"' - when: manual allow_failure: true @@ -87,18 +87,11 @@ core:deploy: - core:settings - core:sonar only: - - master - -#after_script: -# - 'pwd' -# - 'ls -al' -# - 'ls -al */' -# - 'ls -al */*/' -# - 'ls -al */*/*/' + - main store:downstream: stage: bridge - trigger: uniprot/deployment/mirror-uniprot-store + trigger: uniprot/deployment/uniprot-store when: on_success only: - - master \ No newline at end of file + - main \ No newline at end of file diff --git a/controlled-vocabulary/src/main/java/org/uniprot/cv/xdb/UniProtDatabaseTypes.java b/controlled-vocabulary/src/main/java/org/uniprot/cv/xdb/UniProtDatabaseTypes.java index d3dba33c2..8423f0de8 100644 --- a/controlled-vocabulary/src/main/java/org/uniprot/cv/xdb/UniProtDatabaseTypes.java +++ b/controlled-vocabulary/src/main/java/org/uniprot/cv/xdb/UniProtDatabaseTypes.java @@ -50,6 +50,12 @@ public List getDBTypesByCategory(UniProtDatabaseCategory .collect(Collectors.toList()); } + public List getInternalDatabaseDetails() { + return UniProtDatabaseTypes.INSTANCE.getAllDbTypes().stream() + .filter(dbDetail -> "internal".equals(dbDetail.getType())) + .collect(Collectors.toList()); + } + private void init() { String source = @@ -72,6 +78,7 @@ private void init() { String linkedReason = item.optString("linkedReason", null); String idMappingName = item.optString("idMappingName", null); + String type = item.optString("type", null); List attributes = new ArrayList<>(); List properties = item.getProperties("attributes"); @@ -97,7 +104,8 @@ private void init() { attributes, isImplicit, linkedReason, - idMappingName); + idMappingName, + type); types.add(xdbType); }); typeMap = diff --git a/controlled-vocabulary/src/main/resources/META-INF/drlineconfiguration.json b/controlled-vocabulary/src/main/resources/META-INF/drlineconfiguration.json index 125a8db9a..16ae10b10 100644 --- a/controlled-vocabulary/src/main/resources/META-INF/drlineconfiguration.json +++ b/controlled-vocabulary/src/main/resources/META-INF/drlineconfiguration.json @@ -1224,7 +1224,7 @@ "name": "TAIR", "displayName": "TAIR", "category": "ORG", - "uriLink": "https://www.arabidopsis.org/servlets/TairObject?accession=%id", + "uriLink": "https://www.arabidopsis.org/servlets/TairObject?type=locus&name=%id", "attributes": [ { "name": "GeneName", @@ -1904,5 +1904,54 @@ "displayName": "AlphaFoldDB", "category": "3DS", "uriLink": "https://alphafold.ebi.ac.uk/entry/%id" - } + }, + { + "name": "eMIND", + "displayName": "eMIND", + "category": "MISC", + "uriLink": "https://research.bioinformatics.udel.edu/itextmine/integrate/doc/emind/medline/%id", + "type": "internal" + }, + { + "name":"PGenN", + "displayName":"PGenN", + "category":"MISC", + "uriLink":"https://research.bioinformatics.udel.edu/itextmine/pgenn/doc/pgenn/medline/%id", + "type":"internal" + }, + { + "name":"Alzforum", + "displayName":"Alzforum", + "category":"GVD", + "uriLink":"https://www.alzforum.org/node/%id", + "type":"internal" + }, + { + "name":"GeneRIF", + "displayName":"GeneRIF", + "category":"SEQ", + "uriLink":"https://www.ncbi.nlm.nih.gov/gene?Db=gene&Cmd=DetailsSearch&Term=%id", + "type":"internal" + }, + { + "name":"IC4R", + "displayName":"IC4R", + "category":"ORG", + "uriLink":"http://ic4r.org/osGene/%id", + "type":"internal" + }, + { + "name":"ORCID", + "displayName":"ORCID", + "category":"MISC", + "uriLink":"https://orcid.org/%id", + "type":"internal" + }, + { + "name":"PubTator", + "displayName":"PubTator", + "category":"MISC", + "uriLink":"https://www.ncbi.nlm.nih.gov/research/pubtator/?query=%id", + "type":"internal" + } ] \ No newline at end of file diff --git a/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/CrossReferenceValidatorIT.java b/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/CrossReferenceValidatorIT.java index 10ac978de..817a2bf24 100644 --- a/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/CrossReferenceValidatorIT.java +++ b/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/CrossReferenceValidatorIT.java @@ -69,6 +69,7 @@ void testFailedValidation() { opType.getAttributes(), false, null, + null, null); // validate, the category should mismatch diff --git a/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/UniProtKBDatabaseTypesTest.java b/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/UniProtKBDatabaseTypesTest.java index 21084dd3b..821456860 100644 --- a/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/UniProtKBDatabaseTypesTest.java +++ b/controlled-vocabulary/src/test/java/org/uniprot/cv/xdb/UniProtKBDatabaseTypesTest.java @@ -288,7 +288,7 @@ void testProteomesType() { @Test void testDatabaseFieldSize() { - verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(SEQUENCE_DATABASES), 6); + verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(SEQUENCE_DATABASES), 7); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(D3_STRUCTURE_DATABASES), 12); verifyGroupSize( @@ -301,7 +301,7 @@ void testDatabaseFieldSize() { 13); verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(PTM_DATABASES), 10); verifyGroupSize( - UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(GENERIC_VARIATION_DATABASES), 5); + UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(GENERIC_VARIATION_DATABASES), 6); verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(D2_GEL_DATABASES), 7); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(PROTEOMIC_DATABASES), 12); @@ -314,13 +314,13 @@ void testDatabaseFieldSize() { 15); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(ORGANISM_SPECIFIC_DATABASES), - 40); + 41); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(PHYLOGENOMIC_DATABASES), 9); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(ENZYME_AND_PATHWAY_DATABASES), 10); - verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(MISCELLANEOUS), 11); + verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(MISCELLANEOUS), 15); verifyGroupSize( UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(GENE_EXPRESSION_DATABASES), 5); verifyGroupSize( @@ -331,6 +331,14 @@ void testDatabaseFieldSize() { verifyGroupSize(UniProtDatabaseTypes.INSTANCE.getDBTypesByCategory(PROTEOMES_DATABASES), 1); } + @Test + void testInternalCrossRefs() { + List internalCrossRefs = + UniProtDatabaseTypes.INSTANCE.getInternalDatabaseDetails(); + + assertEquals(7, internalCrossRefs.size()); + } + private void verifyGroupSize(List dbTypesByCategory, int size) { assertEquals(size, dbTypesByCategory.size()); } diff --git a/core-domain/src/main/java/org/uniprot/core/cv/xdb/UniProtDatabaseDetail.java b/core-domain/src/main/java/org/uniprot/core/cv/xdb/UniProtDatabaseDetail.java index dfc77def5..b761f9b70 100644 --- a/core-domain/src/main/java/org/uniprot/core/cv/xdb/UniProtDatabaseDetail.java +++ b/core-domain/src/main/java/org/uniprot/core/cv/xdb/UniProtDatabaseDetail.java @@ -9,7 +9,7 @@ public class UniProtDatabaseDetail implements Serializable { private static final long serialVersionUID = 8751881513996820892L; - private static final UniProtDatabaseAttribute DEFAULT_ATTRIBUTE = + public static final UniProtDatabaseAttribute DEFAULT_ATTRIBUTE = new UniProtDatabaseAttribute("Description", "description", null); private String name; private String displayName; @@ -20,6 +20,7 @@ public class UniProtDatabaseDetail implements Serializable { private boolean implicit = false; private String linkedReason = null; private String idMappingName; + private String type; UniProtDatabaseDetail() { this.attributes = new ArrayList<>(); @@ -35,6 +36,28 @@ public UniProtDatabaseDetail( boolean implicit, String linkedReason, String idMappingName) { + this( + name, + displayName, + category, + uriLink, + attributes, + implicit, + linkedReason, + idMappingName, + null); + } + + public UniProtDatabaseDetail( + String name, + String displayName, + UniProtDatabaseCategory category, + String uriLink, + List attributes, + boolean implicit, + String linkedReason, + String idMappingName, + String type) { super(); this.name = name; this.displayName = displayName; @@ -47,6 +70,7 @@ public UniProtDatabaseDetail( if ((attributes != null) && !attributes.isEmpty()) this.attributes.addAll(attributes); else this.attributes.add(DEFAULT_ATTRIBUTE); this.idMappingName = idMappingName; + this.type = type; } public String getName() { @@ -81,6 +105,10 @@ public String getIdMappingName() { return idMappingName; } + public String getType() { + return this.type; + } + @Override public int hashCode() { return Objects.hash( @@ -91,7 +119,8 @@ public int hashCode() { this.uriLink, this.implicit, this.linkedReason, - this.idMappingName); + this.idMappingName, + this.type); } @Override @@ -109,6 +138,7 @@ public boolean equals(Object obj) { && Objects.equals(this.uriLink, other.uriLink) && Objects.equals(this.implicit, other.implicit) && Objects.equals(this.linkedReason, other.linkedReason) - && Objects.equals(this.idMappingName, other.idMappingName); + && Objects.equals(this.idMappingName, other.idMappingName) + && Objects.equals(this.type, other.type); } } diff --git a/core-domain/src/main/java/org/uniprot/core/proteome/GenomeAssemblySource.java b/core-domain/src/main/java/org/uniprot/core/proteome/GenomeAssemblySource.java index ab688b7fa..38051e931 100644 --- a/core-domain/src/main/java/org/uniprot/core/proteome/GenomeAssemblySource.java +++ b/core-domain/src/main/java/org/uniprot/core/proteome/GenomeAssemblySource.java @@ -16,7 +16,7 @@ public enum GenomeAssemblySource implements EnumDisplay { ENSEMBLPROTISTS("EnsemblProtists"), ENSEMBLMETAZOA("EnsemblMetazoa"), ENSEMBL("Ensembl"), - REFSEQ("Refseq"), + REFSEQ("RefSeq"), WORMBASE("WormBase"); private final String name; diff --git a/core-domain/src/main/java/org/uniprot/core/proteome/ProteomeEntry.java b/core-domain/src/main/java/org/uniprot/core/proteome/ProteomeEntry.java index a9ba5ed01..e90eda78b 100644 --- a/core-domain/src/main/java/org/uniprot/core/proteome/ProteomeEntry.java +++ b/core-domain/src/main/java/org/uniprot/core/proteome/ProteomeEntry.java @@ -50,4 +50,6 @@ public interface ProteomeEntry extends Serializable { GenomeAnnotation getGenomeAnnotation(); List getExclusionReasons(); + + ProteomeStatistics getProteomeStatistics(); } diff --git a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilder.java b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilder.java index 7d3e4b810..ade66886b 100644 --- a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilder.java +++ b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilder.java @@ -35,6 +35,7 @@ public class ProteomeEntryBuilder implements Builder { private GenomeAnnotation genomeAnnotation; private List exclusionReasons = new ArrayList<>(); private Integer proteinCount; + private ProteomeStatistics proteomeStatistics; @Override public @Nonnull ProteomeEntry build() { @@ -59,7 +60,8 @@ public class ProteomeEntryBuilder implements Builder { genomeAssembly, genomeAnnotation, exclusionReasons, - proteinCount); + proteinCount, + proteomeStatistics); } public static @Nonnull ProteomeEntryBuilder from(@Nonnull ProteomeEntry instance) { @@ -84,7 +86,8 @@ public class ProteomeEntryBuilder implements Builder { .genomeAssembly(instance.getGenomeAssembly()) .genomeAnnotation(instance.getGenomeAnnotation()) .exclusionReasonsSet(instance.getExclusionReasons()) - .proteinCount(instance.getProteinCount()); + .proteinCount(instance.getProteinCount()) + .proteomeStatistics(instance.getProteomeStatistics()); } public @Nonnull ProteomeEntryBuilder proteomeId(ProteomeId id) { @@ -225,4 +228,9 @@ public class ProteomeEntryBuilder implements Builder { Utils.addOrIgnoreNull(exclusionReason, exclusionReasons); return this; } + + public @Nonnull ProteomeEntryBuilder proteomeStatistics(ProteomeStatistics proteomeStatistics) { + this.proteomeStatistics = proteomeStatistics; + return this; + } } diff --git a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryImpl.java b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryImpl.java index cace8a707..ef6a37e67 100644 --- a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryImpl.java +++ b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeEntryImpl.java @@ -34,12 +34,13 @@ public class ProteomeEntryImpl implements ProteomeEntry { private final GenomeAnnotation genomeAnnotation; private final List taxonLineage; private final List exclusionReasons; + private final ProteomeStatistics proteomeStatistics; // no arg constructor for JSON deserialization ProteomeEntryImpl() { this( null, null, null, null, null, null, null, null, null, null, null, null, null, null, - null, null, null, null, null, null, null); + null, null, null, null, null, null, null, null); } ProteomeEntryImpl( @@ -63,7 +64,8 @@ public class ProteomeEntryImpl implements ProteomeEntry { GenomeAssembly genomeAssembly, GenomeAnnotation genomeAnnotation, List exclusionReasons, - Integer proteinCount) { + Integer proteinCount, + ProteomeStatistics proteomeStatistics) { super(); this.id = id; this.taxonomy = taxonomy; @@ -88,6 +90,7 @@ public class ProteomeEntryImpl implements ProteomeEntry { this.genomeAnnotation = genomeAnnotation; this.exclusionReasons = Utils.unmodifiableList(exclusionReasons); this.proteinCount = proteinCount; + this.proteomeStatistics = proteomeStatistics; } @Override @@ -195,6 +198,11 @@ public List getExclusionReasons() { return exclusionReasons; } + @Override + public ProteomeStatistics getProteomeStatistics() { + return proteomeStatistics; + } + @Override public int hashCode() { return Objects.hash( @@ -214,7 +222,8 @@ public int hashCode() { proteomeCompletenessReport, genomeAssembly, proteinCount, - genomeAnnotation); + genomeAnnotation, + proteomeStatistics); } @Override @@ -240,6 +249,7 @@ public boolean equals(Object obj) { && Objects.equals(genomeAssembly, other.genomeAssembly) && Objects.equals(geneCount, other.geneCount) && Objects.equals(proteinCount, other.proteinCount) - && Objects.equals(genomeAnnotation, other.genomeAnnotation); + && Objects.equals(genomeAnnotation, other.genomeAnnotation) + && Objects.equals(proteomeStatistics, other.proteomeStatistics); } } diff --git a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImpl.java b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImpl.java index 3f425e4b1..132f25bd6 100644 --- a/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImpl.java +++ b/core-domain/src/main/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImpl.java @@ -6,6 +6,7 @@ import org.uniprot.core.proteome.ProteomeStatistics; public class ProteomeStatisticsImpl extends StatisticsImpl implements ProteomeStatistics { + private static final long serialVersionUID = -8585233468042758658L; private final long isoformProteinCount; ProteomeStatisticsImpl( @@ -14,6 +15,10 @@ public class ProteomeStatisticsImpl extends StatisticsImpl implements ProteomeSt this.isoformProteinCount = isoformProteinCount; } + ProteomeStatisticsImpl() { + this(0, 0, 0); + } + @Override public long getIsoformProteinCount() { return isoformProteinCount; diff --git a/core-domain/src/test/java/org/uniprot/core/UniProtKBDatabaseMock.java b/core-domain/src/test/java/org/uniprot/core/UniProtKBDatabaseMock.java index e14fb20ea..432cadd35 100644 --- a/core-domain/src/test/java/org/uniprot/core/UniProtKBDatabaseMock.java +++ b/core-domain/src/test/java/org/uniprot/core/UniProtKBDatabaseMock.java @@ -20,10 +20,10 @@ public UniProtKBDatabaseMock(String name) { public @Nonnull UniProtDatabaseDetail getUniProtDatabaseDetail() { if (Utils.notNullNotEmpty(this.name)) { return new UniProtDatabaseDetail( - this.name, this.name, null, null, null, false, null, null); + this.name, this.name, null, null, null, false, null, null, null); } else { return new UniProtDatabaseDetail( - "dummy", "dummyName", null, null, null, false, null, null); + "dummy", "dummyName", null, null, null, false, null, null, null); } } diff --git a/core-domain/src/test/java/org/uniprot/core/cv/xdb/UniProtKBDatabaseDetailTest.java b/core-domain/src/test/java/org/uniprot/core/cv/xdb/UniProtKBDatabaseDetailTest.java index fd6ee19f4..0fdbda077 100644 --- a/core-domain/src/test/java/org/uniprot/core/cv/xdb/UniProtKBDatabaseDetailTest.java +++ b/core-domain/src/test/java/org/uniprot/core/cv/xdb/UniProtKBDatabaseDetailTest.java @@ -18,6 +18,7 @@ class UniProtKBDatabaseDetailTest { private String uriLink; private List attributes; private String idMappingName; + private String type; @BeforeEach void setUp() { @@ -34,6 +35,7 @@ void setUp() { this.name, this.displayName, this.uriLink)) .collect(Collectors.toList()); this.idMappingName = "idMappingName-" + random; + this.type = "type-" + random; } @Test @@ -99,6 +101,24 @@ void needDefaultConstructorForJsonDeserialization() { assertEquals(1, obj.getAttributes().size()); assertNull(obj.getLinkedReason()); assertFalse(obj.isImplicit()); + assertNull(obj.getType()); + } + + @Test + void testCreateObjectWithType() { + UniProtDatabaseDetail dbDetails = + new UniProtDatabaseDetail( + this.name, + this.displayName, + this.category, + this.uriLink, + this.attributes, + false, + null, + this.idMappingName, + this.type); + assertNotNull(dbDetails); + assertEquals(this.type, dbDetails.getType()); } private UniProtDatabaseDetail createUniProtDatabaseDetail(boolean passAttribute) { diff --git a/core-domain/src/test/java/org/uniprot/core/proteome/GenomeAssemblySourceTest.java b/core-domain/src/test/java/org/uniprot/core/proteome/GenomeAssemblySourceTest.java index bb675cc76..05b8b0cbd 100644 --- a/core-domain/src/test/java/org/uniprot/core/proteome/GenomeAssemblySourceTest.java +++ b/core-domain/src/test/java/org/uniprot/core/proteome/GenomeAssemblySourceTest.java @@ -23,6 +23,7 @@ void getName_toDisplayName_areSame() { @Test void testToDisplayName() { assertSame("EnsemblFungi", GenomeAssemblySource.ENSEMBLFUNGI.getDisplayName()); + assertSame("RefSeq", GenomeAssemblySource.REFSEQ.getDisplayName()); } @Nested diff --git a/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilderTest.java b/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilderTest.java index 2aa0f9f43..117f7bfc9 100644 --- a/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilderTest.java +++ b/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeEntryBuilderTest.java @@ -11,7 +11,10 @@ import java.util.List; import org.junit.jupiter.api.Test; -import org.uniprot.core.citation.*; +import org.uniprot.core.citation.Citation; +import org.uniprot.core.citation.JournalArticle; +import org.uniprot.core.citation.Submission; +import org.uniprot.core.citation.SubmissionDatabase; import org.uniprot.core.citation.impl.JournalArticleBuilder; import org.uniprot.core.citation.impl.SubmissionBuilder; import org.uniprot.core.proteome.*; @@ -350,4 +353,12 @@ void ExclusionReasons() { assertThat(proteome.getExclusionReasons(), hasItem(ExclusionReason.METAGENOME)); assertThat(proteome.getExclusionReasons(), hasItem(ExclusionReason.MIXED_CULTURE)); } + + @Test + void testProteomeStatistics() { + ProteomeStatistics proteomeStatistics = new ProteomeStatisticsBuilder().build(); + ProteomeEntry proteome = + new ProteomeEntryBuilder().proteomeStatistics(proteomeStatistics).build(); + assertSame(proteomeStatistics, proteome.getProteomeStatistics()); + } } diff --git a/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImplTest.java b/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImplTest.java index e4e27baf5..e2e3bebc1 100644 --- a/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImplTest.java +++ b/core-domain/src/test/java/org/uniprot/core/proteome/impl/ProteomeStatisticsImplTest.java @@ -1,6 +1,7 @@ package org.uniprot.core.proteome.impl; -import static org.junit.jupiter.api.Assertions.*; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import org.junit.jupiter.api.Test; @@ -20,6 +21,12 @@ void getCountsAreAccurate() { assertEquals(ISOFORM_PROTEIN_COUNT, PROTEOME_STATISTICS.getIsoformProteinCount()); } + @Test + void defaultConstructor() { + ProteomeStatisticsImpl that = new ProteomeStatisticsImpl(); + assertEquals(new ProteomeStatisticsImpl(0, 0, 0), that); + } + @Test void equals_whenTrue() { ProteomeStatisticsImpl that = diff --git a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMap.java b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMap.java index e99d8eb55..9e637cd76 100644 --- a/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMap.java +++ b/core-parser/src/main/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMap.java @@ -1,26 +1,29 @@ package org.uniprot.core.parser.tsv.uniprot; +import static org.uniprot.core.util.Utils.*; + import java.util.*; import java.util.stream.Collectors; import org.uniprot.core.Property; +import org.uniprot.core.cv.xdb.UniProtDatabaseDetail; import org.uniprot.core.parser.tsv.NamedValueMap; import org.uniprot.core.uniprotkb.xdb.UniProtKBCrossReference; -import org.uniprot.core.util.Utils; public class EntryCrossReferenceMap implements NamedValueMap { private static final String CROSS_REF = "xref_"; + private static final String FULL_SUFFIX = "_full"; private final List dbReferences; - private static final Map D3MethodMAP = new HashMap<>(); + private static final Map d3MethodMap = new HashMap<>(); static { - D3MethodMAP.put("X-ray", "X-ray crystallography"); - D3MethodMAP.put("NMR", "NMR spectroscopy"); - D3MethodMAP.put("EM", "Electron microscopy"); - D3MethodMAP.put("Model", "Model"); - D3MethodMAP.put("Neutron", "Neutron diffraction"); - D3MethodMAP.put("Fiber", "Fiber diffraction"); - D3MethodMAP.put("IR", "Infrared spectroscopy"); + d3MethodMap.put("X-ray", "X-ray crystallography"); + d3MethodMap.put("NMR", "NMR spectroscopy"); + d3MethodMap.put("EM", "Electron microscopy"); + d3MethodMap.put("Model", "Model"); + d3MethodMap.put("Neutron", "Neutron diffraction"); + d3MethodMap.put("Fiber", "Fiber diffraction"); + d3MethodMap.put("IR", "Infrared spectroscopy"); } public static boolean contains(List fields) { @@ -33,7 +36,7 @@ public static boolean contains(List fields) { } public EntryCrossReferenceMap(List dbReferences) { - this.dbReferences = Utils.unmodifiableList(dbReferences); + this.dbReferences = unmodifiableList(dbReferences); } @Override @@ -55,32 +58,60 @@ private void addToMap( if (type.equalsIgnoreCase("GO")) { EntryGoCrossReferenceMap dlGoXref = new EntryGoCrossReferenceMap(xrefs); Map goMap = dlGoXref.attributeValues(); - goMap.forEach(map::put); + map.putAll(goMap); } else if (type.equalsIgnoreCase("PROTEOMES")) { map.put( CROSS_REF + type.toLowerCase(), xrefs.stream() - .map(EntryCrossReferenceMap::proteomeXrefToString) + .map(this::proteomeXrefToString) .collect(Collectors.joining("; "))); } else { map.put( CROSS_REF + type.toLowerCase(), xrefs.stream() - .map(EntryCrossReferenceMap::dbXrefToString) + .map(this::dbXrefToString) .collect(Collectors.joining(";", "", ";"))); + + if (isMultiValueXref(xrefs)) { + map.put( + CROSS_REF + type.toLowerCase() + FULL_SUFFIX, + xrefs.stream() + .map(this::dbXrefFullToString) + .collect(Collectors.joining(";", "", ";"))); + } + if (type.equalsIgnoreCase("PDB")) { map.put("structure_3d", pdbXrefTo3DString(xrefs)); } } } + private static boolean isMultiValueXref(List xrefs) { + boolean result = false; + UniProtKBCrossReference xref = xrefs.get(0); + if (notNullNotEmpty(xref.getProperties())) { + if (xref.getProperties().size() > 1) { + result = true; + } else { // else only one property + Property firstProperty = xref.getProperties().get(0); + result = notDefaultProperty(firstProperty); + } + } + return result; + } + + private static boolean notDefaultProperty(Property property) { + return !property.getKey() + .equalsIgnoreCase(UniProtDatabaseDetail.DEFAULT_ATTRIBUTE.getName()); + } + private String pdbXrefTo3DString(List xrefs) { Map result = xrefs.stream() .flatMap(val -> val.getProperties().stream()) .filter(val -> val.getKey().equalsIgnoreCase("Method")) .map(Property::getValue) - .map(D3MethodMAP::get) + .map(d3MethodMap::get) .filter(Objects::nonNull) .collect( Collectors.groupingBy( @@ -91,16 +122,43 @@ private String pdbXrefTo3DString(List xrefs) { .collect(Collectors.joining("; ")); } - public static String dbXrefToString(UniProtKBCrossReference xref) { + private String dbXrefToString(UniProtKBCrossReference xref) { StringBuilder sb = new StringBuilder(); sb.append(xref.getId()); + if (xref.getIsoformId() != null && !xref.getIsoformId().isEmpty()) { sb.append(" [").append(xref.getIsoformId()).append("]"); } + return sb.toString(); } - public static String proteomeXrefToString(UniProtKBCrossReference xref) { + private String dbXrefFullToString(UniProtKBCrossReference xref) { + StringBuilder sb = new StringBuilder(); + + sb.append("\"").append(xref.getId()); + if (xref.hasProperties()) { + String values = dbXrefPropertiesToString(xref); + if (!values.isEmpty()) { + sb.append("; ").append(values).append("."); + } + } + if (xref.getIsoformId() != null && !xref.getIsoformId().isEmpty()) { + sb.append(" [").append(xref.getIsoformId()).append("]"); + } + sb.append("\""); + return sb.toString(); + } + + private String dbXrefPropertiesToString(UniProtKBCrossReference xref) { + List properties = xref.getProperties(); + return properties.stream() + .map(Property::getValue) + .map(String::strip) + .collect(Collectors.joining("; ")); + } + + private String proteomeXrefToString(UniProtKBCrossReference xref) { StringBuilder sb = new StringBuilder(); sb.append(xref.getId()).append(": ").append(xref.getProperties().get(0).getValue()); diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMapTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMapTest.java index 708351152..ecb5d7a53 100644 --- a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMapTest.java +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/EntryCrossReferenceMapTest.java @@ -23,7 +23,7 @@ void testGetDataEmpty() { } @Test - void hasEmbl() { + void hasMultipleEmblAndFullMaps() { List xrefs = new ArrayList<>(); xrefs.add( createXref( @@ -43,12 +43,16 @@ void hasEmbl() { null)); EntryCrossReferenceMap dl = new EntryCrossReferenceMap(xrefs); Map result = dl.attributeValues(); - assertEquals(1, result.size()); + assertEquals(2, result.size()); verify("AY189288;AK022746;", "xref_embl", result); + verify( + "\"AY189288; AAO86732.1; -; mRNA.\";\"AK022746; BAB14220.1; -; mRNA.\";", + "xref_embl_full", + result); } @Test - void hasEmblAndEnsembl() { + void hasMultipleEmblAndEnsemblWithFullMap() { List xrefs = new ArrayList<>(); xrefs.add( createXref( @@ -86,13 +90,21 @@ void hasEmblAndEnsembl() { null)); EntryCrossReferenceMap dl = new EntryCrossReferenceMap(xrefs); Map result = dl.attributeValues(); - assertEquals(2, result.size()); + assertEquals(4, result.size()); verify("AY189288;AK022746;", "xref_embl", result); + verify( + "\"AY189288; AAO86732.1; -; mRNA.\";\"AK022746; BAB14220.1; -; mRNA.\";", + "xref_embl_full", + result); verify("ENST00000330899 [P31689-1];ENST00000439351;", "xref_ensembl", result); + verify( + "\"ENST00000330899; ENSP00000369127; ENSG00000086061. [P31689-1]\";\"ENST00000439351; ENSP00000414398; ENSG00000090520.\";", + "xref_ensembl_full", + result); } @Test - void hasPdbAndSmr() { + void hasPdbAndSmrAlsoHasFullMapped() { List xrefs = new ArrayList<>(); xrefs.add(createXref(new UniProtKBDatabaseImpl("PDB"), "2LO1", "NMR", "-", "A=1-70", null)); xrefs.add(createXref(new UniProtKBDatabaseImpl("PDB"), "2M6Y", "NMR", "-", "A=1-67", null)); @@ -108,15 +120,19 @@ void hasPdbAndSmr() { xrefs.add(createXref(new UniProtKBDatabaseImpl("SMR"), "P31689", "-", null, null, null)); EntryCrossReferenceMap dl = new EntryCrossReferenceMap(xrefs); Map result = dl.attributeValues(); - assertEquals(3, result.size()); + assertEquals(4, result.size()); verify("2LO1;2M6Y;5TKG;", "xref_pdb", result); + verify( + "\"2LO1; NMR; -; A=1-70.\";\"2M6Y; NMR; -; A=1-67.\";\"5TKG; X-ray; 1.20 A; A/B=16-23.\";", + "xref_pdb_full", + result); verify("P31689;", "xref_smr", result); String pdb3d = "NMR spectroscopy (2); X-ray crystallography (1)"; verify(pdb3d, "structure_3d", result); } @Test - void hasIntactAndString() { + void hasIntactAndStringMixedFullAndSingleIds() { List xrefs = new ArrayList<>(); xrefs.add( createXref(new UniProtKBDatabaseImpl("IntAct"), "P31689", "97", null, null, null)); @@ -131,13 +147,14 @@ void hasIntactAndString() { null)); EntryCrossReferenceMap dl = new EntryCrossReferenceMap(xrefs); Map result = dl.attributeValues(); - assertEquals(2, result.size()); + assertEquals(3, result.size()); verify("P31689;", "xref_intact", result); + verify("\"P31689; 97.\";", "xref_intact_full", result); verify("9606.ENSP00000369127;", "xref_string", result); } @Test - void hasChemblAndSwissLipids() { + void hasChemblAndSwissLipidsOnlySingleIds() { List xrefs = new ArrayList<>(); xrefs.add( createXref( @@ -164,7 +181,7 @@ void hasChemblAndSwissLipids() { } @Test - void testBbXrefToString() { + void testSingleDbXrefToString() { UniProtKBCrossReference dbxref = createXref( new UniProtKBDatabaseImpl("EMBL"), @@ -173,8 +190,69 @@ void testBbXrefToString() { "-", "mRNA", null); - String result = EntryCrossReferenceMap.dbXrefToString(dbxref); - assertEquals("AY189288", result); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("AY189288;", "xref_embl", result); + } + + @Test + void testSingleDbXrefToStringWithIsoforms() { + UniProtKBCrossReference dbxref = + createXref( + new UniProtKBDatabaseImpl("EMBL"), + "AY189288", + "AAO86732.1", + "-", + "mRNA", + "P12345-2"); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("AY189288 [P12345-2];", "xref_embl", result); + } + + @Test + void testSingleDbXrefFullToStringAllIds() { + UniProtKBCrossReference dbxref = + createXref( + new UniProtKBDatabaseImpl("EMBL"), + "AY189288", + "AAO86732.1", + "AAO86732.2", + "mRNA", + null); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("\"AY189288; AAO86732.1; AAO86732.2; mRNA.\";", "xref_embl_full", result); + } + + @Test + void testSingleDbXrefFullToStringMissingIdsWithDash() { + UniProtKBCrossReference dbxref = + createXref( + new UniProtKBDatabaseImpl("EMBL"), + "AY189288", + "AAO86732.1", + "-", + "mRNA", + null); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("\"AY189288; AAO86732.1; -; mRNA.\";", "xref_embl_full", result); + } + + @Test + void testSingleDbXrefFullToStringWithIsoforms() { + UniProtKBCrossReference dbxref = + createXref( + new UniProtKBDatabaseImpl("EMBL"), + "AY189288", + "-", + "AAO86732.2", + "mRNA", + "P12345-2"); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("\"AY189288; -; AAO86732.2; mRNA. [P12345-2]\";", "xref_embl_full", result); } @Test @@ -188,8 +266,9 @@ void testProteomeXrefToString() { null, null, null); - String result = EntryCrossReferenceMap.proteomeXrefToString(dbxref); - assertEquals("UP000006548: Chromosome 4", result); + EntryCrossReferenceMap dl = new EntryCrossReferenceMap(List.of(dbxref)); + Map result = dl.attributeValues(); + verify("UP000006548: Chromosome 4", "xref_proteomes", result); } private void verify(String expected, String field, Map result) { diff --git a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/UniProtKBEntryValueMapperTest.java b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/UniProtKBEntryValueMapperTest.java index 3af7d97d8..e4dc63d5e 100644 --- a/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/UniProtKBEntryValueMapperTest.java +++ b/core-parser/src/test/java/org/uniprot/core/parser/tsv/uniprot/UniProtKBEntryValueMapperTest.java @@ -1,13 +1,13 @@ package org.uniprot.core.parser.tsv.uniprot; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNull; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.nio.charset.Charset; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -76,7 +76,7 @@ private static String inputStreamToString(InputStream is) { @Test void testInactiveEntries() { - List fields = Arrays.asList("accession", "id", "protein_name"); + List fields = List.of("accession", "id", "protein_name"); EntryInactiveReason inactiveReason = new EntryInactiveReasonBuilder().type(InactiveReasonType.DELETED).build(); UniProtKBEntry inactiveEntry = @@ -91,7 +91,7 @@ void testInactiveEntries() { @Test void testIdAccession() { - List fields = Arrays.asList("accession", "id"); + List fields = List.of("accession", "id"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -100,7 +100,7 @@ void testIdAccession() { @Test void testInfo() { - List fields = Arrays.asList("reviewed", "version", "protein_existence"); + List fields = List.of("reviewed", "version", "protein_existence"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("reviewed", "reviewed", result); @@ -110,7 +110,7 @@ void testInfo() { @Test void testSequence() { - List fields = Arrays.asList("length", "mass", "sequence_version", "sequence"); + List fields = List.of("length", "mass", "sequence_version", "sequence"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("541", "length", result); @@ -133,7 +133,7 @@ void testSequence() { @Test void testDefault() { List fields = - Arrays.asList("accession", "id", "protein_name", "gene_names", "organism_name"); + List.of("accession", "id", "protein_name", "gene_names", "organism_name"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -147,7 +147,7 @@ void testDefault() { @Test void testECnumber() { - List fields = Arrays.asList("accession", "protein_name", "ec"); + List fields = List.of("accession", "protein_name", "ec"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); String proteinName = @@ -160,7 +160,7 @@ void testECnumber() { @Test void testGene() { List fields = - Arrays.asList("gene_names", "gene_primary", "gene_synonym", "gene_oln", "gene_orf"); + List.of("gene_names", "gene_primary", "gene_synonym", "gene_oln", "gene_orf"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("SLC1A5 ASCT2 M7V1 RDR RDRC", "gene_names", result); @@ -172,7 +172,7 @@ void testGene() { @Test void testOrganism() { - List fields = Arrays.asList("organism_name", "organism_id"); + List fields = List.of("organism_name", "organism_id"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Homo sapiens (Human)", "organism_name", result); @@ -181,7 +181,7 @@ void testOrganism() { @Test void testVirusHosts() { - List fields = Arrays.asList("accession", "organism_name", "virus_hosts", "lineage"); + List fields = List.of("accession", "organism_name", "virus_hosts", "lineage"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("P03431", "accession", result); @@ -195,7 +195,7 @@ void testVirusHosts() { @Test void testAlterProduct() { - List fields = Arrays.asList("accession", "cc_alternative_products"); + List fields = List.of("accession", "cc_alternative_products"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -213,8 +213,7 @@ void testAlterProduct() { @Test void testComments() { List fields = - Arrays.asList( - "accession", "cc_function", "cc_domain", "cc_subunit", "cc_interaction"); + List.of("accession", "cc_function", "cc_domain", "cc_subunit", "cc_interaction"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -253,7 +252,7 @@ void testComments() { @Test void testComments2() { List fields = - Arrays.asList( + List.of( "accession", "cc_interaction", "cc_subcellular_location", @@ -281,7 +280,7 @@ void testComments2() { @Test void testProteinFamily() { - List fields = Arrays.asList("accession", "protein_families", "cc_similarity"); + List fields = List.of("accession", "protein_families", "cc_similarity"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("P03431", "accession", result); @@ -295,8 +294,7 @@ void testProteinFamily() { @Test void testSequenceCaution() { - List fields = - Arrays.asList("accession", "cc_sequence_caution", "error_gmodel_pred"); + List fields = List.of("accession", "cc_sequence_caution", "error_gmodel_pred"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ84MC7, fields); verify("Q84MC7", "accession", result); @@ -310,7 +308,7 @@ void testSequenceCaution() { @Test void testBPCP() { List fields = - Arrays.asList( + List.of( "accession", "absorption", "kinetics", @@ -361,7 +359,7 @@ void testBPCP() { @Test void testFeatures() { List fields = - Arrays.asList( + List.of( "accession", "ft_chain", "ft_topo_dom", @@ -455,7 +453,7 @@ void testFeatures() { @Test void testNumberOfFeatures() { - List fields = Arrays.asList("accession", "feature"); + List fields = List.of("accession", "feature"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -469,7 +467,7 @@ void testNumberOfFeatures() { @Test void testReferences() { - List fields = Arrays.asList("accession", "lit_pubmed_id"); + List fields = List.of("accession", "lit_pubmed_id"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -482,7 +480,7 @@ void testReferences() { @Test void testGOTerm() { - List fields = Arrays.asList("accession", "go", "go_c", "go_f", "go_p", "go_id"); + List fields = List.of("accession", "go", "go_c", "go_f", "go_p", "go_id"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); @@ -549,12 +547,14 @@ void testGOTerm() { @Test void testXRefs1() { List fields = - Arrays.asList("accession", "xref_embl", "xref_ccds", "xref_refseq", "xref_smr"); + List.of("accession", "xref_embl", "xref_ccds", "xref_refseq", "xref_smr"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); String embl = - "U53347;AF102826;AF105423;GQ919058;AK292690;AK299137;AK301661;AK316546;AC008622;CH471126;BC000062;AF334818;"; + "U53347;AF102826;AF105423;GQ919058;" + + "AK292690;AK299137;AK301661;AK316546;" + + "AC008622;CH471126;BC000062;AF334818;"; String ccds = "CCDS12692.1 [Q15758-1];CCDS46125.1 [Q15758-2];CCDS46126.1 [Q15758-3];"; String refseq = "NP_001138616.1 [Q15758-3];NP_001138617.1 [Q15758-2];NP_005619.1 [Q15758-1];"; @@ -565,10 +565,48 @@ void testXRefs1() { verify(smr, "xref_smr", result); } + @Test + void testXRefs1Full() { + List fields = + List.of( + "accession", + "xref_embl_full", + "xref_ccds_full", + "xref_refseq_full", + "xref_smr_full"); + Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); + + verify("Q15758", "accession", result); + + String emblFull = + "\"U53347; AAC50629.1; -; mRNA.\";" + + "\"AF102826; AAD09812.1; -; mRNA.\";" + + "\"AF105423; AAD27806.1; -; mRNA.\";" + + "\"GQ919058; ACX53626.1; -; mRNA.\";" + + "\"AK292690; BAF85379.1; -; mRNA.\";" + + "\"AK299137; BAG61189.1; -; mRNA.\";" + + "\"AK301661; BAG63136.1; -; mRNA.\";" + + "\"AK316546; BAH14917.1; -; mRNA.\";" + + "\"AC008622; -; NOT_ANNOTATED_CDS; Genomic_DNA.\";" + + "\"CH471126; EAW57446.1; -; Genomic_DNA.\";" + + "\"BC000062; AAH00062.1; -; mRNA.\";" + + "\"AF334818; AAK77026.1; -; mRNA.\";"; + verify(emblFull, "xref_embl_full", result); + assertNull(result.get("xref_ccds_full")); + + String refseqFull = + "\"NP_001138616.1; NM_001145144.1. [Q15758-3]\";" + + "\"NP_001138617.1; NM_001145145.1. [Q15758-2]\";" + + "\"NP_005619.1; NM_005628.2. [Q15758-1]\";"; + verify(refseqFull, "xref_refseq_full", result); + + assertNull(result.get("xref_smr_full")); + } + @Test void testXRefs2() { List fields = - Arrays.asList( + List.of( "accession", "xref_smr", "xref_biogrid", @@ -590,10 +628,33 @@ void testXRefs2() { verify(string, "xref_string", result); } + @Test + void testXRefs2FullXrefs() { + List fields = + List.of( + "accession", + "xref_smr_full", + "xref_biogrid_full", + "xref_intact_full", + "xref_mint_full", + "xref_string_full"); + Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); + + verify("Q15758", "accession", result); + + String biogrid = "\"112401; 92.\";"; + String intact = "\"Q15758; 73.\";"; + assertNull(result.get("xref_smr_full")); + verify(biogrid, "xref_biogrid_full", result); + verify(intact, "xref_intact_full", result); + assertNull(result.get("xref_mint_full")); + assertNull(result.get("xref_string_full")); + } + @Test void testXRefs3() { List fields = - Arrays.asList( + List.of( "accession", "xref_drugbank", "xref_guidetopharmacology", @@ -616,34 +677,35 @@ void testXRefs3() { } @Test - void testXRefs4() { + void testXRefs3Full() { List fields = - Arrays.asList( + List.of( "accession", - "xref_drugbank", - "xref_guidetopharmacology", - "xref_tcdb", - "xref_dmdm", - "xref_maxqb"); + "xref_drugbank_full", + "xref_guidetopharmacology_full", + "xref_tcdb_full", + "xref_dmdm_full", + "xref_maxqb_full"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); verify("Q15758", "accession", result); - String drugbank = "DB00174;DB13146;DB00130;"; - String guidetopharmacology = "874;"; - String tcdb = "2.A.23.3.3;"; - String dmdm = "21542389;"; + String drugbank = + "\"DB00174; Asparagine.\";\"DB13146; Fluciclovine (18F).\";\"DB00130; L-Glutamine.\";"; + String tcdb = + "\"2.A.23.3.3; the dicarboxylate/amino acid:cation (na(+) or h(+)) symporter (daacs) family.\";"; + String maxqb = "Q15758;"; - verify(drugbank, "xref_drugbank", result); - verify(guidetopharmacology, "xref_guidetopharmacology", result); - verify(tcdb, "xref_tcdb", result); - verify(dmdm, "xref_dmdm", result); - verify(maxqb, "xref_maxqb", result); + verify(drugbank, "xref_drugbank_full", result); + assertNull(result.get("xref_guidetopharmacology_full")); + verify(tcdb, "xref_tcdb_full", result); + assertNull(result.get("xref_dmdm_full")); + assertNull(result.get("xref_maxqb_full")); } @Test - void testXRefs5() { + void testXRefs4() { List fields = - Arrays.asList( + List.of( "accession", "xref_ensembl", "xref_reactome", @@ -666,9 +728,40 @@ void testXRefs5() { verify(pfam, "xref_pfam", result); } + @Test + void testXRefs4Full() { + List fields = + List.of( + "accession", + "xref_ensembl_full", + "xref_reactome_full", + "xref_interpro_full", + "xref_prosite_full", + "xref_pfam_full"); + Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); + + verify("Q15758", "accession", result); + + String ensemblFull = + "\"ENST00000412532; ENSP00000397924; ENSG00000105281. [Q15758-3]\";" + + "\"ENST00000434726; ENSP00000406532; ENSG00000105281. [Q15758-2]\";" + + "\"ENST00000542575; ENSP00000444408; ENSG00000105281. [Q15758-1]\";"; + String reactomeFull = "\"R-HSA-352230; Amino acid transport across the plasma membrane.\";"; + String interproFull = + "\"IPR001991; Na-dicarboxylate_symporter.\";\"IPR018107; Na-dicarboxylate_symporter_CS.\";\"IPR036458; Na:dicarbo_symporter_sf.\";"; + String prositeFull = + "\"PS00713; NA_DICARBOXYL_SYMP_1; 1.\";\"PS00714; NA_DICARBOXYL_SYMP_2; 1.\";"; + String pfamFull = "\"PF00375; SDF; 1.\";"; + verify(ensemblFull, "xref_ensembl_full", result); + verify(reactomeFull, "xref_reactome_full", result); + verify(interproFull, "xref_interpro_full", result); + verify(prositeFull, "xref_prosite_full", result); + verify(pfamFull, "xref_pfam_full", result); + } + @Test void testProteome() { - List fields = Arrays.asList("accession", "xref_proteomes"); + List fields = List.of("accession", "xref_proteomes"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("P03431", "accession", result); @@ -678,19 +771,22 @@ void testProteome() { @Test void testPdb() { - List fields = Arrays.asList("accession", "xref_pdb", "structure_3d"); + List fields = List.of("accession", "xref_pdb", "structure_3d"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("P03431", "accession", result); String pdb = "2ZNL;2ZTT;3A1G;"; + String pdbFull = + "\"2ZNL; X-ray; 2.30 A; B=1-81.\";\"2ZTT; X-ray; 2.10 A; A/C=679-757.\";\"3A1G; X-ray; 1.70 A; A/C=678-757.\";"; String d3d = "X-ray crystallography (3)"; verify(pdb, "xref_pdb", result); + verify(pdbFull, "xref_pdb_full", result); verify(d3d, "structure_3d", result); } @Test - void testkeyword() { - List fields = Arrays.asList("accession", "keyword", "keywordid"); + void testKeyword() { + List fields = List.of("accession", "keyword", "keywordid"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("P03431", "accession", result); @@ -711,7 +807,7 @@ void testkeyword() { @Test void testExtraAttributeCommentCountForQ15758() { - List fields = Arrays.asList("comment_count"); + List fields = List.of("comment_count"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); String expectedCommentCount = @@ -723,7 +819,7 @@ void testExtraAttributeCommentCountForQ15758() { @Test void testExtraAttributeCommentCountForP03431() { - List fields = Arrays.asList("comment_count"); + List fields = List.of("comment_count"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); String expectedCommentCount = @@ -735,7 +831,7 @@ void testExtraAttributeCommentCountForP03431() { @Test void testExtraAttributeFeatureCount() { - List fields = Arrays.asList("feature_count"); + List fields = List.of("feature_count"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryQ15758, fields); String expectedCommentCount = @@ -746,7 +842,7 @@ void testExtraAttributeFeatureCount() { @Test void testExtraAttributeUniParcId() { - List fields = Arrays.asList("uniparc_id"); + List fields = List.of("uniparc_id"); Map result = new UniProtKBEntryValueMapper().mapEntity(entryP03431, fields); verify("UP1234567890", "uniparc_id", result); diff --git a/json-parser/src/main/java/org/uniprot/core/json/parser/proteome/ProteomeJsonConfig.java b/json-parser/src/main/java/org/uniprot/core/json/parser/proteome/ProteomeJsonConfig.java index 9040fee57..cf9bf96d2 100644 --- a/json-parser/src/main/java/org/uniprot/core/json/parser/proteome/ProteomeJsonConfig.java +++ b/json-parser/src/main/java/org/uniprot/core/json/parser/proteome/ProteomeJsonConfig.java @@ -5,37 +5,14 @@ import org.uniprot.core.CrossReference; import org.uniprot.core.Database; import org.uniprot.core.Value; -import org.uniprot.core.citation.Author; -import org.uniprot.core.citation.Book; -import org.uniprot.core.citation.ElectronicArticle; -import org.uniprot.core.citation.Journal; -import org.uniprot.core.citation.JournalArticle; -import org.uniprot.core.citation.Locator; -import org.uniprot.core.citation.Patent; -import org.uniprot.core.citation.PublicationDate; -import org.uniprot.core.citation.Submission; -import org.uniprot.core.citation.Thesis; -import org.uniprot.core.citation.Unpublished; -import org.uniprot.core.citation.impl.AuthorImpl; -import org.uniprot.core.citation.impl.BookImpl; -import org.uniprot.core.citation.impl.ElectronicArticleImpl; -import org.uniprot.core.citation.impl.JournalArticleImpl; -import org.uniprot.core.citation.impl.JournalImpl; -import org.uniprot.core.citation.impl.PatentImpl; -import org.uniprot.core.citation.impl.PublicationDateImpl; -import org.uniprot.core.citation.impl.SubmissionImpl; -import org.uniprot.core.citation.impl.ThesisImpl; -import org.uniprot.core.citation.impl.UnpublishedImpl; +import org.uniprot.core.citation.*; +import org.uniprot.core.citation.impl.*; import org.uniprot.core.impl.CrossReferenceImpl; import org.uniprot.core.impl.DefaultDatabase; import org.uniprot.core.impl.ValueImpl; import org.uniprot.core.json.parser.JsonConfig; import org.uniprot.core.json.parser.deserializer.LocalDateDeserializer; -import org.uniprot.core.json.parser.serializer.AuthorSerializer; -import org.uniprot.core.json.parser.serializer.JournalSerializer; -import org.uniprot.core.json.parser.serializer.LocalDateSerializer; -import org.uniprot.core.json.parser.serializer.LocatorSerializer; -import org.uniprot.core.json.parser.serializer.PublicationDateSerializer; +import org.uniprot.core.json.parser.serializer.*; import org.uniprot.core.json.parser.uniprot.serializer.UniProtKBAccessionSerializer; import org.uniprot.core.proteome.*; import org.uniprot.core.proteome.impl.*; @@ -89,6 +66,7 @@ private ObjectMapper initObjectMapper() { mod.addAbstractTypeMapping(ProteomeId.class, ProteomeIdImpl.class); mod.addAbstractTypeMapping(Taxonomy.class, TaxonomyImpl.class); mod.addAbstractTypeMapping(TaxonomyLineage.class, TaxonomyLineageImpl.class); + mod.addAbstractTypeMapping(ProteomeStatistics.class, ProteomeStatisticsImpl.class); mod.addAbstractTypeMapping(Value.class, ValueImpl.class); mod.addAbstractTypeMapping(RedundantProteome.class, RedundantProteomeImpl.class); diff --git a/json-parser/src/test/java/org/uniprot/core/json/parser/proteome/ProteomeTest.java b/json-parser/src/test/java/org/uniprot/core/json/parser/proteome/ProteomeTest.java index 586579f6b..dd26abe1f 100644 --- a/json-parser/src/test/java/org/uniprot/core/json/parser/proteome/ProteomeTest.java +++ b/json-parser/src/test/java/org/uniprot/core/json/parser/proteome/ProteomeTest.java @@ -150,6 +150,13 @@ public static ProteomeEntry getCompleteProteomeEntry() { .genomeAnnotation(genomeAnnotation) .build(); + ProteomeStatistics proteomeStatistics = + new ProteomeStatisticsBuilder() + .reviewedProteinCount(1) + .unreviewedProteinCount(10) + .isoformProteinCount(23) + .build(); + return new ProteomeEntryBuilder() .proteomeId(proteomeId) .description(description) @@ -177,6 +184,7 @@ public static ProteomeEntry getCompleteProteomeEntry() { .genomeAnnotation(genomeAnnotation) .exclusionReasonsAdd(ExclusionReason.MIXED_CULTURE) .proteinCount(250) + .proteomeStatistics(proteomeStatistics) .build(); } diff --git a/readme b/readme index 0c2b33833..8a7e3c2dd 100644 --- a/readme +++ b/readme @@ -1,2 +1,2 @@ # Parent UniProt -contain core modules for uniprot: common, domain, parsers, flatfile, xml, util +Contains core modules for uniprot: common, domain, parsers, flatfile, xml, util diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/AbstractConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/AbstractConverterTest.java index 7594f5d70..cc0e01a93 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/AbstractConverterTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/AbstractConverterTest.java @@ -24,8 +24,8 @@ public abstract class AbstractConverterTest { XMLGregorianCalendar.class, new MethodBasedFactory<>( AbstractConverterTest.class.getMethod("createXMLGregorianCalendar"))); - } catch (NoSuchMethodException e) { - e.printStackTrace(); + } catch (NoSuchMethodException nse) { + nse.printStackTrace(); System.exit(0); } objectCreator = new LoremIpsumObjectCreator(classBindings);