From a8964d8117ed910b75fbb20e2547f113ce657498 Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Tue, 10 Sep 2024 16:18:34 +0100 Subject: [PATCH 1/5] make dbRef optional, add light xml converter and a new xsd for xref --- .../uniparc/UniParcEntryLightConverter.java | 48 ++++++++++++ .../src/main/resources/xsd/dbReference.xsd | 38 ++++++++++ xml-parser/src/main/resources/xsd/uniparc.xsd | 2 +- .../UniParcEntryLightConverterTest.java | 73 +++++++++++++++++++ 4 files changed, 160 insertions(+), 1 deletion(-) create mode 100644 xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverter.java create mode 100644 xml-parser/src/main/resources/xsd/dbReference.xsd create mode 100644 xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverter.java new file mode 100644 index 000000000..fcb0bc7c0 --- /dev/null +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverter.java @@ -0,0 +1,48 @@ +package org.uniprot.core.xml.uniparc; + +import org.uniprot.core.uniparc.UniParcEntryLight; +import org.uniprot.core.uniparc.impl.UniParcEntryLightBuilder; +import org.uniprot.core.xml.Converter; +import org.uniprot.core.xml.jaxb.uniparc.Entry; +import org.uniprot.core.xml.jaxb.uniparc.ObjectFactory; + +public class UniParcEntryLightConverter implements Converter { + private static final String UNIPARC = "uniparc"; + private final ObjectFactory xmlFactory; + private final SequenceFeatureConverter seqFeatureConverter; + private final SequenceConverter sequenceConverter; + + public UniParcEntryLightConverter(){ + this(new ObjectFactory()); + } + + public UniParcEntryLightConverter(ObjectFactory xmlFactory) { + this.xmlFactory = xmlFactory; + this.seqFeatureConverter = new SequenceFeatureConverter(xmlFactory); + this.sequenceConverter = new SequenceConverter(xmlFactory); + } + + @Override + public UniParcEntryLight fromXml(Entry xmlObj) { + UniParcEntryLightBuilder builder = new UniParcEntryLightBuilder(); + builder.uniParcId(xmlObj.getAccession()) + .sequence(sequenceConverter.fromXml(xmlObj.getSequence())) + .sequenceFeaturesSet( + xmlObj.getSignatureSequenceMatch().stream() + .map(seqFeatureConverter::fromXml) + .toList()); + return builder.build(); + } + + @Override + public Entry toXml(UniParcEntryLight uniObj) { + Entry entry = xmlFactory.createEntry(); + entry.setDataset(UNIPARC); + entry.setAccession(uniObj.getUniParcId()); + entry.setSequence(sequenceConverter.toXml(uniObj.getSequence())); + uniObj.getSequenceFeatures().stream() + .map(seqFeatureConverter::toXml) + .forEach(val -> entry.getSignatureSequenceMatch().add(val)); + return entry; + } +} diff --git a/xml-parser/src/main/resources/xsd/dbReference.xsd b/xml-parser/src/main/resources/xsd/dbReference.xsd new file mode 100644 index 000000000..18b2af897 --- /dev/null +++ b/xml-parser/src/main/resources/xsd/dbReference.xsd @@ -0,0 +1,38 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/xml-parser/src/main/resources/xsd/uniparc.xsd b/xml-parser/src/main/resources/xsd/uniparc.xsd index e7efa3628..fb119d7d1 100644 --- a/xml-parser/src/main/resources/xsd/uniparc.xsd +++ b/xml-parser/src/main/resources/xsd/uniparc.xsd @@ -25,7 +25,7 @@ + minOccurs="0" maxOccurs="unbounded" /> diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java new file mode 100644 index 000000000..104417e97 --- /dev/null +++ b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java @@ -0,0 +1,73 @@ +package org.uniprot.core.xml.uniparc; + +import org.junit.jupiter.api.Test; +import org.uniprot.core.Location; +import org.uniprot.core.Sequence; +import org.uniprot.core.impl.SequenceBuilder; +import org.uniprot.core.uniparc.SequenceFeature; +import org.uniprot.core.uniparc.SignatureDbType; +import org.uniprot.core.uniparc.UniParcEntryLight; +import org.uniprot.core.uniparc.impl.InterProGroupBuilder; +import org.uniprot.core.uniparc.impl.SequenceFeatureBuilder; +import org.uniprot.core.uniparc.impl.UniParcEntryLightBuilder; +import org.uniprot.core.xml.jaxb.uniparc.Entry; + +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +class UniParcEntryLightConverterTest { + + @Test + void objectToXMLAndXMLToObjectTest() { + UniParcEntryLight uniParcEntryLight = createUniParcEntryLight(); + UniParcEntryLightConverter converter = new UniParcEntryLightConverter(); + Entry xmlObj = converter.toXml(uniParcEntryLight); + System.out.println(UniParcXmlTestHelper.toXmlString(xmlObj, Entry.class, "entry")); + UniParcEntryLight converted = converter.fromXml(xmlObj); + assertEquals(uniParcEntryLight, converted); + } + + private UniParcEntryLight createUniParcEntryLight() { + UniParcEntryLightBuilder builder = new UniParcEntryLightBuilder(); + String sequenceStr = + "MALYSISKPVGSKINKHSYQDENTLVGKQALSKGTEKTKLSTNFEINLPRRTVLSDVSNV" + + "GKNNADEKDTKKAKRSFDESNLSTNEEADKPVESKFVKKLKVYSKNADPSVETLQKDRVS" + + "NVDDHLSSNPLMAEEYAPEIFEYIRKLDLKCLPNPKYMDQQKELTWKMREILNEWLVEIH" + + "SNFCLMPETLYLAVNIIDRFLSRRSCSLSKFQLTGITALLIASKYEEVMCPSIQNFVYMT" + + "DGAFTVEDVCVAERYMLNVLNFDLSYPSPLNFLRKISQAEGYDAQTRTLGKYLTEIYLFD" + + "HDLLRYPMSKIAAAAMYLSRRLLRRGPWTPKLVESSGGYEEHELKEIAYIMLHYHNKPLE" + + "HKAFFQKYSSKRFLKASIFVHQLVRQRYSVNRTDDDDLQSEPSSSLTNDGH"; + Sequence sequence = new SequenceBuilder(sequenceStr).build(); + List sfs = new ArrayList<>(); + SequenceFeatureBuilder sfBuilder = new SequenceFeatureBuilder(); + sfBuilder + .signatureDbType(SignatureDbType.PANTHER) + .signatureDbId("PTHR11977") + .locationsAdd(new Location(49, 790)) + .interproGroup( + new InterProGroupBuilder().id("IPR007122").name("Villin/Gelsolin").build()); + + SequenceFeatureBuilder sfBuilder2 = new SequenceFeatureBuilder(); + sfBuilder2 + .signatureDbType(SignatureDbType.PFAM) + .signatureDbId("PF00626") + .locationsAdd(new Location(81, 163)) + .locationsAdd(new Location(202, 267)) + .locationsAdd(new Location(330, 398)) + .locationsAdd(new Location(586, 653)) + .locationsAdd(new Location(692, 766)) + .interproGroup( + new InterProGroupBuilder() + .id("IPR007123") + .name("Gelsolin-like domain") + .build()); + sfs.add(sfBuilder.build()); + sfs.add(sfBuilder2.build()); + builder.uniParcId("UPI0000083A08") + .sequence(sequence) + .sequenceFeaturesSet(sfs); + return builder.build(); + } +} From 06937df356a7b5afa68f42ef3f7279ed86a72d49 Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Wed, 11 Sep 2024 16:38:25 +0100 Subject: [PATCH 2/5] update xsd and add a new converter --- xml-parser/pom.xml | 11 ++ .../UniParcCrossReferenceConverter.java | 169 ++++++++++++++++++ .../src/main/resources/xsd/dbReference.xsd | 41 ++--- .../UniParcCrossReferenceConverterTest.java | 44 +++++ 4 files changed, 245 insertions(+), 20 deletions(-) create mode 100644 xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java create mode 100644 xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java diff --git a/xml-parser/pom.xml b/xml-parser/pom.xml index a7ccf6ff7..809382d98 100644 --- a/xml-parser/pom.xml +++ b/xml-parser/pom.xml @@ -24,6 +24,7 @@ org.uniprot.core.xml.jaxb.coordinate org.uniprot.core.xml.jaxb.uniref org.uniprot.core.xml.jaxb.uniparc + org.uniprot.core.xml.jaxb.dbreference org.uniprot.core.xml.jaxb.unirule org.uniprot.core.xml.jaxb.feature @@ -215,6 +216,16 @@ -XhashCode + + true + ${xsd-location}/dbReference.xsd + ${dbreference-xsd-pojo-package} + + -verbose + -Xequals + -XhashCode + + diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java new file mode 100644 index 000000000..ad3f678ff --- /dev/null +++ b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java @@ -0,0 +1,169 @@ +package org.uniprot.core.xml.dbreference; + +import com.google.common.base.Strings; +import org.uniprot.core.uniparc.UniParcCrossReference; +import org.uniprot.core.uniparc.UniParcDatabase; +import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder; +import org.uniprot.core.uniprotkb.taxonomy.Organism; +import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; +import org.uniprot.core.util.Utils; +import org.uniprot.core.xml.Converter; +import org.uniprot.core.xml.XmlReaderException; +import org.uniprot.core.xml.jaxb.dbreference.DbReference; +import org.uniprot.core.xml.jaxb.dbreference.ObjectFactory; +import org.uniprot.core.xml.jaxb.dbreference.PropertyType; + +import org.uniprot.core.xml.uniprot.XmlConverterHelper; +import org.uniprot.cv.taxonomy.TaxonomicNode; +import org.uniprot.cv.taxonomy.TaxonomyRepo; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; + +public class UniParcCrossReferenceConverter + implements Converter { + + public static final String PROPERTY_GENE_NAME = "gene_name"; + public static final String PROPERTY_PROTEIN_NAME = "protein_name"; + public static final String PROPERTY_CHAIN = "chain"; + public static final String PROPERTY_NCBI_GI = "NCBI_GI"; + public static final String PROPERTY_PROTEOME_ID = "proteome_id"; + public static final String PROPERTY_COMPONENT = "component"; + public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id"; + public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession"; + + private final ObjectFactory xmlFactory; + private final TaxonomyRepo taxonomyRepo; + + public UniParcCrossReferenceConverter() { + this(new ObjectFactory(), null); + } + + public UniParcCrossReferenceConverter(ObjectFactory xmlFactory, TaxonomyRepo taxonomyRepo) { + this.xmlFactory = xmlFactory; + this.taxonomyRepo = taxonomyRepo; + } + + @Override + public UniParcCrossReference fromXml(DbReference xmlObj) { + UniParcCrossReferenceBuilder builder = new UniParcCrossReferenceBuilder(); + builder.database(UniParcDatabase.typeOf(xmlObj.getType())) + .id(xmlObj.getId()) + .active(xmlObj.getActive().equals("Y")) + .versionI(xmlObj.getVersionI()) + .created(XmlConverterHelper.dateFromXml(xmlObj.getCreated())) + .lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast())); + + for (PropertyType property : xmlObj.getProperty()) { + switch (property.getType()) { + case PROPERTY_GENE_NAME: + builder.geneName(property.getValue()); + break; + case PROPERTY_PROTEIN_NAME: + builder.proteinName(property.getValue()); + break; + case PROPERTY_CHAIN: + builder.chain(property.getValue()); + break; + case PROPERTY_NCBI_GI: + builder.ncbiGi(property.getValue()); + break; + case PROPERTY_PROTEOME_ID: + builder.proteomeId(property.getValue()); + break; + case PROPERTY_COMPONENT: + builder.component(property.getValue()); + break; + case PROPERTY_NCBI_TAXONOMY_ID: + builder.organism(convertTaxonomy(property.getValue())); + break; + case PROPERTY_UNIPROTKB_ACCESSION: + builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, property.getValue()); + break; + default: + throw new XmlReaderException( + "Unable to read xml property: " + + xmlObj.getType() + + "value: " + + property.getValue()); + } + } + if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion()); + return builder.build(); + } + + @Override + public DbReference toXml(UniParcCrossReference uniObj) { + DbReference xmlObj = xmlFactory.createDbReference(); + xmlObj.setActive(uniObj.isActive() ? "Y" : "N"); + xmlObj.setId(uniObj.getId()); + xmlObj.setType(uniObj.getDatabase().getDisplayName()); + xmlObj.setVersionI(uniObj.getVersionI()); + if (uniObj.getVersion() != null) xmlObj.setVersion(uniObj.getVersion()); + xmlObj.setCreated(XmlConverterHelper.dateToXml(uniObj.getCreated())); + xmlObj.setLast(XmlConverterHelper.dateToXml(uniObj.getLastUpdated())); + + List properties = new ArrayList<>(); + if (Utils.notNullNotEmpty(uniObj.getGeneName())) { + properties.add(createProperty(PROPERTY_GENE_NAME, uniObj.getGeneName())); + } + if (Utils.notNullNotEmpty(uniObj.getProteinName())) { + properties.add(createProperty(PROPERTY_PROTEIN_NAME, uniObj.getProteinName())); + } + if (Utils.notNullNotEmpty(uniObj.getChain())) { + properties.add(createProperty(PROPERTY_CHAIN, uniObj.getChain())); + } + if (Utils.notNullNotEmpty(uniObj.getNcbiGi())) { + properties.add(createProperty(PROPERTY_NCBI_GI, uniObj.getNcbiGi())); + } + if (Utils.notNullNotEmpty(uniObj.getProteomeId())) { + properties.add(createProperty(PROPERTY_PROTEOME_ID, uniObj.getProteomeId())); + } + if (Utils.notNullNotEmpty(uniObj.getComponent())) { + properties.add(createProperty(PROPERTY_COMPONENT, uniObj.getComponent())); + } + if (Utils.notNull(uniObj.getOrganism())) { + String taxonId = String.valueOf(uniObj.getOrganism().getTaxonId()); + properties.add(createProperty(PROPERTY_NCBI_TAXONOMY_ID, taxonId)); + } + xmlObj.getProperty().addAll(properties); + if (Utils.notNullNotEmpty(uniObj.getProperties())) { + uniObj.getProperties().stream() + .map(prop -> createProperty(prop.getKey(), prop.getValue())) + .forEach(val -> xmlObj.getProperty().add(val)); + } + + return xmlObj; + } + + private Organism convertTaxonomy(String taxId) { + OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId)); + Optional opNode = getTaxonomyNode(taxId); + if (opNode.isPresent()) { + TaxonomicNode node = opNode.get(); + builder.scientificName(node.scientificName()); + if (!Strings.isNullOrEmpty(node.commonName())) { + builder.commonName(node.commonName()); + } + if (!Strings.isNullOrEmpty(node.synonymName())) { + builder.synonymsAdd(node.synonymName()); + } + } + + return builder.build(); + } + + private Optional getTaxonomyNode(String taxId) { + if (taxonomyRepo == null) { + return Optional.empty(); + } else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId)); + } + + private PropertyType createProperty(String key, String value) { + PropertyType xmlObj = xmlFactory.createPropertyType(); + xmlObj.setType(key); + xmlObj.setValue(value); + return xmlObj; + } +} diff --git a/xml-parser/src/main/resources/xsd/dbReference.xsd b/xml-parser/src/main/resources/xsd/dbReference.xsd index 18b2af897..b9883367f 100644 --- a/xml-parser/src/main/resources/xsd/dbReference.xsd +++ b/xml-parser/src/main/resources/xsd/dbReference.xsd @@ -5,30 +5,31 @@ xmlns="http://uniprot.org/dbReference"> + + Describes a collection of dbReference entries. + - + - - - - - - - - - - - - - - - + + + + + + + + + + + + + + diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java new file mode 100644 index 000000000..d8f0583b7 --- /dev/null +++ b/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java @@ -0,0 +1,44 @@ +package org.uniprot.core.xml.dbreference; + +import org.junit.jupiter.api.Test; +import org.uniprot.core.uniparc.UniParcCrossReference; +import org.uniprot.core.uniparc.UniParcDatabase; +import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder; +import org.uniprot.core.uniprotkb.taxonomy.Organism; +import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; +import org.uniprot.core.xml.jaxb.dbreference.DbReference; + +import java.time.LocalDate; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.uniprot.core.xml.uniparc.UniParcDBCrossReferenceConverter.PROPERTY_UNIPROTKB_ACCESSION; + +class UniParcCrossReferenceConverterTest { + @Test + void testComplete() { + Organism taxonomy = new OrganismBuilder().taxonId(7227).build(); + UniParcCrossReferenceBuilder builder = new UniParcCrossReferenceBuilder(); + builder.database(UniParcDatabase.TREMBL) + .id("A0A0C4DHG2-PB") + .versionI(1) + .version(1) + .active(true) + .organism(taxonomy) + .proteinName("Gelsolin, isoform J") + .proteomeId("proteomeValue") + .geneName("Gel") + .component("ComponentValue") + .ncbiGi("ncbiGiValue") + .chain("chainValue") + .propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, "P21802") + .created(LocalDate.of(2015, 4, 1)) + .lastUpdated(LocalDate.of(2019, 5, 8)); + + UniParcCrossReference xref = builder.build(); + UniParcCrossReferenceConverter converter = new UniParcCrossReferenceConverter(); + DbReference xmlObj = converter.toXml(xref); + UniParcCrossReference converted = converter.fromXml(xmlObj); + assertEquals(xref, converted); + + } +} From eed267f5f6bfced10e5a0ce0b089285e122dae8c Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Thu, 12 Sep 2024 13:26:18 +0100 Subject: [PATCH 3/5] rename xsd file --- xml-parser/pom.xml | 2 +- .../resources/xsd/{dbReference.xsd => uniparc-dbreference.xsd} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename xml-parser/src/main/resources/xsd/{dbReference.xsd => uniparc-dbreference.xsd} (100%) diff --git a/xml-parser/pom.xml b/xml-parser/pom.xml index 809382d98..15a2680be 100644 --- a/xml-parser/pom.xml +++ b/xml-parser/pom.xml @@ -218,7 +218,7 @@ true - ${xsd-location}/dbReference.xsd + ${xsd-location}/uniparc-dbreference.xsd ${dbreference-xsd-pojo-package} -verbose diff --git a/xml-parser/src/main/resources/xsd/dbReference.xsd b/xml-parser/src/main/resources/xsd/uniparc-dbreference.xsd similarity index 100% rename from xml-parser/src/main/resources/xsd/dbReference.xsd rename to xml-parser/src/main/resources/xsd/uniparc-dbreference.xsd From a50d98c4fef6070883ad7f227387524f58185af2 Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Fri, 11 Oct 2024 13:08:00 +0100 Subject: [PATCH 4/5] move duplicate code to a util class --- .../xml/CrossReferenceConverterUtils.java | 81 +++++++++++++++++++ .../UniParcCrossReferenceConverter.java | 71 +--------------- .../UniParcDBCrossReferenceConverter.java | 78 ++---------------- .../UniParcCrossReferenceConverterTest.java | 3 +- .../UniParcDBCrossReferenceConverterTest.java | 2 +- .../UniParcEntryLightConverterTest.java | 1 - 6 files changed, 94 insertions(+), 142 deletions(-) create mode 100644 xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java b/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java new file mode 100644 index 000000000..9ac824646 --- /dev/null +++ b/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java @@ -0,0 +1,81 @@ +package org.uniprot.core.xml; + +import com.google.common.base.Strings; +import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder; +import org.uniprot.core.uniprotkb.taxonomy.Organism; +import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; +import org.uniprot.cv.taxonomy.TaxonomicNode; +import org.uniprot.cv.taxonomy.TaxonomyRepo; + +import java.util.Optional; + +public class CrossReferenceConverterUtils { + public static final String PROPERTY_GENE_NAME = "gene_name"; + public static final String PROPERTY_PROTEIN_NAME = "protein_name"; + public static final String PROPERTY_CHAIN = "chain"; + public static final String PROPERTY_NCBI_GI = "NCBI_GI"; + public static final String PROPERTY_PROTEOME_ID = "proteome_id"; + public static final String PROPERTY_COMPONENT = "component"; + public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id"; + public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession"; + + private CrossReferenceConverterUtils(){} + + public static void populateUniParcCrossReferenceBuilder(String propertyType, String propertyValue, UniParcCrossReferenceBuilder builder, TaxonomyRepo taxonomyRepo) { + switch (propertyValue) { + case PROPERTY_GENE_NAME: + builder.geneName(propertyValue); + break; + case PROPERTY_PROTEIN_NAME: + builder.proteinName(propertyValue); + break; + case PROPERTY_CHAIN: + builder.chain(propertyValue); + break; + case PROPERTY_NCBI_GI: + builder.ncbiGi(propertyValue); + break; + case PROPERTY_PROTEOME_ID: + builder.proteomeId(propertyValue); + break; + case PROPERTY_COMPONENT: + builder.component(propertyValue); + break; + case PROPERTY_NCBI_TAXONOMY_ID: + builder.organism(CrossReferenceConverterUtils.convertTaxonomy(propertyValue, taxonomyRepo)); + break; + case PROPERTY_UNIPROTKB_ACCESSION: + builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, propertyValue); + break; + default: + throw new XmlReaderException( + "Unable to read xml property: " + + propertyType + + "value: " + + propertyValue); + } + } + + private static Organism convertTaxonomy(String taxId, TaxonomyRepo taxonomyRepo) { + OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId)); + Optional opNode = getTaxonomyNode(taxId, taxonomyRepo); + if (opNode.isPresent()) { + TaxonomicNode node = opNode.get(); + builder.scientificName(node.scientificName()); + if (!Strings.isNullOrEmpty(node.commonName())) { + builder.commonName(node.commonName()); + } + if (!Strings.isNullOrEmpty(node.synonymName())) { + builder.synonymsAdd(node.synonymName()); + } + } + + return builder.build(); + } + + private static Optional getTaxonomyNode(String taxId, TaxonomyRepo taxonomyRepo) { + if (taxonomyRepo == null) { + return Optional.empty(); + } else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId)); + } +} diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java index ad3f678ff..3544e3954 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java @@ -8,11 +8,10 @@ import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; import org.uniprot.core.util.Utils; import org.uniprot.core.xml.Converter; -import org.uniprot.core.xml.XmlReaderException; +import org.uniprot.core.xml.CrossReferenceConverterUtils; import org.uniprot.core.xml.jaxb.dbreference.DbReference; import org.uniprot.core.xml.jaxb.dbreference.ObjectFactory; import org.uniprot.core.xml.jaxb.dbreference.PropertyType; - import org.uniprot.core.xml.uniprot.XmlConverterHelper; import org.uniprot.cv.taxonomy.TaxonomicNode; import org.uniprot.cv.taxonomy.TaxonomyRepo; @@ -21,18 +20,10 @@ import java.util.List; import java.util.Optional; +import static org.uniprot.core.xml.CrossReferenceConverterUtils.*; + public class UniParcCrossReferenceConverter implements Converter { - - public static final String PROPERTY_GENE_NAME = "gene_name"; - public static final String PROPERTY_PROTEIN_NAME = "protein_name"; - public static final String PROPERTY_CHAIN = "chain"; - public static final String PROPERTY_NCBI_GI = "NCBI_GI"; - public static final String PROPERTY_PROTEOME_ID = "proteome_id"; - public static final String PROPERTY_COMPONENT = "component"; - public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id"; - public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession"; - private final ObjectFactory xmlFactory; private final TaxonomyRepo taxonomyRepo; @@ -56,38 +47,7 @@ public UniParcCrossReference fromXml(DbReference xmlObj) { .lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast())); for (PropertyType property : xmlObj.getProperty()) { - switch (property.getType()) { - case PROPERTY_GENE_NAME: - builder.geneName(property.getValue()); - break; - case PROPERTY_PROTEIN_NAME: - builder.proteinName(property.getValue()); - break; - case PROPERTY_CHAIN: - builder.chain(property.getValue()); - break; - case PROPERTY_NCBI_GI: - builder.ncbiGi(property.getValue()); - break; - case PROPERTY_PROTEOME_ID: - builder.proteomeId(property.getValue()); - break; - case PROPERTY_COMPONENT: - builder.component(property.getValue()); - break; - case PROPERTY_NCBI_TAXONOMY_ID: - builder.organism(convertTaxonomy(property.getValue())); - break; - case PROPERTY_UNIPROTKB_ACCESSION: - builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, property.getValue()); - break; - default: - throw new XmlReaderException( - "Unable to read xml property: " - + xmlObj.getType() - + "value: " - + property.getValue()); - } + CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(xmlObj.getType(), property.getValue(), builder, taxonomyRepo); } if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion()); return builder.build(); @@ -137,29 +97,6 @@ public DbReference toXml(UniParcCrossReference uniObj) { return xmlObj; } - private Organism convertTaxonomy(String taxId) { - OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId)); - Optional opNode = getTaxonomyNode(taxId); - if (opNode.isPresent()) { - TaxonomicNode node = opNode.get(); - builder.scientificName(node.scientificName()); - if (!Strings.isNullOrEmpty(node.commonName())) { - builder.commonName(node.commonName()); - } - if (!Strings.isNullOrEmpty(node.synonymName())) { - builder.synonymsAdd(node.synonymName()); - } - } - - return builder.build(); - } - - private Optional getTaxonomyNode(String taxId) { - if (taxonomyRepo == null) { - return Optional.empty(); - } else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId)); - } - private PropertyType createProperty(String key, String value) { PropertyType xmlObj = xmlFactory.createPropertyType(); xmlObj.setType(key); diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java index 9a71713be..c47b839ef 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java @@ -1,25 +1,21 @@ package org.uniprot.core.xml.uniparc; -import java.util.ArrayList; -import java.util.List; -import java.util.Optional; - import org.uniprot.core.uniparc.UniParcCrossReference; import org.uniprot.core.uniparc.UniParcDatabase; import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder; -import org.uniprot.core.uniprotkb.taxonomy.Organism; -import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; import org.uniprot.core.util.Utils; import org.uniprot.core.xml.Converter; -import org.uniprot.core.xml.XmlReaderException; +import org.uniprot.core.xml.CrossReferenceConverterUtils; import org.uniprot.core.xml.jaxb.uniparc.DbReferenceType; import org.uniprot.core.xml.jaxb.uniparc.ObjectFactory; import org.uniprot.core.xml.jaxb.uniparc.PropertyType; import org.uniprot.core.xml.uniprot.XmlConverterHelper; -import org.uniprot.cv.taxonomy.TaxonomicNode; import org.uniprot.cv.taxonomy.TaxonomyRepo; -import com.google.common.base.Strings; +import java.util.ArrayList; +import java.util.List; + +import static org.uniprot.core.xml.CrossReferenceConverterUtils.*; /** * @author jluo @@ -28,14 +24,6 @@ public class UniParcDBCrossReferenceConverter implements Converter { - public static final String PROPERTY_GENE_NAME = "gene_name"; - public static final String PROPERTY_PROTEIN_NAME = "protein_name"; - public static final String PROPERTY_CHAIN = "chain"; - public static final String PROPERTY_NCBI_GI = "NCBI_GI"; - public static final String PROPERTY_PROTEOME_ID = "proteome_id"; - public static final String PROPERTY_COMPONENT = "component"; - public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id"; - public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession"; private final ObjectFactory xmlFactory; private final TaxonomyRepo taxonomyRepo; @@ -60,38 +48,7 @@ public UniParcCrossReference fromXml(DbReferenceType xmlObj) { .lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast())); for (PropertyType property : xmlObj.getProperty()) { - switch (property.getType()) { - case PROPERTY_GENE_NAME: - builder.geneName(property.getValue()); - break; - case PROPERTY_PROTEIN_NAME: - builder.proteinName(property.getValue()); - break; - case PROPERTY_CHAIN: - builder.chain(property.getValue()); - break; - case PROPERTY_NCBI_GI: - builder.ncbiGi(property.getValue()); - break; - case PROPERTY_PROTEOME_ID: - builder.proteomeId(property.getValue()); - break; - case PROPERTY_COMPONENT: - builder.component(property.getValue()); - break; - case PROPERTY_NCBI_TAXONOMY_ID: - builder.organism(convertTaxonomy(property.getValue())); - break; - case PROPERTY_UNIPROTKB_ACCESSION: - builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, property.getValue()); - break; - default: - throw new XmlReaderException( - "Unable to read xml property: " - + xmlObj.getType() - + "value: " - + property.getValue()); - } + CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(xmlObj.getType(), property.getValue(), builder, taxonomyRepo); } if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion()); return builder.build(); @@ -141,29 +98,6 @@ public DbReferenceType toXml(UniParcCrossReference uniObj) { return xmlObj; } - private Organism convertTaxonomy(String taxId) { - OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId)); - Optional opNode = getTaxonomyNode(taxId); - if (opNode.isPresent()) { - TaxonomicNode node = opNode.get(); - builder.scientificName(node.scientificName()); - if (!Strings.isNullOrEmpty(node.commonName())) { - builder.commonName(node.commonName()); - } - if (!Strings.isNullOrEmpty(node.synonymName())) { - builder.synonymsAdd(node.synonymName()); - } - } - - return builder.build(); - } - - private Optional getTaxonomyNode(String taxId) { - if (taxonomyRepo == null) { - return Optional.empty(); - } else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId)); - } - private PropertyType createProperty(String key, String value) { PropertyType xmlObj = xmlFactory.createPropertyType(); xmlObj.setType(key); diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java index d8f0583b7..626aa64b6 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverterTest.java @@ -11,7 +11,8 @@ import java.time.LocalDate; import static org.junit.jupiter.api.Assertions.assertEquals; -import static org.uniprot.core.xml.uniparc.UniParcDBCrossReferenceConverter.PROPERTY_UNIPROTKB_ACCESSION; +import static org.uniprot.core.xml.CrossReferenceConverterUtils.PROPERTY_UNIPROTKB_ACCESSION; + class UniParcCrossReferenceConverterTest { @Test diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverterTest.java index 4ed30c247..70bed5bd4 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverterTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverterTest.java @@ -2,7 +2,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; -import static org.uniprot.core.xml.uniparc.UniParcDBCrossReferenceConverter.PROPERTY_UNIPROTKB_ACCESSION; +import static org.uniprot.core.xml.CrossReferenceConverterUtils.PROPERTY_UNIPROTKB_ACCESSION; import java.time.LocalDate; diff --git a/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java index 104417e97..1677e844e 100644 --- a/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java +++ b/xml-parser/src/test/java/org/uniprot/core/xml/uniparc/UniParcEntryLightConverterTest.java @@ -24,7 +24,6 @@ void objectToXMLAndXMLToObjectTest() { UniParcEntryLight uniParcEntryLight = createUniParcEntryLight(); UniParcEntryLightConverter converter = new UniParcEntryLightConverter(); Entry xmlObj = converter.toXml(uniParcEntryLight); - System.out.println(UniParcXmlTestHelper.toXmlString(xmlObj, Entry.class, "entry")); UniParcEntryLight converted = converter.fromXml(xmlObj); assertEquals(uniParcEntryLight, converted); } From bfdb37443863248ee04cf72807f553f0def7609b Mon Sep 17 00:00:00 2001 From: Shadab Ahmad Date: Fri, 11 Oct 2024 14:31:18 +0100 Subject: [PATCH 5/5] fix a typo --- .../org/uniprot/core/xml/CrossReferenceConverterUtils.java | 2 +- .../xml/dbreference/UniParcCrossReferenceConverter.java | 7 +------ .../core/xml/uniparc/UniParcDBCrossReferenceConverter.java | 2 +- 3 files changed, 3 insertions(+), 8 deletions(-) diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java b/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java index 9ac824646..c9ca29d03 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/CrossReferenceConverterUtils.java @@ -22,7 +22,7 @@ public class CrossReferenceConverterUtils { private CrossReferenceConverterUtils(){} public static void populateUniParcCrossReferenceBuilder(String propertyType, String propertyValue, UniParcCrossReferenceBuilder builder, TaxonomyRepo taxonomyRepo) { - switch (propertyValue) { + switch (propertyType) { case PROPERTY_GENE_NAME: builder.geneName(propertyValue); break; diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java index 3544e3954..3e019b590 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/dbreference/UniParcCrossReferenceConverter.java @@ -1,11 +1,8 @@ package org.uniprot.core.xml.dbreference; -import com.google.common.base.Strings; import org.uniprot.core.uniparc.UniParcCrossReference; import org.uniprot.core.uniparc.UniParcDatabase; import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder; -import org.uniprot.core.uniprotkb.taxonomy.Organism; -import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder; import org.uniprot.core.util.Utils; import org.uniprot.core.xml.Converter; import org.uniprot.core.xml.CrossReferenceConverterUtils; @@ -13,12 +10,10 @@ import org.uniprot.core.xml.jaxb.dbreference.ObjectFactory; import org.uniprot.core.xml.jaxb.dbreference.PropertyType; import org.uniprot.core.xml.uniprot.XmlConverterHelper; -import org.uniprot.cv.taxonomy.TaxonomicNode; import org.uniprot.cv.taxonomy.TaxonomyRepo; import java.util.ArrayList; import java.util.List; -import java.util.Optional; import static org.uniprot.core.xml.CrossReferenceConverterUtils.*; @@ -47,7 +42,7 @@ public UniParcCrossReference fromXml(DbReference xmlObj) { .lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast())); for (PropertyType property : xmlObj.getProperty()) { - CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(xmlObj.getType(), property.getValue(), builder, taxonomyRepo); + CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(property.getType(), property.getValue(), builder, taxonomyRepo); } if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion()); return builder.build(); diff --git a/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java index c47b839ef..bf47e761f 100644 --- a/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java +++ b/xml-parser/src/main/java/org/uniprot/core/xml/uniparc/UniParcDBCrossReferenceConverter.java @@ -48,7 +48,7 @@ public UniParcCrossReference fromXml(DbReferenceType xmlObj) { .lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast())); for (PropertyType property : xmlObj.getProperty()) { - CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(xmlObj.getType(), property.getValue(), builder, taxonomyRepo); + CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(property.getType(), property.getValue(), builder, taxonomyRepo); } if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion()); return builder.build();