Skip to content

Commit

Permalink
Merge branch 'uniparc_light_xml' into uniparc_light_vd_v2
Browse files Browse the repository at this point in the history
  • Loading branch information
ahmadshadab committed Oct 11, 2024
2 parents e21338c + bfdb374 commit 5798f2d
Show file tree
Hide file tree
Showing 10 changed files with 405 additions and 74 deletions.
11 changes: 11 additions & 0 deletions xml-parser/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
<coordinate-xsd-pojo-package>org.uniprot.core.xml.jaxb.coordinate</coordinate-xsd-pojo-package>
<uniref-xsd-pojo-package>org.uniprot.core.xml.jaxb.uniref</uniref-xsd-pojo-package>
<uniparc-xsd-pojo-package>org.uniprot.core.xml.jaxb.uniparc</uniparc-xsd-pojo-package>
<dbreference-xsd-pojo-package>org.uniprot.core.xml.jaxb.dbreference</dbreference-xsd-pojo-package>
<unirule-xsd-pojo-package>org.uniprot.core.xml.jaxb.unirule</unirule-xsd-pojo-package>
<feature-xsd-pojo-package>org.uniprot.core.xml.jaxb.feature</feature-xsd-pojo-package>
</properties>
Expand Down Expand Up @@ -215,6 +216,16 @@
<extensionArg>-XhashCode</extensionArg>
</extensionArgs>
</xsdOption>
<xsdOption>
<extension>true</extension>
<xsd>${xsd-location}/uniparc-dbreference.xsd</xsd>
<packagename>${dbreference-xsd-pojo-package}</packagename>
<extensionArgs>
<extensionArg>-verbose</extensionArg>
<extensionArg>-Xequals</extensionArg>
<extensionArg>-XhashCode</extensionArg>
</extensionArgs>
</xsdOption>
</xsdOptions>
</configuration>
</execution>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
package org.uniprot.core.xml;

import com.google.common.base.Strings;
import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder;
import org.uniprot.core.uniprotkb.taxonomy.Organism;
import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder;
import org.uniprot.cv.taxonomy.TaxonomicNode;
import org.uniprot.cv.taxonomy.TaxonomyRepo;

import java.util.Optional;

public class CrossReferenceConverterUtils {
public static final String PROPERTY_GENE_NAME = "gene_name";
public static final String PROPERTY_PROTEIN_NAME = "protein_name";
public static final String PROPERTY_CHAIN = "chain";
public static final String PROPERTY_NCBI_GI = "NCBI_GI";
public static final String PROPERTY_PROTEOME_ID = "proteome_id";
public static final String PROPERTY_COMPONENT = "component";
public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id";
public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession";

private CrossReferenceConverterUtils(){}

public static void populateUniParcCrossReferenceBuilder(String propertyType, String propertyValue, UniParcCrossReferenceBuilder builder, TaxonomyRepo taxonomyRepo) {
switch (propertyType) {
case PROPERTY_GENE_NAME:
builder.geneName(propertyValue);
break;
case PROPERTY_PROTEIN_NAME:
builder.proteinName(propertyValue);
break;
case PROPERTY_CHAIN:
builder.chain(propertyValue);
break;
case PROPERTY_NCBI_GI:
builder.ncbiGi(propertyValue);
break;
case PROPERTY_PROTEOME_ID:
builder.proteomeId(propertyValue);
break;
case PROPERTY_COMPONENT:
builder.component(propertyValue);
break;
case PROPERTY_NCBI_TAXONOMY_ID:
builder.organism(CrossReferenceConverterUtils.convertTaxonomy(propertyValue, taxonomyRepo));
break;
case PROPERTY_UNIPROTKB_ACCESSION:
builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, propertyValue);
break;
default:
throw new XmlReaderException(
"Unable to read xml property: "
+ propertyType
+ "value: "
+ propertyValue);
}
}

private static Organism convertTaxonomy(String taxId, TaxonomyRepo taxonomyRepo) {
OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId));
Optional<TaxonomicNode> opNode = getTaxonomyNode(taxId, taxonomyRepo);
if (opNode.isPresent()) {
TaxonomicNode node = opNode.get();
builder.scientificName(node.scientificName());
if (!Strings.isNullOrEmpty(node.commonName())) {
builder.commonName(node.commonName());
}
if (!Strings.isNullOrEmpty(node.synonymName())) {
builder.synonymsAdd(node.synonymName());
}
}

return builder.build();
}

private static Optional<TaxonomicNode> getTaxonomyNode(String taxId, TaxonomyRepo taxonomyRepo) {
if (taxonomyRepo == null) {
return Optional.empty();
} else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId));
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
package org.uniprot.core.xml.dbreference;

import org.uniprot.core.uniparc.UniParcCrossReference;
import org.uniprot.core.uniparc.UniParcDatabase;
import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder;
import org.uniprot.core.util.Utils;
import org.uniprot.core.xml.Converter;
import org.uniprot.core.xml.CrossReferenceConverterUtils;
import org.uniprot.core.xml.jaxb.dbreference.DbReference;
import org.uniprot.core.xml.jaxb.dbreference.ObjectFactory;
import org.uniprot.core.xml.jaxb.dbreference.PropertyType;
import org.uniprot.core.xml.uniprot.XmlConverterHelper;
import org.uniprot.cv.taxonomy.TaxonomyRepo;

import java.util.ArrayList;
import java.util.List;

import static org.uniprot.core.xml.CrossReferenceConverterUtils.*;

public class UniParcCrossReferenceConverter
implements Converter<DbReference, UniParcCrossReference> {
private final ObjectFactory xmlFactory;
private final TaxonomyRepo taxonomyRepo;

public UniParcCrossReferenceConverter() {
this(new ObjectFactory(), null);
}

public UniParcCrossReferenceConverter(ObjectFactory xmlFactory, TaxonomyRepo taxonomyRepo) {
this.xmlFactory = xmlFactory;
this.taxonomyRepo = taxonomyRepo;
}

@Override
public UniParcCrossReference fromXml(DbReference xmlObj) {
UniParcCrossReferenceBuilder builder = new UniParcCrossReferenceBuilder();
builder.database(UniParcDatabase.typeOf(xmlObj.getType()))
.id(xmlObj.getId())
.active(xmlObj.getActive().equals("Y"))
.versionI(xmlObj.getVersionI())
.created(XmlConverterHelper.dateFromXml(xmlObj.getCreated()))
.lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast()));

for (PropertyType property : xmlObj.getProperty()) {
CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(property.getType(), property.getValue(), builder, taxonomyRepo);
}
if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion());
return builder.build();
}

@Override
public DbReference toXml(UniParcCrossReference uniObj) {
DbReference xmlObj = xmlFactory.createDbReference();
xmlObj.setActive(uniObj.isActive() ? "Y" : "N");
xmlObj.setId(uniObj.getId());
xmlObj.setType(uniObj.getDatabase().getDisplayName());
xmlObj.setVersionI(uniObj.getVersionI());
if (uniObj.getVersion() != null) xmlObj.setVersion(uniObj.getVersion());
xmlObj.setCreated(XmlConverterHelper.dateToXml(uniObj.getCreated()));
xmlObj.setLast(XmlConverterHelper.dateToXml(uniObj.getLastUpdated()));

List<PropertyType> properties = new ArrayList<>();
if (Utils.notNullNotEmpty(uniObj.getGeneName())) {
properties.add(createProperty(PROPERTY_GENE_NAME, uniObj.getGeneName()));
}
if (Utils.notNullNotEmpty(uniObj.getProteinName())) {
properties.add(createProperty(PROPERTY_PROTEIN_NAME, uniObj.getProteinName()));
}
if (Utils.notNullNotEmpty(uniObj.getChain())) {
properties.add(createProperty(PROPERTY_CHAIN, uniObj.getChain()));
}
if (Utils.notNullNotEmpty(uniObj.getNcbiGi())) {
properties.add(createProperty(PROPERTY_NCBI_GI, uniObj.getNcbiGi()));
}
if (Utils.notNullNotEmpty(uniObj.getProteomeId())) {
properties.add(createProperty(PROPERTY_PROTEOME_ID, uniObj.getProteomeId()));
}
if (Utils.notNullNotEmpty(uniObj.getComponent())) {
properties.add(createProperty(PROPERTY_COMPONENT, uniObj.getComponent()));
}
if (Utils.notNull(uniObj.getOrganism())) {
String taxonId = String.valueOf(uniObj.getOrganism().getTaxonId());
properties.add(createProperty(PROPERTY_NCBI_TAXONOMY_ID, taxonId));
}
xmlObj.getProperty().addAll(properties);
if (Utils.notNullNotEmpty(uniObj.getProperties())) {
uniObj.getProperties().stream()
.map(prop -> createProperty(prop.getKey(), prop.getValue()))
.forEach(val -> xmlObj.getProperty().add(val));
}

return xmlObj;
}

private PropertyType createProperty(String key, String value) {
PropertyType xmlObj = xmlFactory.createPropertyType();
xmlObj.setType(key);
xmlObj.setValue(value);
return xmlObj;
}
}
Original file line number Diff line number Diff line change
@@ -1,25 +1,21 @@
package org.uniprot.core.xml.uniparc;

import java.util.ArrayList;
import java.util.List;
import java.util.Optional;

import org.uniprot.core.uniparc.UniParcCrossReference;
import org.uniprot.core.uniparc.UniParcDatabase;
import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder;
import org.uniprot.core.uniprotkb.taxonomy.Organism;
import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder;
import org.uniprot.core.util.Utils;
import org.uniprot.core.xml.Converter;
import org.uniprot.core.xml.XmlReaderException;
import org.uniprot.core.xml.CrossReferenceConverterUtils;
import org.uniprot.core.xml.jaxb.uniparc.DbReferenceType;
import org.uniprot.core.xml.jaxb.uniparc.ObjectFactory;
import org.uniprot.core.xml.jaxb.uniparc.PropertyType;
import org.uniprot.core.xml.uniprot.XmlConverterHelper;
import org.uniprot.cv.taxonomy.TaxonomicNode;
import org.uniprot.cv.taxonomy.TaxonomyRepo;

import com.google.common.base.Strings;
import java.util.ArrayList;
import java.util.List;

import static org.uniprot.core.xml.CrossReferenceConverterUtils.*;

/**
* @author jluo
Expand All @@ -28,14 +24,6 @@
public class UniParcDBCrossReferenceConverter
implements Converter<DbReferenceType, UniParcCrossReference> {

public static final String PROPERTY_GENE_NAME = "gene_name";
public static final String PROPERTY_PROTEIN_NAME = "protein_name";
public static final String PROPERTY_CHAIN = "chain";
public static final String PROPERTY_NCBI_GI = "NCBI_GI";
public static final String PROPERTY_PROTEOME_ID = "proteome_id";
public static final String PROPERTY_COMPONENT = "component";
public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id";
public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession";

private final ObjectFactory xmlFactory;
private final TaxonomyRepo taxonomyRepo;
Expand All @@ -60,38 +48,7 @@ public UniParcCrossReference fromXml(DbReferenceType xmlObj) {
.lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast()));

for (PropertyType property : xmlObj.getProperty()) {
switch (property.getType()) {
case PROPERTY_GENE_NAME:
builder.geneName(property.getValue());
break;
case PROPERTY_PROTEIN_NAME:
builder.proteinName(property.getValue());
break;
case PROPERTY_CHAIN:
builder.chain(property.getValue());
break;
case PROPERTY_NCBI_GI:
builder.ncbiGi(property.getValue());
break;
case PROPERTY_PROTEOME_ID:
builder.proteomeId(property.getValue());
break;
case PROPERTY_COMPONENT:
builder.component(property.getValue());
break;
case PROPERTY_NCBI_TAXONOMY_ID:
builder.organism(convertTaxonomy(property.getValue()));
break;
case PROPERTY_UNIPROTKB_ACCESSION:
builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, property.getValue());
break;
default:
throw new XmlReaderException(
"Unable to read xml property: "
+ xmlObj.getType()
+ "value: "
+ property.getValue());
}
CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(property.getType(), property.getValue(), builder, taxonomyRepo);
}
if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion());
return builder.build();
Expand Down Expand Up @@ -141,29 +98,6 @@ public DbReferenceType toXml(UniParcCrossReference uniObj) {
return xmlObj;
}

private Organism convertTaxonomy(String taxId) {
OrganismBuilder builder = new OrganismBuilder().taxonId(Long.parseLong(taxId));
Optional<TaxonomicNode> opNode = getTaxonomyNode(taxId);
if (opNode.isPresent()) {
TaxonomicNode node = opNode.get();
builder.scientificName(node.scientificName());
if (!Strings.isNullOrEmpty(node.commonName())) {
builder.commonName(node.commonName());
}
if (!Strings.isNullOrEmpty(node.synonymName())) {
builder.synonymsAdd(node.synonymName());
}
}

return builder.build();
}

private Optional<TaxonomicNode> getTaxonomyNode(String taxId) {
if (taxonomyRepo == null) {
return Optional.empty();
} else return taxonomyRepo.retrieveNodeUsingTaxID(Integer.parseInt(taxId));
}

private PropertyType createProperty(String key, String value) {
PropertyType xmlObj = xmlFactory.createPropertyType();
xmlObj.setType(key);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package org.uniprot.core.xml.uniparc;

import org.uniprot.core.uniparc.UniParcEntryLight;
import org.uniprot.core.uniparc.impl.UniParcEntryLightBuilder;
import org.uniprot.core.xml.Converter;
import org.uniprot.core.xml.jaxb.uniparc.Entry;
import org.uniprot.core.xml.jaxb.uniparc.ObjectFactory;

public class UniParcEntryLightConverter implements Converter<Entry, UniParcEntryLight> {
private static final String UNIPARC = "uniparc";
private final ObjectFactory xmlFactory;
private final SequenceFeatureConverter seqFeatureConverter;
private final SequenceConverter sequenceConverter;

public UniParcEntryLightConverter(){
this(new ObjectFactory());
}

public UniParcEntryLightConverter(ObjectFactory xmlFactory) {
this.xmlFactory = xmlFactory;
this.seqFeatureConverter = new SequenceFeatureConverter(xmlFactory);
this.sequenceConverter = new SequenceConverter(xmlFactory);
}

@Override
public UniParcEntryLight fromXml(Entry xmlObj) {
UniParcEntryLightBuilder builder = new UniParcEntryLightBuilder();
builder.uniParcId(xmlObj.getAccession())
.sequence(sequenceConverter.fromXml(xmlObj.getSequence()))
.sequenceFeaturesSet(
xmlObj.getSignatureSequenceMatch().stream()
.map(seqFeatureConverter::fromXml)
.toList());
return builder.build();
}

@Override
public Entry toXml(UniParcEntryLight uniObj) {
Entry entry = xmlFactory.createEntry();
entry.setDataset(UNIPARC);
entry.setAccession(uniObj.getUniParcId());
entry.setSequence(sequenceConverter.toXml(uniObj.getSequence()));
uniObj.getSequenceFeatures().stream()
.map(seqFeatureConverter::toXml)
.forEach(val -> entry.getSignatureSequenceMatch().add(val));
return entry;
}
}
Loading

0 comments on commit 5798f2d

Please sign in to comment.