Skip to content

Commit

Permalink
improve uniparc xref voldemort index
Browse files Browse the repository at this point in the history
  • Loading branch information
LeonardoGonzales committed Dec 3, 2024
1 parent a13cefc commit 606d6ef
Show file tree
Hide file tree
Showing 5 changed files with 144 additions and 59 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
package org.uniprot.core.parser.fasta.uniparc;

import org.uniprot.core.Sequence;
import org.uniprot.core.uniparc.UniParcCrossReference;
import org.uniprot.core.uniparc.UniParcEntry;
import org.uniprot.core.uniparc.UniParcEntryLight;

import static org.uniprot.core.uniparc.impl.UniParcEntryLightBuilder.HAS_ACTIVE_CROSS_REF;
Expand All @@ -9,8 +11,20 @@
* @author jluo
* @date: 24 Jun 2019
*/
public class UniParcEntryLightFastaParser {
private UniParcEntryLightFastaParser(){}
public class UniParcFastaParser {
private UniParcFastaParser(){}


public static String toFasta(UniParcEntry entry) {
String status = "active";
boolean isActive = entry.getUniParcCrossReferences()
.stream()
.anyMatch(UniParcCrossReference::isActive);
if (!isActive) {
status = "inactive";
}
return getFastaString(entry.getUniParcId().getValue(), status, entry.getSequence());
}

public static String toFasta(UniParcEntryLight entry) {
String status = "active";
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package org.uniprot.core.parser.fasta.uniparc;

import static org.junit.jupiter.api.Assertions.*;
import static org.uniprot.core.uniparc.impl.UniParcEntryLightBuilder.HAS_ACTIVE_CROSS_REF;

import org.junit.jupiter.api.Test;
import org.uniprot.core.Property;
import org.uniprot.core.Sequence;
import org.uniprot.core.impl.SequenceBuilder;
import org.uniprot.core.uniparc.*;
import org.uniprot.core.uniparc.impl.*;
import org.uniprot.core.uniprotkb.taxonomy.Organism;
import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder;

import java.time.LocalDate;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
* @author jluo
* @date: 24 Jun 2019
*/
class UniParcFastaParserTest {

public static final String EXPECTED_FASTA_RESULT = """
>UPI0000083A08 status=active
MSMAMARALATLGRLRYRVSGQLPLLDETAIEVMAGGQFLDGRKAREELGFFSTTALDDT
LLRAIDWFRDNGYFNA""";
public static final String EXPECTED_FASTA_RESULT_INACTIVE = """
>UPI0000083A08 status=inactive
MSMAMARALATLGRLRYRVSGQLPLLDETAIEVMAGGQFLDGRKAREELGFFSTTALDDT
LLRAIDWFRDNGYFNA""";
@Test
void testUniParcEntryToFasta() {
UniParcEntry entry = create();
String fasta = UniParcFastaParser.toFasta(entry);
assertEquals(EXPECTED_FASTA_RESULT, fasta);
}

@Test
void testUniParcEntryLightToFasta() {
UniParcEntryLight entry = createEntryLight();
String fasta = UniParcFastaParser.toFasta(entry);
assertEquals(EXPECTED_FASTA_RESULT, fasta);
}

@Test
void testUniParcEntryLightToFastaInactive() {
UniParcEntryLight entry = createEntryLight();
entry = UniParcEntryLightBuilder.from(entry).extraAttributesAdd(HAS_ACTIVE_CROSS_REF, false).build();
String fasta = UniParcFastaParser.toFasta(entry);
assertEquals(EXPECTED_FASTA_RESULT_INACTIVE, fasta);
}

private UniParcEntry create() {
Sequence sequence = getSequence();
List<UniParcCrossReference> xrefs = getXrefs();
return new UniParcEntryBuilder()
.uniParcId(new UniParcIdBuilder("UPI0000083A08").build())
.uniParcCrossReferencesSet(xrefs)
.sequence(sequence)
.build();
}

private UniParcEntryLight createEntryLight() {
return new UniParcEntryLightBuilder()
.uniParcId("UPI0000083A08")
.sequence(getSequence())
.build();
}

private static Sequence getSequence() {
String seq =
"MSMAMARALATLGRLRYRVSGQLPLLDETAIEVMAGGQFLDGRKAREELGFFSTTALDDT" + "LLRAIDWFRDNGYFNA";
Sequence sequence = new SequenceBuilder(seq).build();
return sequence;
}

private List<UniParcCrossReference> getXrefs() {
Organism taxonomy =
new OrganismBuilder().taxonId(9606).scientificName("Homo sapiens").build();
List<Property> properties = new ArrayList<>();
properties.add(new Property("prop1", "pvalue"));
UniParcCrossReference xref =
new UniParcCrossReferenceBuilder()
.versionI(3)
.database(UniParcDatabase.SWISSPROT)
.id("P12345")
.version(7)
.active(true)
.created(LocalDate.of(2017, 5, 17))
.lastUpdated(LocalDate.of(2017, 2, 27))
.propertiesSet(properties)
.organism(taxonomy)
.proteinName("some pname")
.geneName("some gname")
.build();

List<Property> properties2 = new ArrayList<>();
properties.add(new Property("prop2", "pvalue2"));
Organism taxonomy2 = new OrganismBuilder().taxonId(10090).scientificName("MOUSE").build();

UniParcCrossReference xref2 =
new UniParcCrossReferenceBuilder()
.versionI(1)
.database(UniParcDatabase.TREMBL)
.id("P52346")
.version(7)
.active(true)
.created(LocalDate.of(2017, 2, 12))
.lastUpdated(LocalDate.of(2017, 4, 23))
.propertiesSet(properties2)
.organism(taxonomy2)
.proteinName("some pname")
.proteomeId("UP00000564")
.component("chromosome 1")
.build();

return Arrays.asList(xref, xref2);
}

}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.uniprot.core.xml;

import com.google.common.base.Strings;
import org.uniprot.core.uniparc.UniParcCrossReference;
import org.uniprot.core.uniparc.impl.UniParcCrossReferenceBuilder;
import org.uniprot.core.uniprotkb.taxonomy.Organism;
import org.uniprot.core.uniprotkb.taxonomy.impl.OrganismBuilder;
Expand All @@ -18,6 +19,7 @@ public class CrossReferenceConverterUtils {
public static final String PROPERTY_COMPONENT = "component";
public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id";
public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession";
public static final String PROPERTY_SOURCES = UniParcCrossReference.PROPERTY_SOURCES;

private CrossReferenceConverterUtils(){}

Expand Down Expand Up @@ -47,6 +49,9 @@ public static void populateUniParcCrossReferenceBuilder(String propertyType, Str
case PROPERTY_UNIPROTKB_ACCESSION:
builder.propertiesAdd(PROPERTY_UNIPROTKB_ACCESSION, propertyValue);
break;
case PROPERTY_SOURCES:
builder.propertiesAdd(PROPERTY_SOURCES, propertyValue);
break;
default:
throw new XmlReaderException(
"Unable to read xml property: "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ public class UniParcDBCrossReferenceConverter
public static final String PROPERTY_COMPONENT = "component";
public static final String PROPERTY_NCBI_TAXONOMY_ID = "NCBI_taxonomy_id";
public static final String PROPERTY_UNIPROTKB_ACCESSION = "UniProtKB_accession";
public static final String PROPERTY_SOURCE = "source";

private final ObjectFactory xmlFactory;
private final TaxonomyRepo taxonomyRepo;
Expand All @@ -57,13 +56,6 @@ public UniParcCrossReference fromXml(DbReferenceType xmlObj) {
.lastUpdated(XmlConverterHelper.dateFromXml(xmlObj.getLast()));

for (PropertyType property : xmlObj.getProperty()) {

//TODO: Change it
/*
case PROPERTY_SOURCE:
builder.propertiesAdd(PROPERTY_SOURCE, property.getValue());
break;
*/
CrossReferenceConverterUtils.populateUniParcCrossReferenceBuilder(property.getType(), property.getValue(), builder, taxonomyRepo);
}
if (xmlObj.getVersion() != null) builder.version(xmlObj.getVersion());
Expand Down

0 comments on commit 606d6ef

Please sign in to comment.