Skip to content

Commit

Permalink
EVA-3425 make sure reference and alternate is always upper case (#192)
Browse files Browse the repository at this point in the history
* make sure reference and alternate is always upper case
  • Loading branch information
nitin-ebi authored Oct 26, 2023
1 parent f280604 commit e027cbc
Show file tree
Hide file tree
Showing 24 changed files with 238 additions and 24 deletions.
6 changes: 4 additions & 2 deletions src/main/java/uk/ac/ebi/eva/commons/models/data/Variant.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ public Variant(String chromosome, int start, int end, String reference, String a
this.chromosome = chromosome;
this.start = start;
this.end = end;
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.reference = (reference != null) ? reference : "";
this.alternate = (alternate != null) ? alternate : "";

Expand Down Expand Up @@ -229,7 +231,7 @@ public String getReference() {
}

public void setReference(String reference) {
this.reference = reference;
this.reference = reference.toUpperCase();
this.length = Math.max(reference.length(), alternate.length());
}

Expand All @@ -238,7 +240,7 @@ public String getAlternate() {
}

public void setAlternate(String alternate) {
this.alternate = alternate;
this.alternate = alternate.toUpperCase();
this.length = Math.max(reference.length(), alternate.length());
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
* Entry that associates a variant and a file in a variant archive. It contains
Expand Down Expand Up @@ -82,7 +83,7 @@ public VariantSourceEntry(String fileId, String studyId) {
public VariantSourceEntry(String fileId, String studyId, String[] secondaryAlternates, String format) {
this.fileId = fileId;
this.studyId = studyId;
this.secondaryAlternates = secondaryAlternates;
setSecondaryAlternates(secondaryAlternates);
this.format = format;

this.samplesData = new ArrayList<>();
Expand Down Expand Up @@ -111,7 +112,10 @@ public String[] getSecondaryAlternates() {
}

public void setSecondaryAlternates(String[] secondaryAlternates) {
this.secondaryAlternates = secondaryAlternates;
this.secondaryAlternates = secondaryAlternates==null ? new String[0] : Arrays.stream(secondaryAlternates)
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
}

public String getFormat() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ public VariantStats(String chromosome, int position, String referenceAllele, Str
Variant.VariantType variantType, float maf, float mgf, String mafAllele, String mgfGenotype,
int numMissingAlleles, int numMissingGenotypes, int numMendelErrors, float percentCasesDominant,
float percentControlsDominant, float percentCasesRecessive, float percentControlsRecessive) {
this.refAllele = referenceAllele;
this.altAllele = alternateAlleles;
this.refAllele = Objects.nonNull(referenceAllele) ? referenceAllele.toUpperCase(): null;
this.altAllele = Objects.nonNull(alternateAlleles) ? alternateAlleles.toUpperCase(): null;
this.variantType = variantType;

this.maf = maf;
Expand Down Expand Up @@ -134,15 +134,15 @@ public String getRefAllele() {
}

public void setRefAllele(String refAllele) {
this.refAllele = refAllele;
this.refAllele = Objects.nonNull(refAllele) ? refAllele.toUpperCase() : null;
}

public String getAltAllele() {
return altAllele;
}

public void setAltAllele(String altAllele) {
this.altAllele = altAllele;
this.altAllele = Objects.nonNull(altAllele) ? altAllele.toUpperCase() : null;
}

public Variant.VariantType getVariantType() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

import java.util.Collections;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

/**
Expand Down Expand Up @@ -83,6 +84,8 @@ public Annotation(String chromosome, int start, int end, String referenceAllele,
this.vepVersion = vepVersion;
this.vepCacheVersion = vepCacheVersion;

referenceAllele = Objects.nonNull(referenceAllele) ? referenceAllele.toUpperCase() : null;
alternativeAllele = Objects.nonNull(alternativeAllele) ? alternativeAllele.toUpperCase() : null;
this.id = buildAnnotationId(chromosome, start, referenceAllele, alternativeAllele, vepVersion, vepCacheVersion);
this.xrefs = new HashSet<>();
this.consequenceTypes = new HashSet<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@

import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

/**
Expand Down Expand Up @@ -116,6 +117,8 @@ public class VariantDocument {
public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Map<String, Set<String>> hgvs, Set<String> ids,
Set<VariantSourceEntryMongo> variantSources) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand All @@ -139,6 +142,8 @@ public VariantDocument(Variant.VariantType variantType, String chromosome, int s
public VariantDocument(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Set<HgvsMongo> hgvs, Set<String> ids,
Set<VariantSourceEntryMongo> variantSources) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand All @@ -160,6 +165,8 @@ public VariantDocument(Variant.VariantType variantType, String chromosome, int s
}

public static String buildVariantId(String chromosome, int start, String reference, String alternate) {
reference = reference.toUpperCase();
alternate = alternate.toUpperCase();
StringBuilder builder = new StringBuilder(chromosome);
builder.append("_");
builder.append(start);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import static uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument.ALTERNATE_FIELD;
Expand Down Expand Up @@ -79,6 +80,8 @@ public class SimplifiedVariant {

public SimplifiedVariant(Variant.VariantType variantType, String chromosome, int start, int end, int length,
String reference, String alternate, Map<String, Set<String>> hgvs) {
reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
this.id = buildVariantId(chromosome, start, reference, alternate);
this.variantType = variantType;
this.chromosome = chromosome;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,13 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

/**
* Mongo database representation of Variant Source entry.
Expand Down Expand Up @@ -73,8 +75,10 @@ public VariantSourceEntryMongo(String fileId, String studyId, String[] alternate
this.fileId = fileId;
this.studyId = studyId;
if (alternates != null && alternates.length > 0) {
this.alternates = new String[alternates.length];
System.arraycopy(alternates, 0, this.alternates, 0, alternates.length);
this.alternates = Arrays.stream(alternates)
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
}
attrs = buildAttributes(attributes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import java.util.Arrays;
import java.util.HashSet;
import java.util.Objects;
import java.util.Set;

public class AccessionReportLineMapper extends VariantVcfFactory implements LineMapper<Variant> {
Expand All @@ -37,7 +38,7 @@ public Variant mapLine(String line, int lineNumber) {
String chromosome = fields[0];
int position = Integer.parseInt(fields[1]);
String reference = getReference(fields);
String alternateAllele = fields[4];
String alternateAllele = Objects.nonNull(fields[4]) ? fields[4].toUpperCase() : null ;

VariantCoreFields keyFields = getVariantCoreKeyFields(chromosome, position, reference, alternateAllele);
Variant variant = new Variant(chromosome, (int) keyFields.getStart(), (int) keyFields.getEnd(), keyFields.getReference(), keyFields.getAlternate());
Expand All @@ -48,7 +49,7 @@ public Variant mapLine(String line, int lineNumber) {
}

private String getReference(String[] fields) {
return fields[3].equals(".") ? "" : fields[3];
return fields[3].equals(".") ? "" : fields[3].toUpperCase();
}

private Set<String> getIds(String[] fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,8 +182,8 @@ private Map<String,String> parseVariant(String variantString, String coordinates
parsedVariant.put("chromosome", leftVariantFields[0]);
}
parsedVariant.put("start", leftVariantFields[leftVariantFields.length-2]);
parsedVariant.put("reference", leftVariantFields[leftVariantFields.length-1]);
parsedVariant.put("alternative", variantFields[1]);
parsedVariant.put("reference", leftVariantFields[leftVariantFields.length-1].toUpperCase());
parsedVariant.put("alternative", variantFields[1].toUpperCase());
} catch (ArrayIndexOutOfBoundsException e) {
logger.error("Unexpected variant format for column 1: "+variantString);
throw e;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Collectors;

import static java.lang.Math.max;

Expand Down Expand Up @@ -121,11 +122,14 @@ private Set<String> getIds(String[] fields) {
}

private String getReference(String[] fields) {
return fields[3].equals(".") ? "" : fields[3];
return fields[3].equals(".") ? "" : fields[3].toUpperCase();
}

private String[] getAlternateAlleles(String[] fields, String chromosome, int position, String reference) {
return fields[4].split(",");
return Arrays.stream(fields[4].split(","))
.map(a->a.toUpperCase())
.collect(Collectors.toList())
.toArray(new String[0]);
}

private float getQuality(String[] fields) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ public EnsemblVariant(String chromosome, long start, long end, String reference,
this.chromosome = chromosome;
this.start = start;
this.end = end;
this.reference = reference;
this.alternate = alternate;
this.reference = reference.toUpperCase();
this.alternate = alternate.toUpperCase();
transformToEnsemblFormat();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import uk.ac.ebi.eva.pipeline.Application;

import java.util.Map;
import java.util.Objects;

/**
* Statistics related to a set of samples for a given variant.
Expand Down Expand Up @@ -122,15 +123,15 @@ public String getReference() {
}

void setReference(String reference) {
this.reference = reference;
this.reference = Objects.nonNull(reference) ? reference.toUpperCase() : null;
}

public String getAlternate() {
return alternate;
}

void setAlternate(String alternate) {
this.alternate = alternate;
this.alternate = Objects.nonNull(alternate) ? alternate.toUpperCase() : null;
}

public String getCohortId() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package uk.ac.ebi.eva.commons.models.data;

import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class VariantSourceEntryTest {
@Test
public void testChangeRefAltToUpperCase() {
VariantSourceEntry variantSourceEntry = new VariantSourceEntry(null, null, new String[]{"a", "t"}, null);
assertEquals("A", variantSourceEntry.getSecondaryAlternates()[0]);
assertEquals("T", variantSourceEntry.getSecondaryAlternates()[1]);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package uk.ac.ebi.eva.commons.models.data;

import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class VariantStatsTest {
@Test
public void testChangeRefAltToUpperCase() {
VariantStats variantStats = new VariantStats("a", "t", null);
assertEquals("A", variantStats.getRefAllele());
assertEquals("T", variantStats.getAltAllele());
}
}
14 changes: 14 additions & 0 deletions src/test/java/uk/ac/ebi/eva/commons/models/data/VariantTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package uk.ac.ebi.eva.commons.models.data;

import org.junit.Test;

import static org.junit.Assert.assertEquals;

public class VariantTest {
@Test
public void testChangeRefAltToUpperCase() {
Variant variant = new Variant("1", 1, 1, "c", "t");
assertEquals("C", variant.getReference());
assertEquals("T", variant.getAlternate());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
package uk.ac.ebi.eva.commons.models.mongo;

import org.junit.Test;
import uk.ac.ebi.eva.commons.models.mongo.entity.Annotation;

import static org.junit.Assert.assertEquals;

public class AnnotationTest {

@Test
public void testChangeRefAltToUpperCase() {
Annotation annotation = new Annotation("chr", 1, 1, "a", "t",
"vep", "vep_cache");
assertEquals("chr_1_A_T_vep_vep_cache", annotation.getId());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package uk.ac.ebi.eva.commons.models.mongo;

import org.junit.Test;
import uk.ac.ebi.eva.commons.models.data.Variant;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;

import java.util.Map;
import java.util.Set;

import static org.junit.Assert.assertEquals;

public class VariantDocumentTest {
@Test
public void testChangeRefAltToUpperCase() {
VariantDocument variantDocument = new VariantDocument(Variant.VariantType.SNV, "chr", 1,
2, 1, "a", "t", (Map<String, Set<String>>) null,
null, null);

assertEquals("A", variantDocument.getReference());
assertEquals("T", variantDocument.getAlternate());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package uk.ac.ebi.eva.commons.models.mongo.projections;

import org.junit.Test;
import uk.ac.ebi.eva.commons.models.data.Variant;
import uk.ac.ebi.eva.commons.models.mongo.entity.projections.SimplifiedVariant;

import java.util.HashMap;

import static org.junit.Assert.assertEquals;

public class SimplifiedVariantTest {

@Test
public void testChangeRefAltToUpperCase() {
SimplifiedVariant simplifiedVariant = new SimplifiedVariant(Variant.VariantType.SNV, "chr",
1, 2, 1, "a", "t", new HashMap<>());
assertEquals("A", simplifiedVariant.getReference());
assertEquals("T", simplifiedVariant.getAlternate());
}

}
Loading

0 comments on commit e027cbc

Please sign in to comment.