Skip to content

Commit

Permalink
EVA-3571 New stats calculator (#196)
Browse files Browse the repository at this point in the history
* Added new stats calculator
  • Loading branch information
nitin-ebi authored Aug 12, 2024
1 parent 36f5f07 commit 022ca8f
Show file tree
Hide file tree
Showing 16 changed files with 771 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@
<filtering>true</filtering>
<includes>
<include>test-mongo.properties</include>
<include>test-stats.properties</include>
<include>opencga/conf/storage-mongodb.properties</include>
</includes>
</testResource>
Expand All @@ -208,6 +209,7 @@
<filtering>false</filtering>
<excludes>
<exclude>test-mongo.properties</exclude>
<exclude>test-stats.properties</exclude>
<exclude>opencga/conf/storage-mongodb.properties</exclude>
</excludes>
</testResource>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ private Genotype normalizeGenotypeAlleles(Genotype g) {
}
}

void setGenotypesCount(Map<Genotype, Integer> genotypesCount) {
public void setGenotypesCount(Map<Genotype, Integer> genotypesCount) {
this.genotypesCount = genotypesCount;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -266,4 +266,8 @@ public Set<VariantStatsMongo> getVariantStatsMongo() {
public Set<VariantAnnotation> getAnnotations() {
return annotations;
}

public void setStats(Set<VariantStatsMongo> variantStats) {
this.variantStatsMongo = variantStats;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -174,4 +174,19 @@ private BasicDBObject buildAttributes(Map<String, String> attributes) {
return attrs;
}

public BasicDBObject getSampleData() {
return samp;
}

public String getStudyId() {
return studyId;
}

public String getFileId() {
return fileId;
}

public String[] getAlternates() {
return alternates;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,20 @@ public class BeanNames {
public static final String VARIANT_ANNOTATION_READER = "variant-annotation-reader";
public static final String VARIANT_READER = "variant-reader";
public static final String ACCESSION_REPORT_READER = "accession-report-reader";
public static final String VARIANT_STATS_READER = "variant-stats-reader";

public static final String VEP_ANNOTATION_PROCESSOR = "vep-annotation-processor";
public static final String ANNOTATION_PARSER_PROCESSOR = "annotation-parser-processor";
public static final String ANNOTATION_COMPOSITE_PROCESSOR = "annotation-composite-processor";
public static final String VARIANT_STATS_PROCESSOR = "variant-stats-processor";

public static final String GENE_WRITER = "gene-writer";
public static final String ANNOTATION_WRITER = "annotation-writer";
public static final String ANNOTATION_IN_VARIANT_WRITER = "annotation-in-variant-writer";
public static final String COMPOSITE_ANNOTATION_VARIANT_WRITER = "composite-annotation-variant-writer";
public static final String VARIANT_WRITER = "variant-writer";
public static final String ACCESSION_IMPORTER = "accession-importer";
public static final String VARIANT_STATS_WRITER = "variant-stats-writer";

public static final String ANNOTATION_SKIP_STEP_DECIDER = "annotation-skip-step-decider";
public static final String STATISTICS_SKIP_STEP_DECIDER = "statistics-skip-step-decider";
Expand All @@ -60,6 +63,7 @@ public class BeanNames {
public static final String DROP_FILES_BY_STUDY_STEP = "drop-files-by-study-step";
public static final String LOAD_ANNOTATION_METADATA_STEP = "annotation-metadata-step";
public static final String ACCESSION_IMPORT_STEP = "accession-import-step";
public static final String CALCULATE_AND_LOAD_STATISTICS_STEP = "calculate-load-statistics-step";

public static final String AGGREGATED_VCF_JOB = "aggregated-vcf-job";
public static final String ANNOTATE_VARIANTS_JOB = "annotate-variants-job";
Expand All @@ -69,4 +73,5 @@ public class BeanNames {
public static final String CALCULATE_STATISTICS_JOB = "calculate-statistics-job";
public static final String DROP_STUDY_JOB = "drop-study-job";
public static final String ACCESSION_IMPORT_JOB = "accession-import-job";
public static final String CALCULATE_AND_LOAD_STATISTICS_JOB = "calculate-load-statistics-job";
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.io.readers;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.mongodb.core.MongoTemplate;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.readers.VariantStatsReader;
import uk.ac.ebi.eva.pipeline.parameters.ChunkSizeParameters;
import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters;
import uk.ac.ebi.eva.pipeline.parameters.InputParameters;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_READER;

@Configuration
public class VariantStatsReaderConfiguration {

@Bean(VARIANT_STATS_READER)
@StepScope
public ItemStreamReader<VariantDocument> variantStatsReader(DatabaseParameters databaseParameters,
MongoTemplate mongoTemplate,
InputParameters inputParameters,
ChunkSizeParameters chunkSizeParameters) {

return new VariantStatsReader(databaseParameters, mongoTemplate, inputParameters.getStudyId(), chunkSizeParameters.getChunkSize());
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.io.writers;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemWriter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.mongodb.core.MongoTemplate;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.writers.VariantStatsWriter;
import uk.ac.ebi.eva.pipeline.parameters.DatabaseParameters;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_WRITER;

@Configuration
public class VariantStatsWriterConfiguration {

@Bean(VARIANT_STATS_WRITER)
@StepScope
public ItemWriter<VariantDocument> variantStatsWriter(DatabaseParameters databaseParameters, MongoTemplate mongoTemplate) {
return new VariantStatsWriter(databaseParameters, mongoTemplate);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.Job;
import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.JobBuilderFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import org.springframework.context.annotation.Scope;
import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.CalculateAndLoadStatisticsStepConfiguration;
import uk.ac.ebi.eva.pipeline.parameters.NewJobIncrementer;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_AND_LOAD_STATISTICS_JOB;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_AND_LOAD_STATISTICS_STEP;

/**
* Configuration to run a full Statistics job: variantStatsFlow: statsCreate --> statsLoad
* <p>
* TODO add a new PopulationStatisticsJobParametersValidator
*/
@Configuration
@EnableBatchProcessing
@Import({CalculateAndLoadStatisticsStepConfiguration.class})
public class CalculateAndLoadStatisticsJobConfiguration {

private static final Logger logger = LoggerFactory.getLogger(CalculateAndLoadStatisticsJobConfiguration.class);

@Autowired
@Qualifier(CALCULATE_AND_LOAD_STATISTICS_STEP)
private Step calculateAndLoadStatisticsStep;

@Bean(CALCULATE_AND_LOAD_STATISTICS_JOB)
@Scope("prototype")
public Job calculateAndLoadStatisticsJob(JobBuilderFactory jobBuilderFactory) {
logger.debug("Building '" + CALCULATE_AND_LOAD_STATISTICS_JOB + "'");

return jobBuilderFactory
.get(CALCULATE_AND_LOAD_STATISTICS_JOB)
.incrementer(new NewJobIncrementer())
.start(calculateAndLoadStatisticsStep)
.build();
}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs.steps;

import org.springframework.batch.core.Step;
import org.springframework.batch.core.configuration.annotation.EnableBatchProcessing;
import org.springframework.batch.core.configuration.annotation.StepBuilderFactory;
import org.springframework.batch.core.step.tasklet.TaskletStep;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.batch.item.ItemStreamReader;
import org.springframework.batch.item.ItemWriter;
import org.springframework.batch.repeat.policy.SimpleCompletionPolicy;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Import;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.configuration.ChunkSizeCompletionPolicyConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.io.readers.VariantStatsReaderConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.io.writers.VariantStatsWriterConfiguration;
import uk.ac.ebi.eva.pipeline.configuration.jobs.steps.processors.VariantStatsProcessorConfiguration;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.CALCULATE_AND_LOAD_STATISTICS_STEP;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_PROCESSOR;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_READER;
import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_WRITER;


@Configuration
@EnableBatchProcessing
@Import({VariantStatsReaderConfiguration.class, VariantStatsWriterConfiguration.class,
VariantStatsProcessorConfiguration.class, ChunkSizeCompletionPolicyConfiguration.class})
public class CalculateAndLoadStatisticsStepConfiguration {

@Bean(CALCULATE_AND_LOAD_STATISTICS_STEP)
public Step calculateAndLoadStatisticsStep(
@Qualifier(VARIANT_STATS_READER) ItemStreamReader<VariantDocument> variantStatsReader,
@Qualifier(VARIANT_STATS_PROCESSOR) ItemProcessor<VariantDocument, VariantDocument> variantStatsProcessor,
@Qualifier(VARIANT_STATS_WRITER) ItemWriter<VariantDocument> variantStatsWriter,
StepBuilderFactory stepBuilderFactory,
SimpleCompletionPolicy chunkSizeCompletionPolicy) {
TaskletStep step = stepBuilderFactory.get(CALCULATE_AND_LOAD_STATISTICS_STEP)
.<VariantDocument, VariantDocument>chunk(chunkSizeCompletionPolicy)
.reader(variantStatsReader)
.processor(variantStatsProcessor)
.writer(variantStatsWriter)
.build();
return step;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2024 EMBL - European Bioinformatics Institute
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.ac.ebi.eva.pipeline.configuration.jobs.steps.processors;

import org.springframework.batch.core.configuration.annotation.StepScope;
import org.springframework.batch.item.ItemProcessor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import uk.ac.ebi.eva.commons.models.mongo.entity.VariantDocument;
import uk.ac.ebi.eva.pipeline.io.processors.VariantStatsProcessor;

import static uk.ac.ebi.eva.pipeline.configuration.BeanNames.VARIANT_STATS_PROCESSOR;

@Configuration
public class VariantStatsProcessorConfiguration {

@Bean(VARIANT_STATS_PROCESSOR)
@StepScope
public ItemProcessor<VariantDocument, VariantDocument> variantStatsProcessor() {
return new VariantStatsProcessor();
}
}
Loading

0 comments on commit 022ca8f

Please sign in to comment.