From 57be79c9d55c0bfc4efc5581605e2bb2362916c8 Mon Sep 17 00:00:00 2001 From: Robert Langlois Date: Thu, 21 Apr 2016 09:56:41 -0700 Subject: [PATCH] IPA-4283: Update all cycle states (cherry picked from commit cc55ac6) --- interop/logic/metric/q_metric.h | 32 ++++- interop/logic/summary/cycle_state_summary.h | 92 ++++++++++++++ interop/logic/summary/error_summary.h | 113 ++---------------- interop/logic/summary/extraction_summary.h | 46 ++----- interop/logic/summary/map_cycle_to_read.h | 79 ++++++++++++ interop/logic/summary/quality_summary.h | 74 +++--------- interop/logic/summary/run_summary.h | 48 +++++++- interop/model/metrics/q_metric.h | 4 +- interop/model/summary/cycle_state_summary.h | 2 +- src/apps/cyclesim.cpp | 9 +- src/apps/summary.cpp | 3 + src/ext/swig/interop.i | 10 +- src/interop/CMakeLists.txt | 3 +- .../inc/corrected_intensity_metrics_test.h | 53 ++++++++ .../interop/metrics/inc/error_metrics_test.h | 4 +- .../metrics/inc/extraction_metrics_test.h | 2 + .../interop/metrics/inc/q_metrics_test.h | 6 + .../interop/metrics/summary_metrics_test.cpp | 1 + 18 files changed, 372 insertions(+), 209 deletions(-) create mode 100644 interop/logic/summary/cycle_state_summary.h create mode 100644 interop/logic/summary/map_cycle_to_read.h diff --git a/interop/logic/metric/q_metric.h b/interop/logic/metric/q_metric.h index 5249bd85a..690c00e91 100644 --- a/interop/logic/metric/q_metric.h +++ b/interop/logic/metric/q_metric.h @@ -181,7 +181,7 @@ namespace illumina { namespace interop { namespace logic { namespace metric { * This only for legacy platforms that use older q-metric formats, which do not include bin information * in the header. * - * @param set q_metric set + * @param metrics q_metric set * @param q_score_bins vector of q-score bins * @param instrument type */ @@ -190,4 +190,34 @@ namespace illumina { namespace interop { namespace logic { namespace metric { const size_t count = count_legacy_q_score_bins(metrics); populate_legacy_q_score_bins(q_score_bins, instrument, count); } + /** Test whether the q-values are compressed + * + * @param set q_metric set + * @return number of q-vals + */ + inline size_t count_qvals(const model::metric_base::metric_set& metrics) + { + return metrics.size() > 0 ? metrics.metrics()[0].size() : 0; + } + /** Test whether the q-values are compressed + * + * @param set q_metric set + * @return true if the histogram is compressed (no all zero columns) + */ + inline bool is_compressed(const model::metric_base::metric_set& metrics) + { + const size_t q_val_count = count_qvals(metrics); + return q_val_count > 0 && q_val_count != 50; + } + /** Get the index for the given q-value + * + * @param metrics q_metric set + * @param qval threshold + * @return index of q-val above given threshold + */ + inline size_t index_for_q_value(const model::metric_base::metric_set& metrics, const size_t qval) + { + if(!is_compressed(metrics)) return qval; + return metrics.index_for_q_value(qval); + } }}}} diff --git a/interop/logic/summary/cycle_state_summary.h b/interop/logic/summary/cycle_state_summary.h new file mode 100644 index 000000000..025733d6c --- /dev/null +++ b/interop/logic/summary/cycle_state_summary.h @@ -0,0 +1,92 @@ +/** Update the cycle state for the summary model + * + * @file + * @date 4/21/16 + * @version 1.0 + * @copyright GNU Public License. + */ +#pragma once +#include "interop/logic/summary/map_cycle_to_read.h" + + +namespace illumina { namespace interop { namespace logic { namespace summary { + +/** Define a member function of cycle_state_summary that sets a cycle state */ +typedef void (model::summary::cycle_state_summary::*set_cycle_state_func_t )(const model::run::cycle_range&); +/** Vector of vectors of cycle_range objects */ +typedef std::vector > cycle_range_vector2d_t; + +/** Cache errors for all tiles up to a give max cycle + * + * This function only includes errors from useable cycles (not the last cycle). + * + * @param beg iterator to start of a collection of error metrics + * @param end iterator to end of a collection of error metrics + * @param cycle_to_read map that takes a cycle and returns the read-number cycle-in-read pair + * @param summary_by_lane_read destination cache by read then by lane a collection of errors + */ +template +void cache_cycle_state_by_lane_read(I beg, I end, + const read_cycle_vector_t& cycle_to_read, + cycle_range_vector2d_t& summary_by_lane_read) +{ + typedef std::map< size_t, model::run::cycle_range > cycle_range_tile_t; + typedef std::vector< cycle_range_tile_t > cycle_range_by_read_tile_t; + cycle_range_by_read_tile_t tmp (summary_by_lane_read.size()); + for(;beg != end;++beg) + { + INTEROP_ASSERT(beg->cycle() > 0); + INTEROP_ASSERT((beg->cycle()-1) < cycle_to_read.size()); + const read_cycle& read = cycle_to_read[beg->cycle()-1]; + if(read.number == 0) continue; + INTEROP_ASSERT((read.number-1) < tmp.size()); + const size_t id = static_cast(model::metric_base::base_metric::id(beg->lane(), beg->tile())); + tmp[read.number-1][id].update(beg->cycle()); + } + for(size_t read=0;read(model::metric_base::base_metric::lane_from_id(ebeg->first)-1); + INTEROP_ASSERT(lane < summary_by_lane_read[read].size()); + summary_by_lane_read[read][lane].update(ebeg->second.last_cycle()); + } + } +} + + +/** Summarize the cycle state for a particular metric + * + * @param beg iterator to start of a collection of error metrics + * @param end iterator to end of a collection of error metrics + * @param cycle_to_read map cycle to the read number and cycle within read number + * @param pointer to function that sets a cycle state + * @param run destination run summary + */ +template +void summarize_cycle_state(I beg, + I end, + const read_cycle_vector_t& cycle_to_read, + set_cycle_state_func_t set_cycle_state_fun, + model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) +{ + + + cycle_range_vector2d_t cycle_range(run.size(), std::vector< model::run::cycle_range>(run.lane_count())); + cache_cycle_state_by_lane_read(beg, end, cycle_to_read, cycle_range); + + model::run::cycle_range overall_cycle_state; + for(size_t read=0;read -void map_read_to_cycle_number(I beg, I end, std::vector& cycle_to_read, UnaryOp op) -{ - cycle_to_read.resize(std::accumulate(beg, end, size_t(0), op::total_cycle_sum(op))+1); - std::fill(cycle_to_read.begin(), cycle_to_read.end(), 0); - for(;beg != end;++beg) - { - for(size_t cycle=op(*beg).first_cycle()-1, last_cycle=op(*beg).last_cycle(), cycle_in_read=1; - cycle < last_cycle;++cycle_in_read, ++cycle) - { - INTEROP_ASSERT(cycle < cycle_to_read.size()); - cycle_to_read[cycle] = read_cycle(op(*beg).number(), cycle_in_read); - } - INTEROP_ASSERT((op(*beg).last_cycle()-1) < cycle_to_read.size()); - cycle_to_read[op(*beg).last_cycle()-1] = read_cycle();// Remove last cycle, not usable - } -} -/** Create a map from cycle to a pair: read number and cycle in read - * - * @param beg iterator to start of a collection of read infos - * @param end iterator to end of a collection of read infos - * @param cycle_to_read map that takes a cycle and returns the read-number cycle-in-read pair - */ -template -void map_read_to_cycle_number(I beg, I end, std::vector& cycle_to_read) -{ - map_read_to_cycle_number(beg, end, cycle_to_read, op::default_get_read()); -} /** Cache errors for all tiles up to a give max cycle * @@ -98,11 +49,11 @@ void cache_error_by_lane_read(I beg, INTEROP_ASSERT(beg->cycle() > 0); INTEROP_ASSERT((beg->cycle()-1) < cycle_to_read.size()); const read_cycle& read = cycle_to_read[beg->cycle()-1]; - if(read.cycle > max_cycle || read.number == 0) continue; + if(read.cycle_within_read > max_cycle || read.is_last_cycle_in_read) continue; key_t key = std::make_pair(beg->lane(), beg->tile()); const size_t read_number = read.number-1; max_error_cycle[read_number] = std::max(max_error_cycle[read_number], static_cast(beg->cycle())); - INTEROP_ASSERT(read_number < tmp.size()); + INTEROP_ASSERTMSG(read_number < tmp.size(), read.number << " " << read.cycle_within_read << ", " << beg->cycle()); if(tmp[read_number].find(key) == tmp[read.number-1].end()) tmp[read_number][key] = std::make_pair(0.0f, 0.0f); tmp[read_number][key].first+=beg->errorRate(); @@ -146,45 +97,6 @@ inline void error_summary_from_cache(summary_by_lane_read& summary_by_lan } } -/** Cache errors for all tiles up to a give max cycle - * - * This function only includes errors from useable cycles (not the last cycle). - * - * @param beg iterator to start of a collection of error metrics - * @param end iterator to end of a collection of error metrics - * @param cycle_to_read map that takes a cycle and returns the read-number cycle-in-read pair - * @param summary_by_lane_read destination cache by read then by lane a collection of errors - */ -template -void cache_cycle_state_by_lane_read(I beg, I end, - const std::vector& cycle_to_read, - std::vector >& summary_by_lane_read) -{ - typedef std::map< size_t, model::run::cycle_range > cycle_range_tile_t; - typedef std::vector< cycle_range_tile_t > cycle_range_by_read_tile_t; - cycle_range_by_read_tile_t tmp (summary_by_lane_read.size()); - for(;beg != end;++beg) - { - INTEROP_ASSERT(beg->cycle() > 0); - INTEROP_ASSERT((beg->cycle()-1) < cycle_to_read.size()); - const read_cycle& read = cycle_to_read[beg->cycle()-1]; - if(read.number == 0) continue; - INTEROP_ASSERT((read.number-1) < tmp.size()); - const size_t id = static_cast(model::metric_base::base_metric::id(beg->lane(), beg->tile())); - tmp[read.number-1][id].update(beg->cycle()); - } - for(size_t read=0;read(model::metric_base::base_metric::lane_from_id(ebeg->first)-1); - INTEROP_ASSERT(lane < summary_by_lane_read[read].size()); - summary_by_lane_read[read][lane].update(ebeg->second.last_cycle()); - } - } -} - /** Summarize a collection error metrics * * @sa model::summary::lane_summary::error_rate @@ -201,13 +113,16 @@ void cache_cycle_state_by_lane_read(I beg, I end, * * @param beg iterator to start of a collection of error metrics * @param end iterator to end of a collection of error metrics + * @param cycle_to_read map cycle to the read number and cycle within read number * @param run destination run summary */ template -void summarize_error_metrics(I beg, I end, model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) +void summarize_error_metrics(I beg, + I end, + const read_cycle_vector_t& cycle_to_read, + model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) { typedef summary_by_lane_read summary_by_lane_read_t; - typedef std::vector > cycle_range_read_lane_t; typedef model::summary::metric_stat& (model::summary::lane_summary::*error_functor_t )(); typedef std::pair cycle_functor_pair_t; @@ -215,9 +130,6 @@ void summarize_error_metrics(I beg, I end, model::summary::run_summary &run) thr if(run.size() == 0) return; summary_by_lane_read_t temp(run, std::distance(beg, end)); - std::vector cycle_to_read; - map_read_to_cycle_number(run.begin(), run.end(), cycle_to_read); - cycle_functor_pair_t cycle_functor_pairs[] = { cycle_functor_pair_t(35u, &model::summary::lane_summary::error_rate_35), cycle_functor_pair_t(50u, &model::summary::lane_summary::error_rate_50), @@ -234,14 +146,10 @@ void summarize_error_metrics(I beg, I end, model::summary::run_summary &run) thr cache_error_by_lane_read(beg, end, std::numeric_limits::max(), cycle_to_read, temp); - cycle_range_read_lane_t cycle_range(run.size(), std::vector< model::run::cycle_range>(run.lane_count())); - cache_cycle_state_by_lane_read(beg, end, cycle_to_read, cycle_range); - float error_rate = 0; size_t total = 0; float error_rate_nonindex = 0; size_t total_nonindex = 0; - model::run::cycle_range overall_cycle_state; for(size_t read=0;read0) run[read].summary().error_rate(divide(error_rate_by_read,static_cast(total_by_read))); error_rate += error_rate_by_read; @@ -273,7 +179,6 @@ void summarize_error_metrics(I beg, I end, model::summary::run_summary &run) thr } run.nonindex_summary().error_rate(divide(error_rate_nonindex,static_cast(total_nonindex))); run.total_summary().error_rate(divide(error_rate,static_cast(total))); - run.cycle_state().error_cycle_range(overall_cycle_state); } } diff --git a/interop/logic/summary/extraction_summary.h b/interop/logic/summary/extraction_summary.h index 52cf0a6fd..0c1c2f630 100644 --- a/interop/logic/summary/extraction_summary.h +++ b/interop/logic/summary/extraction_summary.h @@ -10,6 +10,7 @@ #include "interop/model/model_exceptions.h" #include "interop/logic/summary/summary_statistics.h" #include "interop/model/metrics/extraction_metric.h" +#include "interop/logic/summary/map_cycle_to_read.h" #include "interop/model/summary/run_summary.h" @@ -21,33 +22,6 @@ namespace logic { namespace summary { - /** Create a map from cycle to read number (only if it is the first cycle of the read) - * - * @param beg iterator to start of a collection of read infos - * @param end iterator to end of a collection of read infos - * @param is_first_cycle_of_read map that takes a cycle and returns the read-number if it is the first cycle in the read - * @param op unary operator that takes some object and returns a read info - */ - template - void map_is_first_cycle_of_read(I beg, I end, std::vector& is_first_cycle_of_read, UnaryOp op) - { - is_first_cycle_of_read.resize(std::accumulate(beg, end, size_t(0), op::total_cycle_sum(op))+1); - std::fill(is_first_cycle_of_read.begin(), is_first_cycle_of_read.end(), 0); - for(;beg != end;++beg) is_first_cycle_of_read[op(*beg).first_cycle()] = op(*beg).number(); - } - /** Create a map from cycle to read number (only if it is the first cycle of the read) - * - * @param beg iterator to start of a collection of read infos - * @param end iterator to end of a collection of read infos - * @param is_first_cycle_of_read map that takes a cycle and returns the read-number if it is the first cycle in the read - */ - template - void map_is_first_cycle_of_read(I beg, I end, std::vector& is_first_cycle_of_read) - { - map_is_first_cycle_of_read(beg, end, is_first_cycle_of_read, op::default_get_read()); - } - - /** Summarize a collection extraction metrics * * @sa model::summary::lane_summary::first_cycle_intensity @@ -57,29 +31,31 @@ namespace summary * * @param beg iterator to start of a collection of extraction metrics * @param end iterator to end of a collection of extraction metrics + * @param cycle_to_read map cycle to the read number and cycle within read number * @param channel channel to use for intensity reporting * @param run destination run summary */ template - void summarize_extraction_metrics(I beg, I end, const size_t channel, model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) + void summarize_extraction_metrics(I beg, + I end, + const read_cycle_vector_t& cycle_to_read, + const size_t channel, + model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) { typedef typename model::metrics::extraction_metric::ushort_t ushort_t; typedef summary_by_lane_read summary_by_lane_read_t; if(beg == end) return; if(run.size()==0)return; - summary_by_lane_read_t temp(run, std::distance(beg, end)); - std::vector is_first_cycle_of_read; - logic::summary::map_is_first_cycle_of_read(run.begin(), run.end(), is_first_cycle_of_read); for(;beg != end;++beg) { - const size_t read = is_first_cycle_of_read[beg->cycle()]; - if(read == 0) continue; // If read is zero, then the cycle is invalid (not the first cycle of read) - INTEROP_ASSERT((read-1) < temp.read_count()); + const size_t read = cycle_to_read[beg->cycle()-1].number-1; + if(cycle_to_read[beg->cycle()-1].cycle_within_read > 1) continue; + INTEROP_ASSERT(read < temp.read_count()); const size_t lane = beg->lane()-1; if(lane >= temp.lane_count()) throw model::index_out_of_bounds_exception("Lane exceeds lane count in RunInfo.xml"); - temp(read-1, lane).push_back(beg->max_intensity(channel)); + temp(read, lane).push_back(beg->max_intensity(channel)); } float first_cycle_intensity = 0; diff --git a/interop/logic/summary/map_cycle_to_read.h b/interop/logic/summary/map_cycle_to_read.h new file mode 100644 index 000000000..f09f3125a --- /dev/null +++ b/interop/logic/summary/map_cycle_to_read.h @@ -0,0 +1,79 @@ +/** Map each cycle to its read info + * + * @file + * @date 4/20/16 + * @version 1.0 + * @copyright GNU Public License. + */ +#pragma once +#include +#include +#include "interop/logic/summary/summary_statistics.h" +#include "interop/model/metric_base/base_metric.h" + +namespace illumina { namespace interop { namespace logic { namespace summary { + + +/** Read number and cycle in read + */ +struct read_cycle +{ + /** Constructor + * + * @param r read number + * @param c cycle in read + */ + read_cycle(const size_t r = 0, const size_t c = 0) : number(r), cycle_within_read(c), is_last_cycle_in_read(false) { } + + /** Read number */ + size_t number; + /** Cycle within read */ + size_t cycle_within_read; + /** Is last cycle in read */ + bool is_last_cycle_in_read; +}; + +/** Vector of read_cycle objects */ +typedef std::vector read_cycle_vector_t; + +/** Create a map from cycle to a pair: read number and cycle in read + * + * @param beg iterator to start of a collection of read infos + * @param end iterator to end of a collection of read infos + * @param cycle_to_read map that takes a cycle and returns the read-number cycle-in-read pair + * @param op unary operator that takes some object and returns a read info + */ +template +void map_read_to_cycle_number(I beg, I end, read_cycle_vector_t& cycle_to_read, UnaryOp op) +{ + cycle_to_read.resize(std::accumulate(beg, end, size_t(0), op::total_cycle_sum(op))); + std::fill(cycle_to_read.begin(), cycle_to_read.end(), 0); + for(;beg != end;++beg) + { + for(size_t cycle=op(*beg).first_cycle()-1, last_cycle=op(*beg).last_cycle(), cycle_in_read=1; + cycle < last_cycle;++cycle_in_read, ++cycle) + { + INTEROP_ASSERT(cycle < cycle_to_read.size()); + cycle_to_read[cycle] = read_cycle(op(*beg).number(), cycle_in_read); + } + INTEROP_ASSERT((op(*beg).last_cycle()-1) < cycle_to_read.size()); + cycle_to_read[op(*beg).last_cycle()-1].is_last_cycle_in_read = true; + } +} +/** Create a map from cycle to a pair: read number and cycle in read + * + * @param beg iterator to start of a collection of read infos + * @param end iterator to end of a collection of read infos + * @param cycle_to_read map that takes a cycle and returns the read-number cycle-in-read pair + */ +template +void map_read_to_cycle_number(I beg, I end, read_cycle_vector_t& cycle_to_read) +{ + map_read_to_cycle_number(beg, end, cycle_to_read, op::default_get_read()); +} + + + + + +}}}} diff --git a/interop/logic/summary/quality_summary.h b/interop/logic/summary/quality_summary.h index ba5eb670b..00383d6a9 100644 --- a/interop/logic/summary/quality_summary.h +++ b/interop/logic/summary/quality_summary.h @@ -10,51 +10,12 @@ #include #include "interop/model/model_exceptions.h" #include "interop/logic/summary/summary_statistics.h" +#include "interop/logic/summary/map_cycle_to_read.h" #include "interop/model/metrics/q_metric.h" #include "interop/model/summary/run_summary.h" -namespace illumina -{ -namespace interop -{ -namespace logic -{ -namespace summary -{ - /** Create a map from cycle to read number (only if it is the first cycle of the read) - * - * @param beg iterator to start of a collection of read infos - * @param end iterator to end of a collection of read infos - * @param is_first_cycle_of_read map that takes a cycle and returns the read-number - * @param op unary operator that takes some object and returns a read info - */ - template - void map_read_to_cycle(I beg, I end, std::vector& cycle_to_read, UnaryOp op) - { - cycle_to_read.resize(std::accumulate(beg, end, size_t(0), op::total_cycle_sum(op))+1); - std::fill(cycle_to_read.begin(), cycle_to_read.end(), 0); - for(;beg != end;++beg) - { - for(size_t cycle=op(*beg).first_cycle()-1, last_cycle=op(*beg).last_cycle(), cycle_in_read=1;cycle < last_cycle;++cycle_in_read, ++cycle) - { - INTEROP_ASSERT(cycle < cycle_to_read.size()); - cycle_to_read[cycle] = op(*beg).number(); - } - INTEROP_ASSERT((op(*beg).last_cycle()-1) < cycle_to_read.size()); - cycle_to_read[op(*beg).last_cycle()-1] = 0;// Remove last cycle, not usable - } - } - /** Create a map from cycle to read number (only if it is the first cycle of the read) - * - * @param beg iterator to start of a collection of read infos - * @param end iterator to end of a collection of read infos - * @param is_first_cycle_of_read map that takes a cycle and returns the read-number - */ - template - void map_read_to_cycle(I beg, I end, std::vector& cycle_to_read) - { - map_read_to_cycle(beg, end, cycle_to_read, op::default_get_read()); - } +namespace illumina { namespace interop { namespace logic { namespace summary { + /** Contains number above Q30 and total number of calls */ struct qval_total @@ -97,11 +58,16 @@ namespace summary * * @param beg iterator to start of a collection of q metrics * @param end iterator to end of a collection of q metrics + * @param cycle_to_read map cycle to the read number and cycle within read number * @param qval30_index index of Q30 bin * @param run destination run summary */ template - void summarize_quality_metrics(I beg, I end, const size_t qval_index, model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) + void summarize_quality_metrics(I beg, + I end, + const read_cycle_vector_t& cycle_to_read, + const size_t qval_index, + model::summary::run_summary &run) throw( model::index_out_of_bounds_exception ) { typedef model::metrics::q_metric::uint_t uint_t; typedef model::summary::lane_summary lane_summary; @@ -116,26 +82,24 @@ namespace summary size_vector2d_t metrics_in_read(run.size(), size_vector_t(run.lane_count())); tile_lookup_t tile_lookup; - std::vector cycle_to_read; - map_read_to_cycle(run.begin(), run.end(), cycle_to_read); - for(;beg != end;++beg) { INTEROP_ASSERT(beg->cycle() > 0); INTEROP_ASSERT((beg->cycle()-1) < cycle_to_read.size()); - const size_t read_number = cycle_to_read[beg->cycle()-1]; - if(read_number == 0) continue; + const size_t read_number = cycle_to_read[beg->cycle()-1].number-1; + if(cycle_to_read[beg->cycle()-1].is_last_cycle_in_read) continue; const size_t lane = beg->lane()-1; - INTEROP_ASSERT((read_number-1) < cache.size()); - INTEROP_ASSERT(lane < cache[read_number-1].size()); - INTEROP_ASSERT((read_number-1) < metrics_in_read.size()); - INTEROP_ASSERT(lane < metrics_in_read[read_number-1].size()); + INTEROP_ASSERT(read_number < cache.size()); + INTEROP_ASSERT(lane < cache[read_number].size()); + INTEROP_ASSERT(read_number < metrics_in_read.size()); + INTEROP_ASSERT(lane < metrics_in_read[read_number].size()); if(lane >= run.lane_count()) throw model::index_out_of_bounds_exception("Lane exceeds lane count in RunInfo.xml"); - cache[read_number-1][lane].above_qval+=beg->total_over_qscore(static_cast(qval_index)); - cache[read_number-1][lane].total += beg->sum_qscore(); + cache[read_number][lane].above_qval+=beg->total_over_qscore(static_cast(qval_index)); + cache[read_number][lane].total += beg->sum_qscore(); tile_lookup.insert(beg->tile()); - metrics_in_read[read_number - 1][lane] += 1; + metrics_in_read[read_number][lane] += 1; } + ::uint64_t total_useable_calls = 0; ::uint64_t useable_calls_gt_q30 = 0; float projected_yield_g = 0; diff --git a/interop/logic/summary/run_summary.h b/interop/logic/summary/run_summary.h index e97d03bb0..049f072cd 100644 --- a/interop/logic/summary/run_summary.h +++ b/interop/logic/summary/run_summary.h @@ -56,15 +56,55 @@ namespace summary { using namespace model::metrics; if(metrics.empty()) return; + + summary.initialize(metrics.run_info().reads(), metrics.run_info().flowcell().lane_count()); + + + read_cycle_vector_t cycle_to_read; + map_read_to_cycle_number(summary.begin(), summary.end(), cycle_to_read); summarize_tile_metrics(metrics.get_set().begin(), metrics.get_set().end(), summary); - summarize_error_metrics(metrics.get_set().begin(), metrics.get_set().end(), summary); + summarize_error_metrics(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + summary); INTEROP_ASSERT(metrics.run_info().channels().size()>0); const size_t intensity_channel = utils::expected2actual_map(metrics.run_info().channels())[0]; - summarize_extraction_metrics(metrics.get_set().begin(), metrics.get_set().end(), intensity_channel, summary); - const size_t q30_index = metrics.get_set().index_for_q_value(30); - summarize_quality_metrics(metrics.get_set().begin(), metrics.get_set().end(), q30_index, summary); + summarize_extraction_metrics(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + intensity_channel, + summary); + + const size_t q30_index = logic::metric::index_for_q_value(metrics.get_set(), 30); + summarize_quality_metrics(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + q30_index, + summary); summarize_tile_count(metrics, summary); + + summarize_cycle_state(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + &model::summary::cycle_state_summary::error_cycle_range, + summary); + summarize_cycle_state(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + &model::summary::cycle_state_summary::extracted_cycle_range, + summary); + summarize_cycle_state(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + &model::summary::cycle_state_summary::qscored_cycle_range, + summary); + // Summarize called cycle state + summarize_cycle_state(metrics.get_set().begin(), + metrics.get_set().end(), + cycle_to_read, + &model::summary::cycle_state_summary::called_cycle_range, + summary); } } diff --git a/interop/model/metrics/q_metric.h b/interop/model/metrics/q_metric.h index a077f6688..2accccf33 100644 --- a/interop/model/metrics/q_metric.h +++ b/interop/model/metrics/q_metric.h @@ -159,10 +159,12 @@ namespace illumina { */ q_score_bin::bin_type max_q_value()const { - return m_bin_count == static_cast(MAX_Q_BINS) || m_bin_count == 0 ? static_cast(MAX_Q_BINS) : m_qscoreBins.back().upper(); + return m_bin_count == static_cast(MAX_Q_BINS) || + m_bin_count == 0 ? static_cast(MAX_Q_BINS) : m_qscoreBins.back().upper(); } /** Get the index for the given q-value * + * @note Never call this function directly, use interop::logic::metric::index_for_q_value * @param qval q-value * @return index; */ diff --git a/interop/model/summary/cycle_state_summary.h b/interop/model/summary/cycle_state_summary.h index 638ac2d88..49b6dbee4 100644 --- a/interop/model/summary/cycle_state_summary.h +++ b/interop/model/summary/cycle_state_summary.h @@ -48,7 +48,7 @@ class cycle_state_summary } /** Get the base called cycle range * - * @return base called cycle range + * @return base called cycle range */ const run::cycle_range& called_cycle_range()const { diff --git a/src/apps/cyclesim.cpp b/src/apps/cyclesim.cpp index 9021d6674..095e1a2ef 100644 --- a/src/apps/cyclesim.cpp +++ b/src/apps/cyclesim.cpp @@ -274,7 +274,14 @@ int copy_cycle_metrics(const std::string& input, const std::string& output, unsi for(const_iterator beg = metrics.metrics().begin(), end=metrics.metrics().end();beg != end;++beg) { - if(beg->cycle() > max_cycle) continue; + if(beg->tile()%2==0 && max_cycle > 1) + { + if (beg->cycle() > (max_cycle-1)) continue; + } + else + { + if (beg->cycle() > max_cycle) continue; + } subset.push_back(*beg); } diff --git a/src/apps/summary.cpp b/src/apps/summary.cpp index c9f486dfa..c53281069 100644 --- a/src/apps/summary.cpp +++ b/src/apps/summary.cpp @@ -312,4 +312,7 @@ void print_summary(std::ostream& out, const run_summary& summary) print_array(out, values, width); } } + out << "Extracted: " << format(summary.cycle_state().extracted_cycle_range()) << "\n"; + out << "Called: " << format(summary.cycle_state().called_cycle_range()) << "\n"; + out << "Scored: " << format(summary.cycle_state().qscored_cycle_range()) << "\n"; } diff --git a/src/ext/swig/interop.i b/src/ext/swig/interop.i index ac7e504d9..d88ceebe3 100644 --- a/src/ext/swig/interop.i +++ b/src/ext/swig/interop.i @@ -150,8 +150,15 @@ WRAP_TEMPLATE_BASE(index_metric) %ignore illumina::interop::model::summary::run_summary::end; %ignore illumina::interop::model::summary::run_summary::operator[]; + %{ #include "interop/model/run/cycle_range.h" +#include "interop/model/run/read_info.h" +%} +%include "interop/model/run/cycle_range.h" +%include "interop/model/run/read_info.h" + +%{ #include "interop/model/summary/cycle_state_summary.h" #include "interop/model/summary/metric_summary.h" #include "interop/model/summary/lane_summary.h" @@ -164,7 +171,6 @@ WRAP_TEMPLATE_BASE(index_metric) #include "interop/model/run/image_dimensions.h" #include "interop/model/run/info.h" #include "interop/model/run/parameters.h" -#include "interop/model/run/read_info.h" #include "interop/model/run_metrics.h" #include "interop/logic/metric/q_metric.h" @@ -172,7 +178,6 @@ WRAP_TEMPLATE_BASE(index_metric) %} -%include "interop/model/run/cycle_range.h" %include "interop/model/summary/cycle_state_summary.h" %include "interop/model/summary/metric_summary.h" %include "interop/model/summary/lane_summary.h" @@ -184,7 +189,6 @@ WRAP_TEMPLATE_BASE(index_metric) %include "interop/model/run/image_dimensions.h" %include "interop/model/run/info.h" %include "interop/model/run/parameters.h" -%include "interop/model/run/read_info.h" // // Setup typemaps for summary metrics diff --git a/src/interop/CMakeLists.txt b/src/interop/CMakeLists.txt index 2f1368d94..b0003c55c 100644 --- a/src/interop/CMakeLists.txt +++ b/src/interop/CMakeLists.txt @@ -1,5 +1,4 @@ - set(SRCS model/run/info.cpp model/run/parameters.cpp @@ -79,7 +78,7 @@ set(HEADERS ../../interop/util/linear_hierarchy.h ../../interop/util/object_list.h ../../interop/util/exception_specification.h - ) + ../../interop/logic/summary/map_cycle_to_read.h ../../interop/logic/summary/cycle_state_summary.h) if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "AppleClang" OR CMAKE_COMPILER_IS_GNUCC) foreach(SRC ${SRCS}) diff --git a/src/tests/interop/metrics/inc/corrected_intensity_metrics_test.h b/src/tests/interop/metrics/inc/corrected_intensity_metrics_test.h index 29b62c9b5..14e14b48e 100644 --- a/src/tests/interop/metrics/inc/corrected_intensity_metrics_test.h +++ b/src/tests/interop/metrics/inc/corrected_intensity_metrics_test.h @@ -10,6 +10,7 @@ #include #include "metric_test.h" #include "interop/model/metrics/corrected_intensity_metric.h" +#include "interop/model/summary/run_summary.h" namespace illumina{ namespace interop { namespace unittest { @@ -75,6 +76,32 @@ namespace illumina{ namespace interop { namespace unittest { }; return to_string(tmp); } + /** Get number lanes in data + * + * @return 8 lanes + */ + static size_t lane_count(){return 8;} + /** Get reads describing data + * + * @return reads vector + */ + static std::vector reads() + { + std::vector reads(1, model::run::read_info(1, 1, 27, false)); + return reads; + } + /** Get the summary for these metrics + * + * @return run summary + */ + static model::summary::run_summary summary() + { + std::vector read_infos = reads(); + model::summary::run_summary summary(read_infos.begin(), read_infos.end(), lane_count()); + summary[0][0].cycle_state().called_cycle_range(model::run::cycle_range(1, 25)); + summary.cycle_state().called_cycle_range(model::run::cycle_range(1, 25)); + return summary; + } }; /** This test compares byte values taken from an InterOp file for three records produced by RTA 2.7.x @@ -128,6 +155,32 @@ namespace illumina{ namespace interop { namespace unittest { }; return to_string(tmp); } + /** Get number lanes in data + * + * @return 8 lanes + */ + static size_t lane_count(){return 8;} + /** Get reads describing data + * + * @return reads vector + */ + static std::vector reads() + { + std::vector reads(1, model::run::read_info(1, 1, 27, false)); + return reads; + } + /** Get the summary for these metrics + * + * @return run summary + */ + static model::summary::run_summary summary() + { + std::vector read_infos = reads(); + model::summary::run_summary summary(read_infos.begin(), read_infos.end(), lane_count()); + summary[0][0].cycle_state().called_cycle_range(model::run::cycle_range(3, 3)); + summary.cycle_state().called_cycle_range(model::run::cycle_range(3, 3)); + return summary; + } }; /** Interface between fixtures and Google Test */ template diff --git a/src/tests/interop/metrics/inc/error_metrics_test.h b/src/tests/interop/metrics/inc/error_metrics_test.h index 6b72590af..e7d60d26d 100644 --- a/src/tests/interop/metrics/inc/error_metrics_test.h +++ b/src/tests/interop/metrics/inc/error_metrics_test.h @@ -84,12 +84,12 @@ struct error_v3 : metric_test summary[0][6].error_rate_50()=model::summary::metric_stat(00.0f, 0, 0.0f); summary[0][6].error_rate_75()=model::summary::metric_stat(0.0f, 0, 0.0f); summary[0][6].error_rate_100()=model::summary::metric_stat(0.0f, 0, 0.0f); - summary[0][6].cycle_state().error_cycle_range(model::run::cycle_range(2, 2)); + summary[0][6].cycle_state().error_cycle_range(model::run::cycle_range(3, 3)); summary[0].summary().error_rate(0.67515134811401367f); summary.total_summary().error_rate(0.67515134811401367f); summary.nonindex_summary().error_rate(0.67515134811401367f); summary[0][6].tile_count(1); - summary.cycle_state().error_cycle_range(model::run::cycle_range(2, 2)); + summary.cycle_state().error_cycle_range(model::run::cycle_range(3, 3)); return summary; } }; diff --git a/src/tests/interop/metrics/inc/extraction_metrics_test.h b/src/tests/interop/metrics/inc/extraction_metrics_test.h index ea6f3dcd5..e31c1eddc 100644 --- a/src/tests/interop/metrics/inc/extraction_metrics_test.h +++ b/src/tests/interop/metrics/inc/extraction_metrics_test.h @@ -91,10 +91,12 @@ namespace illumina{ namespace interop { namespace unittest { std::vector read_infos = reads(); model::summary::run_summary summary(read_infos.begin(), read_infos.end(), lane_count()); summary[0][6].first_cycle_intensity()=model::summary::metric_stat(321, 24.75883674621582f, 312); + summary[0][6].cycle_state().extracted_cycle_range(model::run::cycle_range(1, 1)); summary[0].summary().first_cycle_intensity(321); summary.total_summary().first_cycle_intensity(321); summary.nonindex_summary().first_cycle_intensity(321); summary[0][6].tile_count(3); + summary.cycle_state().extracted_cycle_range(model::run::cycle_range(1, 1)); return summary; } }; diff --git a/src/tests/interop/metrics/inc/q_metrics_test.h b/src/tests/interop/metrics/inc/q_metrics_test.h index b40a74bbc..23717c8ac 100644 --- a/src/tests/interop/metrics/inc/q_metrics_test.h +++ b/src/tests/interop/metrics/inc/q_metrics_test.h @@ -107,6 +107,7 @@ namespace illumina{ namespace interop { namespace unittest { summary[0][0].projected_yield_g(0.0098816361278295517); summary[0][0].yield_g(0.0074112270958721638f); summary[0][0].percent_gt_q30(95.733200073242188f); + summary[0][0].cycle_state().qscored_cycle_range(model::run::cycle_range(1, 2)); summary[0].summary().projected_yield_g(0.0098816361278295517); summary[0].summary().yield_g(0.0074112270958721638f); summary[0].summary().percent_gt_q30(95.733200073242188f); @@ -116,6 +117,7 @@ namespace illumina{ namespace interop { namespace unittest { summary.nonindex_summary().percent_gt_q30(95.733200073242188f); summary.nonindex_summary().projected_yield_g(0.0098816361278295517); summary.nonindex_summary().yield_g(0.0074112270958721638f); + summary.cycle_state().qscored_cycle_range(model::run::cycle_range(1, 2)); return summary; } }; @@ -221,6 +223,7 @@ namespace illumina{ namespace interop { namespace unittest { summary[0][0].projected_yield_g(0.0056276721879839897f); summary[0][0].yield_g(0.0056276721879839897f); summary[0][0].percent_gt_q30(95.650672912597656f); + summary[0][0].cycle_state().qscored_cycle_range(model::run::cycle_range(1, 1)); summary[0].summary().projected_yield_g(0.0056276721879839897f); summary[0].summary().yield_g(0.0056276721879839897f); summary[0].summary().percent_gt_q30(95.650672912597656f); @@ -230,6 +233,7 @@ namespace illumina{ namespace interop { namespace unittest { summary.nonindex_summary().percent_gt_q30(95.650672912597656f); summary.nonindex_summary().projected_yield_g(0.0056276721879839897f); summary.nonindex_summary().yield_g(0.0056276721879839897); + summary.cycle_state().qscored_cycle_range(model::run::cycle_range(1, 1)); return summary; } }; @@ -321,6 +325,7 @@ namespace illumina{ namespace interop { namespace unittest { summary[0][6].projected_yield_g(0.0095612816512584686f); summary[0][6].yield_g(0.009561280719935894f); summary[0][6].percent_gt_q30(90.1163330078125f); + summary[0][6].cycle_state().qscored_cycle_range(model::run::cycle_range(3, 3)); summary[0].summary().projected_yield_g(0.0095612816512584686f); summary[0].summary().yield_g(0.0095612816512584686f); summary[0].summary().percent_gt_q30(90.1163330078125f); @@ -330,6 +335,7 @@ namespace illumina{ namespace interop { namespace unittest { summary.nonindex_summary().percent_gt_q30(90.1163330078125f); summary.nonindex_summary().projected_yield_g(0.0095612816512584686f); summary.nonindex_summary().yield_g(0.0095612816512584686f); + summary.cycle_state().qscored_cycle_range(model::run::cycle_range(3, 3)); return summary; } }; diff --git a/src/tests/interop/metrics/summary_metrics_test.cpp b/src/tests/interop/metrics/summary_metrics_test.cpp index 02c644e40..1da5f5b79 100644 --- a/src/tests/interop/metrics/summary_metrics_test.cpp +++ b/src/tests/interop/metrics/summary_metrics_test.cpp @@ -143,6 +143,7 @@ TYPED_TEST(summary_metrics_test, lane_summary) const model::summary::cycle_state_summary& actual_cycle_summary = actual_lane_summary.cycle_state(); const model::summary::cycle_state_summary& expected_cycle_summary = expected_lane_summary.cycle_state(); + EXPECT_CYCLE_EQ(actual_cycle_summary.called_cycle_range(), expected_cycle_summary.called_cycle_range()); EXPECT_CYCLE_EQ(actual_cycle_summary.extracted_cycle_range(), expected_cycle_summary.extracted_cycle_range()); EXPECT_CYCLE_EQ(actual_cycle_summary.called_cycle_range(), expected_cycle_summary.called_cycle_range()); EXPECT_CYCLE_EQ(actual_cycle_summary.qscored_cycle_range(), expected_cycle_summary.qscored_cycle_range());