Skip to content

Commit

Permalink
Add support for error metrics v6 (#296)
Browse files Browse the repository at this point in the history
* Support future platforms using error metrics v6

* Fix version history

* Fix windows python build

* Fix testing branches on same day
  • Loading branch information
ezralanglois authored Aug 18, 2022
1 parent c829797 commit 367e44a
Show file tree
Hide file tree
Showing 16 changed files with 570 additions and 50 deletions.
1 change: 1 addition & 0 deletions .github/workflows/python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ jobs:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
packages_dir: ./dist
repository_url: https://test.pypi.org/legacy/
skip_existing: true

publish:
needs: [build]
Expand Down
2 changes: 2 additions & 0 deletions docs/src/changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@

Date | Description
---------- | -----------
2022-08-09 | Support parsing error metrics v6
2022-08-09 | Fix potential bugs when parsing Q-metrics
2022-08-13 | Issue-282: Support universal build for M1 mac
2022-08-13 | Issue-282: Remove support for Python 3.5 (not working on Mac M1)
2022-08-13 | Issue-282: Update Windows Agent to 2019 from 2016 (2016 discontinued)
Expand Down
39 changes: 39 additions & 0 deletions interop/io/metric_file_stream.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,26 @@ namespace illumina { namespace interop { namespace io
std::istringstream in(buffer);
read_metrics(in, metrics, buffer.length(), rebuild);
}
/** Write the binary InterOp file into the given string using the given metric set
*
* @param buffer string holding a byte buffer
* @param metrics metric set
* @param version version of the format to write (-1 means use latest)
* @throw bad_format_exception
* @throw incomplete_file_exception
* @throw model::index_out_of_bounds_exception
*/
template<class MetricSet>
void write_interop_to_string(std::string& buffer
, const MetricSet& metrics, const ::int16_t version = -1) INTEROP_THROW_SPEC(
(interop::io::bad_format_exception,
interop::io::incomplete_file_exception,
model::index_out_of_bounds_exception))
{
std::ostringstream out;
write_metrics(out, metrics, version);
buffer = out.str();
}
/** Read the binary InterOp file into the given metric set
*
* @param buffer string holding a byte buffer
Expand All @@ -108,6 +128,25 @@ namespace illumina { namespace interop { namespace io
std::istringstream in(buffer);
return read_header(in, metrics);
}
/** Write the binary InterOp file header into a string from the given metric set
*
* @param buffer string holding a byte buffer
* @param metrics metric set
* @throw bad_format_exception
* @throw incomplete_file_exception
* @throw model::index_out_of_bounds_exception
*/
template<class MetricSet>
void write_header_to_string(std::string& buffer, const MetricSet& metrics, ::int16_t version=-1) INTEROP_THROW_SPEC(
(interop::io::bad_format_exception,
interop::io::incomplete_file_exception,
model::index_out_of_bounds_exception))
{
if(version == -1) version = metrics.version();
std::ostringstream out;
write_metric_header<typename MetricSet::metric_type>(out, version, metrics);
buffer = out.str();
}
/** Read the binary InterOp file into the given metric set
*
* @snippet src/examples/example1.cpp Reading a binary InterOp file
Expand Down
17 changes: 10 additions & 7 deletions interop/model/metrics/corrected_intensity_metric.h
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ namespace illumina { namespace interop { namespace model { namespace metrics
* @param lane lane number
* @param tile tile number
* @param cycle cycle number
* @param called_count_vec number of clusters called per base
* @param called_count number of clusters called per base
*/
corrected_intensity_metric(const uint_t lane,
const uint_t tile,
Expand Down Expand Up @@ -434,10 +434,10 @@ namespace illumina { namespace interop { namespace model { namespace metrics
*/
float percent_base(const constants::dna_bases index) const INTEROP_THROW_SPEC((model::index_out_of_bounds_exception))
{
uint_t total = total_calls(index == constants::NC);
const uint_t total = total_calls(index == constants::NC );
if (total == 0)
return std::numeric_limits<float>::quiet_NaN();
return called_counts(index) / static_cast<float>(total) * 100;
return called_counts(index) / static_cast<float>(total) * 100.0f;
}

/** Get the percentage per base (does not include no calls)
Expand All @@ -447,11 +447,14 @@ namespace illumina { namespace interop { namespace model { namespace metrics
*/
float_array_t percent_bases() const
{
uint_t total = total_calls();
float_array_t percent_bases(called_counts_array().size() - 1);
if(m_called_counts.empty())
{
return float_array_t();
}
const uint_t total = total_calls();
float_array_t percent_bases(constants::NUM_OF_BASES);
for (size_t i = 0; i < percent_bases.size(); ++i)
percent_bases[i] = (total == 0) ? std::numeric_limits<float>::quiet_NaN() :
called_counts_array()[i + 1] / static_cast<float>(total) * 100;
percent_bases[i] = static_cast<float>(m_called_counts[i + 1]) / static_cast<float>(total) * 100.0f;
return percent_bases;
}

Expand Down
176 changes: 160 additions & 16 deletions interop/model/metrics/error_metric.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,90 @@

namespace illumina { namespace interop { namespace model { namespace metrics
{
/** Header information for an error metric file, used for format version 6
*/
class error_metric_header : public metric_base::base_cycle_metric::header_type
{
public:
/** Constructor
*/
error_metric_header(const std::vector<std::string>& adapters) :
m_number_adapters(static_cast<uint16_t>(adapters.size())),
m_adapter_length(0)
{
if(adapters.size()>0)
m_adapter_length = static_cast<uint16_t>(adapters[0].size());
for(size_t i = 0; i < adapters.size(); i++) {
INTEROP_ASSERTMSG(adapters[i].size() == m_adapter_length,
"Adapter Sequence (" << adapters[i] << ") length doesn't match expected adapter length");
m_adapter_sequences.push_back(adapters[i]);
}
}

error_metric_header() :
m_number_adapters(0),
m_adapter_length(0)
{}

/** number of adapter sequences in model
*
* @return m_number_adapters
*/
uint16_t number_adapters() const
{
return m_number_adapters;
}

/** Length of each adapter sequence
*
* @return m_adapter_length
*/
uint16_t adapter_length() const
{
return m_adapter_length;
}

/** Adapter sequences
*
* @return m_adapter_sequences
*/
const std::vector<std::string>& adapter_sequences() const
{
return m_adapter_sequences;
}

void clear()
{
m_number_adapters = 0;
m_adapter_length = 0;
m_adapter_sequences.clear();
metric_base::base_cycle_metric::header_type::clear();
}
/** Generate a default header
*
* @return default header
*/
static error_metric_header default_header()
{
return error_metric_header(std::vector<std::string>());
}

private:
uint16_t m_number_adapters;
uint16_t m_adapter_length;
std::vector<std::string> m_adapter_sequences;

template<class MetricType, int Version>
friend
struct io::generic_layout;
};

/** @brief Error rate for a spiked in PhiX control sample
*
* The error metric is the calculated error rate, as determined by a spiked in PhiX control sample.
* This metric is available for each lane and tile for every cycle.
*
* @note Supported versions: 3, 4, and 5
* @note Supported versions: 3, 4, 5 and 6
*/
class error_metric : public metric_base::base_cycle_metric
{
Expand All @@ -40,29 +118,35 @@ namespace illumina { namespace interop { namespace model { namespace metrics
MAX_MISMATCH = 5,
/** Unique type code for metric */
TYPE = constants::Error,
/** Latest version of the InterOp format */
LATEST_VERSION = 5
/** Latest version of the I
* nterOp format */
LATEST_VERSION = 6
};
/** Define a uint array using an underlying vector
*/
typedef std::vector<uint_t> uint_array_t;
/** Vector of floats points */
typedef std::vector<float> float_vector;
/** Error metric set header */
typedef error_metric_header header_type;
public:
/** Constructor
*/
error_metric() :
metric_base::base_cycle_metric(0, 0, 0),
m_error_rate(std::numeric_limits<float>::quiet_NaN()),
m_phix_adapter_rate(std::numeric_limits<float>::quiet_NaN()),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
metric_base::base_cycle_metric(0, 0, 0),
m_error_rate(std::numeric_limits<float>::quiet_NaN()),
m_phix_adapter_rate(std::numeric_limits<float>::quiet_NaN()),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
{
}
/** Constructor
*/
error_metric(const header_type&) :
metric_base::base_cycle_metric(0, 0, 0),
m_error_rate(std::numeric_limits<float>::quiet_NaN()),
m_phix_adapter_rate(std::numeric_limits<float>::quiet_NaN()),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
error_metric(const header_type& header) :
metric_base::base_cycle_metric(0, 0, 0),
m_error_rate(std::numeric_limits<float>::quiet_NaN()),
m_phix_adapter_rate(std::numeric_limits<float>::quiet_NaN()),
m_phix_adapter_rates(header.number_adapters(), 0),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
{
}

Expand All @@ -78,13 +162,34 @@ namespace illumina { namespace interop { namespace model { namespace metrics
const uint_t cycle,
const float error,
const float phix_adapter_rate) :
metric_base::base_cycle_metric(lane, tile, cycle),
m_error_rate(error),
m_phix_adapter_rate(phix_adapter_rate),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
metric_base::base_cycle_metric(lane, tile, cycle),
m_error_rate(error),
m_phix_adapter_rate(phix_adapter_rate),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
{
}

/** Constructor
*
* @param lane lane number
* @param tile tile number
* @param cycle cycle number
* @param error error rate for current cycle
* @param phix_adapter_rates rates of each adapter
*/
error_metric(const uint_t lane,
const uint_t tile,
const uint_t cycle,
const float error,
const float_vector &phix_adapter_rates) :
metric_base::base_cycle_metric(lane, tile, cycle),
m_error_rate(error),
m_phix_adapter_rates(phix_adapter_rates),
m_mismatch_cluster_count(MAX_MISMATCH, 0)
{
set_mean_adapter_rate();
}

public:
/** @defgroup error_metric Error Metrics
*
Expand Down Expand Up @@ -114,6 +219,34 @@ namespace illumina { namespace interop { namespace model { namespace metrics
{
return m_phix_adapter_rate;
}
/* Calculated adapter trim rate per adapter of PhiX clusters
*
* @note Supported by v6
* @return vector of adapter rates per adapter
*/
const float_vector& phix_adapter_rates() const
{
return m_phix_adapter_rates;
}
/* Calculated adapter trim rate per adapter of PhiX clusters
*
* @note Supported by v6
* @return adapter rate for given adapter index
*/
float phix_adapter_rate_at(const size_t n) const INTEROP_THROW_SPEC((model::index_out_of_bounds_exception))
{
INTEROP_BOUNDS_CHECK(n, m_phix_adapter_rates.size(), "Index out of bounds");
return m_phix_adapter_rates[n];
}
/* Number of adapters on PhiX
*
* @note Supported by v6
* @return number of adapters
*/
size_t phix_adapter_count() const
{
return m_phix_adapter_rates.size();
}

/** Number of clusters at given number of mismatches
*
Expand Down Expand Up @@ -175,8 +308,19 @@ namespace illumina { namespace interop { namespace model { namespace metrics
{ return "Error"; }

private:

void set_mean_adapter_rate() {
const size_t size = m_phix_adapter_rates.size();
if(size == 0) return;
m_phix_adapter_rate = 0;
for(size_t i = 0; i < size; i++)
m_phix_adapter_rate += m_phix_adapter_rates[i];
m_phix_adapter_rate /= size;
}

float m_error_rate;
float m_phix_adapter_rate;
float_vector m_phix_adapter_rates;
uint_array_t m_mismatch_cluster_count;
template<class MetricType, int Version>
friend
Expand Down
1 change: 0 additions & 1 deletion src/ext/python/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -689,7 +689,6 @@ def imaging(run_metrics, dtype='f4', **extra):

if not isinstance(dtype, str):
dtype = np.dtype(dtype).str

return np.core.records.fromarrays(data.transpose()
, names=",".join(headers)
, formats=",".join([dtype for _ in headers]))
Expand Down
Loading

0 comments on commit 367e44a

Please sign in to comment.