Skip to content

Commit

Permalink
patch 1.3.1- gba85597c mapq0 fix
Browse files Browse the repository at this point in the history
  • Loading branch information
grizk-illumina committed Dec 8, 2022
1 parent 2e0ec68 commit c4cb6c9
Show file tree
Hide file tree
Showing 15 changed files with 78 additions and 25 deletions.
27 changes: 25 additions & 2 deletions src/include/align/Alignment.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,19 @@ class Alignment : public AlignmentHeader {
return ret;
}

const map::SeedChain& chain() const
{
assert(chain_);
return *chain_;
}
void setChain(const map::SeedChain& c)
{
assert(!chain_ || &c == chain_);
chain_ = &c;
}
bool hasOnlyRandomSamples() const { return isUnmapped() || chain().hasOnlyRandomSamples(); }
bool isExtra() const { return isUnmapped() || chain().isExtra(); }

uint32_t setCigarOperations(const std::string& operations, int softClipStart = 0)
{
auto ret = cigar_.setOperationSequence(operations, softClipStart);
Expand Down Expand Up @@ -246,8 +259,9 @@ class Alignment : public AlignmentHeader {
short getNm() const { return getMismatchCount(); }

private:
Cigar cigar_;
const Alignment* sa_ = 0;
const map::SeedChain* chain_ = 0;
Cigar cigar_;
const Alignment* sa_ = 0;
/// lengths clipped at the beginning and at the end of the sequence
//int clip_[2];
//std::vector<unsigned char> optional_;
Expand Down Expand Up @@ -449,6 +463,15 @@ class AlignmentPair {
return (nullptr != seedChains_[0]) && (nullptr != seedChains_[1]) && seedChains_[0]->isPerfect() &&
seedChains_[1]->isPerfect();
}
bool hasOnlyRandomSamples() const
{
return (at(0).isUnmapped() || at(0).hasOnlyRandomSamples()) &&
(at(1).isUnmapped() || at(1).hasOnlyRandomSamples());
}
bool isExtra() const
{
return (at(0).isUnmapped() || at(0).isExtra()) && (at(1).isUnmapped() || at(1).isExtra());
}
bool isFiltered() const { return at(0).isFiltered() || at(1).isFiltered(); }
friend std::ostream& operator<<(std::ostream& os, const AlignmentPair& p)
{
Expand Down
7 changes: 5 additions & 2 deletions src/include/align/PairBuilder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class PairBuilder {
const int aln_cfg_sec_phred_delta_;
const bool aln_cfg_sec_aligns_hard_;
const int aln_cfg_mapq_min_len_;
const int aln_cfg_sample_mapq0_;

mutable std::vector<int> reported_;

Expand All @@ -63,7 +64,8 @@ class PairBuilder {
const int aln_cfg_sec_score_delta,
const int aln_cfg_sec_phred_delta,
const bool aln_cfg_sec_aligns_hard,
const int aln_cfg_mapq_min_len)
const int aln_cfg_mapq_min_len,
const int aln_cfg_sample_mapq0)
: similarity_(similarity),
alnMinScore_(alnMinScore),
aln_cfg_unpaired_pen_(aln_cfg_unpaired_pen),
Expand All @@ -72,7 +74,8 @@ class PairBuilder {
aln_cfg_sec_score_delta_(aln_cfg_sec_score_delta),
aln_cfg_sec_phred_delta_(aln_cfg_sec_phred_delta),
aln_cfg_sec_aligns_hard_(aln_cfg_sec_aligns_hard),
aln_cfg_mapq_min_len_(aln_cfg_mapq_min_len)
aln_cfg_mapq_min_len_(aln_cfg_mapq_min_len),
aln_cfg_sample_mapq0_(aln_cfg_sample_mapq0)
{
}

Expand Down
7 changes: 5 additions & 2 deletions src/include/align/SinglePicker.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class SinglePicker {
const int aln_cfg_sec_phred_delta_;
const bool aln_cfg_sec_aligns_hard_;
const int aln_cfg_mapq_min_len_;
const int aln_cfg_sample_mapq0_;

public:
typedef sequences::Read Read;
Expand All @@ -56,15 +57,17 @@ class SinglePicker {
const int aln_cfg_sec_score_delta,
const int aln_cfg_sec_phred_delta,
const bool aln_cfg_sec_aligns_hard,
const int aln_cfg_mapq_min_len)
const int aln_cfg_mapq_min_len,
const int aln_cfg_sample_mapq0)
: similarity_(similarity),
alnMinScore_(alnMinScore),
suppMinScoreAdj_(suppMinScoreAdj),
aln_cfg_sec_aligns_(aln_cfg_sec_aligns),
aln_cfg_sec_score_delta_(aln_cfg_sec_score_delta),
aln_cfg_sec_phred_delta_(aln_cfg_sec_phred_delta),
aln_cfg_sec_aligns_hard_(aln_cfg_sec_aligns_hard),
aln_cfg_mapq_min_len_(aln_cfg_mapq_min_len)
aln_cfg_mapq_min_len_(aln_cfg_mapq_min_len),
aln_cfg_sample_mapq0_(aln_cfg_sample_mapq0)
{
}

Expand Down
9 changes: 7 additions & 2 deletions src/include/bam/Bam.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
**/
#pragma once

#include <assert.h>
#include <cstdint>

namespace dragenos {
Expand Down Expand Up @@ -92,10 +93,14 @@ class BamRecordAccessor {
const std::size_t readLength() const { return h_->l_seq; }

const char* nameBegin() const { return h_->read_name; }
const char* nameEnd() const { return nameBegin() + h_->l_read_name - 1; }
const char* nameEnd() const { return nameBegin() + h_->l_read_name; }
const Name getName(const char qnameSuffixDelim) const
{
return Name(nameBegin(), std::find(nameBegin(), nameEnd(), qnameSuffixDelim));
assert(nameBegin() != nameEnd()); // unexpected empty name
const char* end = nameEnd() - 1;
assert(!*end); // name includes 0 terminator according to specs
Name ret(nameBegin(), std::find(nameBegin(), end, qnameSuffixDelim));
return ret;
}

const char* cigarBegin() const { return nameEnd(); }
Expand Down
2 changes: 1 addition & 1 deletion src/include/io/Bam2ReadTransformer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ class BamToReadTransformer {
const auto name = bra.getName(qnameSuffixDelim_);
const auto bases = bra.getBases();
const auto qscores = bra.getQscores();
tmpName_.assign(name.first, name.second - 1);
tmpName_.assign(name.first, name.second);
tmpBases_.clear();
if (bra.reverse()) {
for (auto it = bases.first; bases.second != it; ++it) {
Expand Down
1 change: 1 addition & 0 deletions src/include/map/SeedChain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ class SeedChain {
void setReverseComplement(bool reverseComplement) { reverseComplement_ = reverseComplement; }
bool isReverseComplement() const { return reverseComplement_; }
bool hasOnlyRandomSamples() const { return randomSamplesOnly_; }
bool setRandomSamplesOnly(bool randomSamplesOnly) { return randomSamplesOnly_ = randomSamplesOnly; }
/**
** \brief check if the chain would accept a seed mapping to the reference with that position and
*orientation
Expand Down
1 change: 1 addition & 0 deletions src/include/options/DragenOsOptions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ class DragenOsOptions : public common::Options {
uint32_t alignerMapqMax_ = 60; // Aligner.mapq-max
uint32_t alignerUnpairedPen_ = 80; // Aligner.unpaired-pen
int alignerXsPairPen_ = 25; // Aligner.xs-pair-penalty
int alignerSampleMapq0_ = 1; // Aligner.sample-map0

int alignerSecAligns_ = 0; // Aligner.sec-aligns
int alignerSecScoreDelta_ = 0; // Aligner.sec-score-delta
Expand Down
7 changes: 5 additions & 2 deletions src/lib/align/Aligner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void Aligner::updateIneligibility(

void Aligner::generateUngappedAlignment(const Read& read, map::SeedChain& seedChain, Alignment& alignment)
{
alignment.setChain(seedChain);
FlagType flags = !read.getPosition() ? Alignment::FIRST_IN_TEMPLATE : Alignment::LAST_IN_TEMPLATE;
flags |= seedChain.isReverseComplement() ? Alignment::REVERSE_COMPLEMENT : Alignment::NONE;
flags |= seedChain.isFiltered() ? Alignment::UNMAPPED : Alignment::NONE;
Expand Down Expand Up @@ -287,15 +288,15 @@ int Aligner::initializeUngappedAlignmentScores(
int malus = 0;

Database databaseBestLastToSeqLeft;
size_t bestToLastRefStart = referenceOffset + bestLast + 1;
size_t bestToLastRefStart = referenceOffset + bestLast + 1;
databaseBestLastToSeqLeft.reserve(seqLeft + 1);
refSeq_.getBases(bestToLastRefStart, bestToLastRefStart + seqLeft, databaseBestLastToSeqLeft);

/*
databaseBestLastToSeqLeft.reserve(seqLeft - bestLast + 1);
refSeq_.getBases(referenceOffset + bestLast + 1, referenceOffset + bestLast + 1 + seqLeft, databaseBestLastToSeqLeft);
*/

for (int i = bestLast + 1; i < seqLeft; ++i) {
const auto readBase = readBases[i];
const unsigned char referenceBase = databaseBestLastToSeqLeft[i - (bestLast + 1)];
Expand Down Expand Up @@ -492,6 +493,8 @@ bool Aligner::rescuePair(
rescued,
anchoredIdx)) {
chainBuilders_[rescuedIdx].addSeedChain(rescuedSeedChain);
rescued.setChain(chainBuilders_[rescuedIdx].back());

unpairedAlignments_[rescuedIdx].append(rescued);
makePair(
insertSizeParameters,
Expand Down
2 changes: 2 additions & 0 deletions src/lib/align/AlignmentRescue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,8 @@ bool AlignmentRescue::scan(
rescuedChain.addSeedPosition(map::SeedPosition(seed, referencePosition, 0), false);
}
rescuedChain.setPerfect(not conflict);
rescuedChain.setExtra(anchoredChain.isExtra());
rescuedChain.setRandomSamplesOnly(anchoredChain.hasOnlyRandomSamples());
//if (log) std::cerr << "scanning... succeeded" << std::endl;
DRAGEN_RESCUE_LOG << "RESCUE: "
<< "first_seed_pos=" << rescuedChain.firstReadBase()
Expand Down
8 changes: 6 additions & 2 deletions src/lib/align/PairBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,12 @@ void PairBuilder::updateEndMapq(

const MapqType mapq_prod_pen = pe_mapq - (sub_mapq_pen_v >> 7);

const MapqType mapq =
(INVALID_SCORE != xs_score_diff) ? std::min(xs_heur_mapq, mapq_prod_pen) : mapq_prod_pen;
const bool mapq0 = (aln_cfg_sample_mapq0_ >= 1 && best->hasOnlyRandomSamples()) ||
(aln_cfg_sample_mapq0_ >= 2 && best->isExtra());
const MapqType mapq = mapq0 ? 0
: (INVALID_SCORE != xs_score_diff)
? std::min(std::max(0, xs_heur_mapq), mapq_prod_pen)
: mapq_prod_pen;

#ifdef TRACE_SCORING
std::cerr << "[SCORING]\t"
Expand Down
6 changes: 5 additions & 1 deletion src/lib/align/SinglePicker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,11 @@ void SinglePicker::updateMapq(const int readLength, Alignments& alignments, Alig
<< "r" << 0 << " a2m_mapq=" << a2m_mapq << " sub_count=" << sub_count
<< " sub_count_log2=" << sub_count_log2 << " sub_mapq_pen_v=" << sub_mapq_pen_v << std::endl;
#endif // TRACE_SCORING
best->setMapq(a2m_mapq - (sub_mapq_pen_v >> 7));
const bool mapq0 = (aln_cfg_sample_mapq0_ >= 1 && best->hasOnlyRandomSamples()) ||
(aln_cfg_sample_mapq0_ >= 2 && best->isExtra());
const MapqType mapq = mapq0 ? 0 : a2m_mapq - (sub_mapq_pen_v >> 7);

best->setMapq(mapq);
const ScoreType xs = secondBestScore >= alnMinScore_ ? secondBestScore : INVALID_SCORE;
best->setXs(xs);
}
Expand Down
4 changes: 2 additions & 2 deletions src/lib/align/tests/integration/AlignerGtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ TEST(Aligner, updateMapqSingleEnded)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::SinglePicker picker(similarityScores, 19, 8, 0, 0, 0, false, 50);
const align::SinglePicker picker(similarityScores, 19, 8, 0, 0, 0, false, 50, 0);

const unsigned char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTC";
Expand Down Expand Up @@ -137,7 +137,7 @@ TEST(Aligner, updateMapqSingleEnded1XRepeat)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::SinglePicker picker(similarityScores, 19, 8, 0, 0, 0, false, 50);
const align::SinglePicker picker(similarityScores, 19, 8, 0, 0, 0, false, 50, 0);

const unsigned char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTC";
Expand Down
10 changes: 5 additions & 5 deletions src/lib/align/tests/integration/PairBuilderGtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ TEST(PairBuilder, updateMapq)
const short match = 1;
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50, 0);

const unsigned char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCTTATG";
Expand Down Expand Up @@ -135,7 +135,7 @@ TEST(PairBuilder, pickBest1)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50, 0);

char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCTTATG";
Expand Down Expand Up @@ -232,7 +232,7 @@ TEST(PairBuilder, pickBest2)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50, 0);

char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCTTATG";
Expand Down Expand Up @@ -338,7 +338,7 @@ TEST(PairBuilder, pickBest3)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50, 0);

char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCTTATG";
Expand Down Expand Up @@ -444,7 +444,7 @@ TEST(PairBuilder, pickBest4)
const short mismatch = -1;
const dragenos::align::SimilarityScores similarityScores(match, mismatch);
const ReferenceDirDummy referenceDir;
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50);
const align::PairBuilder pairBuilder(similarityScores, 19, 80, 25, 0, 0, 0, false, 50, 0);

char reference[] =
"TCCATCGAGATGGACGCCGTTGGCGCTCTCCGTCTTTCTCCATTGCGTCGTGGCCTTGCTATTGACTCTACTGTAGACATTTTTACTTTTTATGTCCCTCTTATG";
Expand Down
6 changes: 4 additions & 2 deletions src/lib/workflow/DualFastq2SamWorkflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,8 @@ void DualFastq2SamWorkflow::alignDualFastqBlock(
options_.alignerSecScoreDelta_,
options_.alignerSecPhredDelta_,
options_.alignerSecAlignsHard_,
options_.alignerMapqMinLen_);
options_.alignerMapqMinLen_,
options_.alignerSampleMapq0_);

// aligner is not stateless, make sure each thread uses its own.
align::Aligner aligner(
Expand Down Expand Up @@ -441,7 +442,8 @@ void DualFastq2SamWorkflow::parseDualFastq(
options_.alignerSecScoreDelta_,
options_.alignerSecPhredDelta_,
options_.alignerSecAlignsHard_,
options_.alignerMapqMinLen_);
options_.alignerMapqMinLen_,
options_.alignerSampleMapq0_);

fastq::FastqNRecordReader r1Reader(r1Stream);
fastq::FastqNRecordReader r2Reader(r2Stream);
Expand Down
6 changes: 4 additions & 2 deletions src/lib/workflow/Input2SamWorkflow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,8 @@ void parseSingleInput(
options.alignerSecScoreDelta_,
options.alignerSecPhredDelta_,
options.alignerSecAlignsHard_,
options.alignerMapqMinLen_);
options.alignerMapqMinLen_,
options.alignerSampleMapq0_);

const sam::SamGenerator sam(htConfig);

Expand Down Expand Up @@ -257,7 +258,8 @@ void parseSingleInput(
options.alignerSecScoreDelta_,
options.alignerSecPhredDelta_,
options.alignerSecAlignsHard_,
options.alignerMapqMinLen_);
options.alignerMapqMinLen_,
options.alignerSampleMapq0_);

align::Aligner aligner(
refSeq,
Expand Down

0 comments on commit c4cb6c9

Please sign in to comment.