Skip to content

Commit

Permalink
Merge pull request #53 from walaj/parallel
Browse files Browse the repository at this point in the history
Merge parallel into master
  • Loading branch information
walaj authored Mar 4, 2019
2 parents 7e93e58 + d8d06f1 commit d12cf22
Show file tree
Hide file tree
Showing 11 changed files with 129 additions and 97 deletions.
2 changes: 1 addition & 1 deletion src/svaba/AlignmentFragment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ AlignmentFragment::AlignmentFragment(const SeqLib::BamRecord &talign, bool flip)

m_align = talign;

talign.GetIntTag("SQ", sub_n);
assert(talign.GetIntTag("SQ", sub_n));

// orient cigar so it is on the contig orientation.
// need to do this to get the right ordering of the contig fragments below
Expand Down
149 changes: 82 additions & 67 deletions src/svaba/BreakPoint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@

#include "svaba_params.h"

// n is the max integer given the int size (e.g. 255). x is string with int
#define INTNSTOI(x,n) std::min((int)n, std::stoi(x));

// define repeats
static std::vector<std::string> repr = {"AAAAAAAAAAAAAAAA", "TTTTTTTTTTTTTTTT",
"CCCCCCCCCCCCCCCC", "GGGGGGGGGGGGGGGG",
Expand Down Expand Up @@ -63,13 +66,14 @@ using namespace SeqLib;
for (auto& s : allele)
max_lod = std::max(max_lod, s.second.LO);

ss << b1.chr_name << sep << b1.gr.pos1 << sep << b1.gr.strand << sep
<< b2.chr_name << sep << b2.gr.pos1 << sep << b2.gr.strand << sep
<< ref << sep << alt << sep
<< getSpan() << sep
<< b1.mapq << sep << b2.mapq << sep
<< b1.nm << sep << b2.nm << sep
<< dc.mapq1 << sep << dc.mapq2 << sep
ss << b1.chr_name << sep << b1.gr.pos1 << sep << b1.gr.strand << sep //1-3
<< b2.chr_name << sep << b2.gr.pos1 << sep << b2.gr.strand << sep //4-6
<< ref << sep << alt << sep //7-8
<< getSpan() << sep //9
<< b1.mapq << sep << b2.mapq << sep //10-11
<< b1.nm << sep << b2.nm << sep //12-13
<< dc.mapq1 << sep << dc.mapq2 << sep //14-15
<< a.split << sep << a.cigar << sep << a.alt << sep << a.cov << sep // ALL NEW 9/2018
//<< dc.ncount << sep << dc.tcount << sep
<< b1.sub_n << sep << b2.sub_n << sep
<< (homology.length() ? homology : "x") << sep
Expand Down Expand Up @@ -259,25 +263,29 @@ BreakPoint::BreakPoint(const std::string &line, const SeqLib::BamHeader& h) {
//case 15: dc.tcount = std::stoi(val); break;
case 14: dc.mapq1 = std::stoi(val); break;
case 15: dc.mapq2 = std::stoi(val); break;
case 16: b1.sub_n = std::stoi(val); break;
case 17: b2.sub_n = std::stoi(val); break;
case 18: homology = (val == "x" ? "" : val); break;
case 19: insertion = (val == "x" ? "" : val); break;
case 20: cname = val; break;
case 21: num_align = std::stoi(val); break;
case 22: confidence = val; break;
case 23: evidence = val; break;
case 24: quality = std::stoi(val); break;
case 25: secondary = val == "1";
case 26: somatic_score = std::stod(val); break;
case 27: somatic_lod = std::stod(val); break;
case 28: a.LO = std::stod(val); break;
case 29: pon = std::stoi(val); break;
case 30: repeat_seq = val; break;
case 31: blacklist = (val=="1"); break;
case 32: rs = val; break;
case 33: read_names = val; break;
case 34: bxtable = val; break;
case 16: a.split = std::stoi(val); break;
case 17: a.cigar = std::stoi(val); break;
case 18: a.alt = std::stoi(val); break;
case 19: a.cov = std::stoi(val); break;
case 20: b1.sub_n = std::stoi(val); break;
case 21: b2.sub_n = std::stoi(val); break;
case 22: homology = (val == "x" ? "" : val); break;
case 23: insertion = (val == "x" ? "" : val); break;
case 24: cname = val; break;
case 25: num_align = std::stoi(val); break;
case 26: confidence = val; break;
case 27: evidence = val; break;
case 28: quality = std::stoi(val); break;
case 29: secondary = val == "1";
case 30: somatic_score = std::stod(val); break;
case 31: somatic_lod = std::stod(val); break;
case 32: a.LO = std::stod(val); break;
case 33: pon = std::stoi(val); break;
case 34: repeat_seq = val; break;
case 35: blacklist = (val=="1"); break;
case 36: rs = val; break;
case 37: read_names = val; break;
case 38: bxtable = val; break;
default:
aaa.indel = evidence == "INDEL";
aaa.fromString(val);
Expand Down Expand Up @@ -1238,46 +1246,48 @@ ReducedBreakPoint::ReducedBreakPoint(const std::string &line, const SeqLib::BamH
alt_s = val;
break;
case 9: break; //span = stoi(val); break; // automatically calculated
case 10:
case 10: //mapq1
b1 = ReducedBreakEnd(GenomicRegion(chr1, pos1, pos1, h), std::stoi(val), chr_name1); b1.gr.strand = strand1; break;
case 11:
case 11: //mapq2
b2 = ReducedBreakEnd(GenomicRegion(chr2, pos2, pos2, h), std::stoi(val), chr_name2); b2.gr.strand = strand2; break;
case 12: b1.nm = std::stoi(val); break;
case 13: b2.nm = std::stoi(val); break;
case 16: b1.sub_n = std::min((int)255, std::stoi(val)); break;
case 17: b2.sub_n = std::min((int)255, std::stoi(val)); break;
//case 14: dc.ncount = std::min((int)255, std::stoi(val)); break;
//case 15: dc.tcount = std::min((int)255,std::stoi(val)); break;
case 14: dc.mapq1 = std::stoi(val); break;
case 15: dc.mapq2 = std::stoi(val); break;
case 18:
case 12: b1.nm = INTNSTOI(val,255); break;
case 13: b2.nm = INTNSTOI(val,255); break;
case 14: dc.mapq1 = INTNSTOI(val, 255); break;
case 15: dc.mapq2 = INTNSTOI(val, 255); break;
//case 16: split (not needed, since in genotype)
//case 17: cigar (not needed, since in genotype)
//case 18: alt (not needed, since in genotype)
case 19: cov = INTNSTOI(val,65535); break;
case 20: b1.sub_n = INTNSTOI(val,255); break;
case 21: b2.sub_n = INTNSTOI(val,255); break;
case 22:
homology_s = val;
break;
case 19:
case 23:
insertion_s = val;
break;
case 20: cname_s = val; break;
case 21: num_align = std::min((int)31, std::stoi(val)); break;
case 22:
case 24: cname_s = val; break;
case 25: num_align = std::min((int)31, std::stoi(val)); break;
case 26:
pass = val == "PASS";
confidence_s = val;
break;
case 23:
case 27:
evidence_s = val;
indel = val == "INDEL";
imprecise = val == "DSCRD";
break;
case 24: quality = std::stod(val); break; //std::min((int)255,std::stoi(val)); break;
case 25: secondary = val == "1" ? 1 : 0;
case 26: somatic_score = std::stod(val); break;
case 27: somatic_lod = std::stod(val); break;
case 28: true_lod = std::stod(val); break;
case 29: pon = std::min(255,std::stoi(val)); break;
case 30: repeat_s = val; break; // repeat_seq
case 31: blacklist = (val=="1" ? 1 : 0); break;
case 32: dbsnp = val != "x"; break;
case 33: read_names_s = val; break; //reads
case 34: bxtable_s = val; break; //bx tags
case 28: quality = std::stod(val); break; //std::min((int)255,std::stoi(val)); break;
case 29: secondary = val == "1" ? 1 : 0;
case 30: somatic_score = std::stod(val); break;
case 31: somatic_lod = std::stod(val); break;
case 32: true_lod = std::stod(val); break;
case 33: pon = std::min(255,std::stoi(val)); break;
case 34: repeat_s = val; break; // repeat_seq
case 35: blacklist = (val=="1" ? 1 : 0); break;
case 36: dbsnp = val != "x"; break;
case 37: read_names_s = val; break; //reads
case 38: bxtable_s = val; break; //bx tags
default:
format_s.push_back(val);
}
Expand Down Expand Up @@ -1672,25 +1682,30 @@ bool ReducedBreakPoint::operator<(const ReducedBreakPoint& bp) const {
else if (std::strcmp(evidence, bp.evidence) > 0) // >
return false;

if (nsplit > bp.nsplit)
if (cov > bp.cov)
return true;
else if (nsplit < bp.nsplit)
else if (cov < bp.cov)
return false;

//if (nsplit > bp.nsplit)
// return true;
//else if (nsplit < bp.nsplit)
// return false;

if (tsplit > bp.tsplit)
return true;
else if (tsplit < bp.tsplit)
return false;
//if (tsplit > bp.tsplit)
// return true;
//else if (tsplit < bp.tsplit)
// return false;

if (dc.ncount > bp.dc.ncount)
return true;
else if (dc.ncount < bp.dc.ncount)
return false;
//if (dc.ncount > bp.dc.ncount)
// return true;
//else if (dc.ncount < bp.dc.ncount)
// return false;

if (dc.tcount > bp.dc.tcount)
return true;
else if (dc.tcount < bp.dc.tcount)
return false;
//if (dc.tcount > bp.dc.tcount)
// return true;
//else if (dc.tcount < bp.dc.tcount)
// return false;

// break the tie somehow
if (cname > bp.cname)
Expand Down
7 changes: 4 additions & 3 deletions src/svaba/BreakPoint.h
Original file line number Diff line number Diff line change
Expand Up @@ -137,8 +137,9 @@ struct ReducedBreakEnd {
double somatic_lod = 0; // LogOdds that variant not in normal
double true_lod = 0;

uint32_t nsplit:8, tsplit:8, af_n:7, num_align:5, secondary:1, dbsnp:1, pass:1, blacklist:1, indel:1, imprecise:1;
uint32_t tcov_support:8, ncov_support:8, tcov:8, ncov:8;
//uint32_t nsplit:8, tsplit:8,
//uint32_t tcov_support:8, ncov_support:8, tcov:8, ncov:8;
uint32_t cov:16, af_n:7, num_align:5, secondary:1, dbsnp:1, pass:1, blacklist:1, indel:1, imprecise:1;
uint32_t tcigar:8, ncigar:8, dummy:8, af_t:8;
float quality;
uint8_t pon;
Expand Down Expand Up @@ -196,7 +197,7 @@ struct ReducedBreakEnd {
struct BreakPoint {

static std::string header() {
return "chr1\tpos1\tstrand1\tchr2\tpos2\tstrand2\tref\talt\tspan\tmapq1\tmapq2\tnm1\tnm2\tdisc_mapq1\tdisc_mapq2\tsub_n1\tsub_n2\thomology\tinsertion\tcontig\tnumalign\tconfidence\tevidence\tquality\tsecondary_alignment\tsomatic_score\tsomatic_lod\ttrue_lod\tpon_samples\trepeat_seq\tgraylist\tDBSNP\treads\tbxtags";
return "chr1\tpos1\tstrand1\tchr2\tpos2\tstrand2\tref\talt\tspan\tmapq1\tmapq2\tnm1\tnm2\tdisc_mapq1\tdisc_mapq2\tsplit\tcigar\talt\tcov\tsub_n1\tsub_n2\thomology\tinsertion\tcontig\tnumalign\tconfidence\tevidence\tquality\tsecondary_alignment\tsomatic_score\tsomatic_lod\ttlod\tpon_samples\trepeat_seq\tgraylist\tDBSNP\treads\tbxtags";
}

double somatic_score = 0;
Expand Down
4 changes: 2 additions & 2 deletions src/svaba/DiscordantCluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,7 @@ using namespace SeqLib;
mapq1 = __getMeanMapq(false);
mapq2 = __getMeanMapq(true);
assert(mapq1 >= 0);
assert(mapq2 >= -1); // can have -1 as placeholder if no mate reads (bc didnt do lookup)x
assert(mapq2 >= -1); // can have -1 as placeholder if no mate reads (bc didnt do lookup)

// orient them correctly so that left end is first
if (m_reg2 < m_reg1) {
Expand Down Expand Up @@ -394,7 +394,7 @@ using namespace SeqLib;
return -1;

if (tmapq.size() > 0)
mean = std::accumulate(tmapq.begin(), tmapq.end(), 0.0) / tmapq.size();
mean = ((double)std::accumulate(tmapq.begin(), tmapq.end(), 0.0)) / ((double)tmapq.size());

// actually, get the median
//if (tmapq.size() > 0)
Expand Down
11 changes: 6 additions & 5 deletions src/svaba/run_svaba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ static struct timespec start;

// learned value
static int max_mapq_possible;
static std::string args = "svaba "; // hold string of what the input args were
static std::string args = "svaba"; // hold string of what the input args were
static int32_t readlen = 0;

namespace opt {
Expand Down Expand Up @@ -122,6 +122,7 @@ namespace opt {
static size_t mate_region_lookup_limit = 400;
static bool interchrom_lookup = true;
static int32_t max_reads_per_assembly = -1; // set default of 50000 in parseRunOptions
static bool no_bad_avoid = true; // if true, don't avoid previously bad regions

// additional optional params
static int chunk = 25000;
Expand Down Expand Up @@ -646,6 +647,7 @@ void runsvaba(int argc, char** argv) {
os_discordant << DiscordantCluster::header() << std::endl;

// put args into string for VCF later
args += "(v" + std::string(SVABA_VERSION) + ") ";
for (int i = 0; i < argc; ++i)
args += std::string(argv[i]) + " ";

Expand Down Expand Up @@ -930,7 +932,6 @@ bool runWorkItem(const SeqLib::GenomicRegion& region, svabaThreadUnit& wu, long
wu.badd.Concat(w.second.readBam(&log_file));
wu.badd.MergeOverlappingIntervals();
wu.badd.CreateTreeMap();


// adjust the counts
if (w.first.at(0) == 't') {
Expand Down Expand Up @@ -1475,7 +1476,7 @@ CountPair run_mate_collection_loop(const SeqLib::GenomicRegion& region, WalkerMa
for (auto& s : tmp_somatic_mate_regions) {

// check if its not bad from mate region
if (badd.size())
if (badd.size() && !opt::no_bad_avoid)
if (badd.CountOverlaps(s))
continue;

Expand Down Expand Up @@ -1847,15 +1848,15 @@ void collect_and_clear_reads(WalkerMap& walkers, svabaReadVector& brv, std::vect
dedupe.insert(sr);
}
}

// concat together all of the learning sequences
if (opt::ec_correct_type != "s")
assert(!w.second.all_seqs.size());
for (auto& r : w.second.all_seqs) {
learn_seqs.push_back(strdup(r));
free(r); // free what was alloced in
}

w.second.all_seqs.clear();
w.second.reads.clear();
}
Expand Down
2 changes: 2 additions & 0 deletions src/svaba/run_svaba.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

#include "workqueue.h"

#define SVABA_VERSION "1.1.0"

// typedefs
typedef std::map<std::string, std::string> BamMap;
typedef std::pair<int, int> CountPair;
Expand Down
4 changes: 2 additions & 2 deletions src/svaba/svaba.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@
#include "refilter.h"
#include "run_svaba.h"

#define AUTHOR "Jeremiah Wala <[email protected]>"
#define AUTHOR "Jeremiah Wala <[email protected]"

static const char *SVABA_USAGE_MESSAGE =
"------------------------------------------------------------\n"
"-------- SvABA - SV and indel detection by assembly --------\n"
"------------------------------------------------------------\n"
"Program: SvABA \n"
"Version: 1.0.1 \n"
"Version: " SVABA_VERSION "\n"
"Contact: Jeremiah Wala [ [email protected] ]\n"
"Usage: svaba <command> [options]\n\n"
"Commands:\n"
Expand Down
2 changes: 0 additions & 2 deletions src/svaba/svabaBamWalker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,6 @@ SeqLib::GRC svabaBamWalker::readBam(std::ofstream * log) {
// keep track of regions where we hit the weird-read limit
SeqLib::GRC bad_regions;


// buffer to store reads from a region
// if we don't hit the limit, then add them to be assembled
svabaReadVector this_reads;
Expand Down Expand Up @@ -135,7 +134,6 @@ SeqLib::GRC svabaBamWalker::readBam(std::ofstream * log) {
<< " weird reads. Limit: " << SeqLib::AddCommas(m_limit) << std::endl;
if (log)
(*log) << ss.str();
std::cerr << ss.str();

if (m_region.size())
bad_regions.add(m_region[tb->m_region_idx]);
Expand Down
10 changes: 4 additions & 6 deletions src/svaba/svabaUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,11 @@ static std::string POLYTC = "TCTCTCTCTCTCTCTCTCTCTCTC";
// set the time string
time_t t = time(0); // get time now
struct tm * now = localtime( & t );
std::stringstream month;
std::stringstream mdate;
if ( (now->tm_mon+1) < 10)
month << "0" << now->tm_mon+1;
else
month << now->tm_mon+1;
mdate << (now->tm_year + 1900) << month.str() << now->tm_mday;
mdate << (now->tm_year + 1900)
<< (now->tm_mon + 1 < 10 ? "0": "") << (now->tm_mon + 1)
<< (now->tm_mday < 10 ? "0" : "")
<< now->tm_mday;
return mdate.str();
}

Expand Down
Loading

0 comments on commit d12cf22

Please sign in to comment.