From 0de53ff674279b5c20d173574086c0a809ad1612 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 27 Oct 2024 16:15:41 -0700 Subject: [PATCH 1/3] src/abismal.cpp: updating the definition of ambiguous for paired-end reads to use Hamming distance as tiebreaker when alignment scores are identical. This has the effect of not marking near-identical concordant mapppings as ambiguous by simply choosing one among the two. Since the ends would be mapped as single-end the major consequence is to make it easier to identify concordant mappers downstream when merging them into fragments --- src/abismal.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/abismal.cpp b/src/abismal.cpp index 2a3ac3a..1beb0f3 100644 --- a/src/abismal.cpp +++ b/src/abismal.cpp @@ -445,13 +445,15 @@ struct pe_element { } bool update(const score_t scr, const se_element &s1, const se_element &s2) { - if (scr > aln_score) { + const auto rd = r1.diffs + r2.diffs; + const auto sd = s1.diffs + s2.diffs; + if (scr > aln_score || (scr == aln_score && sd < rd)) { r1 = s1; r2 = s2; aln_score = scr; return true; } - else if (scr == aln_score) { + else if (scr == aln_score && sd == rd) { r1.set_ambig(); return false; } From 43ad31af55bb5c5db18e7f76d760ce0fea738dba Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 27 Oct 2024 16:16:07 -0700 Subject: [PATCH 2/3] data/md5sum.txt: udpating hashes for change to how ambiguity is defined within abismal --- data/md5sum.txt | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/data/md5sum.txt b/data/md5sum.txt index 1be10d3..af69295 100644 --- a/data/md5sum.txt +++ b/data/md5sum.txt @@ -2,15 +2,15 @@ 04f73fa36d90fbe9e157e5c3214e585a tests/reads.mstats da3003dc18e6ecfcf0586705444ca6f8 tests/reads_pbat_pe_1.fq bbfd18b0cc7cf93ad4ed317ef996f3ee tests/reads_pbat_pe_2.fq -7d2e72d75fe2e55f978dffa76a492c61 tests/reads_pbat_pe.mstats -2415d44925cc23fa60a0d78c9af04509 tests/reads_pbat_pe.sam +d1dccf9708510156c9fda4f1071c7961 tests/reads_pbat_pe.mstats +4da080480c5709961854141cac61ee97 tests/reads_pbat_pe.sam d49efb9f420b4edeb4647217fa247e6d tests/reads_pe_1.fq 3627c807f259109ea0c4c2b7bcd12320 tests/reads_pe_2.fq -0efe8fb80106f2edb370e8f7e7c1bbb6 tests/reads_pe.mstats -3c10bd0ce3f7d458a05a334a351d96ff tests/reads_pe.sam +83a686ffbef21d1e215d5472e5d09720 tests/reads_pe.mstats +60a9b686be708329cc7b6ca3cc666667 tests/reads_pe.sam 3dd98274d8b707878aaad8246c6df9f6 tests/reads_rpbat_pe_1.fq fcf5e4d93ac62dafcfb279e95103ad0e tests/reads_rpbat_pe_2.fq -19329a8ec659a82dc82e56a0ca6999b0 tests/reads_rpbat_pe.mstats -c6980b863b86d7e491e3f9358943bcd3 tests/reads_rpbat_pe.sam +06a71b984820d14d25968cb513e8e95c tests/reads_rpbat_pe.mstats +76ce5d7d379e2bd571b32ccb0afbe786 tests/reads_rpbat_pe.sam 0f2ad3720fd50961494222a3cf1dbef1 tests/reads.sam bcbf01be810cbf4051292813eb6b9225 tests/tRex1.idx From 25fd5fc82cb17dcc7392e7cc215166b4c533a096 Mon Sep 17 00:00:00 2001 From: Andrew D Smith Date: Sun, 27 Oct 2024 16:26:50 -0700 Subject: [PATCH 3/3] data/md5sum.txt: udpating hashes because the previous PR hashes were forgotten --- data/md5sum.txt | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/data/md5sum.txt b/data/md5sum.txt index af69295..00f8a14 100644 --- a/data/md5sum.txt +++ b/data/md5sum.txt @@ -1,16 +1,16 @@ -8dc13cfc9c7af9c5ed368d5e2462275e tests/reads_1.fq -04f73fa36d90fbe9e157e5c3214e585a tests/reads.mstats -da3003dc18e6ecfcf0586705444ca6f8 tests/reads_pbat_pe_1.fq -bbfd18b0cc7cf93ad4ed317ef996f3ee tests/reads_pbat_pe_2.fq -d1dccf9708510156c9fda4f1071c7961 tests/reads_pbat_pe.mstats -4da080480c5709961854141cac61ee97 tests/reads_pbat_pe.sam -d49efb9f420b4edeb4647217fa247e6d tests/reads_pe_1.fq -3627c807f259109ea0c4c2b7bcd12320 tests/reads_pe_2.fq -83a686ffbef21d1e215d5472e5d09720 tests/reads_pe.mstats -60a9b686be708329cc7b6ca3cc666667 tests/reads_pe.sam -3dd98274d8b707878aaad8246c6df9f6 tests/reads_rpbat_pe_1.fq -fcf5e4d93ac62dafcfb279e95103ad0e tests/reads_rpbat_pe_2.fq -06a71b984820d14d25968cb513e8e95c tests/reads_rpbat_pe.mstats -76ce5d7d379e2bd571b32ccb0afbe786 tests/reads_rpbat_pe.sam -0f2ad3720fd50961494222a3cf1dbef1 tests/reads.sam +e95a8739a378bc5628c76ffbe8293682 tests/reads_1.fq +447647120a08d3b58162414fae4f4a39 tests/reads.mstats +5487eb32b5492ddba3a95a8461e2ff4c tests/reads_pbat_pe_1.fq +aff19aeae4184c38cf364b3a94527098 tests/reads_pbat_pe_2.fq +adcdbadcd3cc14bfb57c170802e16bf7 tests/reads_pbat_pe.mstats +07773c36061cbe787c568d97b9ab0d5e tests/reads_pbat_pe.sam +514bf940e7d5f44e57291a75e1fc6629 tests/reads_pe_1.fq +64e17fc1e424f9cb21879d38ddf745e5 tests/reads_pe_2.fq +771991a3d074b6587f6faa8f538f7112 tests/reads_pe.mstats +759c446ff985911feb074fe70843e248 tests/reads_pe.sam +e94e71292fccd255bad3f3694efe7a4b tests/reads_rpbat_pe_1.fq +8bfa00acd0639dc66acd5aa8ac0369d5 tests/reads_rpbat_pe_2.fq +3a0a49feb314f6cb8fa49c4c114b52fc tests/reads_rpbat_pe.mstats +64566ed0651fabe719f911b86cf05dc6 tests/reads_rpbat_pe.sam +e7a2a3690bc17de0881e7fb2623841ec tests/reads.sam bcbf01be810cbf4051292813eb6b9225 tests/tRex1.idx