From 9cbcb07cdb09488f5b3501ed4ca4c7baed4a48e9 Mon Sep 17 00:00:00 2001 From: Cloud User Date: Mon, 3 Jun 2024 19:20:28 +0000 Subject: [PATCH] Added the option to output scores and remove the traceback pointer steramming in the top level function. Added the kernel configuration for local affine with scores without traceback, targeting the BSW kernel in Darwin-WGA. --- CMakeLists.txt | 10 +- include/align.h | 24 ++- include/seq_align_multiple.h | 9 +- .../banding_local_affine_scored.cpp | 189 ++++++++++++++++++ kernels/banding_local_affine_scored/params.h | 67 +++++++ src/align.cpp | 37 +++- src/seq_align_multiple.cpp | 39 +++- .../test_csim_banding_local_affine_scored.cpp | 184 +++++++++++++++++ 8 files changed, 526 insertions(+), 33 deletions(-) create mode 100644 kernels/banding_local_affine_scored/banding_local_affine_scored.cpp create mode 100644 kernels/banding_local_affine_scored/params.h create mode 100644 testbench/test_csim_banding_local_affine_scored.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 273f0117..f68faa8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,7 +38,7 @@ set(EXECUTABLE_TARGETS test_csim_global_two_piece_affine test_csim_banding_global_two_piece_affine test_csim_banding_local_affine - + test_csim_banding_local_affine_scored test_global_affine_real_data ) @@ -90,6 +90,13 @@ add_executable(test_csim_global_affine "kernels/global_affine/kernel_global_affine.cpp" ${COMMON_SRCS}) +# BSW in Darwin-WGA +add_executable(test_csim_banding_local_affine_scored + "kernels/banding_local_affine_scored/banding_local_affine_scored.cpp" + "testbench/test_csim_banding_local_affine_scored.cpp" + ${COMMON_SRCS}) + + add_executable(test_csim_local_affine "testbench/test_csim_local_affine.cpp" "kernels/local_affine/kernel_local_affine.cpp" @@ -192,6 +199,7 @@ target_include_directories(test_csim_global_two_piece_affine PRIVATE "${DP_HLS_H target_include_directories(test_global_affine_real_data PRIVATE "${DP_HLS_HOME}/kernels/global_affine") target_include_directories(test_csim_banding_global_two_piece_affine PRIVATE "${DP_HLS_HOME}/kernels/banding_global_two_piece_affine") target_include_directories(test_csim_banding_local_affine PRIVATE "${DP_HLS_HOME}/kernels/banding_local_affine") +target_include_directories(test_csim_banding_local_affine_scored PRIVATE "${DP_HLS_HOME}/kernels/banding_local_affine_scored") # Add include to Xilinx Library Files if (RAPTOR) diff --git a/include/align.h b/include/align.h index f871d187..7848feb2 100644 --- a/include/align.h +++ b/include/align.h @@ -254,11 +254,15 @@ namespace Align const idx_t query_length, const idx_t reference_length, const Penalties &penalties, - idx_t &tb_i, idx_t &tb_j, - tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] + idx_t &tb_i, idx_t &tb_j +#ifndef NO_TRACEBACK + , tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] +#endif +#ifdef SCORED + , type_t &score +#endif #ifdef CMAKEDEBUG - , - Container &debugger + , Container &debugger #endif ); @@ -324,11 +328,15 @@ namespace Align const idx_t query_length, const idx_t reference_length, const Penalties &penalties, - idx_t &tb_i, idx_t &tb_j, - tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] + idx_t &tb_i, idx_t &tb_j +#ifndef NO_TRACEBACK + , tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] +#endif +#ifdef SCORED + , type_t &score +#endif #ifdef CMAKEDEBUG - , - Container &debugger + , Container &debugger #endif ); diff --git a/include/seq_align_multiple.h b/include/seq_align_multiple.h index 8145ce4e..520a5e4c 100644 --- a/include/seq_align_multiple.h +++ b/include/seq_align_multiple.h @@ -25,8 +25,13 @@ extern "C" { idx_t (&query_lengths)[N_BLOCKS], idx_t (&reference_lengths)[N_BLOCKS], Penalties (&penalties)[N_BLOCKS], - idx_t (&tb_is)[N_BLOCKS], idx_t (&tb_js)[N_BLOCKS], - tbr_t (&tb_streams)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH][N_BLOCKS] + idx_t (&tb_is)[N_BLOCKS], idx_t (&tb_js)[N_BLOCKS] +#ifndef NO_TRACEBACK + , tbr_t (&tb_streams)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH][N_BLOCKS] +#endif +#ifdef SCORED + , type_t (&scores)[N_BLOCKS] +#endif #ifdef CMAKEDEBUG , Container (&debugger)[N_BLOCKS] #endif diff --git a/kernels/banding_local_affine_scored/banding_local_affine_scored.cpp b/kernels/banding_local_affine_scored/banding_local_affine_scored.cpp new file mode 100644 index 00000000..456d0c49 --- /dev/null +++ b/kernels/banding_local_affine_scored/banding_local_affine_scored.cpp @@ -0,0 +1,189 @@ +#include "frontend.h" + +void BSWAffine::PE::Compute(char_t local_query_val, + char_t local_reference_val, + score_vec_t up_prev, + score_vec_t diag_prev, + score_vec_t left_prev, + const Penalties penalties, + score_vec_t &write_score, + tbp_t &write_traceback) +{ + +// Define Traceback Pointer Navigation Direction + + const type_t insert_open = left_prev[1] + penalties.open + penalties.extend; // Insert open + const type_t insert_extend = left_prev[0] + penalties.open; // insert extend + const type_t delete_open = up_prev[1] + penalties.open + penalties.extend; // delete open + const type_t delete_extend = up_prev[2] + penalties.open; // delete extend + + bool insert_open_b = insert_open > insert_extend; + bool delete_open_b = delete_open > delete_extend; + write_score[0] = insert_open_b ? insert_open : insert_extend; + write_score[2] = delete_open_b ? delete_open : delete_extend; + tbp_t insert_tb = insert_open_b ? (tbp_t) 0 : TB_IMAT; + tbp_t delete_tb = delete_open_b ? (tbp_t) 0 : TB_DMAT; + + + const type_t match = (local_query_val == local_reference_val) ? diag_prev[1] + penalties.match : diag_prev[1] + penalties.mismatch; + + type_t max_value = write_score[0] > write_score[2] ? write_score[0] : write_score[2]; // compare between insertion and deletion + max_value = max_value > match ? max_value : match; + max_value = max_value >= 0 ? max_value : (type_t) 0; // compare with match/mismatch + write_score[1] = max_value; + + tbp_t dir_tb; + + // Set traceback pointer based on the direction of the maximum score. + if (max_value == write_score[0]) + { // Insert Case + dir_tb = TB_LEFT; + } + else if (max_value == write_score[2]) + { + dir_tb = TB_UP; + } + else if (max_value == write_score[1]) + { + dir_tb = TB_DIAG; + } else if (max_value == 0){ + dir_tb = TB_PH; + } + else + { + // Undefined behavior happens if the max score is non of the I, D, or M. + } + + write_traceback = dir_tb + insert_tb + delete_tb; +} + +void BSWAffine::InitializeScores( + score_vec_t (&init_col_scr)[MAX_QUERY_LENGTH], + score_vec_t (&init_row_scr)[MAX_REFERENCE_LENGTH], + Penalties penalties) +{ +// Utils::Init::ArrSet(init_col_scr, {NINF, 0.0, 0.0}); // qry +// Utils::Init::ArrSet(init_row_scr, {0.0, 0.0, NINF}); // reference layer 0 + + InitializeColumnScores: + for (int i = 0; i < MAX_QUERY_LENGTH; i++) + { + init_col_scr[i][0] = NINF; + init_col_scr[i][1] = 0.0; + init_col_scr[i][2] = 0.0; + } + InitializeRowScores: + for (int i = 0; i < MAX_REFERENCE_LENGTH; i++) + { + init_row_scr[i][0] = 0.0; + init_row_scr[i][1] = 0.0; + init_row_scr[i][2] = NINF; + } +} + +void BSWAffine::UpdatePEMaximum( + const wavefront_scores_inf_t scores, + ScorePack (&max)[PE_NUM], + const idx_t chunk_row_offset, const idx_t wavefront, + const idx_t p_cols, const idx_t ck_idx, + const bool (&predicate)[PE_NUM], + const idx_t query_len, const idx_t ref_len){ + + for (idx_t i = 0; i < PE_NUM; i++) + { +#pragma HLS unroll + if (predicate[i] && (scores[i + 1][LAYER_MAXIMIUM] > max[i].score)) + { + max[i].score = scores[i + 1][LAYER_MAXIMIUM]; + max[i].p_col = p_cols; + max[i].ck = ck_idx; + } + } +} + +void BSWAffine::InitializeMaxScores(ScorePack (&max)[PE_NUM], idx_t qry_len, idx_t ref_len) +{ + for (int i = 0; i < PE_NUM; i++) + { +#pragma HLS unroll + max[i].score = NINF; + max[i].p_col = 0; + max[i].ck = 0; + } +} + +void BSWAffine::Traceback::StateMapping(tbp_t tbp, TB_STATE &state, tbr_t &navigation) +{ + + if (state == TB_STATE::MM) + { + if (tbp(1, 0) == TB_DIAG) + { + navigation = AL_MMI; + } + else if (tbp(1, 0) == TB_UP) + { + state = TB_STATE::DEL; + navigation = AL_NULL; + } + else if (tbp(1, 0) == TB_LEFT) + { + state = TB_STATE::INS; + navigation = AL_NULL; + } else if (tbp(1, 0) == TB_PH){ + navigation = AL_END; + } + } + else if (state == TB_STATE::DEL) + { + if ((bool)tbp[3]) + { // deletion extending + // states remains the same. + // printf("delete extend"); + } + else + { // deletion closing + state = TB_STATE::MM; // set the state back to MM + } + navigation = AL_DEL; + } + else if (state == TB_STATE::INS) + { + if ((bool)tbp[2]) + { // insertion extending + // states remains the same. + // ("delete extend"); + } + else + { // insertion closing + state = TB_STATE::MM; // set the state back to MM + } + navigation = AL_INS; + } + else + { + // Placeholder for kernel side debug + } +} + +void BSWAffine::Traceback::StateInit(tbp_t tbp, TB_STATE &state) +{ + if (tbp(1, 0) == TB_DIAG) + { + state = TB_STATE::MM; + } + else if (tbp(1, 0) == TB_UP) + { + state = TB_STATE::DEL; + } + else if (tbp(1, 0) == TB_LEFT) + { + state = TB_STATE::INS; + } + else + { + // Placeholder for kernel side debug + } +} + +// <<< Local Affine Implementation <<< diff --git a/kernels/banding_local_affine_scored/params.h b/kernels/banding_local_affine_scored/params.h new file mode 100644 index 00000000..1cfc5235 --- /dev/null +++ b/kernels/banding_local_affine_scored/params.h @@ -0,0 +1,67 @@ +#ifndef PARAMS_H +#define PARAMS_H + +#include +#include +#include + +#define MAX_QUERY_LENGTH 256 +#define MAX_REFERENCE_LENGTH 256 + +#define BANDING FIXED +#define BANDWIDTH 64 + +#define SCORED +#define NO_TRACEBACK + +#define PE_NUM 32 + +#define ALIGN_TYPE BSWAffine +#define N_BLOCKS 8 +#define N_LAYERS 3 +#define LAYER_MAXIMIUM 1 // We need to indicate from which layer (main matrix) is the maximum score stored. + +// Primitive Types +typedef ap_uint<2> char_t; // Sequence Alphabet +typedef ap_fixed<16, 12> type_t; // Scores Type +typedef short idx_t; // Indexing Type, could be much less than 32. ap_uint<8> +typedef ap_uint<4> tbp_t; // Traceback Pointer Type + +// Define Zero Value +#define zero_fp ((type_t)0) +#define ZERO_CHAR (char_t(0)) + +// Defien upper and lower bound for score type, aka type_t +#define INF 1024 +#define NINF -1024 + +// Legacy Debugger Configuration +#define DEBUG_OUTPUT_PATH "/home/yic033@AD.UCSD.EDU/DP-HLS-Debug/local_affine/" +#define DEBUG_FILENAME "debug_kernel" + +#define TB_PH (tbp_t) 0b00 +#define TB_LEFT (tbp_t) 0b01 +#define TB_DIAG (tbp_t) 0b10 +#define TB_UP (tbp_t) 0b11 + +#define TB_IMAT (tbp_t) 0b0100 // Insertion Matrix +#define TB_DMAT (tbp_t) 0b1000 // Deletion Matrix + + +struct Penalties { + type_t open; + type_t extend; + type_t mismatch; + type_t match; + type_t linear_gap; +}; + +enum TB_STATE { + MM = 0, // Match/Mismatch + INS = 1, // Insertion + DEL = 2, // Deletion + END = 3 // End +}; + + +#endif \ No newline at end of file diff --git a/src/align.cpp b/src/align.cpp index c348f0e3..a7b1f16b 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -356,11 +356,15 @@ void Align::Rectangular::AlignStatic( const idx_t query_length, const idx_t reference_length, const Penalties &penalties, - idx_t &tb_i, idx_t &tb_j, - tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] + idx_t &tb_i, idx_t &tb_j +#ifndef NO_TRACEBACK + , tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] +#endif +#ifdef SCORED + , type_t &score +#endif #ifdef CMAKEDEBUG - , - Container &debugger + , Container &debugger #endif ) { @@ -373,7 +377,6 @@ void Align::Rectangular::AlignStatic( tbp_t tbp_matrix[PE_NUM][TBMEM_SIZE]; bool col_pred[PE_NUM]; - #pragma HLS bind_storage variable = init_row_score type = ram_t2p impl = bram #pragma HLS array_partition variable = tbp_matrix type = cyclic factor = PRAGMA_PE_NUM dim = 1 @@ -447,6 +450,10 @@ void Align::Rectangular::AlignStatic( cout << "Traceback start idx physical: " << maximum.ck << " " << max_pe << " " << maximum.p_col << endl; #endif +#ifdef SCORED + score = maximum.score; +#endif + #ifndef NO_TRACEBACK Traceback::TracebackFixedSize(tbp_matrix, tb_out, maximum.ck, max_pe, maximum.p_col, tb_i, tb_j); #endif @@ -502,11 +509,15 @@ void Align::Fixed::AlignStatic( const idx_t query_length, const idx_t reference_length, const Penalties &penalties, - idx_t &tb_i, idx_t &tb_j, - tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] + idx_t &tb_i, idx_t &tb_j +#ifndef NO_TRACEBACK + , tbr_t (&tb_out)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH] +#endif +#ifdef SCORED + , type_t &score +#endif #ifdef CMAKEDEBUG - , - Container &debugger + , Container &debugger #endif ){ @@ -623,7 +634,12 @@ void Align::Fixed::AlignStatic( std::cout << "First TBP: " << tbp_matrix[max_pe][maximum.p_col] << std::endl; #endif +#ifdef SCORED + score = maximum.score; +#endif +#ifndef NO_TRACEBACK Traceback::TracebackFixedSize<2 * BANDWIDTH - 1>(tbp_matrix, tb_out, maximum.ck, max_pe, maximum.p_col, tb_i, tb_j); +#endif #ifdef CMAKEDEBUG std::cout << "Traceback done" << std::endl; #endif @@ -643,8 +659,7 @@ void Align::Fixed::ChunkCompute( ScorePack (&max)[PE_NUM], // write out so must pass by reference tbp_t (&chunk_tbp_out)[PE_NUM][TBMEM_SIZE] #ifdef CMAKEDEBUG - , - Container &debugger + , Container &debugger #endif ){ diff --git a/src/seq_align_multiple.cpp b/src/seq_align_multiple.cpp index b7b90fac..c2a405a1 100644 --- a/src/seq_align_multiple.cpp +++ b/src/seq_align_multiple.cpp @@ -39,14 +39,17 @@ extern "C" idx_t (&query_lengths)[N_BLOCKS], idx_t (&reference_lengths)[N_BLOCKS], Penalties (&penalties)[N_BLOCKS], - idx_t (&tb_is)[N_BLOCKS], idx_t (&tb_js)[N_BLOCKS], + idx_t (&tb_is)[N_BLOCKS], idx_t (&tb_js)[N_BLOCKS] +#ifndef NO_TRACEBACK + , tbr_t (&tb_streams)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH][N_BLOCKS] +#endif +#ifdef SCORED + , type_t (&scores)[N_BLOCKS] +#endif #ifdef CMAKEDEBUG - tbr_t (&tb_streams)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH][N_BLOCKS], - Container (&debugger)[N_BLOCKS]) -#else - tbr_t (&tb_streams)[MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH][N_BLOCKS]) + , Container (&debugger)[N_BLOCKS] #endif - { + ){ // Initialize local buffer to copy the input data char_t querys_b[N_BLOCKS][MAX_QUERY_LENGTH]; char_t references_b[N_BLOCKS][MAX_REFERENCE_LENGTH]; @@ -131,16 +134,20 @@ extern "C" query_lengths_b[i], reference_lengths_b[i], penalties_b[i], - tb_is_b[i], tb_js_b[i], - tb_streams_b[i] + tb_is_b[i], tb_js_b[i] +#ifndef NO_TRACEBACK + , tb_streams_b[i] +#endif +#ifdef SCORED + , scores[i] +#endif #ifdef CMAKEDEBUG - , - debugger[i] + , debugger[i] #endif ); } - // Utils::Kernel::top_level_writeout(tb_streams_b, tb_streams); +#ifndef NO_TRACEBACK WriteTBP: for (idx_t i = 0; i < MAX_QUERY_LENGTH + MAX_REFERENCE_LENGTH; i++) { @@ -150,6 +157,7 @@ extern "C" tb_streams[i][j] = tb_streams_b[j][i]; } } +#endif ExtractTracebackCoordinate: for (int i = 0; i < N_BLOCKS; i++) @@ -157,5 +165,14 @@ extern "C" tb_is[i] = tb_is_b[i]; tb_js[i] = tb_js_b[i]; } + +#ifdef SCORED + ExtractAlignmentScores: + for (int i = 0; i < N_BLOCKS; i++) + { + scores[i] = scores[i]; + } +#endif + } } diff --git a/testbench/test_csim_banding_local_affine_scored.cpp b/testbench/test_csim_banding_local_affine_scored.cpp new file mode 100644 index 00000000..18e78669 --- /dev/null +++ b/testbench/test_csim_banding_local_affine_scored.cpp @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include "params.h" +#include "seq_align_multiple.h" +#include "host_utils.h" +#include "solutions.h" + +#ifdef CMAKEDEBUG +#include "debug.h" +#endif + +using namespace std; + +#define INPUT_QUERY_LENGTH 128 +#define INPUT_REFERENCE_LENGTH 128 + +char_t base_to_num(char base) +{ + switch (base) + { + case 'A': + return 0; + case 'C': + return 1; + case 'G': + return 2; + case 'T': + return 3; + default: + return 0; +#ifdef CMAKEDEBUG + throw std::runtime_error("Unrecognized Nucleotide " + std::string(1, base) + " from A, C, G, and T.\n"); // or throw an exception +#endif + } +} + +struct Penalties_sol +{ + float extend; + float open; + float linear_gap; + float match; + float mismatch; +}; + +int main(){ + char alphabet[4] = {'A', 'T', 'G', 'C'}; + std::string query_string = Random::Sequence<4>(alphabet, INPUT_QUERY_LENGTH); + std::string reference_string = Random::Sequence<4>(alphabet, INPUT_REFERENCE_LENGTH); + + // Struct for Penalties in kernel + Penalties penalties[N_BLOCKS]; + for (int i = 0; i < N_BLOCKS; i++){ + penalties[i].extend = -1; + penalties[i].open = -1; + penalties[i].match = 3; + penalties[i].mismatch = -1; + } + + // Struct for penalties in solution + Penalties_sol penalties_sol[N_BLOCKS]; + for (Penalties_sol &penalty : penalties_sol) { + penalty.extend = -1; + penalty.open = -1; + penalty.match = 3; + penalty.mismatch = -1; + } + + // Reference and Query Strings + std::vector query(query_string.begin(), query_string.end()); + std::vector reference(reference_string.begin(), reference_string.end()); + +#ifdef CMAKEDEBUG + // Initialize Debugger + Container debuggers[N_BLOCKS]; + for (int i = 0; i < N_BLOCKS; i++){ + debuggers[i] = Container(); + } +#endif + + // Assert actual query length and reference length should be smaller than the maximum length + try { + if (query.size() > MAX_QUERY_LENGTH) throw std::runtime_error("Query length should less than MAX_QUERY_LENGTH, " + "actual query len " + std::to_string(query.size()) + ", Allocated qry len: " + std::to_string(MAX_QUERY_LENGTH)); + if (reference.size() > MAX_REFERENCE_LENGTH) throw std::runtime_error("Reference length should less than MAX_REFERENCE_LENGTH, " + "actual ref len " + std::to_string(reference.size()) + ", Allocated ref len: " + std::to_string(MAX_REFERENCE_LENGTH)); + } catch (const std::exception &e) { + std::cerr << "Exception: " << e.what() << std::endl; + throw; + } + + // Allocate query and reference buffer to pass to the kernel + char_t reference_buff[MAX_REFERENCE_LENGTH][N_BLOCKS]; + char_t query_buff[MAX_QUERY_LENGTH][N_BLOCKS]; + + // Allocate lengths for query and reference + idx_t qry_lengths[N_BLOCKS], ref_lengths[N_BLOCKS]; + + // Fill query buffer and references buffer for all blocks. + // Each buffer is of MAX size, but only the actual length + // elements is filled. + for (int b = 0; b < N_BLOCKS; b++) + { + for (int i = 0; i < query.size(); i++) + { + query_buff[i][b] = base_to_num(query[i]); + } + for (int i = 0; i < reference.size(); i++) + { + reference_buff[i][b] = base_to_num(reference[i]); + } + } + + // Fill the lengths of the query and reference + for (int b = 0; b < N_BLOCKS; b++) + { + qry_lengths[b] = query.size(); + ref_lengths[b] = reference.size(); + } + + // Allocate traceback streams + idx_t tb_is_d[N_BLOCKS], tb_js_d[N_BLOCKS]; + type_t scores[N_BLOCKS]; + + cout << "Kernel Started" << endl; + // Actual kernel calling + seq_align_multiple_static( + query_buff, + reference_buff, + qry_lengths, + ref_lengths, + penalties, + tb_is_d, tb_js_d, + scores +#ifdef CMAKEDEBUG + , debuggers +#endif + ); + + + // Print the query and reference strings + cout << "Query : " << query_string << endl; + cout << "Reference: " << reference_string << endl; + + + // Get the solution scores and traceback + array, MAX_QUERY_LENGTH>, N_LAYERS> sol_score_mat; + array, MAX_QUERY_LENGTH> sol_tb_mat; + map alignments; + // local_affine_solution(query_string, reference_string, penalties_sol[0], sol_score_mat, sol_tb_mat, alignments); + // print_matrix(sol_score_mat[0], "Solution Score Matrix Layer 0"); + // print_matrix(sol_tb_mat, "Solution Traceback Matrix"); + cout << "Solution Aligned Query : " << alignments["query"] << endl; + cout << "Solution Aligned Reference: " << alignments["reference"] << endl; +#ifdef CMAKEDEBUG + // Cast kernel scores to matrix scores + debuggers[0].cast_scores(); + debuggers[0].compare_scores(sol_score_mat, query.size(), reference.size()); // check if the scores from the kernel matches scores from the solution +#endif + + // reconstruct kernel alignments + array, N_BLOCKS> kernel_alignments; + int tb_is_h[N_BLOCKS]; + int tb_js_h[N_BLOCKS]; + string query_string_blocks[N_BLOCKS]; + string reference_string_blocks[N_BLOCKS]; + // for global alignments, adjust the lengths to be the lengths - 1 + for (int i = 0; i < N_BLOCKS; i++) { + tb_is_h[i] = (int) tb_is_d[i]; + tb_js_h[i] = (int) tb_js_d[i]; + query_string_blocks[i] = query_string; + reference_string_blocks[i] = reference_string; + } + + // Print kernel 0 traceback + for (int i = 0; i < N_BLOCKS; i++) { + cout << "Kernel " << i << ", Maximum Row: " << tb_is_h[i] << ", Maximum Column: " << tb_js_h[i] << endl; + cout << "Alignment Scores: " << scores[i] << endl; + } + + +} \ No newline at end of file