From 6259c8557ea7d8a41824448515abbc3a6c7682f4 Mon Sep 17 00:00:00 2001 From: Yingqi Cao Date: Tue, 21 May 2024 04:46:21 +0000 Subject: [PATCH] Current design causes II violation in banding kernel under 250MHz. Losen it to 200MHz resolve the issue. --- kernels/banding_global_linear/params.h | 14 ++--- src/align.cpp | 54 +++++++++---------- src/pe.cpp | 3 +- testbench/test_csim_banding_global_linear.cpp | 6 ++- 4 files changed, 41 insertions(+), 36 deletions(-) diff --git a/kernels/banding_global_linear/params.h b/kernels/banding_global_linear/params.h index d7c8271c..9d7016f8 100644 --- a/kernels/banding_global_linear/params.h +++ b/kernels/banding_global_linear/params.h @@ -4,20 +4,20 @@ #include #include -#define MAX_QUERY_LENGTH 16 -#define MAX_REFERENCE_LENGTH 16 +#define MAX_QUERY_LENGTH 256 +#define MAX_REFERENCE_LENGTH 256 -#define INPUT_QUERY_LENGTH 14 -#define INPUT_REFERENCE_LENGTH 15 +#define INPUT_QUERY_LENGTH 256 +#define INPUT_REFERENCE_LENGTH 256 #define ALIGN_TYPE BandingGlobalLinear #define N_BLOCKS 1 #define N_LAYERS 1 -const int PE_NUM = 4; +const int PE_NUM = 5; #define LAYER_MAXIMIUM 0 // We need to indicate from which layer (main matrix) is the maximum score stored. #define BANDING FIXED -#define BANDWIDTH 3 +#define BANDWIDTH 64 // Primitive Types typedef ap_uint<2> char_t; // Sequence Alphabet @@ -39,7 +39,7 @@ typedef ap_uint<2> tbp_t; // Traceback Pointer Type #define TB_UP (tbp_t) 0b11 // Legacy Debugger Configuration -#define DEBUG_OUTPUT_FILE "/home/centos/workspace/banding/DP-HLS/banding_global_linear_out.txt" +#define DEBUG_OUTPUT_FILE "/home/centos/workspace/DP-HLS/banding_global_linear_out.txt" struct Penalties { type_t mismatch; diff --git a/src/align.cpp b/src/align.cpp index 1ad999cc..a698314b 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -541,15 +541,15 @@ void Align::Fixed::AlignStatic( #ifdef CMAKEDEBUG // print l_lims and u_lims - std::cout << "Lower limits: "; - for (int j = 0; j < MAX_QUERY_LENGTH; j++) { - std::cout << l_lims[j] << " "; - } - std::cout << endl << "Upper limits: "; - for (int j = 0; j < MAX_QUERY_LENGTH; j++) { - std::cout << u_lims[j] << " "; - } - std::cout << endl; + // std::cout << "Lower limits: "; + // for (int j = 0; j < MAX_QUERY_LENGTH; j++) { + // std::cout << l_lims[j] << " "; + // } + // std::cout << endl << "Upper limits: "; + // for (int j = 0; j < MAX_QUERY_LENGTH; j++) { + // std::cout << u_lims[j] << " "; + // } + // std::cout << endl; #endif // Declare and initialize maximum scores. @@ -656,17 +656,17 @@ void Align::Fixed::ChunkCompute( ){ #ifdef CMAKEDEBUG - std::cout << "Started Chunk: " << ck_idx << std::endl; - // print local l lim and local u lim - std::cout << "Local Lower limits: "; - for (int j = 0; j < PE_NUM; j++) { - std::cout << local_l_lim[j] << " "; - } - std::cout << endl << "Local Upper limits: "; - for (int j = 0; j < PE_NUM; j++) { - std::cout << local_u_lim[j] << " "; - } - std::cout << endl; + // std::cout << "Started Chunk: " << ck_idx << std::endl; + // // print local l lim and local u lim + // std::cout << "Local Lower limits: "; + // for (int j = 0; j < PE_NUM; j++) { + // std::cout << local_l_lim[j] << " "; + // } + // std::cout << endl << "Local Upper limits: "; + // for (int j = 0; j < PE_NUM; j++) { + // std::cout << local_u_lim[j] << " "; + // } + // std::cout << endl; #endif bool predicate[PE_NUM]; @@ -688,7 +688,7 @@ void Align::Fixed::ChunkCompute( const idx_t chunk_end_col = local_u_lim[PE_NUM - 1]; // Set the upper left corner cell of the chunk, depending whether it's the first chunk. - dp_mem[0][0] = local_l_lim[0] > 0 ? init_row_scr[local_l_lim[0]-1] : init_col_scr[0]; + dp_mem[0][0] = local_l_lim[0] > 0 ? init_row_scr[chunk_start_col-1] : init_col_scr[0]; Iterating_Wavefronts: for (int i = chunk_start_col; i < chunk_end_col + PE_NUM; i++) @@ -709,12 +709,12 @@ void Align::Fixed::ChunkCompute( Align::Fixed::MapPredicate(local_l_lim, local_u_lim, i, col_pred, predicate); #ifdef CMAKEDEBUG // print predicate - std::cout << "Predicate: "; - for (int j = 0; j < PE_NUM; j++) - { - std::cout << predicate[j]; - } - std::cout << endl; + // std::cout << "Predicate: "; + // for (int j = 0; j < PE_NUM; j++) + // { + // std::cout << predicate[j]; + // } + // std::cout << endl; #endif // Align::ShiftReference(local_reference, reference, i, chunk_end_col); diff --git a/src/pe.cpp b/src/pe.cpp index 14f21a13..2e27791d 100644 --- a/src/pe.cpp +++ b/src/pe.cpp @@ -86,6 +86,7 @@ void PE::PEUnrollFixedSep( for (int i = 0; i < PE_NUM; i++) { #pragma HLS unroll + // FIXME: I can probably fuse the predicate and feeding NINF logic here. ALIGN_TYPE::PE::Compute( qry[i], ref[i], @@ -108,6 +109,6 @@ void PE::PEUnrollFixedSep( #endif } #ifdef CMAKEDEBUG - printf("\n"); + // printf("\n"); #endif } \ No newline at end of file diff --git a/testbench/test_csim_banding_global_linear.cpp b/testbench/test_csim_banding_global_linear.cpp index 7039958b..c0edda03 100644 --- a/testbench/test_csim_banding_global_linear.cpp +++ b/testbench/test_csim_banding_global_linear.cpp @@ -11,6 +11,9 @@ #include "solutions.h" #include "debug.h" +#define INPUT_QUERY_LENGTH 256 +#define INPUT_REFERENCE_LENGTH 256 + using namespace std; char tbp_to_char(tbp_t tbp){ @@ -201,10 +204,11 @@ int main(){ // print out the scores // print_matrix(scores_sol[k], "Solution Score Matrix, Layer: " + std::to_string(k)); +#ifdef CMAKEDEBUG fprint_matrix(debug_file, sol_score_mat[0], query_string, reference_string, "Solution Score Matrix, Layer: " + std::to_string(0)); fprint_matrix(debug_file, debuggers->scores_cpp[0], query_string, reference_string, "Kernel Score Matrix, Layer: " + std::to_string(0)); // print traceback pointer matrices fprint_matrix(debug_file, sol_tb_mat, "Solution Traceback Matrix, Layer: " + std::to_string(0)); - +#endif return 0; } \ No newline at end of file