Skip to content

Commit

Permalink
Merge branch 'optimize-banding' of github.com:TurakhiaLab/DP-HLS into…
Browse files Browse the repository at this point in the history
… optimize-banding
  • Loading branch information
ioeddk committed May 30, 2024
2 parents a523dc1 + 2a8c4d2 commit 459de50
Show file tree
Hide file tree
Showing 13 changed files with 275 additions and 80 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -fsanitize=address -fno-inline -D
# "-fsanitize=address" flag was used to check the stack smashing with Google Address Sanitizer. Use this flag with CLang and
# run the program to check.

set(DP_HLS_HOME "/home/centos/workspace/DP-HLS")
set(DP_HLS_HOME "/home/centos/workspace/banding/DP-HLS")

set(EXECUTABLE_TARGETS
baseline_local_linear
Expand Down
2 changes: 0 additions & 2 deletions include/PE.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,6 @@ namespace PE
const dp_mem_block_t dp_mem,
const input_char_block_t qry,
const input_char_block_t ref,
const bool entering, const bool exiting,
const idx_t entering_pe, const idx_t exiting_pe,
const Penalties penalties,
wavefront_scores_inf_t &score,
tbp_vec_t &tbp);
Expand Down
8 changes: 2 additions & 6 deletions include/align.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,9 +349,9 @@ namespace Align
const chunk_col_scores_inf_t &init_col_scr,
score_vec_t (&init_row_scr)[MAX_REFERENCE_LENGTH],
idx_t p_cols, const idx_t ck_idx,
const idx_t (&local_l_lim)[PE_NUM], const idx_t (&local_u_lim)[PE_NUM],
idx_t &l_lim_reg, idx_t &u_lim_reg,
const bool (&col_pred)[PE_NUM],
const idx_t global_query_length, const idx_t reference_length,
const idx_t global_query_length, const idx_t local_query_length, const idx_t reference_length,
const Penalties &penalties,
ScorePack (&max)[PE_NUM], // write out so must pass by reference
tbp_t (&chunk_tbp_out)[PE_NUM][TBMEM_SIZE]
Expand Down Expand Up @@ -401,10 +401,8 @@ namespace Align
void PrepareLocals(
const char_t (&query)[MAX_QUERY_LENGTH_],
const score_vec_t (&init_col_scr)[MAX_QUERY_LENGTH_],
const idx_t (&l_lim)[MAX_QUERY_LENGTH_], const idx_t (&u_lim)[MAX_QUERY_LENGTH_],
char_t (&local_query)[PE_NUM_],
chunk_col_scores_inf_t &init_col_scr_local,
idx_t (&local_l_lim)[PE_NUM_], idx_t (&local_u_lim)[PE_NUM_],
bool (&col_pred)[PE_NUM_], const idx_t local_query_len,
const idx_t idx)
{
Expand All @@ -415,8 +413,6 @@ namespace Align
init_col_scr_local[i + 1] = init_col_scr[idx + i];
local_query[i] = query[idx + i];
col_pred[i] = i < local_query_len;
local_l_lim[i] = l_lim[idx + i];
local_u_lim[i] = u_lim[idx + i];
}
}
}
Expand Down
94 changes: 94 additions & 0 deletions include/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include <array>
#include <string>
// #include <experimental/filesystem>
#include <map>
#include <fstream>
#include <unordered_map>
#include <hls_vector.h>
Expand Down Expand Up @@ -36,6 +37,27 @@ class Container {
array<array<tbr_t, MAX_REFERENCE_LENGTH>, MAX_QUERY_LENGTH> tb_mat_kernel;
array<array<char, MAX_REFERENCE_LENGTH>, MAX_QUERY_LENGTH> tb_mat_cpp; // this need to be translated

std::map<std::pair<uint, uint>, std::array<std::array<std::array<float, N_LAYERS>, PE_NUM+1>, 2>> wf_dp_mem;
std::map<std::pair<uint, uint>, std::array<bool, PE_NUM>> wf_predicates;

struct score_info {
float up[N_LAYERS];
float left[N_LAYERS];
float diag[N_LAYERS];
float write[N_LAYERS];
bool pred;
bool exiting;
bool entering;
int entering_pe;
int exiting_pe;
};

/**
* @brief Record the information of scores, with their coordinate as the index
*
*/
std::map<std::pair<int, int>, score_info> scores_infos;

Container() {};

void cast_scores();
Expand All @@ -45,6 +67,78 @@ class Container {

void set_score(int chunk_row_offset, int chunk_col_offset, int pe_num, int wavefront, score_vec_t vals, bool pred);
void set_scores_wf(int chunk_row_offset, int chunk_col_offset, int wavefront, score_vec_t vals[PE_NUM], bool predicates[PE_NUM]);

template <typename IDX_T>
void set_wf_dp_mem(IDX_T ck_idx, IDX_T wf_idx, dp_mem_block_t dp_mem){
array<std::array<std::array<float, N_LAYERS>, PE_NUM+1>, 2> store_dp_mem;
for (int i = 0; i < PE_NUM+1; i++){
for (int j = 0; j < 2; j++){
for (int k = 0; k < N_LAYERS; k++){
store_dp_mem[j][i][k] = dp_mem[i][j][k];
}
}
}
wf_dp_mem[std::make_pair(ck_idx, wf_idx)] = store_dp_mem;
}

template <typename IDX_T>
void set_score_info_dependency(IDX_T chunk_offset, IDX_T wf_idx, dp_mem_block_t dp_mem){
for (int i = 0; i < PE_NUM; i++){
score_info curr_info;
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)] = curr_info;
for (int k = 0; k < N_LAYERS; k++){
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].up[k] = dp_mem[i][0][k];
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].left[k] = dp_mem[i+1][0][k];
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].diag[k] = dp_mem[i][1][k];
}

}
}

template <typename IDX_T>
void set_score_info_entering_exiting(IDX_T chunk_offset, IDX_T wf_idx, bool entering, bool exiting, int entering_pe, int exiting_pe){
for (int i = 0; i < PE_NUM; i++){
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].entering = entering;
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].exiting = exiting;
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].entering_pe = entering_pe;
scores_infos[std::make_pair(chunk_offset + i, wf_idx - i)].exiting_pe = exiting_pe;
}
}

// function that dump scores infos to a debug file
template <int N_LAYERS_>
void dump_scores_infos(ofstream &file){
// dump scores, one cell per line
file << "Scores: " << endl;
for (const auto& kv : this->scores_infos) {
const std::pair<int, int>& key = kv.first;
const score_info& value = kv.second;
for (int i = 0; i < N_LAYERS_; i++){
file << "Coordinate: (" << key.first << ", " << key.second << "), Layer: " << i <<
", Up: " << value.up[i] << ", Left: " << value.left[i] << ", Diag: " << value.diag[i] << ", Pred: " << value.pred << ", ";
if (value.entering){
file << "Entering PE: " << value.entering_pe << ", ";
}
if (value.exiting){
file << "Exiting PE: " << value.exiting_pe << ", ";
}
file << endl;
}
}

}
// set score info predicate
template <typename IDX_T>
void set_score_info_predicates(IDX_T ck_offset, IDX_T wf_idx, bool preds[PE_NUM]){
for (int i = 0; i < PE_NUM; i++){
this->scores_infos[std::make_pair(ck_offset + i, wf_idx-i)].pred = preds[i];
}
}

// template <typename IDX_T>
// void dump_tb_info(ofstream &file){

// }

void compare_scores(array<array<array<float, MAX_REFERENCE_LENGTH>, MAX_QUERY_LENGTH>, N_LAYERS> scores_sol,
int query_len, int ref_len);
Expand Down
6 changes: 3 additions & 3 deletions include/solutions.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,8 +341,8 @@ void fixed_banding_global_linear_solution(std::string query, std::string referen
int llim[SOL_MAX_QUERY_LENGTH], ulim[SOL_MAX_QUERY_LENGTH];
for (int i = 0; i < SOL_MAX_QUERY_LENGTH; i++)
{
llim[i] = max(0, i - SOL_BANDWIDTH);
ulim[i] = min(SOL_MAX_REFERENCE_LENGTH - 1, i + SOL_BANDWIDTH - 1);
llim[i] = i - SOL_BANDWIDTH;
ulim[i] = i + SOL_BANDWIDTH - 1;
}

// print ulim
Expand Down Expand Up @@ -2636,7 +2636,7 @@ void fprint_matrix(ofstream &file, array<array<T, N>, M> &mat, string name)
template <typename T, int M, int N>
void fprint_matrix(ofstream &file, array<array<T, N>, M> &mat, string query, string reference, string name)
{
int width = 2;
int width = 3;
file << name << endl;
file << std::right << std::setw(width) << " ";
file << std::right << std::setw(width) << " ";
Expand Down
28 changes: 28 additions & 0 deletions include/traceback.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,14 @@ namespace Traceback
int ck_idx, int pe_idx, int col_idx, int v_row, int v_col)
{

#ifdef CMAKEDEBUG
// set the initial state of the traceback to be AL_END
for (int i = 0; i < MAX_QUERY_LENGTH + MAX_REFERENCE_LENGTH; i++)
{
traceback_out[i] = AL_END;
}
#endif

#pragma HLS bind_storage variable = traceback_out type = fifo impl = uram
int pe = pe_idx; // row index, but in tbmat
int col = col_idx;
Expand All @@ -126,6 +134,26 @@ namespace Traceback
TB_STATE state;
ALIGN_TYPE::Traceback::StateInit(tbmat[pe][col], state);

#ifdef CMAKEDEBUG
// print the content fo tbmat
// Please print it with certain width for the first row, which is the index

for (int j = 0; j < TBMEM_SIZE; j++)
{
std::cout << std::setw(3) << j << " ";
}
std::cout << std::endl;
for (int i = 0; i < PE_NUM; i++)
{
for (int j = 0; j < TBMEM_SIZE; j++)
{
std::cout << std::setw(3) << tbmat[i][j].to_int() << " ";
}
std::cout << std::endl;
}
std::cout << std::endl;
#endif

traceback_loop:
while (navigation != AL_END) // Now solely this flag determines whether to stop the traceback.
{
Expand Down
41 changes: 23 additions & 18 deletions kernels/banding_global_linear/kernel_banding_global_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,19 @@ void BandingGlobalLinear::UpdatePEMaximum(
const bool (&predicate)[PE_NUM],
const idx_t query_len, const idx_t ref_len)
{
for (int i = 0; i < PE_NUM; i++)
{
#pragma HLS unroll
if (predicate[i] && chunk_row_offset + i == query_len - 1 && wavefront - i == ref_len - 1)
{
if (max[i].score < scores[i + 1][0])
{
max[i].score = scores[i + 1][0];
max[i].p_col = p_cols;
max[i].ck = ck_idx;
}
}
}
// for (int i = 0; i < PE_NUM; i++)
// {
// #pragma HLS unroll
// if (predicate[i] && chunk_row_offset + i == query_len - 1 && wavefront - i == ref_len - 1)
// {
// if (max[i].score < scores[i + 1][0])
// {
// max[i].score = scores[i + 1][0];
// max[i].p_col = p_cols;
// max[i].ck = ck_idx;
// }
// }
// }
}

void BandingGlobalLinear::InitializeMaxScores(ScorePack (&max)[PE_NUM], idx_t qry_len, idx_t ref_len)
Expand All @@ -102,11 +102,16 @@ void BandingGlobalLinear::InitializeMaxScores(ScorePack (&max)[PE_NUM], idx_t qr
max[i].ck = 0;
}

// idx_t max_pe = (qry_len - 1) % PE_NUM;
// idx_t max_ck = (qry_len - 1) / PE_NUM;
// max[max_pe].score = INF;
// max[max_pe].p_col = max_ck * (2 * BANDWIDTH + PE_NUM - 1 + PE_NUM - 1) + max_pe + ref_len - 1;
// max[max_pe].ck = max_ck;
idx_t max_pe = (qry_len - 1) % PE_NUM;
idx_t max_ck = (qry_len - 1) / PE_NUM;
max[max_pe].score = INF;
std::cout << "Magic Number: " << ( max_ck * PE_NUM - BANDWIDTH ) << std::endl;
max[max_pe].p_col = max_ck * (TB_CHUNK_WIDTH) + max_pe + ref_len - ( max_ck * PE_NUM - BANDWIDTH ) - 1; // FIXME: Problemmatic, the problem is that the last chunk doesn't have length ref_len.
max[max_pe].ck = max_ck;
#ifdef CMAKEDEBUG
std::cout << "Physical Column" << max[max_pe].p_col << std::endl;

#endif
}

void BandingGlobalLinear::Traceback::StateInit(tbp_t tbp, TB_STATE &state)
Expand Down
16 changes: 10 additions & 6 deletions kernels/banding_global_linear/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,18 @@
#include <ap_fixed.h>
#include <hls_vector.h>

#define MAX_QUERY_LENGTH 32
#define MAX_REFERENCE_LENGTH 32
#define MAX_QUERY_LENGTH 256
#define MAX_REFERENCE_LENGTH 256


#define INPUT_QUERY_LENGTH 256
#define INPUT_REFERENCE_LENGTH 256


#define ALIGN_TYPE BandingGlobalLinear
#define N_BLOCKS 1
#define N_LAYERS 1
const int PE_NUM = 8;
const int PE_NUM = 16;
#define LAYER_MAXIMIUM 0 // We need to indicate from which layer (main matrix) is the maximum score stored.

#define BANDING FIXED
Expand All @@ -19,7 +24,7 @@ const int PE_NUM = 8;
// Primitive Types
typedef ap_uint<2> char_t; // Sequence Alphabet
typedef ap_fixed<16, 12> type_t; // Scores Type <width, integer_width>
typedef short idx_t; // Indexing Type, could be much less than 32. ap_uint<8>
typedef int idx_t; // Indexing Type, could be much less than 32. ap_uint<8>
typedef ap_uint<2> tbp_t; // Traceback Pointer Type

// Define Zero Value
Expand All @@ -35,8 +40,7 @@ typedef ap_uint<2> tbp_t; // Traceback Pointer Type
#define TB_DIAG (tbp_t) 0b10
#define TB_UP (tbp_t) 0b11

// Legacy Debugger Configuration
#define DEBUG_OUTPUT_FILE "/home/centos/workspace/DP-HLS/banding_global_linear_out.txt"
#define DEBUG_OUTPUT_FILE "/home/centos/workspace/banding/DP-HLS/banding_global_linear_out.txt"

struct Penalties {
type_t mismatch;
Expand Down
Loading

0 comments on commit 459de50

Please sign in to comment.