diff --git a/.gitignore b/.gitignore index 1398da8c..c1ff4db0 100644 --- a/.gitignore +++ b/.gitignore @@ -94,3 +94,4 @@ compile_configs compile_configs output.txt +banding_global_linaer.txt diff --git a/scripts/extract_lines.sh b/scripts/extract_lines.sh new file mode 100755 index 00000000..49418166 --- /dev/null +++ b/scripts/extract_lines.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +base_folder="/home/centos/workspace/DP-HLS/reports_compile/banding_global_linear" +output_file="banding_global_linaer.txt" +name="banding_global_linear" + +# Empty the output file if it exists +> "$output_file" + +# Iterate through subfolders in the base folder +for subfolder in "$base_folder"/"$name"_*; do + if [ -d "$subfolder" ]; then + config_number=$(basename "$subfolder") + rpt_file="$subfolder/imp/impl_1_kernel_util_routed.rpt" + + if [ -f "$rpt_file" ]; then + line=$(grep '^| seq_align_multiple_static ' "$rpt_file") + if [ -n "$line" ]; then + echo "$config_number: $line" >> "$output_file" + else + echo "$config_number: No matching line" >> "$output_file" + fi + else + echo "$config_number: No Report" >> "$output_file" + fi + fi +done diff --git a/src/align.cpp b/src/align.cpp index a3bebd78..c100dfab 100644 --- a/src/align.cpp +++ b/src/align.cpp @@ -546,7 +546,9 @@ void Align::Fixed::AlignStatic( #endif #endif +#ifndef NO_TRACEBACK #pragma HLS array_partition variable = tbp_matrix type = cyclic factor = PRAGMA_PE_NUM dim = 1 +#endif #ifdef CMAKEDEBUG // print l_lims and u_lims @@ -718,6 +720,9 @@ void Align::Fixed::ChunkCompute( for (idx_t i = chunk_start_col; i < chunk_end_col + local_query_length; i++) { #pragma HLS pipeline II = 1 + +// It's weird that if we don't remove this line after remove the tbp_matrix in no traceback mode, the synthesis will run into +// an infinite loop in implementing the init_row_scr. #pragma HLS dependence variable = init_row_scr type = inter direction = RAW false #ifdef CMAKEDEBUG diff --git a/src/hosts/host_bainging_local_affine_scored.cpp b/src/hosts/host_bainging_local_affine_scored.cpp new file mode 100644 index 00000000..bbee0685 --- /dev/null +++ b/src/hosts/host_bainging_local_affine_scored.cpp @@ -0,0 +1,181 @@ +// FIXME: Wheird that if including params.h, the host code will not compile with weird bug. +// Need a more elegant way. Currently just redefine those types + +#include "xcl2.hpp" +#include +#include +#include +#include +#include "host_utils.h" +#include "dp_hls_common.h" +#include +#include + + +int main(int argc, char **argv) { + if (argc != 2) { + std::cout << "Usage: " << argv[0] << " " << std::endl; + return EXIT_FAILURE; + } + + std::string binaryFile = argv[1]; + cl_int err; + cl::Context context; + cl::Kernel krnl_seq_align; + cl::CommandQueue q; + + // Allocate memory for each array + // std::vector> querys_chars(N_BLOCKS * MAX_QUERY_LENGTH); + // std::vector> references_chars(N_BLOCKS * MAX_REFERENCE_LENGTH); + std::vector> querys(N_BLOCKS * MAX_QUERY_LENGTH); + std::vector> references(N_BLOCKS * MAX_REFERENCE_LENGTH); + std::vector> query_lengths(N_BLOCKS); + std::vector> reference_lengths(N_BLOCKS); + std::vector> penalties(N_BLOCKS); // Assuming a single penalties struct + std::vector> traceback_start_is(N_BLOCKS); // Allocate buffer for the starting row and column of the buffer + std::vector> traceback_start_js(N_BLOCKS); + std::vector> scores(N_BLOCKS); + + // Initialize data + char alphabet[] = {'A', 'T', 'C', 'G'}; // currently putting just random sequence here + string querys_strings = Random::Sequence<4>(alphabet, N_BLOCKS * MAX_QUERY_LENGTH); + string references_strings = Random::Sequence<4>(alphabet, N_BLOCKS * MAX_REFERENCE_LENGTH); + const char *query_ptr = querys_strings.c_str(); + const char *reference_ptr = references_strings.c_str(); + for (int i = 0; i < N_BLOCKS; i++) { + query_lengths[i] = MAX_QUERY_LENGTH; + reference_lengths[i] = MAX_REFERENCE_LENGTH; + for (int j = 0; j < MAX_QUERY_LENGTH; j++) { + querys[i * MAX_QUERY_LENGTH + j] = (type_t) HostUtils::Sequence::base_to_num(*query_ptr++); + } + for (int j = 0; j < MAX_REFERENCE_LENGTH; j++) { + references[i * MAX_REFERENCE_LENGTH + j] = (type_t) HostUtils::Sequence::base_to_num(*reference_ptr++); + } + // Initialize Penalties + penalties[i].open = type_t(-2); + penalties[i].extend = type_t(-1); + penalties[i].mismatch = type_t(-3); + penalties[i].match = type_t(2); + } + + // OPENCL HOST CODE AREA START + auto devices = xcl::get_xil_devices(); + auto fileBuf = xcl::read_binary_file(binaryFile); + cl::Program::Binaries bins{{fileBuf.data(), fileBuf.size()}}; + bool valid_device = false; + for (unsigned int i = 0; i < devices.size(); i++) { + auto device = devices[i]; + OCL_CHECK(err, context = cl::Context(device, nullptr, nullptr, nullptr, &err)); + OCL_CHECK(err, q = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err)); + std::cout << "Trying to program device[" << i << "]: " << device.getInfo() << std::endl; + cl::Program program(context, {device}, bins, nullptr, &err); + if (err != CL_SUCCESS) { + std::cout << "Failed to program device[" << i << "] with xclbin file!\n"; + } else { + std::cout << "Device[" << i << "]: program successful!\n"; + OCL_CHECK(err, krnl_seq_align = cl::Kernel(program, "seq_align_multiple_static", &err)); + valid_device = true; + break; + } + } + if (!valid_device) { + std::cout << "Failed to program any device found, exit!\n"; + exit(EXIT_FAILURE); + } + + // Allocate Buffers in Global Memory and set kernel arguments + OCL_CHECK(err, cl::Buffer buffer_querys(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + sizeof(char_t) * querys.size(), querys.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_references(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + sizeof(char_t) * references.size(), references.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_query_lengths(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + sizeof(idx_t) * query_lengths.size(), query_lengths.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_reference_lengths(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + sizeof(idx_t) * reference_lengths.size(), reference_lengths.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_penalties(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY, + sizeof(Penalties) * penalties.size(), penalties.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_traceback_start_is(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + sizeof(idx_t) * traceback_start_is.size(), traceback_start_is.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_traceback_start_js(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + sizeof(idx_t) * traceback_start_js.size(), traceback_start_js.data(), &err)); + OCL_CHECK(err, cl::Buffer buffer_scores(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY, + sizeof(type_t) * scores.size(), scores.data(), &err)); + + // Set Kernel Arguments + OCL_CHECK(err, err = krnl_seq_align.setArg(0, buffer_querys)); + OCL_CHECK(err, err = krnl_seq_align.setArg(1, buffer_references)); + OCL_CHECK(err, err = krnl_seq_align.setArg(2, buffer_query_lengths)); + OCL_CHECK(err, err = krnl_seq_align.setArg(3, buffer_reference_lengths)); + OCL_CHECK(err, err = krnl_seq_align.setArg(4, buffer_penalties)); + OCL_CHECK(err, err = krnl_seq_align.setArg(5, buffer_traceback_start_is)); + OCL_CHECK(err, err = krnl_seq_align.setArg(6, buffer_traceback_start_js)); + OCL_CHECK(err, err = krnl_seq_align.setArg(7, buffer_scores)); + + // Copy input data to device global memory + auto start = std::chrono::high_resolution_clock::now(); + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_querys, buffer_references, buffer_query_lengths, + buffer_reference_lengths, buffer_penalties}, 0 /* 0 means from host*/)); + + // Launch the Kernel + OCL_CHECK(err, err = q.enqueueTask(krnl_seq_align)); + + + // Copy Result from Device Global Memory to Host Local Memory + OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_traceback_start_is, buffer_traceback_start_js, buffer_scores}, CL_MIGRATE_MEM_OBJECT_HOST)); + q.finish(); + auto end = std::chrono::high_resolution_clock::now(); + + // OPENCL HOST CODE AREA END + + // Print raw traceback pointer streams + for (int i = 0; i < N_BLOCKS; i++) { + std::cout << "Query: " << querys_strings.substr(i * MAX_QUERY_LENGTH, MAX_QUERY_LENGTH) << std::endl; + std::cout << "Reference: " << references_strings.substr(i * MAX_REFERENCE_LENGTH, MAX_REFERENCE_LENGTH) << std::endl; + std::cout << "Alignment Scores: " << scores[i] << std::endl; + std::cout << std::endl; + } + + // set up the array to store the traceback lengthes + // string query_strings_primitive[N_BLOCKS]; + // string reference_strings_primitive[N_BLOCKS]; + // for (int i = 0; i < N_BLOCKS; i++){ + // query_strings_primitive[i] = querys_strings.substr(i * MAX_QUERY_LENGTH, MAX_QUERY_LENGTH); + // reference_strings_primitive[i] = references_strings.substr(i * MAX_REFERENCE_LENGTH, MAX_REFERENCE_LENGTH); + // } + + // tbr_t tb_streams_primitive[N_BLOCKS][MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH]; + // for (int i = 0; i < N_BLOCKS; i++){ + // for (int j = 0; j < MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH; j++){ + // tb_streams_primitive[i][j] = tb_streams[i * (MAX_QUERY_LENGTH + MAX_REFERENCE_LENGTH) + j]; + // } + // } + + // int tb_qry_lengths[N_BLOCKS]; + // int tb_ref_lengths[N_BLOCKS]; + // for (int i = 0; i < N_BLOCKS; i++){ + // tb_qry_lengths[i] = traceback_start_is[i]; + // tb_ref_lengths[i] = traceback_start_js[i]; + // } + // std::cout << "Reconstructing Traceback" << std::endl; + // array, N_BLOCKS> kernel_alignments; + // kernel_alignments = ReconstructTracebackBlocks( + // query_strings_primitive, + // reference_strings_primitive, + // tb_qry_lengths, tb_ref_lengths, + // tb_streams_primitive); + + // // Print Actual Alignments + // for (int i = 0; i < N_BLOCKS; i++){ + // std::cout << "Block " << i << " Results" << std::endl; + // std::cout << "Query : " << query_strings_primitive[i] << std::endl; + // std::cout << "Reference: " << reference_strings_primitive[i] << std::endl; + // std::cout << "Kernel Aligned Query : " << kernel_alignments[i]["query"] << std::endl; + // std::cout << "Kernel Aligned Reference: " << kernel_alignments[i]["reference"] << std::endl << std::endl; + // } + + // Print time + std::cout << "Kernel execution time: " << std::chrono::duration_cast(end - start).count() << "ms" << std::endl; + + std::cout << "Kernel execution complete." << std::endl; + return EXIT_SUCCESS; +} diff --git a/src/seq_align_multiple.cpp b/src/seq_align_multiple.cpp index c2a405a1..7d3bf71e 100644 --- a/src/seq_align_multiple.cpp +++ b/src/seq_align_multiple.cpp @@ -58,8 +58,12 @@ extern "C" Penalties penalties_b[N_BLOCKS]; idx_t tb_is_b[N_BLOCKS]; idx_t tb_js_b[N_BLOCKS]; +#ifndef NO_TRACEBACK tbr_t tb_streams_b[N_BLOCKS][MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH]; - +#endif +#ifdef SCORED + type_t scores_b[N_BLOCKS]; +#endif // Attempted to use URAM but it didn't work. // #pragma HLS bind_storage variable = tb_streams_b type = fifo impl = uram // #pragma HLS bind_storage variable = querys_b type = ram_1p impl = uram @@ -72,7 +76,14 @@ extern "C" #pragma HLS array_partition variable = penalties_b type = complete dim = 1 #pragma HLS array_partition variable = tb_is_b type = complete dim = 1 #pragma HLS array_partition variable = tb_js_b type = complete dim = 1 + +#ifndef NO_TRACEBACK #pragma HLS array_partition variable = tb_streams_b type = complete dim = 1 +#endif + +#ifdef SCORED +#pragma HLS array_partition variable = scores_b type = complete dim = 1 +#endif // F1 doesn't support axis on the top level. But for other FPGA it might be more optimized. // #pragma HLS interface mode = axis port = querys_b @@ -139,7 +150,7 @@ extern "C" , tb_streams_b[i] #endif #ifdef SCORED - , scores[i] + , scores_b[i] #endif #ifdef CMAKEDEBUG , debugger[i] @@ -170,7 +181,7 @@ extern "C" ExtractAlignmentScores: for (int i = 0; i < N_BLOCKS; i++) { - scores[i] = scores[i]; + scores[i] = scores_b[i]; } #endif diff --git a/testbench/test_csim_banding_global_linear.cpp b/testbench/test_csim_banding_global_linear.cpp index 21009c13..dfce98e7 100644 --- a/testbench/test_csim_banding_global_linear.cpp +++ b/testbench/test_csim_banding_global_linear.cpp @@ -14,6 +14,9 @@ using namespace std; +#define INPUT_QUERY_LENGTH 256 +#define INPUT_REFERENCE_LENGTH 256 + char tbp_to_char(tbp_t tbp){ if (tbp == TB_DIAG) return 'D'; else if (tbp == TB_UP) return 'U'; @@ -220,8 +223,9 @@ int main(){ } debug_file << endl; +#ifdef CMAKEDEBUG debuggers[0].dump_scores_infos(debug_file); - +#endif return 0; } \ No newline at end of file