Skip to content

Commit

Permalink
Updated the no traceback optimization and scored option for the templ…
Browse files Browse the repository at this point in the history
…ate.
  • Loading branch information
ioeddk committed Jun 4, 2024
1 parent 6eb4773 commit 94f0d9f
Show file tree
Hide file tree
Showing 6 changed files with 233 additions and 4 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,4 @@ compile_configs

compile_configs
output.txt
banding_global_linaer.txt
27 changes: 27 additions & 0 deletions scripts/extract_lines.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

base_folder="/home/centos/workspace/DP-HLS/reports_compile/banding_global_linear"
output_file="banding_global_linaer.txt"
name="banding_global_linear"

# Empty the output file if it exists
> "$output_file"

# Iterate through subfolders in the base folder
for subfolder in "$base_folder"/"$name"_*; do
if [ -d "$subfolder" ]; then
config_number=$(basename "$subfolder")
rpt_file="$subfolder/imp/impl_1_kernel_util_routed.rpt"

if [ -f "$rpt_file" ]; then
line=$(grep '^| seq_align_multiple_static ' "$rpt_file")
if [ -n "$line" ]; then
echo "$config_number: $line" >> "$output_file"
else
echo "$config_number: No matching line" >> "$output_file"
fi
else
echo "$config_number: No Report" >> "$output_file"
fi
fi
done
5 changes: 5 additions & 0 deletions src/align.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,9 @@ void Align::Fixed::AlignStatic(
#endif
#endif

#ifndef NO_TRACEBACK
#pragma HLS array_partition variable = tbp_matrix type = cyclic factor = PRAGMA_PE_NUM dim = 1
#endif

#ifdef CMAKEDEBUG
// print l_lims and u_lims
Expand Down Expand Up @@ -718,6 +720,9 @@ void Align::Fixed::ChunkCompute(
for (idx_t i = chunk_start_col; i < chunk_end_col + local_query_length; i++)
{
#pragma HLS pipeline II = 1

// It's weird that if we don't remove this line after remove the tbp_matrix in no traceback mode, the synthesis will run into
// an infinite loop in implementing the init_row_scr.
#pragma HLS dependence variable = init_row_scr type = inter direction = RAW false

#ifdef CMAKEDEBUG
Expand Down
181 changes: 181 additions & 0 deletions src/hosts/host_bainging_local_affine_scored.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
// FIXME: Wheird that if including params.h, the host code will not compile with weird bug.
// Need a more elegant way. Currently just redefine those types

#include "xcl2.hpp"
#include <vector>
#include <algorithm>
#include <ap_int.h>
#include <ap_fixed.h>
#include "host_utils.h"
#include "dp_hls_common.h"
#include <map>
#include <chrono>


int main(int argc, char **argv) {
if (argc != 2) {
std::cout << "Usage: " << argv[0] << " <XCLBIN File>" << std::endl;
return EXIT_FAILURE;
}

std::string binaryFile = argv[1];
cl_int err;
cl::Context context;
cl::Kernel krnl_seq_align;
cl::CommandQueue q;

// Allocate memory for each array
// std::vector<char, aligned_allocator<char_t>> querys_chars(N_BLOCKS * MAX_QUERY_LENGTH);
// std::vector<char, aligned_allocator<char_t>> references_chars(N_BLOCKS * MAX_REFERENCE_LENGTH);
std::vector<char_t, aligned_allocator<char_t>> querys(N_BLOCKS * MAX_QUERY_LENGTH);
std::vector<char_t, aligned_allocator<char_t>> references(N_BLOCKS * MAX_REFERENCE_LENGTH);
std::vector<idx_t, aligned_allocator<idx_t>> query_lengths(N_BLOCKS);
std::vector<idx_t, aligned_allocator<idx_t>> reference_lengths(N_BLOCKS);
std::vector<Penalties, aligned_allocator<Penalties>> penalties(N_BLOCKS); // Assuming a single penalties struct
std::vector<idx_t, aligned_allocator<idx_t>> traceback_start_is(N_BLOCKS); // Allocate buffer for the starting row and column of the buffer
std::vector<idx_t, aligned_allocator<idx_t>> traceback_start_js(N_BLOCKS);
std::vector<tbr_t, aligned_allocator<type_t>> scores(N_BLOCKS);

// Initialize data
char alphabet[] = {'A', 'T', 'C', 'G'}; // currently putting just random sequence here
string querys_strings = Random::Sequence<4>(alphabet, N_BLOCKS * MAX_QUERY_LENGTH);
string references_strings = Random::Sequence<4>(alphabet, N_BLOCKS * MAX_REFERENCE_LENGTH);
const char *query_ptr = querys_strings.c_str();
const char *reference_ptr = references_strings.c_str();
for (int i = 0; i < N_BLOCKS; i++) {
query_lengths[i] = MAX_QUERY_LENGTH;
reference_lengths[i] = MAX_REFERENCE_LENGTH;
for (int j = 0; j < MAX_QUERY_LENGTH; j++) {
querys[i * MAX_QUERY_LENGTH + j] = (type_t) HostUtils::Sequence::base_to_num(*query_ptr++);
}
for (int j = 0; j < MAX_REFERENCE_LENGTH; j++) {
references[i * MAX_REFERENCE_LENGTH + j] = (type_t) HostUtils::Sequence::base_to_num(*reference_ptr++);
}
// Initialize Penalties
penalties[i].open = type_t(-2);
penalties[i].extend = type_t(-1);
penalties[i].mismatch = type_t(-3);
penalties[i].match = type_t(2);
}

// OPENCL HOST CODE AREA START
auto devices = xcl::get_xil_devices();
auto fileBuf = xcl::read_binary_file(binaryFile);
cl::Program::Binaries bins{{fileBuf.data(), fileBuf.size()}};
bool valid_device = false;
for (unsigned int i = 0; i < devices.size(); i++) {
auto device = devices[i];
OCL_CHECK(err, context = cl::Context(device, nullptr, nullptr, nullptr, &err));
OCL_CHECK(err, q = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE, &err));
std::cout << "Trying to program device[" << i << "]: " << device.getInfo<CL_DEVICE_NAME>() << std::endl;
cl::Program program(context, {device}, bins, nullptr, &err);
if (err != CL_SUCCESS) {
std::cout << "Failed to program device[" << i << "] with xclbin file!\n";
} else {
std::cout << "Device[" << i << "]: program successful!\n";
OCL_CHECK(err, krnl_seq_align = cl::Kernel(program, "seq_align_multiple_static", &err));
valid_device = true;
break;
}
}
if (!valid_device) {
std::cout << "Failed to program any device found, exit!\n";
exit(EXIT_FAILURE);
}

// Allocate Buffers in Global Memory and set kernel arguments
OCL_CHECK(err, cl::Buffer buffer_querys(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(char_t) * querys.size(), querys.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_references(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(char_t) * references.size(), references.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_query_lengths(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(idx_t) * query_lengths.size(), query_lengths.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_reference_lengths(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(idx_t) * reference_lengths.size(), reference_lengths.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_penalties(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
sizeof(Penalties) * penalties.size(), penalties.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_traceback_start_is(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(idx_t) * traceback_start_is.size(), traceback_start_is.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_traceback_start_js(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(idx_t) * traceback_start_js.size(), traceback_start_js.data(), &err));
OCL_CHECK(err, cl::Buffer buffer_scores(context, CL_MEM_USE_HOST_PTR | CL_MEM_WRITE_ONLY,
sizeof(type_t) * scores.size(), scores.data(), &err));

// Set Kernel Arguments
OCL_CHECK(err, err = krnl_seq_align.setArg(0, buffer_querys));
OCL_CHECK(err, err = krnl_seq_align.setArg(1, buffer_references));
OCL_CHECK(err, err = krnl_seq_align.setArg(2, buffer_query_lengths));
OCL_CHECK(err, err = krnl_seq_align.setArg(3, buffer_reference_lengths));
OCL_CHECK(err, err = krnl_seq_align.setArg(4, buffer_penalties));
OCL_CHECK(err, err = krnl_seq_align.setArg(5, buffer_traceback_start_is));
OCL_CHECK(err, err = krnl_seq_align.setArg(6, buffer_traceback_start_js));
OCL_CHECK(err, err = krnl_seq_align.setArg(7, buffer_scores));

// Copy input data to device global memory
auto start = std::chrono::high_resolution_clock::now();
OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_querys, buffer_references, buffer_query_lengths,
buffer_reference_lengths, buffer_penalties}, 0 /* 0 means from host*/));

// Launch the Kernel
OCL_CHECK(err, err = q.enqueueTask(krnl_seq_align));


// Copy Result from Device Global Memory to Host Local Memory
OCL_CHECK(err, err = q.enqueueMigrateMemObjects({buffer_traceback_start_is, buffer_traceback_start_js, buffer_scores}, CL_MIGRATE_MEM_OBJECT_HOST));
q.finish();
auto end = std::chrono::high_resolution_clock::now();

// OPENCL HOST CODE AREA END

// Print raw traceback pointer streams
for (int i = 0; i < N_BLOCKS; i++) {
std::cout << "Query: " << querys_strings.substr(i * MAX_QUERY_LENGTH, MAX_QUERY_LENGTH) << std::endl;
std::cout << "Reference: " << references_strings.substr(i * MAX_REFERENCE_LENGTH, MAX_REFERENCE_LENGTH) << std::endl;
std::cout << "Alignment Scores: " << scores[i] << std::endl;
std::cout << std::endl;
}

// set up the array to store the traceback lengthes
// string query_strings_primitive[N_BLOCKS];
// string reference_strings_primitive[N_BLOCKS];
// for (int i = 0; i < N_BLOCKS; i++){
// query_strings_primitive[i] = querys_strings.substr(i * MAX_QUERY_LENGTH, MAX_QUERY_LENGTH);
// reference_strings_primitive[i] = references_strings.substr(i * MAX_REFERENCE_LENGTH, MAX_REFERENCE_LENGTH);
// }

// tbr_t tb_streams_primitive[N_BLOCKS][MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH];
// for (int i = 0; i < N_BLOCKS; i++){
// for (int j = 0; j < MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH; j++){
// tb_streams_primitive[i][j] = tb_streams[i * (MAX_QUERY_LENGTH + MAX_REFERENCE_LENGTH) + j];
// }
// }

// int tb_qry_lengths[N_BLOCKS];
// int tb_ref_lengths[N_BLOCKS];
// for (int i = 0; i < N_BLOCKS; i++){
// tb_qry_lengths[i] = traceback_start_is[i];
// tb_ref_lengths[i] = traceback_start_js[i];
// }
// std::cout << "Reconstructing Traceback" << std::endl;
// array<map<string, string>, N_BLOCKS> kernel_alignments;
// kernel_alignments = ReconstructTracebackBlocks(
// query_strings_primitive,
// reference_strings_primitive,
// tb_qry_lengths, tb_ref_lengths,
// tb_streams_primitive);

// // Print Actual Alignments
// for (int i = 0; i < N_BLOCKS; i++){
// std::cout << "Block " << i << " Results" << std::endl;
// std::cout << "Query : " << query_strings_primitive[i] << std::endl;
// std::cout << "Reference: " << reference_strings_primitive[i] << std::endl;
// std::cout << "Kernel Aligned Query : " << kernel_alignments[i]["query"] << std::endl;
// std::cout << "Kernel Aligned Reference: " << kernel_alignments[i]["reference"] << std::endl << std::endl;
// }

// Print time
std::cout << "Kernel execution time: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "ms" << std::endl;

std::cout << "Kernel execution complete." << std::endl;
return EXIT_SUCCESS;
}
17 changes: 14 additions & 3 deletions src/seq_align_multiple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,12 @@ extern "C"
Penalties penalties_b[N_BLOCKS];
idx_t tb_is_b[N_BLOCKS];
idx_t tb_js_b[N_BLOCKS];
#ifndef NO_TRACEBACK
tbr_t tb_streams_b[N_BLOCKS][MAX_REFERENCE_LENGTH + MAX_QUERY_LENGTH];

#endif
#ifdef SCORED
type_t scores_b[N_BLOCKS];
#endif
// Attempted to use URAM but it didn't work.
// #pragma HLS bind_storage variable = tb_streams_b type = fifo impl = uram
// #pragma HLS bind_storage variable = querys_b type = ram_1p impl = uram
Expand All @@ -72,7 +76,14 @@ extern "C"
#pragma HLS array_partition variable = penalties_b type = complete dim = 1
#pragma HLS array_partition variable = tb_is_b type = complete dim = 1
#pragma HLS array_partition variable = tb_js_b type = complete dim = 1

#ifndef NO_TRACEBACK
#pragma HLS array_partition variable = tb_streams_b type = complete dim = 1
#endif

#ifdef SCORED
#pragma HLS array_partition variable = scores_b type = complete dim = 1
#endif

// F1 doesn't support axis on the top level. But for other FPGA it might be more optimized.
// #pragma HLS interface mode = axis port = querys_b
Expand Down Expand Up @@ -139,7 +150,7 @@ extern "C"
, tb_streams_b[i]
#endif
#ifdef SCORED
, scores[i]
, scores_b[i]
#endif
#ifdef CMAKEDEBUG
, debugger[i]
Expand Down Expand Up @@ -170,7 +181,7 @@ extern "C"
ExtractAlignmentScores:
for (int i = 0; i < N_BLOCKS; i++)
{
scores[i] = scores[i];
scores[i] = scores_b[i];
}
#endif

Expand Down
6 changes: 5 additions & 1 deletion testbench/test_csim_banding_global_linear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

using namespace std;

#define INPUT_QUERY_LENGTH 256
#define INPUT_REFERENCE_LENGTH 256

char tbp_to_char(tbp_t tbp){
if (tbp == TB_DIAG) return 'D';
else if (tbp == TB_UP) return 'U';
Expand Down Expand Up @@ -220,8 +223,9 @@ int main(){
}
debug_file << endl;

#ifdef CMAKEDEBUG
debuggers[0].dump_scores_infos<N_LAYERS>(debug_file);

#endif

return 0;
}

0 comments on commit 94f0d9f

Please sign in to comment.