Align Pairwise with vectorised{} doesn't speed the execution time #3134
-
Hi, I am currently writing a benchmark program to compare the runtime of my own implementation for sequence alignment with seqan3. Therefore, I am using the align_pairwise function. By measuring the runtime I detected, if I am adding Here is my code #include<utility>
#include <thread>
#include <typeinfo>
#include <limits>
#include <seqan3/alignment/pairwise/align_pairwise.hpp>
#include <seqan3/alignment/scoring/nucleotide_scoring_scheme.hpp>
#include <seqan3/alphabet/nucleotide/dna4.hpp>
#include <seqan3/core/debug_stream.hpp>
#include <seqan3/alignment/configuration/align_config_output.hpp>
#include <seqan3/alignment/configuration/align_config_parallel.hpp>
#include <seqan3/alignment/configuration/align_config_vectorised.hpp>
#include <seqan3/io/sequence_file/input.hpp>
#include <seqan3/alphabet/container/bitpacked_sequence.hpp>
#include "../../helpers/timer.cuh"
using namespace seqan3::literals;
struct my_traits : seqan3::sequence_file_input_default_traits_dna
{
using sequence_alphabet = seqan3::dna4; // instead of dna5
};
int main(int argc, char *argv[])
{
// here reading the files and converting them into correct format for computing
seqan3::sequence_file_input<my_traits> file1{argv[1]};
seqan3::sequence_file_input<my_traits> file2{argv[2]};
std::vector<seqan3::dna4_vector> records1{};
std::vector<seqan3::dna4_vector> records2{};
for (auto &record : file1)
{
records1.push_back(std::move(record.sequence()));
}
for (auto &record : file2)
{
records2.push_back(std::move(record.sequence()));
}
auto recordPairs = seqan3::views::zip(records1, records2) | seqan3::ranges::to<std::vector>();
// Configure the alignment kernel.
auto const alignment_method = seqan3::align_cfg::method_global{};
auto const scoringScheme = seqan3::align_cfg::scoring_scheme{seqan3::nucleotide_scoring_scheme{seqan3::match_score{1}, seqan3::mismatch_score{-1}}};
auto const gapCost = seqan3::align_cfg::gap_cost_affine{seqan3::align_cfg::open_score{0}, seqan3::align_cfg::extension_score{-1}};
auto const output = seqan3::align_cfg::output_score{};
auto const parallel = seqan3::align_cfg::parallel{std::thread::hardware_concurrency()};
auto const vectorised = seqan3::align_cfg::vectorised{};
auto const scoreType = seqan3::align_cfg::score_type<int16_t>{};
// testing runtime of cases with and without vectorization
seqan3::configuration vectorisedConfig =
alignment_method | scoringScheme | gapCost | output | scoreType | parallel | vectorised;
helpers::CpuTimer ComputingTimer("Timer for computing Sequences vectorised");
ComputingTimer.start();
auto scores = seqan3::align_pairwise(recordPairs, vectorisedConfig);
auto firstElement = scores.begin();
ComputingTimer.print();
ComputingTimer.reset();
seqan3::configuration nonVectorisedConfig = alignment_method | scoringScheme | gapCost | output | scoreType | parallel;
helpers::CpuTimer ComputingTimer2("Timer for computing Sequences non vectorised");
ComputingTimer2.start();
auto scores2 = seqan3::align_pairwise(recordPairs, nonVectorisedConfig);
firstElement = scores2.begin();
ComputingTimer2.print();
ComputingTimer2.reset();
} And I am using this cmake File: cmake_minimum_required (VERSION 3.4)
project (seqan3_benchmark CXX)
set (CMAKE_CXX_FLAGS "-O3")
# add seqan3 to search path
list (APPEND CMAKE_PREFIX_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../seqan3/build_system")
# require seqan3 with a version between >=3.0.0 and <4.0.0
find_package (seqan3 3.0 REQUIRED)
# build app with seqan3
add_executable (seq3PairwiseAllignementBenchmark seq3PairwiseAllignementBenchmark.cpp)
target_link_libraries (seq3PairwiseAllignementBenchmark seqan3::seqan3) |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 1 reply
-
Hey there! You indeed need some compiler flags to enable vectorization. set (CMAKE_CXX_FLAGS "-O3 -march=native") This will auto-detect all supported CPU flags and add them. $ lscpu | grep Flags
I have not yet looked at your code in detail, but it seems good 👍. Don't hesitate to ask more questions, we are happy to help and to check whether we would do something differently. |
Beta Was this translation helpful? Give feedback.
Hey there!
You indeed need some compiler flags to enable vectorization.
The easiest is to use
-march=native
This will auto-detect all supported CPU flags and add them.
You can check supported instructions via
$ lscpu | grep Flags
sse4_1 sse4_2 avx avx2
are some of the interesting ones. There's a list of flags for GCC, but just using-march=native
is enough.I have not yet looked at your code in detail, but it seems good 👍.
Don't hesitate to ask more questions, we are happy to help and to check whether we would do something differently.