From 93c3f7a3ec65d7223952db0e428f198b25504bdf Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 10 Jul 2024 09:30:46 +0200 Subject: [PATCH 001/124] Use payload less for old short read code path --- src/snarl_seed_clusterer.cpp | 95 ++++++++++++++++++++---------------- src/snarl_seed_clusterer.hpp | 2 + src/zip_code.cpp | 4 +- src/zip_code.hpp | 4 +- 4 files changed, 62 insertions(+), 43 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index af127519047..9edb155d5e4 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -36,6 +36,7 @@ vector SnarlDistanceIndexClusterer::cluste zip.fill_in_zipcode(distance_index, seed_caches[i].pos); seed_caches[i].zipcode = std::move(zip); } + seed_caches[i].decoder = ZipCodeDecoder(&(seed_caches[i].zipcode)); } vector*> all_seed_caches = {&seed_caches}; @@ -80,6 +81,7 @@ vector> SnarlDistanceIndexClusterer zip.fill_in_zipcode(distance_index, all_seed_caches[read_num][i].pos); all_seed_caches[read_num][i].zipcode = std::move(zip); } + all_seed_caches[read_num][i].decoder = ZipCodeDecoder(&(all_seed_caches[read_num][i].zipcode)); } } vector*> seed_cache_pointers; @@ -359,39 +361,46 @@ cerr << "Add all seeds to nodes: " << endl; //The zipcodes are already filled in //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now - const ZipCode& old_cache = seed.zipcode; + const ZipCode& zip_code = seed.zipcode; + ZipCodeDecoder& decoder = seed.decoder; + + size_t node_depth = decoder.max_depth(); #ifdef DEBUG_CLUSTER cerr << "Using cached values for node " << id << ": " - << ", " << MIPayload::record_offset(old_cache, distance_index, id) - << ", " << MIPayload::parent_record_offset(old_cache, distance_index, id) - << ", " << MIPayload::node_record_offset(old_cache, distance_index, id) - << ", " << MIPayload::node_length(old_cache) - << ", " << MIPayload::prefix_sum(old_cache, distance_index, id) - << ", " << MIPayload::chain_component(old_cache, distance_index, id) << endl; + << ", " << MIPayload::record_offset(zip_code, distance_index, id) + << ", " << MIPayload::parent_record_offset(zip_code, distance_index, id) + << ", " << MIPayload::node_record_offset(zip_code, distance_index, id) + << ", " << MIPayload::node_length(zip_code) + << ", " << MIPayload::prefix_sum(zip_code, distance_index, id) + << ", " << MIPayload::chain_component(zip_code, distance_index, id) << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); - assert(MIPayload::record_offset(old_cache, distance_index, id) == distance_index.get_record_offset(handle)); - //assert(MIPayload::parent_record_offset(old_cache, distance_index, id) == + assert(MIPayload::record_offset(zip_code, distance_index, id) == distance_index.get_record_offset(handle)); + //assert(MIPayload::parent_record_offset(zip_code, distance_index, id) == // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) // :distance_index.get_record_offset(parent_handle))); - assert(MIPayload::node_record_offset(old_cache, distance_index, id) == distance_index.get_node_record_offset(handle)); - assert(MIPayload::node_length(old_cache) == distance_index.minimum_length(handle)); + assert(MIPayload::node_record_offset(zip_code, distance_index, id) == distance_index.get_node_record_offset(handle)); + assert(MIPayload::node_length(zip_code) == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) // ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(handle); - //assert(MIPayload::prefix_sum(old_cache, distance_index, id) == prefix_sum); - assert(MIPayload::chain_component(old_cache, distance_index, id) == (distance_index.is_multicomponent_chain(parent_handle) + //assert(MIPayload::prefix_sum(zip_code, distance_index, id) == prefix_sum); + assert(MIPayload::chain_component(zip_code, distance_index, id) == (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0)); #endif + //Get the net_handle for the node the seed is on net_handle_t node_net_handle = distance_index.get_node_net_handle(id); + size_t node_chain_component = MIPayload::chain_component(seed.zipcode, distance_index, get_id(seed.pos)); + size_t node_record_offset = MIPayload::node_record_offset(zip_code, distance_index, id); + //Get the parent of the node @@ -399,37 +408,48 @@ cerr << "Add all seeds to nodes: " << endl; //If the grandparent is a root/root snarl, then make it the parent and the node a trivial chain //because they will be clustered here and added to the root instead of being added to the //snarl tree to be clustered - if (MIPayload::is_trivial_chain(old_cache)) { + ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); + ZipCode::code_type_t parent_type = node_depth == 0 ? node_type : decoder.get_code_type(node_depth-1); + auto parent_record_offset = MIPayload::parent_record_offset(zip_code, distance_index, id); + bool parent_is_root = parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE; + //TODO: idk why this doesn't work with the parent_type + bool parent_is_chain = MIPayload::parent_is_chain(zip_code, distance_index, id); + bool is_trivial_chain = node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE; + size_t prefix_sum = is_trivial_chain ? 0 : decoder.get_offset_in_chain(node_depth, &distance_index); + size_t node_length = decoder.get_length(node_depth, &distance_index); + bool is_reversed_in_parent = decoder.get_is_reversed_in_parent(node_depth); + + if (node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE) { //If the node is a trivial chain, then the parent is just the node but recorded as a chain in the net handle parent = distance_index.get_net_handle_from_values (distance_index.get_record_offset(node_net_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE, - MIPayload::node_record_offset(old_cache, distance_index, id)); - if (MIPayload::parent_record_offset(old_cache, distance_index, id) == 0) { + node_record_offset); + if (parent_record_offset == 0) { //If the parent offset stored in the cache is the root, then this is a trivial chain //child of the root not in a root snarl, so remember the root as the parent and the //trivial chain as the node node_net_handle = parent; parent = distance_index.get_root(); - } else if (MIPayload::parent_is_root(old_cache) && !MIPayload::parent_is_chain(old_cache, distance_index, id)) { + } else if (parent_type == ZipCode::ROOT_SNARL) { //If the parent is a root snarl, then the node becomes the trivial chain //and we get the parent root snarl from the cache node_net_handle = parent; - parent = distance_index.get_net_handle_from_values(MIPayload::parent_record_offset(old_cache, distance_index, id), + parent = distance_index.get_net_handle_from_values(parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); } - } else if (MIPayload::parent_record_offset(old_cache, distance_index, id) == 0) { + } else if (parent_record_offset == 0) { //The parent is just the root parent = distance_index.get_root(); - } else if (MIPayload::parent_is_root(old_cache) && !MIPayload::parent_is_chain(old_cache, distance_index, id)) { + } else if (parent_type == ZipCode::ROOT_SNARL) { //If the parent is a root snarl - parent = distance_index.get_net_handle_from_values(MIPayload::parent_record_offset(old_cache, distance_index, id), + parent = distance_index.get_net_handle_from_values(parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); } else { //Otherwise the parent is an actual chain and we use the value from the cache - parent = distance_index.get_net_handle_from_values(MIPayload::parent_record_offset(old_cache, distance_index, id), + parent = distance_index.get_net_handle_from_values(parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); } @@ -456,11 +476,6 @@ cerr << "Add all seeds to nodes: " << endl; //Seed payload is: //record offset of node, record offset of parent, node record offset, node length, is_reversed, is_trivial_chain, parent is chain, parent is root, prefix sum, chain_component - bool is_trivial_chain = MIPayload::is_trivial_chain(old_cache); - size_t prefix_sum = MIPayload::prefix_sum(old_cache, distance_index, id); - size_t node_length = MIPayload::node_length(old_cache); - bool is_reversed_in_parent = MIPayload::is_reversed(old_cache, distance_index, id); - #ifdef DEBUG_CLUSTER //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(node_net_handle))); @@ -479,12 +494,13 @@ cerr << "Add all seeds to nodes: " << endl; //Add the parent chain or trivial chain bool new_parent = false; + //TODO: Could get depth from the zipcodes but the idea of depth isn't the same size_t depth; - if (MIPayload::is_trivial_chain(old_cache) && MIPayload::parent_is_chain(old_cache, distance_index, id) && MIPayload::parent_is_root(old_cache)) { + if ((node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE) && parent_type == ZipCode::ROOT_CHAIN) { //If the node is a trivial chain, and the parent we stored is a chain and root, //then the node is in a simple snarl on the root-level chain depth = 2; - } else if (MIPayload::parent_is_root(old_cache)) { + } else if (parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE) { //If the parent is a root (or root-level chain) depth = 1; } else { @@ -547,9 +563,9 @@ cerr << "Add all seeds to nodes: " << endl; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = MIPayload::chain_component(seed.zipcode, distance_index, get_id(seed.pos)); + parent_problem.children.back().chain_component = node_chain_component; parent_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - MIPayload::prefix_sum(seed.zipcode, distance_index, get_id(seed.pos))); + prefix_sum); //And the parent to chains_by_level @@ -560,15 +576,15 @@ cerr << "Add all seeds to nodes: " << endl; //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too if ( new_parent) { - if (is_trivial_chain && !MIPayload::parent_is_root(old_cache)) { - bool grandparent_is_simple_snarl = MIPayload::parent_is_chain(old_cache, distance_index, id); + if (is_trivial_chain && !parent_is_root) { + bool grandparent_is_simple_snarl = parent_is_chain; parent_problem.has_parent_handle = true; parent_problem.parent_net_handle = grandparent_is_simple_snarl ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(node_net_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE, 1) - : distance_index.get_net_handle_from_values(MIPayload::parent_record_offset(old_cache, distance_index, id), + : distance_index.get_net_handle_from_values(parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); #ifdef DEBUG_CLUSTER @@ -579,14 +595,14 @@ cerr << "Add all seeds to nodes: " << endl; //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too parent_problem.has_grandparent_handle = true; parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( - MIPayload::parent_record_offset(old_cache, distance_index, id), + parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); #ifdef DEBUG_CLUSTER cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; #endif } - } else if (MIPayload::parent_is_root(old_cache) && MIPayload::parent_is_chain(old_cache, distance_index, id) && !is_trivial_chain) { + } else if (parent_is_root && parent_is_chain && !is_trivial_chain) { //The parent chain is a child of the root parent_problem.has_parent_handle = true; parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( @@ -602,9 +618,6 @@ cerr << "Add all seeds to nodes: " << endl; //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node - //Get the values from the seed. Some may be infinite and need to be re-set - size_t node_length = MIPayload::node_length(old_cache); - bool is_reversed_in_parent = MIPayload::is_reversed(old_cache, distance_index, id); //Create a new SnarlTreeNodeProblem for this node @@ -635,9 +648,9 @@ cerr << "Add all seeds to nodes: " << endl; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; node_problem.children.back().has_chain_values = true; - node_problem.children.back().chain_component = MIPayload::chain_component(seed.zipcode, distance_index, get_id(seed.pos)); + node_problem.children.back().chain_component = node_chain_component; node_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - MIPayload::prefix_sum(seed.zipcode, distance_index, get_id(seed.pos))); + prefix_sum); diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 1aac2857c09..7611e7dfade 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -123,6 +123,8 @@ class SnarlDistanceIndexClusterer { //Cached values (zip codes) from the minimizer ZipCode zipcode; + ZipCodeDecoder decoder; + //TODO: This doesn't actually get used but I'll use it if I use the zipcodes properly //std::unique_ptr zipcode_decoder; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a569b90bd87..d5ca65515f4 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -101,7 +101,9 @@ void ZipCode::from_vector(const std::vector& values) { ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : zipcode(zipcode), decoder(0) { - fill_in_full_decoder(); + if (zipcode != nullptr) { + fill_in_full_decoder(); + } } void ZipCodeDecoder::fill_in_full_decoder() { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index aefbdad9f9b..4055c38f48a 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -249,7 +249,7 @@ class ZipCodeDecoder { ///Constructor that goes through the zipcode and decodes it to fill in decoder ///If a depth is given, then only fill in up to depth snarl tree nodes ///Otherwise, fill in the whole zipcode - ZipCodeDecoder(const ZipCode* zipcode); + ZipCodeDecoder(const ZipCode* zipcode = nullptr); ///Go through the entire zipcode and fill in the decoder void fill_in_full_decoder(); @@ -319,6 +319,8 @@ class ZipCodeDecoder { /// unit test from the resulting information. void dump(std::ostream& out) const; + //TODO: I want to make a struct for holding all values of a code as real values + }; std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); From 783e452955d292223580acb5497f424c9582c3c9 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 10 Jul 2024 11:45:19 +0200 Subject: [PATCH 002/124] Use the distance index a bit less --- src/snarl_seed_clusterer.cpp | 2 +- src/zip_code.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 9edb155d5e4..92419327600 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -461,7 +461,7 @@ cerr << "Add all seeds to nodes: " << endl; assert( distance_index.start_end_traversal_of(parent) == distance_index.start_end_traversal_of(distance_index.get_parent(node_net_handle))); } #endif - if (!distance_index.is_root(parent)) { + if (!(parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) #ifdef DEBUG_CLUSTER diff --git a/src/zip_code.cpp b/src/zip_code.cpp index d5ca65515f4..99628fea186 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1847,6 +1847,7 @@ bool MIPayload::parent_is_chain(const ZipCode& zip, const SnarlDistanceIndex& di //Otherwise, check the last thing in the zipcode to get the node values size_t node_depth = decoder.decoder_length()-1; + ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); ZipCode::code_type_t parent_type = decoder.get_code_type(node_depth-1); if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { //If the parent is an irregular snarl @@ -1855,9 +1856,8 @@ bool MIPayload::parent_is_chain(const ZipCode& zip, const SnarlDistanceIndex& di } else if (parent_type == ZipCode::REGULAR_SNARL) { - net_handle_t node_handle = distance_index.get_node_net_handle(id); - net_handle_t parent = distance_index.get_parent(node_handle); - if (distance_index.is_trivial_chain(parent)) { + if (node_type == ZipCode::CHAIN) { + net_handle_t parent = distance_index.get_parent(distance_index.get_node_net_handle(id)); if (distance_index.is_simple_snarl(distance_index.get_parent(parent))) { return true; } else { From 425c4cf786cfa8dd236a20bc0aaf92d6edcc59ec Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 10 Jul 2024 15:05:42 +0200 Subject: [PATCH 003/124] Stop making new decoders --- src/snarl_seed_clusterer.cpp | 60 +++++++++++++++++---------------- src/zip_code.cpp | 64 ++++++++++++++++-------------------- src/zip_code.hpp | 46 +++++++++++++------------- 3 files changed, 85 insertions(+), 85 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 92419327600..21fc7ad2715 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -369,11 +369,11 @@ cerr << "Add all seeds to nodes: " << endl; #ifdef DEBUG_CLUSTER cerr << "Using cached values for node " << id << ": " << ", " << MIPayload::record_offset(zip_code, distance_index, id) - << ", " << MIPayload::parent_record_offset(zip_code, distance_index, id) + << ", " << MIPayload::parent_record_offset(zip_code, decoder, distance_index, id) << ", " << MIPayload::node_record_offset(zip_code, distance_index, id) - << ", " << MIPayload::node_length(zip_code) - << ", " << MIPayload::prefix_sum(zip_code, distance_index, id) - << ", " << MIPayload::chain_component(zip_code, distance_index, id) << endl; + << ", " << MIPayload::node_length(zip_code, decoder) + << ", " << MIPayload::prefix_sum(zip_code, decoder, distance_index, id) + << ", " << MIPayload::chain_component(zip_code, decoder, distance_index, id) << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); @@ -383,12 +383,12 @@ cerr << "Add all seeds to nodes: " << endl; // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) // :distance_index.get_record_offset(parent_handle))); assert(MIPayload::node_record_offset(zip_code, distance_index, id) == distance_index.get_node_record_offset(handle)); - assert(MIPayload::node_length(zip_code) == distance_index.minimum_length(handle)); + assert(MIPayload::node_length(zip_code, decoder) == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) // ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(handle); //assert(MIPayload::prefix_sum(zip_code, distance_index, id) == prefix_sum); - assert(MIPayload::chain_component(zip_code, distance_index, id) == (distance_index.is_multicomponent_chain(parent_handle) + assert(MIPayload::chain_component(zip_code, decoder, distance_index, id) == (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0)); @@ -398,7 +398,7 @@ cerr << "Add all seeds to nodes: " << endl; //Get the net_handle for the node the seed is on net_handle_t node_net_handle = distance_index.get_node_net_handle(id); - size_t node_chain_component = MIPayload::chain_component(seed.zipcode, distance_index, get_id(seed.pos)); + size_t node_chain_component = MIPayload::chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); size_t node_record_offset = MIPayload::node_record_offset(zip_code, distance_index, id); @@ -410,10 +410,10 @@ cerr << "Add all seeds to nodes: " << endl; //snarl tree to be clustered ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); ZipCode::code_type_t parent_type = node_depth == 0 ? node_type : decoder.get_code_type(node_depth-1); - auto parent_record_offset = MIPayload::parent_record_offset(zip_code, distance_index, id); + auto parent_record_offset = MIPayload::parent_record_offset(zip_code, decoder, distance_index, id); bool parent_is_root = parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE; //TODO: idk why this doesn't work with the parent_type - bool parent_is_chain = MIPayload::parent_is_chain(zip_code, distance_index, id); + bool parent_is_chain = MIPayload::parent_is_chain(zip_code, decoder, distance_index, id); bool is_trivial_chain = node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE; size_t prefix_sum = is_trivial_chain ? 0 : decoder.get_offset_in_chain(node_depth, &distance_index); size_t node_length = decoder.get_length(node_depth, &distance_index); @@ -1937,11 +1937,13 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; size_t last_length = last_child.is_seed - ? MIPayload::node_length(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).zipcode) + ? MIPayload::node_length(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).zipcode, + clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).decoder) : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed ? MIPayload::chain_component(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).zipcode, + clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).decoder, distance_index, get_id(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).pos)) : clustering_problem.all_node_problems.at( @@ -2202,17 +2204,17 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))) { + } else if ( last_chain_component_end == MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)); + distance_from_last_child_to_current_child = MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); //Distance is the current node's prefix sum minus the distance from the start of the chain to the last node - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)), + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)), distance_from_chain_start_to_last_node); } } @@ -2231,27 +2233,27 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_current_end_to_end_of_chain = 0; } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { //If this is the last node in the chain - if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))) { + if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { distance_from_current_end_to_end_of_chain = 0; } - } else if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))) { + } else if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { //Length of the chain - (prefix sum + node length of the current node) distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, - SnarlDistanceIndex::sum(MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)), - MIPayload::node_length(current_child_seed.zipcode))); + SnarlDistanceIndex::sum(MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)), + MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder))); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() : MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() : MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2286,13 +2288,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)) != 0) { + if (MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { //Prefix sum + offset of the seed in the node current_child_seed.distance_left = SnarlDistanceIndex::sum(current_child_seed.distance_left, - MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))); + MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))); } current_child_seed.distance_right = SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain); @@ -2337,16 +2339,16 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : (last_child.net_handle == current_child.net_handle ? 0 - : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, MIPayload::node_length(current_child_seed.zipcode))); + : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder))); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) //Left distance is the prefix sum (or inf if the node isn't in the first component of the chain) + offset of seed in node //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() + MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, - MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos))), + MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2380,7 +2382,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node - distance_between -= MIPayload::node_length(current_child_seed.zipcode); + distance_between -= MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder); } #ifdef DEBUG_CLUSTER @@ -2489,9 +2491,9 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = MIPayload::prefix_sum(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)); - last_length = MIPayload::node_length(current_child_seed.zipcode); - last_chain_component_end = MIPayload::chain_component(current_child_seed.zipcode, distance_index, get_id(current_child_seed.pos)); + last_prefix_sum = MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); + last_length = MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder); + last_chain_component_end = MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); } @@ -3176,6 +3178,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr if (include_prefix_sum) { dist_left = SnarlDistanceIndex::sum(dist_left, MIPayload::prefix_sum( clustering_problem.all_seeds->at(read_num)->at(seed_i).zipcode, + clustering_problem.all_seeds->at(read_num)->at(seed_i).decoder, distance_index, get_id(clustering_problem.all_seeds->at(read_num)->at(seed_i).pos))); } //Since we only stored the proper distance left for seeds on chains @@ -3213,7 +3216,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr const SeedCache& first_seed = clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second); //TOOD: get_id is weird node_problem->fragment_best_left = SnarlDistanceIndex::sum(first_seed.distance_left, - include_prefix_sum ? MIPayload::prefix_sum(first_seed.zipcode, distance_index, get_id(clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second).pos)) : 0); + include_prefix_sum ? MIPayload::prefix_sum(first_seed.zipcode, first_seed.decoder, distance_index, get_id(clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second).pos)) : 0); //Record the new cluster for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++ ) { @@ -3260,6 +3263,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr if (include_prefix_sum) { offset = SnarlDistanceIndex::sum(offset, MIPayload::prefix_sum( clustering_problem.all_seeds->at(read_num)->at(seed_num).zipcode, + clustering_problem.all_seeds->at(read_num)->at(seed_num).decoder, distance_index, get_id( clustering_problem.all_seeds->at(read_num)->at(seed_num).pos))); } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 99628fea186..ffc261875cf 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -100,14 +100,14 @@ void ZipCode::from_vector(const std::vector& values) { } ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : - zipcode(zipcode), decoder(0) { + zipcode(zipcode), decoder(0), finished_decoding(false) { if (zipcode != nullptr) { fill_in_full_decoder(); } } void ZipCodeDecoder::fill_in_full_decoder() { - if (zipcode->byte_count() == 0) { + if (zipcode->byte_count() == 0 || finished_decoding) { //If the zipcode is empty return; } @@ -115,12 +115,16 @@ void ZipCodeDecoder::fill_in_full_decoder() { while (!done) { done = fill_in_next_decoder(); } + finished_decoding = true; } bool ZipCodeDecoder::fill_in_next_decoder() { #ifdef DEBUG_ZIPCODE cerr << "Decode one more thing in the zipcode. Currently decoded " << decoder_length() << " things" << endl; #endif + if (finished_decoding) { + return true; + } //The zipcode may be partially or fully filled in already, so first //check to see how much has been filled in @@ -167,6 +171,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" #ifdef DEBUG_ZIPCODE cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #endif + finished_decoding = true; return true; } else { //Otherwise, check if this is a node or a snarl. If it is a node, then there are three things remaining @@ -226,6 +231,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #ifdef DEBUG_ZIPCODE cerr << "\tThe last thing was a chain pretending to be a node so we're done" << endl; #endif + finished_decoding = true; return true; } //Now check if it was actually a real node @@ -245,6 +251,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #ifdef DEBUG_ZIPCODE cerr << "\tThe last thing was a node so we're done" << endl; #endif + finished_decoding = true; return true; } else { //Otherwise, the last thing was a chain @@ -311,12 +318,12 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } } -size_t ZipCodeDecoder::max_depth() { +size_t ZipCodeDecoder::max_depth() const { return decoder_length()-1; } -ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) { +ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { //Now get the code type //A snarl is always a snarl. A chain could actually be a node @@ -362,7 +369,7 @@ ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) { } } -size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) { +size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -405,7 +412,7 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* } } -size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) { +size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { if (depth == 0) { @@ -431,7 +438,7 @@ size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) { } } -size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) { +size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -458,7 +465,7 @@ size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDis } } -size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) { +size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -490,7 +497,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) { +bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { if (depth == 0) { @@ -536,7 +543,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) { } } -net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) { +net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -579,7 +586,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } } -net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) { +net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { net_handle_t n = distance_index->get_node_net_handle(id); for (size_t d = max_depth() ; d > depth ; d--) { n = distance_index->get_parent(n); @@ -591,7 +598,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, } -size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) { +size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { if (depth == 0) { @@ -632,7 +639,7 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) { } } } -size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) { +size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { #ifdef DEBUG_ZIPCODE assert(depth > 0); @@ -677,7 +684,7 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna } } -const bool ZipCodeDecoder::is_equal(ZipCodeDecoder& decoder1, ZipCodeDecoder& decoder2, +const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, const size_t& depth) { if (decoder1.max_depth() < depth && decoder2.max_depth() < depth ) { @@ -1653,10 +1660,8 @@ size_t MIPayload::record_offset(const ZipCode& code, const SnarlDistanceIndex& d return distance_index.get_record_offset(node_handle); } -size_t MIPayload::parent_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - ZipCodeDecoder decoder (&zip); bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; @@ -1717,8 +1722,7 @@ size_t MIPayload::node_record_offset(const ZipCode& zip, const SnarlDistanceInde return distance_index.get_node_record_offset(node_handle); } -size_t MIPayload::node_length(const ZipCode& zip) { - ZipCodeDecoder decoder (&zip); +size_t MIPayload::node_length(const ZipCode& zip, const ZipCodeDecoder& decoder) { if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1738,10 +1742,8 @@ size_t MIPayload::node_length(const ZipCode& zip) { } } -bool MIPayload::is_reversed(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +bool MIPayload::is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - ZipCodeDecoder decoder (&zip); - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1783,9 +1785,8 @@ bool MIPayload::is_reversed(const ZipCode& zip, const SnarlDistanceIndex& distan } } -bool MIPayload::is_trivial_chain(const ZipCode& zip) { +bool MIPayload::is_trivial_chain(const ZipCode& zip, const ZipCodeDecoder& decoder) { - ZipCodeDecoder decoder (&zip); bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { @@ -1823,10 +1824,8 @@ bool MIPayload::is_trivial_chain(const ZipCode& zip) { } } -bool MIPayload::parent_is_chain(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +bool MIPayload::parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - ZipCodeDecoder decoder (&zip); - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1877,10 +1876,8 @@ bool MIPayload::parent_is_chain(const ZipCode& zip, const SnarlDistanceIndex& di } -bool MIPayload::parent_is_root(const ZipCode& zip) { +bool MIPayload::parent_is_root(const ZipCode& zip, const ZipCodeDecoder& decoder) { - ZipCodeDecoder decoder (&zip); - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1904,10 +1901,8 @@ bool MIPayload::parent_is_root(const ZipCode& zip) { } -size_t MIPayload::prefix_sum(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::prefix_sum(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - ZipCodeDecoder decoder (&zip); - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1944,9 +1939,8 @@ size_t MIPayload::prefix_sum(const ZipCode& zip, const SnarlDistanceIndex& dista } } -size_t MIPayload::chain_component(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::chain_component(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - ZipCodeDecoder decoder (&zip); bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4055c38f48a..5d7d7bd4d06 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -244,6 +244,9 @@ class ZipCodeDecoder { ///The zipcode that this is decoding const ZipCode* zipcode; + ///Did we fill in the entire decoder + bool finished_decoding; + public: ///Constructor that goes through the zipcode and decodes it to fill in decoder @@ -259,60 +262,59 @@ class ZipCodeDecoder { bool fill_in_next_decoder(); ///What is the maximum depth of this zipcode? - ///This will entirely fill in the zipcode - size_t max_depth(); + size_t max_depth() const; ///How many codes in the zipcode have been decoded? - size_t decoder_length() {return decoder.size();} + size_t decoder_length() const {return decoder.size();} ///What type of snarl tree node is at the given depth (index into the zipcode) - ZipCode::code_type_t get_code_type(const size_t& depth) ; + ZipCode::code_type_t get_code_type(const size_t& depth) const ; ///Get the length of a snarl tree node given the depth in the snarl tree ///This requires the distance index for irregular snarls (except for a top-level snarl) ///Throws an exception if the distance index is not given when it is needed ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) ; + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl - size_t get_rank_in_snarl(const size_t& depth) ; + size_t get_rank_in_snarl(const size_t& depth) const ; ///Get the number of children in a snarl. Throw an exception if it isn't a snarl - size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) ; + size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; ///Get the prefix sum of a child of a chain ///This requires the distance index for irregular snarls (except for a top-level snarl) ///Throws an exception if the distance index is not given when it is needed ///Doesn't use a given distance index if it isn't needed - size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) ; + size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; ///Is the snarl tree node backwards relative to its parent - bool get_is_reversed_in_parent(const size_t& depth); + bool get_is_reversed_in_parent(const size_t& depth) const; ///Get the handle of the thing at the given depth. This can only be used for ///Root-level structures or irregular snarls - net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) ; + net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; ///Get the handle of the thing at the given depth. This can be used for anything but is slow, /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index); + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; ///Get the information that was stored to get the address in the distance index ///This is the connected component number for a root structure, or the address of ///an irregular snarl. Throws an error for anything else ///This is used for checking equality without looking at the distance index. ///Use get_net_handle for getting the actual handle - size_t get_distance_index_address(const size_t& depth) ; + size_t get_distance_index_address(const size_t& depth) const; /// The minimum distance from start or end of the snarl to the left or right side of the child - size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side); + size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; ///Are the two decoders pointing to the same snarl tree node at the given depth ///This only checks if the values in the zipcode are the same at the given depth, ///so if the preceeding snarl tree nodes are different, ///then this might actually refer to different things - const static bool is_equal(ZipCodeDecoder& decoder1, ZipCodeDecoder& decoder2, + const static bool is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, const size_t& depth); /// Dump a ZipCodeDecoder to a stream so that it can be reconstructed for a @@ -343,23 +345,23 @@ struct MIPayload { //How do decode the zipcode to get the old payload values static size_t record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); - static size_t parent_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static size_t parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); static size_t node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); - static size_t node_length(const ZipCode& zip); + static size_t node_length(const ZipCode& zip, const ZipCodeDecoder& decoder); - static bool is_reversed(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static bool is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool is_trivial_chain (const ZipCode& zip); + static bool is_trivial_chain (const ZipCode& zip, const ZipCodeDecoder& decoder); - static bool parent_is_chain(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static bool parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool parent_is_root (const ZipCode& zip); + static bool parent_is_root (const ZipCode& zip, const ZipCodeDecoder& decoder); - static size_t prefix_sum (const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static size_t prefix_sum (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static size_t chain_component (const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static size_t chain_component (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); }; From 6e8f889941f497cca1a10ee5d434babaadacd970 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 10 Jul 2024 16:44:12 +0200 Subject: [PATCH 004/124] Get parent depth from zipcodes but idk if its any faster --- src/snarl_seed_clusterer.cpp | 38 ++++++++++++------------------------ 1 file changed, 13 insertions(+), 25 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 21fc7ad2715..1d9b38fc111 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -495,18 +495,15 @@ cerr << "Add all seeds to nodes: " << endl; //Add the parent chain or trivial chain bool new_parent = false; //TODO: Could get depth from the zipcodes but the idea of depth isn't the same - size_t depth; - if ((node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE) && parent_type == ZipCode::ROOT_CHAIN) { - //If the node is a trivial chain, and the parent we stored is a chain and root, - //then the node is in a simple snarl on the root-level chain - depth = 2; - } else if (parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE) { - //If the parent is a root (or root-level chain) - depth = 1; - } else { - //Otherwise get it later from parent_node_cluster_offset_to_depth - depth = std::numeric_limits::max(); + size_t parent_depth = 0; + for (size_t d = 0 ; d <= node_depth ; d++) { + auto type = decoder.get_code_type(d); + if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { + parent_depth++; + } } + + new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it @@ -524,26 +521,17 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.seed_count_prefix_sum.back(), distance_index); } - //Get the depth from the parent if we didn't cache it - if (depth == std::numeric_limits::max()) { - depth = distance_index.get_depth(parent); - } - parent_to_depth.emplace(parent, depth); + parent_to_depth.emplace(parent, parent_depth); new_parent = true; - } else { - //If we've seen the parent before, just find its index into all_node_problems and its depth - if (depth == std::numeric_limits::max()) { - depth = parent_to_depth[parent]; - } } #ifdef DEBUG_CLUSTER - assert(depth == distance_index.get_depth(parent)); + assert(parent_depth == distance_index.get_depth(parent)); #endif //If chains_by_level isn't big enough for this depth, resize it and reserve space at each level - if (depth+1 > chains_by_level.size()) { - size_t to_add = (depth+1) - chains_by_level.size(); + if (parent_depth+1 > chains_by_level.size()) { + size_t to_add = (parent_depth+1) - chains_by_level.size(); for (size_t i = 0 ; i < to_add ; i++) { chains_by_level.emplace_back(); chains_by_level.back().reserve(clustering_problem.seed_count_prefix_sum.back()); @@ -570,7 +558,7 @@ cerr << "Add all seeds to nodes: " << endl; //And the parent to chains_by_level if (new_parent) { - chains_by_level[depth].emplace_back(parent); + chains_by_level[parent_depth].emplace_back(parent); } From 7fc2f1fd2bdfd3650c58484a6e165d89be4cd60e Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 10 Jul 2024 17:01:42 +0200 Subject: [PATCH 005/124] Save chain component --- src/snarl_seed_clusterer.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 1d9b38fc111..93a97b583c9 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -399,6 +399,7 @@ cerr << "Add all seeds to nodes: " << endl; //Get the net_handle for the node the seed is on net_handle_t node_net_handle = distance_index.get_node_net_handle(id); size_t node_chain_component = MIPayload::chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); + seed.chain_component=node_chain_component; size_t node_record_offset = MIPayload::node_record_offset(zip_code, distance_index, id); @@ -1930,10 +1931,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed - ? MIPayload::chain_component(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).zipcode, - clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).decoder, - distance_index, - get_id(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).pos)) + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).chain_component : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; @@ -2192,7 +2190,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { + } else if ( last_chain_component_end == current_child_seed.chain_component) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance @@ -2221,13 +2219,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_current_end_to_end_of_chain = 0; } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { //If this is the last node in the chain - if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { + if (chain_problem->chain_component_end != current_child_seed.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { distance_from_current_end_to_end_of_chain = 0; } - } else if (chain_problem->chain_component_end != MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) { + } else if (chain_problem->chain_component_end != current_child_seed.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { @@ -2241,7 +2239,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() : MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.chain_component != 0 ? std::numeric_limits::max() : MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2276,7 +2274,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0) { + if (current_child_seed.chain_component != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { @@ -2334,7 +2332,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)) != 0 ? std::numeric_limits::max() + current_child_seed.chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2481,7 +2479,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c last_child = current_child; last_prefix_sum = MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); last_length = MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder); - last_chain_component_end = MIPayload::chain_component(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); + last_chain_component_end = current_child_seed.chain_component; } From 45d4987748f4d640000f188fb86289c606ebbcbe Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 11 Jul 2024 10:08:27 +0200 Subject: [PATCH 006/124] Cache more stuff in the seed from the zipcodes --- src/snarl_seed_clusterer.cpp | 53 +++++++++++++++++------------------- src/snarl_seed_clusterer.hpp | 5 +++- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 93a97b583c9..081a81b729a 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -399,7 +399,7 @@ cerr << "Add all seeds to nodes: " << endl; //Get the net_handle for the node the seed is on net_handle_t node_net_handle = distance_index.get_node_net_handle(id); size_t node_chain_component = MIPayload::chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); - seed.chain_component=node_chain_component; + seed.payload_chain_component=node_chain_component; size_t node_record_offset = MIPayload::node_record_offset(zip_code, distance_index, id); @@ -417,7 +417,9 @@ cerr << "Add all seeds to nodes: " << endl; bool parent_is_chain = MIPayload::parent_is_chain(zip_code, decoder, distance_index, id); bool is_trivial_chain = node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE; size_t prefix_sum = is_trivial_chain ? 0 : decoder.get_offset_in_chain(node_depth, &distance_index); + seed.payload_prefix_sum = prefix_sum; size_t node_length = decoder.get_length(node_depth, &distance_index); + seed.payload_node_length = node_length; bool is_reversed_in_parent = decoder.get_is_reversed_in_parent(node_depth); if (node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE) { @@ -1926,12 +1928,11 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; size_t last_length = last_child.is_seed - ? MIPayload::node_length(clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).zipcode, - clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).decoder) + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload_node_length : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).chain_component + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload_chain_component : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; @@ -2190,17 +2191,17 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == current_child_seed.chain_component) { + } else if ( last_chain_component_end == current_child_seed.payload_chain_component) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); + distance_from_last_child_to_current_child = current_child_seed.payload_prefix_sum; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); //Distance is the current node's prefix sum minus the distance from the start of the chain to the last node - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)), + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.payload_prefix_sum, distance_from_chain_start_to_last_node); } } @@ -2219,27 +2220,27 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_current_end_to_end_of_chain = 0; } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { //If this is the last node in the chain - if (chain_problem->chain_component_end != current_child_seed.chain_component) { + if (chain_problem->chain_component_end != current_child_seed.payload_chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { distance_from_current_end_to_end_of_chain = 0; } - } else if (chain_problem->chain_component_end != current_child_seed.chain_component) { + } else if (chain_problem->chain_component_end != current_child_seed.payload_chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { //Length of the chain - (prefix sum + node length of the current node) distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, - SnarlDistanceIndex::sum(MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)), - MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder))); + SnarlDistanceIndex::sum(current_child_seed.payload_prefix_sum, + current_child_seed.payload_node_length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.chain_component != 0 ? std::numeric_limits::max() : MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.payload_chain_component != 0 ? std::numeric_limits::max() : current_child_seed.payload_prefix_sum) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2274,13 +2275,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (current_child_seed.chain_component != 0) { + if (current_child_seed.payload_chain_component != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { //Prefix sum + offset of the seed in the node current_child_seed.distance_left = SnarlDistanceIndex::sum(current_child_seed.distance_left, - MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))); + current_child_seed.payload_prefix_sum); } current_child_seed.distance_right = SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain); @@ -2325,16 +2326,16 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : (last_child.net_handle == current_child.net_handle ? 0 - : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder))); + : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload_node_length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) //Left distance is the prefix sum (or inf if the node isn't in the first component of the chain) + offset of seed in node //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - current_child_seed.chain_component != 0 ? std::numeric_limits::max() + current_child_seed.payload_chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, - MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos))), + current_child_seed.payload_prefix_sum), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2368,7 +2369,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node - distance_between -= MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder); + distance_between -= current_child_seed.payload_node_length; } #ifdef DEBUG_CLUSTER @@ -2477,9 +2478,9 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = MIPayload::prefix_sum(current_child_seed.zipcode, current_child_seed.decoder, distance_index, get_id(current_child_seed.pos)); - last_length = MIPayload::node_length(current_child_seed.zipcode, current_child_seed.decoder); - last_chain_component_end = current_child_seed.chain_component; + last_prefix_sum = current_child_seed.payload_prefix_sum; + last_length = current_child_seed.payload_node_length; + last_chain_component_end = current_child_seed.payload_chain_component; } @@ -3163,9 +3164,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t dist_left = clustering_problem.all_seeds->at(read_num)->at(seed_i).distance_left; if (include_prefix_sum) { dist_left = SnarlDistanceIndex::sum(dist_left, - MIPayload::prefix_sum( clustering_problem.all_seeds->at(read_num)->at(seed_i).zipcode, - clustering_problem.all_seeds->at(read_num)->at(seed_i).decoder, - distance_index, get_id(clustering_problem.all_seeds->at(read_num)->at(seed_i).pos))); + clustering_problem.all_seeds->at(read_num)->at(seed_i).payload_prefix_sum); } //Since we only stored the proper distance left for seeds on chains size_t dist_right = structure_length - dist_left + 1; @@ -3202,7 +3201,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr const SeedCache& first_seed = clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second); //TOOD: get_id is weird node_problem->fragment_best_left = SnarlDistanceIndex::sum(first_seed.distance_left, - include_prefix_sum ? MIPayload::prefix_sum(first_seed.zipcode, first_seed.decoder, distance_index, get_id(clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second).pos)) : 0); + include_prefix_sum ? first_seed.payload_prefix_sum : 0); //Record the new cluster for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++ ) { @@ -3248,9 +3247,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t offset = clustering_problem.all_seeds->at(read_num)->at(seed_num).distance_left; if (include_prefix_sum) { offset = SnarlDistanceIndex::sum(offset, - MIPayload::prefix_sum( clustering_problem.all_seeds->at(read_num)->at(seed_num).zipcode, - clustering_problem.all_seeds->at(read_num)->at(seed_num).decoder, - distance_index, get_id( clustering_problem.all_seeds->at(read_num)->at(seed_num).pos))); + clustering_problem.all_seeds->at(read_num)->at(seed_num).payload_prefix_sum); } //First and last offset and last cluster head for this read diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 7611e7dfade..f6ea0d74cb9 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -134,7 +134,10 @@ class SnarlDistanceIndexClusterer { //to the right side of the node, relative to the chain size_t distance_left = std::numeric_limits::max(); size_t distance_right = std::numeric_limits::max(); - size_t chain_component = std::numeric_limits::max(); + //Values from the payload that we're saving + size_t payload_chain_component = std::numeric_limits::max(); + size_t payload_prefix_sum = std::numeric_limits::max(); + size_t payload_node_length = std::numeric_limits::max(); }; From d8c6e18bdc35af019090c5a782fa526097a386ac Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 12 Jul 2024 09:57:26 +0200 Subject: [PATCH 007/124] Try getting all values at once but it doesnt work so Im going to try going beck to the payload --- src/snarl_seed_clusterer.cpp | 15 ++- src/snarl_seed_clusterer.hpp | 3 + src/zip_code.cpp | 174 ++++++++++++++++++++++++++++++++--- src/zip_code.hpp | 49 ++++++---- 4 files changed, 210 insertions(+), 31 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 081a81b729a..776dd66005e 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -37,6 +37,7 @@ vector SnarlDistanceIndexClusterer::cluste seed_caches[i].zipcode = std::move(zip); } seed_caches[i].decoder = ZipCodeDecoder(&(seed_caches[i].zipcode)); + seed_caches[i].payload = seed_caches[i].decoder.get_payload_from_zipcode(id(seed_caches[i].pos), distance_index); } vector*> all_seed_caches = {&seed_caches}; @@ -82,6 +83,7 @@ vector> SnarlDistanceIndexClusterer all_seed_caches[read_num][i].zipcode = std::move(zip); } all_seed_caches[read_num][i].decoder = ZipCodeDecoder(&(all_seed_caches[read_num][i].zipcode)); + all_seed_caches[read_num][i].payload = all_seed_caches[read_num][i].decoder.get_payload_from_zipcode(id(all_seed_caches[read_num][i].pos), distance_index); } } vector*> seed_cache_pointers; @@ -361,6 +363,7 @@ cerr << "Add all seeds to nodes: " << endl; //The zipcodes are already filled in //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now + const MIPayload& payload = seed.payload; const ZipCode& zip_code = seed.zipcode; ZipCodeDecoder& decoder = seed.decoder; @@ -398,9 +401,9 @@ cerr << "Add all seeds to nodes: " << endl; //Get the net_handle for the node the seed is on net_handle_t node_net_handle = distance_index.get_node_net_handle(id); - size_t node_chain_component = MIPayload::chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); + size_t node_chain_component = MIPayload::get_chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); seed.payload_chain_component=node_chain_component; - size_t node_record_offset = MIPayload::node_record_offset(zip_code, distance_index, id); + size_t node_record_offset = MIPayload::get_node_record_offset(zip_code, distance_index, id); @@ -411,10 +414,10 @@ cerr << "Add all seeds to nodes: " << endl; //snarl tree to be clustered ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); ZipCode::code_type_t parent_type = node_depth == 0 ? node_type : decoder.get_code_type(node_depth-1); - auto parent_record_offset = MIPayload::parent_record_offset(zip_code, decoder, distance_index, id); + auto parent_record_offset = MIPayload::get_parent_record_offset(zip_code, decoder, distance_index, id); bool parent_is_root = parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE; //TODO: idk why this doesn't work with the parent_type - bool parent_is_chain = MIPayload::parent_is_chain(zip_code, decoder, distance_index, id); + bool parent_is_chain = MIPayload::get_parent_is_chain(zip_code, decoder, distance_index, id); bool is_trivial_chain = node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE; size_t prefix_sum = is_trivial_chain ? 0 : decoder.get_offset_in_chain(node_depth, &distance_index); seed.payload_prefix_sum = prefix_sum; @@ -456,6 +459,10 @@ cerr << "Add all seeds to nodes: " << endl; SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); } + //cerr << "node and parent " << distance_index.net_handle_as_string(node_net_handle) << " " << distance_index.net_handle_as_string(parent) << endl; + //cerr << "node and parent " << distance_index.net_handle_as_string(payload.node_handle) << " " << distance_index.net_handle_as_string(payload.parent_handle) << endl; + //assert(node_net_handle == payload.node_handle); + //assert(parent == payload.parent_handle); #ifdef DEBUG_CLUSTER diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index f6ea0d74cb9..759762d7f09 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -125,6 +125,9 @@ class SnarlDistanceIndexClusterer { ZipCodeDecoder decoder; + //TODO: I think I can skip the zipcode now since I have the payload + MIPayload payload; + //TODO: This doesn't actually get used but I'll use it if I use the zipcodes properly //std::unique_ptr zipcode_decoder; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index ffc261875cf..766500fb6dd 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -480,7 +480,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista } size_t zip_value; size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OR_RANK_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } @@ -1653,14 +1653,14 @@ void ZipCodeCollection::deserialize(std::istream& in) { } -size_t MIPayload::record_offset(const ZipCode& code, const SnarlDistanceIndex& distance_index, const nid_t& id ) { +size_t MIPayload::get_record_offset(const ZipCode& code, const SnarlDistanceIndex& distance_index, const nid_t& id ) { //TODO: This is pointless but I'll keep it until I fix everything net_handle_t node_handle = distance_index.get_node_net_handle(id); return distance_index.get_record_offset(node_handle); } -size_t MIPayload::parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::get_parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; @@ -1715,14 +1715,14 @@ size_t MIPayload::parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& } } -size_t MIPayload::node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::get_node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { //TODO: This is pointless but I'll keep it until I fix everything net_handle_t node_handle = distance_index.get_node_net_handle(id); return distance_index.get_node_record_offset(node_handle); } -size_t MIPayload::node_length(const ZipCode& zip, const ZipCodeDecoder& decoder) { +size_t MIPayload::get_node_length(const ZipCode& zip, const ZipCodeDecoder& decoder) { if (decoder.decoder_length() == 1) { //If the root-level structure is a node @@ -1742,7 +1742,7 @@ size_t MIPayload::node_length(const ZipCode& zip, const ZipCodeDecoder& decoder) } } -bool MIPayload::is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { +bool MIPayload::get_is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { @@ -1785,7 +1785,7 @@ bool MIPayload::is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, c } } -bool MIPayload::is_trivial_chain(const ZipCode& zip, const ZipCodeDecoder& decoder) { +bool MIPayload::get_is_trivial_chain(const ZipCode& zip, const ZipCodeDecoder& decoder) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; @@ -1824,7 +1824,7 @@ bool MIPayload::is_trivial_chain(const ZipCode& zip, const ZipCodeDecoder& decod } } -bool MIPayload::parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { +bool MIPayload::get_parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { @@ -1876,7 +1876,7 @@ bool MIPayload::parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decode } -bool MIPayload::parent_is_root(const ZipCode& zip, const ZipCodeDecoder& decoder) { +bool MIPayload::get_parent_is_root(const ZipCode& zip, const ZipCodeDecoder& decoder) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { @@ -1901,7 +1901,7 @@ bool MIPayload::parent_is_root(const ZipCode& zip, const ZipCodeDecoder& decoder } -size_t MIPayload::prefix_sum(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::get_prefix_sum(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; if (decoder.decoder_length() == 1) { @@ -1939,7 +1939,7 @@ size_t MIPayload::prefix_sum(const ZipCode& zip, const ZipCodeDecoder& decoder, } } -size_t MIPayload::chain_component(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { +size_t MIPayload::get_chain_component(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; @@ -1974,5 +1974,157 @@ size_t MIPayload::chain_component(const ZipCode& zip, const ZipCodeDecoder& deco } } +MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { + MIPayload payload; + + if (decoder_length() == 1) { + cerr << "Root node" << endl; + //If the root-level structure is a node + payload.parent_is_root = true; + payload.parent_is_chain = true; + + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[0].second; + //Root is chain + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //root_identifier + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_handle = distance_index.get_handle_from_connected_component(zip_value); + cerr << "Got node from identifier " << zip_value << " " << distance_index.net_handle_as_string(payload.node_handle) << endl; + + //Root node length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + payload.node_length = zip_value; + payload.is_trivial_chain = true; + payload.is_reversed = false; + payload.parent_handle = distance_index.get_root(); + + } else if (decoder[max_depth() - 1].first) { + cerr << "Parent is chain" << endl; + //If the parent is a chain + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_is_chain = true; + payload.parent_is_root = false; + + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[0].second; + //is_chain + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + //root_identifier for root, chain length for anything else + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if (decoder_length() == 2) { + //If the node is a child of the root chain + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_type = ZipCode::ROOT_CHAIN; + } else { + payload.parent_handle = distance_index.get_parent(payload.node_handle); + payload.parent_type = ZipCode::CHAIN; + } + + //Node prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Node length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //is_reversed + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //TODO: For top-level chains we got this from the distance index + payload.is_reversed = zip_value; + + payload.chain_component = distance_index.is_multicomponent_chain(payload.parent_handle) + ? distance_index.get_chain_component(payload.node_handle) + : 0; + + + + } else { + cerr << "Child of a snarl" << endl; + //If the node is a child of a snarl + + auto node_handle = distance_index.get_node_net_handle(id); + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(node_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE, + distance_index.get_node_record_offset(node_handle)); + payload.parent_is_chain = false; + payload.parent_is_root = decoder_length() == 2; + payload.is_trivial_chain = true; + + + size_t zip_value; + size_t zip_index = decoder[0].second; + if (payload.parent_is_root) { + //is_chain + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Identifier for root snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_type = ZipCode::ROOT_SNARL; + } else { + //is_regular + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //If this is a non-root snarl, get as much as we can from it + payload.parent_type = ZipCode::EMPTY; + if (zip_value == 0) { + payload.parent_type = ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + payload.parent_type = ZipCode::REGULAR_SNARL; + } else { + payload.parent_type = ZipCode::CYCLIC_SNARL; + } + + //Snarl prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Snarl length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Snarl child_count + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //is_reversed for regular snarl and record offset for irregular/cyclic snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + if (payload.parent_type == ZipCode::REGULAR_SNARL) { + //Snarl is reversed + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); + if (distance_index.is_simple_snarl(distance_index.get_parent(payload.parent_handle))) { + std::tie(payload.is_reversed, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } else { + payload.is_reversed = false; + } + } else { + payload.parent_handle = distance_index.get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + payload.is_reversed = false; + } + + } + //We should be at the node/trivial chain now + zip_index = decoder[max_depth()].second; + //Chain rank in snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Chain length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + + //Get the rest as default values + + } + payload.parent_depth = 0; + for (size_t d = 0 ; d <= max_depth() ; d++) { + auto type = get_code_type(d); + if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { + payload.parent_depth++; + } + } + + + + return payload; +} + } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 5d7d7bd4d06..1cb45dbd06e 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -323,6 +323,9 @@ class ZipCodeDecoder { //TODO: I want to make a struct for holding all values of a code as real values + ///Fill in a payload with values from the zipcode + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + }; std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); @@ -331,39 +334,53 @@ std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); /** The payload for the minimizer index. This stores distance information that gets used in clustering The payload now uses zip codes, so this gets used to go from a zip code to distance information - usable by the clusterer, which expects the old payload format + usable by the clusterer */ -struct MIPayload { +struct MIPayload { typedef std::uint64_t code_type; // We assume that this fits into gbwtgraph::Payload. //typedef std::pair payload_type; - - constexpr static gbwtgraph::Payload NO_CODE = {0, 0}; - constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); + constexpr static gbwtgraph::Payload NO_CODE = {0, 0}; + constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); //How do decode the zipcode to get the old payload values - static size_t record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static size_t get_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + + static size_t get_parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + + static size_t get_node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + + static size_t get_node_length(const ZipCode& zip, const ZipCodeDecoder& decoder); + + static bool get_is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static size_t parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + static bool get_is_trivial_chain (const ZipCode& zip, const ZipCodeDecoder& decoder); - static size_t node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); + static bool get_parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + static bool get_parent_is_root (const ZipCode& zip, const ZipCodeDecoder& decoder); - static size_t node_length(const ZipCode& zip, const ZipCodeDecoder& decoder); + static size_t get_prefix_sum (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + static size_t get_chain_component (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool is_trivial_chain (const ZipCode& zip, const ZipCodeDecoder& decoder); - static bool parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool parent_is_root (const ZipCode& zip, const ZipCodeDecoder& decoder); - static size_t prefix_sum (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + net_handle_t node_handle; + net_handle_t parent_handle; - static size_t chain_component (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); + size_t node_length = std::numeric_limits::max(); + size_t prefix_sum = std::numeric_limits::max(); + size_t chain_component = std::numeric_limits::max(); + //Depth according to the distance index + size_t parent_depth = 0; - + ZipCode::code_type_t parent_type = ZipCode::EMPTY; + bool is_reversed = false; + bool is_trivial_chain = false; + bool parent_is_chain = false; + bool parent_is_root = false; }; } From 0244761eed20ba3a98d27a51b5b9c288dff2b7bf Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 09:42:58 +0200 Subject: [PATCH 008/124] Get all cache values from zipcodes once --- src/snarl_seed_clusterer.cpp | 249 ++++++++--------------- src/snarl_seed_clusterer.hpp | 1 - src/zip_code.cpp | 371 +++-------------------------------- src/zip_code.hpp | 27 +-- 4 files changed, 119 insertions(+), 529 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 776dd66005e..857d724212a 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -371,111 +371,40 @@ cerr << "Add all seeds to nodes: " << endl; #ifdef DEBUG_CLUSTER cerr << "Using cached values for node " << id << ": " - << ", " << MIPayload::record_offset(zip_code, distance_index, id) - << ", " << MIPayload::parent_record_offset(zip_code, decoder, distance_index, id) - << ", " << MIPayload::node_record_offset(zip_code, distance_index, id) - << ", " << MIPayload::node_length(zip_code, decoder) - << ", " << MIPayload::prefix_sum(zip_code, decoder, distance_index, id) - << ", " << MIPayload::chain_component(zip_code, decoder, distance_index, id) << endl; + << ", " << seed.payload.record_offset + << ", " << seed.payload.parent_record_offset + << ", " << seed.payload.node_record_offset + << ", " << seed.payload.node_length + << ", " << seed.payload.prefix_sum + << ", " << seed.payload.chain_component << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); - assert(MIPayload::record_offset(zip_code, distance_index, id) == distance_index.get_record_offset(handle)); - //assert(MIPayload::parent_record_offset(zip_code, distance_index, id) == + assert(seed.payload.record_offset == distance_index.get_record_offset(handle)); + //assert(seed.payload.parent_record_offset == // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) // :distance_index.get_record_offset(parent_handle))); - assert(MIPayload::node_record_offset(zip_code, distance_index, id) == distance_index.get_node_record_offset(handle)); - assert(MIPayload::node_length(zip_code, decoder) == distance_index.minimum_length(handle)); + assert(seed.payload.node_record_offset == distance_index.get_node_record_offset(handle)); + assert(seed.payload.node_length == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) // ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(handle); - //assert(MIPayload::prefix_sum(zip_code, distance_index, id) == prefix_sum); - assert(MIPayload::chain_component(zip_code, decoder, distance_index, id) == (distance_index.is_multicomponent_chain(parent_handle) + //assert(seed.payload.prefix_sum == prefix_sum); + assert(seed.payload.chain_component == (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0)); -#endif - - - - //Get the net_handle for the node the seed is on - net_handle_t node_net_handle = distance_index.get_node_net_handle(id); - size_t node_chain_component = MIPayload::get_chain_component(seed.zipcode, seed.decoder, distance_index, get_id(seed.pos)); - seed.payload_chain_component=node_chain_component; - size_t node_record_offset = MIPayload::get_node_record_offset(zip_code, distance_index, id); - - - - //Get the parent of the node - net_handle_t parent; - //If the grandparent is a root/root snarl, then make it the parent and the node a trivial chain - //because they will be clustered here and added to the root instead of being added to the - //snarl tree to be clustered - ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); - ZipCode::code_type_t parent_type = node_depth == 0 ? node_type : decoder.get_code_type(node_depth-1); - auto parent_record_offset = MIPayload::get_parent_record_offset(zip_code, decoder, distance_index, id); - bool parent_is_root = parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_CHAIN || parent_type == ZipCode::ROOT_NODE; - //TODO: idk why this doesn't work with the parent_type - bool parent_is_chain = MIPayload::get_parent_is_chain(zip_code, decoder, distance_index, id); - bool is_trivial_chain = node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE; - size_t prefix_sum = is_trivial_chain ? 0 : decoder.get_offset_in_chain(node_depth, &distance_index); - seed.payload_prefix_sum = prefix_sum; - size_t node_length = decoder.get_length(node_depth, &distance_index); - seed.payload_node_length = node_length; - bool is_reversed_in_parent = decoder.get_is_reversed_in_parent(node_depth); - - if (node_type == ZipCode::CHAIN || node_type == ZipCode::ROOT_NODE) { - //If the node is a trivial chain, then the parent is just the node but recorded as a chain in the net handle - parent = distance_index.get_net_handle_from_values (distance_index.get_record_offset(node_net_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE, - node_record_offset); - if (parent_record_offset == 0) { - //If the parent offset stored in the cache is the root, then this is a trivial chain - //child of the root not in a root snarl, so remember the root as the parent and the - //trivial chain as the node - node_net_handle = parent; - parent = distance_index.get_root(); - } else if (parent_type == ZipCode::ROOT_SNARL) { - //If the parent is a root snarl, then the node becomes the trivial chain - //and we get the parent root snarl from the cache - node_net_handle = parent; - parent = distance_index.get_net_handle_from_values(parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); - } - } else if (parent_record_offset == 0) { - //The parent is just the root - parent = distance_index.get_root(); - } else if (parent_type == ZipCode::ROOT_SNARL) { - //If the parent is a root snarl - parent = distance_index.get_net_handle_from_values(parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); - } else { - //Otherwise the parent is an actual chain and we use the value from the cache - parent = distance_index.get_net_handle_from_values(parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); - } - //cerr << "node and parent " << distance_index.net_handle_as_string(node_net_handle) << " " << distance_index.net_handle_as_string(parent) << endl; - //cerr << "node and parent " << distance_index.net_handle_as_string(payload.node_handle) << " " << distance_index.net_handle_as_string(payload.parent_handle) << endl; - //assert(node_net_handle == payload.node_handle); - //assert(parent == payload.parent_handle); - - -#ifdef DEBUG_CLUSTER - if (!distance_index.is_root(parent)) { - cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(node_net_handle))) << endl; - assert( distance_index.start_end_traversal_of(parent) == distance_index.start_end_traversal_of(distance_index.get_parent(node_net_handle))); + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; + assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); } #endif - if (!(parent_type == ZipCode::ROOT_SNARL || parent_type == ZipCode::ROOT_NODE)) { + if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) #ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(parent) << endl; + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; #endif //Add the seed to its parent @@ -488,60 +417,52 @@ cerr << "Add all seeds to nodes: " << endl; #ifdef DEBUG_CLUSTER //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() - // : distance_index.get_prefix_sum_value(node_net_handle))); - cerr << "Node length should be " << distance_index.minimum_length(node_net_handle) << " actually " << node_length << endl; - assert(node_length == distance_index.minimum_length(node_net_handle)); - cerr << "Reversed in parent? " << distance_index.net_handle_as_string(node_net_handle) << " " << distance_index.net_handle_as_string(parent) << " " << is_reversed_in_parent << endl; - cerr << "is trivial? " << is_trivial_chain << endl; - if (!distance_index.is_root(parent)) { - cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(parent)) << endl; + // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); + cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; + assert(seed.payload.node_length == distance_index.minimum_length(seed.payload.node_handle)); + cerr << "Reversed in parent? " << distance_index.net_handle_as_string(seed.payload.node_handle) << " " << distance_index.net_handle_as_string(seed.payload.parent_handle) << " " << seed.payload.is_reversed << endl; + cerr << "is trivial? " << seed.payload.is_trivial_chain << endl; + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(seed.payload.parent_handle)) << endl; } - cerr << is_reversed_in_parent << " " << distance_index.is_reversed_in_parent(parent) << endl; + cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; - assert(is_reversed_in_parent == (is_trivial_chain ? distance_index.is_reversed_in_parent(parent) - : distance_index.is_reversed_in_parent(node_net_handle))); + assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) + : distance_index.is_reversed_in_parent(seed.payload.node_handle))); #endif //Add the parent chain or trivial chain bool new_parent = false; - //TODO: Could get depth from the zipcodes but the idea of depth isn't the same - size_t parent_depth = 0; - for (size_t d = 0 ; d <= node_depth ; d++) { - auto type = decoder.get_code_type(d); - if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { - parent_depth++; - } - } new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.parent_handle) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; - if (is_trivial_chain ) { - clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + if (seed.payload.is_trivial_chain ) { + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, node_length, std::numeric_limits::max(), std::numeric_limits::max()); + false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain - clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index); } - parent_to_depth.emplace(parent, parent_depth); + parent_to_depth.emplace(seed.payload.parent_handle, seed.payload.parent_depth); new_parent = true; } #ifdef DEBUG_CLUSTER - assert(parent_depth == distance_index.get_depth(parent)); + assert(seed.payload.parent_depth == distance_index.get_depth(seed.payload.parent_handle)); #endif //If chains_by_level isn't big enough for this depth, resize it and reserve space at each level - if (parent_depth+1 > chains_by_level.size()) { - size_t to_add = (parent_depth+1) - chains_by_level.size(); + if (seed.payload.parent_depth+1 > chains_by_level.size()) { + size_t to_add = (seed.payload.parent_depth+1) - chains_by_level.size(); for (size_t i = 0 ; i < to_add ; i++) { chains_by_level.emplace_back(); chains_by_level.back().reserve(clustering_problem.seed_count_prefix_sum.back()); @@ -549,40 +470,40 @@ cerr << "Add all seeds to nodes: " << endl; } //Make sure the seed's distances are relative to the orientation in the parent - seed.distance_left = is_reversed_in_parent != is_rev(pos) ? node_length- get_offset(pos) + seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = is_reversed_in_parent != is_rev(pos) ? get_offset(pos) + 1 - : node_length- get_offset(pos); + seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 + : seed.payload.node_length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(parent)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.parent_handle)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = node_net_handle; + parent_problem.children.back().net_handle = seed.payload.node_handle; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = node_chain_component; + parent_problem.children.back().chain_component = seed.payload.chain_component; parent_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - prefix_sum); + seed.payload.prefix_sum); //And the parent to chains_by_level if (new_parent) { - chains_by_level[parent_depth].emplace_back(parent); + chains_by_level[seed.payload.parent_depth].emplace_back(seed.payload.parent_handle); } //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too if ( new_parent) { - if (is_trivial_chain && !parent_is_root) { - bool grandparent_is_simple_snarl = parent_is_chain; + if (seed.payload.is_trivial_chain && !seed.payload.parent_is_root) { + bool grandparent_is_simple_snarl = seed.payload.parent_is_chain; parent_problem.has_parent_handle = true; parent_problem.parent_net_handle = grandparent_is_simple_snarl - ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(node_net_handle), + ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(seed.payload.node_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE, 1) - : distance_index.get_net_handle_from_values(parent_record_offset, + : distance_index.get_net_handle_from_values(seed.payload.parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); #ifdef DEBUG_CLUSTER @@ -593,14 +514,14 @@ cerr << "Add all seeds to nodes: " << endl; //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too parent_problem.has_grandparent_handle = true; parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( - parent_record_offset, + seed.payload.parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); #ifdef DEBUG_CLUSTER cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; #endif } - } else if (parent_is_root && parent_is_chain && !is_trivial_chain) { + } else if (seed.payload.parent_is_root && seed.payload.parent_is_chain && !seed.payload.is_trivial_chain) { //The parent chain is a child of the root parent_problem.has_parent_handle = true; parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( @@ -622,39 +543,39 @@ cerr << "Add all seeds to nodes: " << endl; bool new_node = false; if (seen_nodes.count(id) == 0) { new_node = true; - clustering_problem.net_handle_to_node_problem_index.emplace(node_net_handle, + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(node_net_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, node_length, std::numeric_limits::max(), + false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max()); //Remember the parent of this node, since it will be needed to remember the root snarl later - clustering_problem.all_node_problems.back().parent_net_handle = parent; + clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; seen_nodes.insert(id); } - seed.distance_left = is_reversed_in_parent != is_rev(pos) ? node_length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = is_reversed_in_parent != is_rev(pos) ? get_offset(pos) + 1 : node_length- get_offset(pos); + seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; + seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); - SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); + SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.node_handle)); node_problem.children.emplace_back(); - node_problem.children.back().net_handle = node_net_handle; + node_problem.children.back().net_handle = seed.payload.node_handle; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; node_problem.children.back().has_chain_values = true; - node_problem.children.back().chain_component = node_chain_component; + node_problem.children.back().chain_component = seed.payload.chain_component; node_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - prefix_sum); + seed.payload.prefix_sum); //Remember this seed as a child of the node if (new_node) { - nodes_to_cluster_now.emplace_back(node_net_handle); + nodes_to_cluster_now.emplace_back(seed.payload.node_handle); } } } @@ -1935,11 +1856,11 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; size_t last_length = last_child.is_seed - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload_node_length + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.node_length : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload_chain_component + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.chain_component : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; @@ -2198,17 +2119,17 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == current_child_seed.payload_chain_component) { + } else if ( last_chain_component_end == current_child_seed.payload.chain_component) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = current_child_seed.payload_prefix_sum; + distance_from_last_child_to_current_child = current_child_seed.payload.prefix_sum; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); //Distance is the current node's prefix sum minus the distance from the start of the chain to the last node - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.payload_prefix_sum, + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.payload.prefix_sum, distance_from_chain_start_to_last_node); } } @@ -2227,27 +2148,27 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_current_end_to_end_of_chain = 0; } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { //If this is the last node in the chain - if (chain_problem->chain_component_end != current_child_seed.payload_chain_component) { + if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { distance_from_current_end_to_end_of_chain = 0; } - } else if (chain_problem->chain_component_end != current_child_seed.payload_chain_component) { + } else if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { //Length of the chain - (prefix sum + node length of the current node) distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, - SnarlDistanceIndex::sum(current_child_seed.payload_prefix_sum, - current_child_seed.payload_node_length)); + SnarlDistanceIndex::sum(current_child_seed.payload.prefix_sum, + current_child_seed.payload.node_length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.payload_chain_component != 0 ? std::numeric_limits::max() : current_child_seed.payload_prefix_sum) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() : current_child_seed.payload.prefix_sum) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2282,13 +2203,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (current_child_seed.payload_chain_component != 0) { + if (current_child_seed.payload.chain_component != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { //Prefix sum + offset of the seed in the node current_child_seed.distance_left = SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.payload_prefix_sum); + current_child_seed.payload.prefix_sum); } current_child_seed.distance_right = SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain); @@ -2333,16 +2254,16 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : (last_child.net_handle == current_child.net_handle ? 0 - : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload_node_length)); + : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload.node_length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) //Left distance is the prefix sum (or inf if the node isn't in the first component of the chain) + offset of seed in node //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - current_child_seed.payload_chain_component != 0 ? std::numeric_limits::max() + current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.payload_prefix_sum), + current_child_seed.payload.prefix_sum), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2376,7 +2297,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node - distance_between -= current_child_seed.payload_node_length; + distance_between -= current_child_seed.payload.node_length; } #ifdef DEBUG_CLUSTER @@ -2485,9 +2406,9 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = current_child_seed.payload_prefix_sum; - last_length = current_child_seed.payload_node_length; - last_chain_component_end = current_child_seed.payload_chain_component; + last_prefix_sum = current_child_seed.payload.prefix_sum; + last_length = current_child_seed.payload.node_length; + last_chain_component_end = current_child_seed.payload.chain_component; } @@ -3171,7 +3092,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t dist_left = clustering_problem.all_seeds->at(read_num)->at(seed_i).distance_left; if (include_prefix_sum) { dist_left = SnarlDistanceIndex::sum(dist_left, - clustering_problem.all_seeds->at(read_num)->at(seed_i).payload_prefix_sum); + clustering_problem.all_seeds->at(read_num)->at(seed_i).payload.prefix_sum); } //Since we only stored the proper distance left for seeds on chains size_t dist_right = structure_length - dist_left + 1; @@ -3208,7 +3129,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr const SeedCache& first_seed = clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second); //TOOD: get_id is weird node_problem->fragment_best_left = SnarlDistanceIndex::sum(first_seed.distance_left, - include_prefix_sum ? first_seed.payload_prefix_sum : 0); + include_prefix_sum ? first_seed.payload.prefix_sum : 0); //Record the new cluster for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++ ) { @@ -3254,7 +3175,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t offset = clustering_problem.all_seeds->at(read_num)->at(seed_num).distance_left; if (include_prefix_sum) { offset = SnarlDistanceIndex::sum(offset, - clustering_problem.all_seeds->at(read_num)->at(seed_num).payload_prefix_sum); + clustering_problem.all_seeds->at(read_num)->at(seed_num).payload.prefix_sum); } //First and last offset and last cluster head for this read diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 759762d7f09..e0ef9ea4c39 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -138,7 +138,6 @@ class SnarlDistanceIndexClusterer { size_t distance_left = std::numeric_limits::max(); size_t distance_right = std::numeric_limits::max(); //Values from the payload that we're saving - size_t payload_chain_component = std::numeric_limits::max(); size_t payload_prefix_sum = std::numeric_limits::max(); size_t payload_node_length = std::numeric_limits::max(); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 766500fb6dd..f8e1b3c3b5a 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1652,333 +1652,10 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } - -size_t MIPayload::get_record_offset(const ZipCode& code, const SnarlDistanceIndex& distance_index, const nid_t& id ) { - - //TODO: This is pointless but I'll keep it until I fix everything - net_handle_t node_handle = distance_index.get_node_net_handle(id); - return distance_index.get_record_offset(node_handle); -} - -size_t MIPayload::get_parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return 0; - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain -#ifdef DEBUG_ZIPCODE - assert(distance_index.get_record_offset(decoder.get_net_handle(0, &distance_index)) == - distance_index.get_record_offset(distance_index.get_parent(distance_index.get_node_net_handle(id)))); -#endif - - return distance_index.get_record_offset(decoder.get_net_handle(0, &distance_index)); - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl -#ifdef DEBUG_ZIPCODE - assert(distance_index.get_record_offset(decoder.get_net_handle(0, &distance_index)) == - distance_index.get_record_offset(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(id))))); -#endif - - return distance_index.get_record_offset(decoder.get_net_handle(0, &distance_index)); - - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - ZipCode::code_type_t parent_type = decoder.get_code_type(node_depth-1); - if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { - //If the parent is an irregular snarl - return decoder.get_distance_index_address(node_depth-1); - - } else { - //TODO: I'm not sure about what to do about this, I don't like doing it here - net_handle_t node_handle = distance_index.get_node_net_handle(id); - net_handle_t parent = distance_index.get_parent(node_handle); - if (distance_index.is_trivial_chain(parent)) { - net_handle_t grandparent = distance_index.get_parent(parent); - if (distance_index.is_simple_snarl(grandparent)) { - return distance_index.get_record_offset(distance_index.get_parent(grandparent)); - - } else { - return distance_index.get_record_offset(grandparent); - } - } else { - return distance_index.get_record_offset(parent); - } - } - } -} - -size_t MIPayload::get_node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - //TODO: This is pointless but I'll keep it until I fix everything - net_handle_t node_handle = distance_index.get_node_net_handle(id); - return distance_index.get_node_record_offset(node_handle); -} - -size_t MIPayload::get_node_length(const ZipCode& zip, const ZipCodeDecoder& decoder) { - - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return decoder.get_length(0); - - } else if (decoder.decoder_length() == 2) { - //If this is a node in the top-level chain - - return decoder.get_length(1); - - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - return decoder.get_length(node_depth); - } -} - -bool MIPayload::get_is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return false; - - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - return decoder.get_is_reversed_in_parent(1); - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - - return false; - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - ZipCode:: code_type_t parent_type = decoder.get_code_type(node_depth-1); - if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { - //If the parent is an irregular snarl - return false; - - } else if (parent_type == ZipCode::REGULAR_SNARL) { - //If the parent is a regular snarl - - //Because I'm storing "regular" and not "simple", need to check this - if (distance_index.is_simple_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(id))))) { - return decoder.get_is_reversed_in_parent(node_depth); - } else { - return false; - } - } else { - //If the parent is a chain - //If this was a node in a chain - return decoder.get_is_reversed_in_parent(node_depth); - } - } -} - -bool MIPayload::get_is_trivial_chain(const ZipCode& zip, const ZipCodeDecoder& decoder) { - - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return true; - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - return false; - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - - return true; - - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - ZipCode::code_type_t parent_type = decoder.get_code_type(node_depth-1); - if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { - //If the parent is an irregular snarl - return true; - - } else if (parent_type == ZipCode::REGULAR_SNARL) { - //If the parent is a regular snarl - return true; - - } else { - //If the parent is a chain - //If this was a node in a chain - return false; - } - } -} - -bool MIPayload::get_parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return true; - - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - return true; - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - - return false; - - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - ZipCode::code_type_t node_type = decoder.get_code_type(node_depth); - ZipCode::code_type_t parent_type = decoder.get_code_type(node_depth-1); - if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { - //If the parent is an irregular snarl - - return false; - - } else if (parent_type == ZipCode::REGULAR_SNARL) { - - if (node_type == ZipCode::CHAIN) { - net_handle_t parent = distance_index.get_parent(distance_index.get_node_net_handle(id)); - if (distance_index.is_simple_snarl(distance_index.get_parent(parent))) { - return true; - } else { - return false; - } - } else { - return true; - } - - } else { - //If the parent is a chain - //If this was a node in a chain - return true; - - } - } -} - - -bool MIPayload::get_parent_is_root(const ZipCode& zip, const ZipCodeDecoder& decoder) { - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return true; - - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - return false; - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - - return true; - - } else { - - return false; - } -} - - -size_t MIPayload::get_prefix_sum(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - return 0; - - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - return decoder.get_offset_in_chain(1); - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - return 0; - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - ZipCode::code_type_t parent_type = decoder.get_code_type(node_depth-1); - if (parent_type == ZipCode::IRREGULAR_SNARL || parent_type == ZipCode::CYCLIC_SNARL) { - return 0; - } else if (parent_type == ZipCode::REGULAR_SNARL) { - //If the parent is a snarl - //Because I'm storing "regular" and not "simple", need to check this - if (distance_index.is_simple_snarl(distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(id))))) { - return decoder.get_offset_in_chain(node_depth-1); - } else { - return 0; - } - } else { - //If the parent is a chain - //If this was a node in a chain - return decoder.get_offset_in_chain(node_depth); - } - } -} - -size_t MIPayload::get_chain_component(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id) { - - - bool root_is_chain = decoder.get_code_type(0) != ZipCode::ROOT_SNARL; - - if (decoder.decoder_length() == 1) { - //If the root-level structure is a node - - return 0; - - } else if (decoder.decoder_length() == 2 && root_is_chain) { - //If this is a node in the top-level chain - - net_handle_t net_handle = distance_index.get_node_net_handle(id); - net_handle_t parent = distance_index.get_parent(net_handle); - return distance_index.is_multicomponent_chain(parent) - ? distance_index.get_chain_component(net_handle) - : 0; - - } else if (decoder.decoder_length() == 2 && !root_is_chain) { - //If the node is the child of the root snarl - - return 0; - } else { - //Otherwise, check the last thing in the zipcode to get the node values - size_t node_depth = decoder.decoder_length()-1; - - net_handle_t net_handle = distance_index.get_node_net_handle(id); - net_handle_t parent = distance_index.get_parent(net_handle); - return distance_index.is_multicomponent_chain(parent) - ? distance_index.get_chain_component(net_handle) - : 0; - } -} - MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; if (decoder_length() == 1) { - cerr << "Root node" << endl; //If the root-level structure is a node payload.parent_is_root = true; payload.parent_is_chain = true; @@ -1990,19 +1667,21 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_handle = distance_index.get_handle_from_connected_component(zip_value); - cerr << "Got node from identifier " << zip_value << " " << distance_index.net_handle_as_string(payload.node_handle) << endl; + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); //Root node length std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value; + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; payload.is_reversed = false; payload.parent_handle = distance_index.get_root(); + payload.parent_type = ZipCode::ROOT_NODE; + payload.parent_record_offset = 0; } else if (decoder[max_depth() - 1].first) { - cerr << "Parent is chain" << endl; //If the parent is a chain payload.node_handle = distance_index.get_node_net_handle(id); payload.parent_is_chain = true; @@ -2010,7 +1689,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //Walk through the zipcode to get values size_t zip_value; - size_t zip_index = decoder[0].second; + size_t zip_index = decoder[max_depth()-1].second; //is_chain std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -2020,10 +1699,12 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //If the node is a child of the root chain payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; + payload.parent_is_root = true; } else { - payload.parent_handle = distance_index.get_parent(payload.node_handle); + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; } + payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); //Node prefix sum std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -2043,29 +1724,34 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } else { - cerr << "Child of a snarl" << endl; //If the node is a child of a snarl - auto node_handle = distance_index.get_node_net_handle(id); - payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(node_handle), + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE, - distance_index.get_node_record_offset(node_handle)); + distance_index.get_node_record_offset(payload.node_handle)); payload.parent_is_chain = false; payload.parent_is_root = decoder_length() == 2; payload.is_trivial_chain = true; size_t zip_value; - size_t zip_index = decoder[0].second; + size_t zip_index; if (payload.parent_is_root) { //is_chain + zip_index = decoder[0].second; std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.node_handle = payload.parent_handle; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { + zip_index = decoder[max_depth()-1].second; //is_regular std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //If this is a non-root snarl, get as much as we can from it @@ -2080,7 +1766,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //Snarl prefix sum std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + + payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Snarl length std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Snarl child_count @@ -2090,15 +1778,18 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); - if (distance_index.is_simple_snarl(distance_index.get_parent(payload.parent_handle))) { - std::tie(payload.is_reversed, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); + if (distance_index.is_simple_snarl(grandparent_handle)) { + payload.is_reversed = zip_value; + payload.parent_is_chain=true; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { payload.is_reversed = false; + payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); } } else { - payload.parent_handle = distance_index.get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); payload.is_reversed = false; + payload.parent_record_offset = zip_value; } } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 1cb45dbd06e..4a30babc550 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -344,37 +344,16 @@ struct MIPayload { constexpr static gbwtgraph::Payload NO_CODE = {0, 0}; constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); - //How do decode the zipcode to get the old payload values - static size_t get_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); - - static size_t get_parent_record_offset(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - - static size_t get_node_record_offset(const ZipCode& zip, const SnarlDistanceIndex& distance_index, const nid_t& id); - - static size_t get_node_length(const ZipCode& zip, const ZipCodeDecoder& decoder); - - static bool get_is_reversed(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - - static bool get_is_trivial_chain (const ZipCode& zip, const ZipCodeDecoder& decoder); - - static bool get_parent_is_chain(const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static bool get_parent_is_root (const ZipCode& zip, const ZipCodeDecoder& decoder); - - static size_t get_prefix_sum (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - static size_t get_chain_component (const ZipCode& zip, const ZipCodeDecoder& decoder, const SnarlDistanceIndex& distance_index, const nid_t& id); - - - - net_handle_t node_handle; net_handle_t parent_handle; size_t node_length = std::numeric_limits::max(); - size_t prefix_sum = std::numeric_limits::max(); - size_t chain_component = std::numeric_limits::max(); + size_t prefix_sum = 0; + size_t chain_component = 0; //Depth according to the distance index size_t parent_depth = 0; + size_t parent_record_offset = 0; ZipCode::code_type_t parent_type = ZipCode::EMPTY; bool is_reversed = false; From 729c32235edb83f3edc092e1f24fc6ecac59af67 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 10:41:26 +0200 Subject: [PATCH 009/124] Dont use distance index for getting is_root_snarl --- src/snarl_seed_clusterer.cpp | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 857d724212a..e7f8a467c33 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -325,7 +325,8 @@ cerr << "Add all seeds to nodes: " << endl; //This is to remember the nodes that we are going to cluster at the end of get_nodes //these will be the nodes that are children of the root or root snarl. //All other seeds are added directly to their parent chains as children - vector nodes_to_cluster_now; + //Bool is true if the parent of the node is a root snarl + std::vector> nodes_to_cluster_now; //Map the parent SnarlTreeNodeProblem to its depth so we don't use get_depth() as much @@ -367,8 +368,6 @@ cerr << "Add all seeds to nodes: " << endl; const ZipCode& zip_code = seed.zipcode; ZipCodeDecoder& decoder = seed.decoder; - size_t node_depth = decoder.max_depth(); - #ifdef DEBUG_CLUSTER cerr << "Using cached values for node " << id << ": " << ", " << seed.payload.record_offset @@ -403,19 +402,11 @@ cerr << "Add all seeds to nodes: " << endl; if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) -#ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; -#endif - //Add the seed to its parent //Also update the zipcode on the seed - - - //Seed payload is: - //record offset of node, record offset of parent, node record offset, node length, is_reversed, is_trivial_chain, parent is chain, parent is root, prefix sum, chain_component - #ifdef DEBUG_CLUSTER + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; @@ -575,7 +566,7 @@ cerr << "Add all seeds to nodes: " << endl; //Remember this seed as a child of the node if (new_node) { - nodes_to_cluster_now.emplace_back(seed.payload.node_handle); + nodes_to_cluster_now.emplace_back(seed.payload.node_handle, seed.payload.parent_type == ZipCode::ROOT_SNARL); } } } @@ -586,7 +577,8 @@ cerr << "Add all seeds to nodes: " << endl; #endif //Go through and cluster nodes that are children of the root or root snarls - for(const net_handle_t& node_net_handle : nodes_to_cluster_now) { + for(const auto& net_and_is_root : nodes_to_cluster_now) { + const net_handle_t& node_net_handle = net_and_is_root.first; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); @@ -597,7 +589,7 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t parent = node_problem.parent_net_handle; - if (distance_index.is_root_snarl(parent)) { + if (net_and_is_root.second) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, From 33b7e8757610f9bc9c78740fe7d505db0c369af6 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 16:41:00 +0200 Subject: [PATCH 010/124] Add the zipcode to the clustering problems and use to get parents --- src/snarl_seed_clusterer.cpp | 37 ++++++++++++++--------- src/snarl_seed_clusterer.hpp | 21 ++++++++++--- src/zip_code.cpp | 57 ++++++++++++++++++++++++++++++++---- 3 files changed, 92 insertions(+), 23 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index e7f8a467c33..c314d92ae2a 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -326,7 +326,7 @@ cerr << "Add all seeds to nodes: " << endl; //these will be the nodes that are children of the root or root snarl. //All other seeds are added directly to their parent chains as children //Bool is true if the parent of the node is a root snarl - std::vector> nodes_to_cluster_now; + std::vector nodes_to_cluster_now; //Map the parent SnarlTreeNodeProblem to its depth so we don't use get_depth() as much @@ -434,13 +434,15 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max()); + false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), + &seed, seed.decoder.max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + &seed, seed.decoder.max_depth() - 1); } parent_to_depth.emplace(seed.payload.parent_handle, seed.payload.parent_depth); @@ -539,7 +541,8 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), - std::numeric_limits::max()); + std::numeric_limits::max(), + &seed, seed.decoder.max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; @@ -566,7 +569,7 @@ cerr << "Add all seeds to nodes: " << endl; //Remember this seed as a child of the node if (new_node) { - nodes_to_cluster_now.emplace_back(seed.payload.node_handle, seed.payload.parent_type == ZipCode::ROOT_SNARL); + nodes_to_cluster_now.emplace_back(&seed); } } } @@ -577,8 +580,8 @@ cerr << "Add all seeds to nodes: " << endl; #endif //Go through and cluster nodes that are children of the root or root snarls - for(const auto& net_and_is_root : nodes_to_cluster_now) { - const net_handle_t& node_net_handle = net_and_is_root.first; + for(const SeedCache* seed : nodes_to_cluster_now) { + const net_handle_t& node_net_handle = seed->payload.node_handle; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); @@ -589,13 +592,14 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t parent = node_problem.parent_net_handle; - if (net_and_is_root.second) { + if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + seed, 0); } clustering_problem.root_children.emplace_back(parent, node_net_handle); } else { @@ -652,10 +656,11 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(snarl_parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + snarl_problem->seed, snarl_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved - SnarlTreeNodeProblem snarl_problem = clustering_problem.all_node_problems.at( + SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); if (snarl_problem.has_grandparent_handle) { SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( @@ -754,7 +759,8 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); } clustering_problem.root_children.emplace_back(parent, chain_handle); } else if (!is_top_level_chain) { @@ -808,7 +814,8 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); @@ -2966,7 +2973,9 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro //Keep track of all clusters on the root SnarlTreeNodeProblem root_problem(distance_index.get_root(), clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index); + clustering_problem.seed_count_prefix_sum.back(), distance_index, + &clustering_problem.all_seeds->at(0)->front(), 0); + //TODO: ikd about the seed here //Remember old distances vector> child_distances (clustering_problem.seed_count_prefix_sum.back(), diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index e0ef9ea4c39..9af9d740147 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -259,6 +259,9 @@ class SnarlDistanceIndexClusterer { //The snarl tree node that the clusters are on net_handle_t containing_net_handle; + + + //The parent and grandparent of containing_net_handle, which might or might not be set //This is just to store information from the minimizer cache net_handle_t parent_net_handle; @@ -268,6 +271,10 @@ class SnarlDistanceIndexClusterer { //if it is a snarl, then this is the actual node, not the sentinel net_handle_t end_in; + //One representative seed so we can get the zipcode and stuff + const SeedCache* seed; + size_t zipcode_depth; + //Minimum length of a node or snarl //If it is a chain, then it is distance_index.chain_minimum_length(), which is //the expected length for a normal chain, and the length of the @@ -295,20 +302,26 @@ class SnarlDistanceIndexClusterer { //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index) : + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, + const SeedCache* seed, size_t zipcode_depth) : containing_net_handle(std::move(net)), - fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()){ + fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), + seed(seed), + zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); } //Constructor for a node or trivial chain, used to remember information from the cache - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, size_t node_length, size_t prefix_sum, size_t component) : + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, + size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : containing_net_handle(net), is_reversed_in_parent(is_reversed_in_parent), node_length(node_length), prefix_sum_value(prefix_sum), chain_component_start(component), chain_component_end(component), - fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()){ + fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), + seed(seed), + zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index f8e1b3c3b5a..328bcc451b2 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -544,6 +544,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { } net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { + //get_net_handle_slow does the same thing so if this gets changed need to change that too if (depth == 0) { @@ -587,14 +588,60 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { - net_handle_t n = distance_index->get_node_net_handle(id); - for (size_t d = max_depth() ; d > depth ; d--) { - n = distance_index->get_parent(n); - if (distance_index->is_trivial_chain(n)){ + //This is just copying get_net_handle except adding a slower version for the things we don't remember + + if (depth == 0) { + //If this is the root chain/snarl/node + + size_t zip_value, zip_index = 0; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return distance_index->get_handle_from_connected_component(zip_value); + + } else if (decoder[depth].first) { + //If this is a chain/node + + net_handle_t n = distance_index->get_node_net_handle(id); + for (size_t d = max_depth() ; d > depth ; d--) { n = distance_index->get_parent(n); + if (distance_index->is_trivial_chain(n)){ + n = distance_index->get_parent(n); + } + } + return n; + } else { + //If this is a snarl + + size_t zip_value; + size_t zip_index = decoder[depth].second; + //zip_value is is_regular_snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { + //If this is a regular snarl + + net_handle_t n = distance_index->get_node_net_handle(id); + for (size_t d = max_depth() ; d > depth ; d--) { + n = distance_index->get_parent(n); + if (distance_index->is_trivial_chain(n)){ + n = distance_index->get_parent(n); + } + } + return n; + } else { + //Irregular snarl + + //zip_value is distance index offset + for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + return snarl_handle; } } - return n; } From 412dcec220862079abfa6cafc624de833cc2f51e Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 16:43:42 +0200 Subject: [PATCH 011/124] Acually use zipcode to get handle --- src/snarl_seed_clusterer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index c314d92ae2a..f41a640b632 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -649,7 +649,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(distance_index.get_parent(snarl_problem->containing_net_handle)); + : distance_index.start_end_traversal_of(snarl_problem->seed->decoder.get_net_handle_slow(id(snarl_problem->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; From 2bdcb1a45bb9b4012474d05fe926b57a725f83b0 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 17:07:31 +0200 Subject: [PATCH 012/124] Stop expecting the parent of a snarl to be a root --- src/snarl_seed_clusterer.cpp | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index f41a640b632..c55e2d29c43 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -672,27 +672,16 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); - //Add the snarl to its parent - if (distance_index.is_root(snarl_parent)) { - if(distance_index.is_root_snarl(snarl_parent)) { - //If the parent is a root snarl, then remember it to be compared in the root - clustering_problem.root_children.emplace_back(snarl_parent, snarl_handle); - } else { - //Otherwise, compare it to itself using external connectivity and forget about it since we're done - compare_and_combine_cluster_on_one_child(clustering_problem, - &clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(snarl_parent))); - } - } else { - //Add the snarl to its parent chain - parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = snarl_handle; - parent_problem.children.back().is_seed = false; - parent_problem.children.back().has_chain_values = false; - if (new_parent) { - //And the parent chain to the things to be clustered next - clustering_problem.parent_chains->emplace_back(snarl_parent); - } + //Add the snarl to its parent chain + parent_problem.children.emplace_back(); + parent_problem.children.back().net_handle = snarl_handle; + parent_problem.children.back().is_seed = false; + parent_problem.children.back().has_chain_values = false; + if (new_parent) { + //And the parent chain to the things to be clustered next + clustering_problem.parent_chains->emplace_back(snarl_parent); } + } #ifdef DEBUG_CLUSTER From ea7a0edc84eb138f568a818ddeea4ea815f3b11a Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 13 Jul 2024 18:44:09 +0200 Subject: [PATCH 013/124] Use zipcodes for getting chain parent --- src/snarl_seed_clusterer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index c55e2d29c43..87b991645b6 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -718,7 +718,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster net_handle_t parent = chain_problem->has_parent_handle ? chain_problem->parent_net_handle - : distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)); + : (chain_problem->zipcode_depth == 0 + ? distance_index.get_root() + : distance_index.start_end_traversal_of(chain_problem->seed->decoder.get_net_handle_slow(id(chain_problem->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { From 6a693ad8ab7be5493b59f12c28de09756017065f Mon Sep 17 00:00:00 2001 From: Xian Date: Sun, 14 Jul 2024 11:59:49 +0200 Subject: [PATCH 014/124] Use zipcodes for distances --- src/snarl_seed_clusterer.cpp | 63 +++++++++++++++++------------------- src/zip_code.cpp | 1 + 2 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 87b991645b6..16597c9508f 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -728,8 +728,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); } #endif - bool is_root = distance_index.is_root(parent); - bool is_root_snarl = is_root ? distance_index.is_root_snarl(parent) : false; + ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 + ? ZipCode::EMPTY + : chain_problem->seed->decoder.get_code_type(chain_problem->zipcode_depth-1); + bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; + bool is_root_snarl = is_root ? ZipCode::ROOT_SNARL : false; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter @@ -764,38 +767,30 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //Remember the distances to the ends of the parent - chain_problem->distance_start_left = - distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)); - - chain_problem->distance_start_right = - distance_index.distance_to_parent_bound(parent, true, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)); - - chain_problem->distance_end_left = - distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)); - - chain_problem->distance_end_right = - distance_index.distance_to_parent_bound(parent, false, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)); -#ifdef DEBUG_CLUSTER + //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle + //to the child when getting the distances + bool snarl_child_is_rev = chain_problem->seed->decoder.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->decoder.max_depth() + ? false + : chain_problem->seed->decoder.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + + chain_problem->distance_start_left = snarl_child_is_rev + ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + + chain_problem->distance_start_right = snarl_child_is_rev + ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + + chain_problem->distance_end_left = snarl_child_is_rev + ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + + chain_problem->distance_end_right = snarl_child_is_rev + ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + + #ifdef DEBUG_CLUSTER cerr << "This child has distances to end : " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; #endif diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 328bcc451b2..828ee69d35d 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -879,6 +879,7 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); + //Add 1 to values to store inf properly snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() From 536c7d7dba0bc21a51aa5bb14a68f2818065a727 Mon Sep 17 00:00:00 2001 From: Xian Date: Sun, 14 Jul 2024 12:03:40 +0200 Subject: [PATCH 015/124] Add distance check --- src/snarl_seed_clusterer.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 16597c9508f..47ec92cf74c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -791,6 +791,38 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER + assert(chain_problem->distance_start_left == + distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); + + assert(chain_problem->distance_start_right == + distance_index.distance_to_parent_bound(parent, true, chain_handle, + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); + + assert(chain_problem->distance_end_left == + distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); + + assert(chain_problem->distance_end_right == + distance_index.distance_to_parent_bound(parent, false, chain_handle, + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); + cerr << "This child has distances to end : " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; #endif From 7fa624df223c45e86d63217fc44c3e01f949be18 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 13:39:43 +0200 Subject: [PATCH 016/124] Don't copy seed to cluster --- src/snarl_seed_clusterer.cpp | 73 +- src/snarl_seed_clusterer.hpp | 11 +- src/unittest/snarl_seed_clusterer.cpp | 1739 ++++++++++++------------- 3 files changed, 852 insertions(+), 971 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 47ec92cf74c..feec6642b50 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -28,16 +28,13 @@ vector SnarlDistanceIndexClusterer::cluste vector seed_caches(seeds.size()); for (size_t i = 0 ; i < seeds.size() ; i++) { - seed_caches[i].pos = seeds[i].pos; - seed_caches[i].zipcode = seeds[i].zipcode; - if (seeds[i].zipcode.byte_count() == 0) { - //If the zipcode is empty - ZipCode zip; - zip.fill_in_zipcode(distance_index, seed_caches[i].pos); - seed_caches[i].zipcode = std::move(zip); - } - seed_caches[i].decoder = ZipCodeDecoder(&(seed_caches[i].zipcode)); - seed_caches[i].payload = seed_caches[i].decoder.get_payload_from_zipcode(id(seed_caches[i].pos), distance_index); +#ifdef DEBUG_CLUSTER + assert (seeds[i].zipcode.byte_count() != 0) { +#endif + seed_caches[i].seed = &(seeds[i]); + if (seeds[i].zipcode.byte_count() != 0) { + seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); + } } vector*> all_seed_caches = {&seed_caches}; @@ -74,16 +71,14 @@ vector> SnarlDistanceIndexClusterer for (size_t read_num = 0 ; read_num < all_seeds.size() ; read_num++) { all_seed_caches.emplace_back(all_seeds[read_num].size()); for (size_t i = 0 ; i < all_seeds[read_num].size() ; i++) { - all_seed_caches[read_num][i].pos = all_seeds[read_num][i].pos; - all_seed_caches[read_num][i].zipcode = all_seeds[read_num][i].zipcode; - if (all_seeds[read_num][i].zipcode.byte_count() == 0) { - //If the zipcode is empty - ZipCode zip; - zip.fill_in_zipcode(distance_index, all_seed_caches[read_num][i].pos); - all_seed_caches[read_num][i].zipcode = std::move(zip); +#ifdef DEBUG_CLUSTER + //The zipcode should be filled in + assert(all_seeds[read_num][i].zipcode.byte_count() != 0); +#endif + all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); + if (all_seeds[read_num][i].zipcode.byte_count() != 0) { + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } - all_seed_caches[read_num][i].decoder = ZipCodeDecoder(&(all_seed_caches[read_num][i].zipcode)); - all_seed_caches[read_num][i].payload = all_seed_caches[read_num][i].decoder.get_payload_from_zipcode(id(all_seed_caches[read_num][i].pos), distance_index); } } vector*> seed_cache_pointers; @@ -342,7 +337,7 @@ cerr << "Add all seeds to nodes: " << endl; vector* seeds = clustering_problem.all_seeds->at(read_num); for (size_t i = 0; i < seeds->size(); i++) { SeedCache& seed = seeds->at(i); - pos_t pos = seed.pos; + pos_t pos = seed.seed->pos; id_t id = get_id(pos); @@ -365,8 +360,6 @@ cerr << "Add all seeds to nodes: " << endl; //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now const MIPayload& payload = seed.payload; - const ZipCode& zip_code = seed.zipcode; - ZipCodeDecoder& decoder = seed.decoder; #ifdef DEBUG_CLUSTER cerr << "Using cached values for node " << id << ": " @@ -435,14 +428,14 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.decoder.max_depth()); + &seed, seed.seed->zipcode_decoder->max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, - &seed, seed.decoder.max_depth() - 1); + &seed, seed.seed->zipcode_decoder->max_depth() - 1); } parent_to_depth.emplace(seed.payload.parent_handle, seed.payload.parent_depth); @@ -542,7 +535,7 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.decoder.max_depth()); + &seed, seed.seed->zipcode_decoder->max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; @@ -649,7 +642,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->decoder.get_net_handle_slow(id(snarl_problem->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; @@ -720,7 +713,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->decoder.get_net_handle_slow(id(chain_problem->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -730,7 +723,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->seed->decoder.get_code_type(chain_problem->zipcode_depth-1); + : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = is_root ? ZipCode::ROOT_SNARL : false; @@ -769,26 +762,26 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->decoder.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->decoder.max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() ? false - : chain_problem->seed->decoder.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) - : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) - : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) - : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) - : chain_problem->seed->decoder.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER assert(chain_problem->distance_start_left == @@ -2126,7 +2119,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c */ #ifdef DEBUG_CLUSTER - cerr << "At child seed " << current_child_seed.pos << endl; + cerr << "At child seed " << current_child_seed->seed->pos << endl; #endif //The distance from the right side of the last child to the left side of this child //(relative to the orientation of the chain diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 9af9d740147..2fe53b82f17 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -74,10 +74,10 @@ class SnarlDistanceIndexClusterer { std::unique_ptr zipcode_decoder; //The decoder for the zipcode Seed() = default; - Seed(pos_t pos, size_t source) : pos(pos), source(source) {} Seed(pos_t pos, size_t source, ZipCode zipcode) : pos(pos), source(source), zipcode(zipcode) { ZipCodeDecoder* decoder = new ZipCodeDecoder(&this->zipcode); zipcode_decoder.reset(decoder); + zipcode_decoder->fill_in_full_decoder(); } Seed(pos_t pos, size_t source, ZipCode zipcode, std::unique_ptr zipcode_decoder) : pos(pos), source(source), zipcode(zipcode), zipcode_decoder(std::move(zipcode_decoder)){ @@ -116,14 +116,7 @@ class SnarlDistanceIndexClusterer { // TODO: This will copy information from the seed, since we need per-seed information anyways // and some of it needs to be mutable, it's simpler than keeping around two collections of Seeds struct SeedCache{ - - pos_t pos; - - //TODO: This gets copied because it needs to be mutable - //Cached values (zip codes) from the minimizer - ZipCode zipcode; - - ZipCodeDecoder decoder; + const Seed* seed; //TODO: I think I can skip the zipcode now since I have the payload MIPayload payload; diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index b4a31109eda..2df08b290a8 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -40,20 +40,15 @@ namespace unittest { id_t seed_nodes[] = {1, 1}; //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 1); + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 1); + } @@ -88,20 +83,14 @@ namespace unittest { positions.emplace_back(make_pos_t(2, false, 1)); positions.emplace_back(make_pos_t(2, true, 7)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (auto& pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 15); - REQUIRE(clusters.size() == 2); + vector seeds; + for (auto& pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 15); + REQUIRE(clusters.size() == 2); } @@ -128,20 +117,15 @@ namespace unittest { positions.emplace_back(make_pos_t(1, false, 0)); positions.emplace_back(make_pos_t(1, true, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (auto& pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0,zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 5); - REQUIRE(clusters.size() == 1); + vector seeds; + for (auto& pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0,zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 5); + REQUIRE(clusters.size() == 1); + } @@ -170,20 +154,15 @@ namespace unittest { positions.emplace_back(make_pos_t(2, false, 0)); positions.emplace_back(make_pos_t(1, false, 5)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 15); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 15); + REQUIRE(clusters.size() == 1); + } } @@ -224,20 +203,15 @@ namespace unittest { positions.emplace_back(make_pos_t(4, false, 1)); positions.emplace_back(make_pos_t(4, false, 3)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 2); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 2); + REQUIRE(clusters.size() == 1); + } @@ -245,21 +219,16 @@ namespace unittest { id_t seed_nodes[] = {2, 3, 5}; //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 1); + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 1); + } @@ -267,21 +236,16 @@ namespace unittest { id_t seed_nodes[] = {2, 3, 5}; //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0,zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 4); - REQUIRE(clusters.size() == 3); + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0,zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 4); + REQUIRE(clusters.size() == 3); + } @@ -292,12 +256,18 @@ namespace unittest { vector> seeds (2); pos_t pos = make_pos_t(2, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode1; + zipcode1.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); - seeds[1].push_back({ pos, 0}); + ZipCode zipcode2; + zipcode2.fill_in_zipcode(dist_index, pos); + seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); REQUIRE(clusters.size() == 2); @@ -311,12 +281,18 @@ namespace unittest { vector> seeds (2); pos_t pos = make_pos_t(5, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode1; + zipcode1.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); - seeds[1].push_back({ pos, 0}); + ZipCode zipcode2; + zipcode2.fill_in_zipcode(dist_index, pos); + seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); REQUIRE(clusters.size() == 2); @@ -365,20 +341,15 @@ namespace unittest { positions.emplace_back(make_pos_t(4, false, 3)); positions.emplace_back(make_pos_t(8, false, 3)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 2); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 2); + REQUIRE(clusters.size() == 2); + } @@ -386,21 +357,16 @@ namespace unittest { id_t seed_nodes[] = {2, 3, 5, 8}; //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 2); + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 2); + } @@ -408,21 +374,16 @@ namespace unittest { id_t seed_nodes[] = {2, 3, 5, 8}; //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 4); - REQUIRE(clusters.size() == 4); + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 4); + REQUIRE(clusters.size() == 4); + } @@ -433,12 +394,18 @@ namespace unittest { vector> seeds (2); pos_t pos = make_pos_t(2, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode1; + zipcode1.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); - seeds[1].push_back({ pos, 0}); + ZipCode zipcode2; + zipcode2.fill_in_zipcode(dist_index, pos); + seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); REQUIRE(clusters.size() == 2); @@ -452,12 +419,18 @@ namespace unittest { vector> seeds (2); pos_t pos = make_pos_t(5, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode1; + zipcode1.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); - seeds[1].push_back({ pos, 0}); + ZipCode zipcode2; + zipcode2.fill_in_zipcode(dist_index, pos); + seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); REQUIRE(clusters.size() == 2); @@ -500,20 +473,15 @@ namespace unittest { positions.emplace_back(make_pos_t(3, false, 8)); positions.emplace_back(make_pos_t(5, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 5); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 5); + REQUIRE(clusters.size() == 2); + } @@ -524,20 +492,15 @@ namespace unittest { positions.emplace_back(make_pos_t(3, false, 8)); positions.emplace_back(make_pos_t(5, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 2); - REQUIRE(clusters.size() == 3); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 2); + REQUIRE(clusters.size() == 3); + } @@ -594,120 +557,90 @@ namespace unittest { positions.emplace_back(make_pos_t(10, false, 0)); positions.emplace_back(make_pos_t(12, false, 1)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 2); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 2); + REQUIRE(clusters.size() == 1); + } SECTION("two clusters in same snarl") { vector positions; positions.emplace_back(make_pos_t(10, false, 0)); positions.emplace_back(make_pos_t(12, false, 1)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 1); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 1); + REQUIRE(clusters.size() == 2); + } SECTION("one cluster in same snarl separated by one node") { vector positions; positions.emplace_back(make_pos_t(10, false, 0)); positions.emplace_back(make_pos_t(14, false, 0)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 1); + } SECTION("two clusters in same snarl separated by one node") { vector positions; positions.emplace_back(make_pos_t(10, false, 0)); positions.emplace_back(make_pos_t(14, false, 0)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 2); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 2); + REQUIRE(clusters.size() == 2); + } SECTION("two clusters using path in different snarl") { vector positions; positions.emplace_back(make_pos_t(5, false, 0)); positions.emplace_back(make_pos_t(12, false, 0)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 9); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 9); + REQUIRE(clusters.size() == 2); + } SECTION("one cluster using path in different snarl") { vector positions; positions.emplace_back(make_pos_t(5, false, 0)); positions.emplace_back(make_pos_t(12, false, 0)); //all are in the same cluster - for (bool use_minimizers : {false, true} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 1); + } SECTION("one cluster") { vector positions; @@ -716,40 +649,30 @@ namespace unittest { positions.emplace_back(make_pos_t(9, true, 2)); positions.emplace_back(make_pos_t(7, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 8); - REQUIRE(clusters.size() == 1); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 8); + REQUIRE(clusters.size() == 1); + } SECTION("two clusters") { vector positions; positions.emplace_back(make_pos_t(12, false, 0)); positions.emplace_back(make_pos_t(7, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 4); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 4); + REQUIRE(clusters.size() == 2); + } } @@ -815,20 +738,15 @@ namespace unittest { positions.emplace_back(make_pos_t(11, false, 0)); positions.emplace_back(make_pos_t(8, false, 2)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 4); - REQUIRE(clusters.size() == 3); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 4); + REQUIRE(clusters.size() == 3); + } @@ -846,20 +764,15 @@ namespace unittest { positions.emplace_back(make_pos_t(13, false, 0)); positions.emplace_back(make_pos_t(7, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 2); + } SECTION( "A bunch of nodes in the snarl on the other side" ) { @@ -873,20 +786,15 @@ namespace unittest { positions.emplace_back(make_pos_t(10, false, 2)); positions.emplace_back(make_pos_t(13, false, 0)); //all are in the same cluster - for (bool use_minimizers : {true, false} ) { - vector seeds; - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 2); + vector seeds; + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 2); + } } TEST_CASE( "Cluster looping, multicomponent", @@ -980,19 +888,14 @@ namespace unittest { positions.emplace_back(make_pos_t(10, false, 0)); //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 5); - REQUIRE(clusters.size() == 2); + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 5); + REQUIRE(clusters.size() == 2); + } @@ -1003,19 +906,14 @@ namespace unittest { positions.emplace_back(make_pos_t(8, false, 0)); //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 5); - REQUIRE(clusters.size() == 2); + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 5); + REQUIRE(clusters.size() == 2); + } @@ -1026,20 +924,15 @@ namespace unittest { positions.emplace_back(make_pos_t(7, false, 0)); //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - seeds.clear(); - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 9); - REQUIRE(clusters.size() == 1); + seeds.clear(); + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 9); + REQUIRE(clusters.size() == 1); + } SECTION( "Two clusters" ) { @@ -1049,20 +942,15 @@ namespace unittest { positions.emplace_back(make_pos_t(11, false, 0)); //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - seeds.clear(); - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 2); + seeds.clear(); + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 2); + } SECTION( "One cluster" ) { @@ -1072,20 +960,15 @@ namespace unittest { positions.emplace_back(make_pos_t(11, false, 0)); //all are in the same cluster vector seeds; - for (bool use_minimizers : {true, false} ) { - seeds.clear(); - for (pos_t pos : positions) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 11); - REQUIRE(clusters.size() == 1); + seeds.clear(); + for (pos_t pos : positions) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 11); + REQUIRE(clusters.size() == 1); + } } @@ -1120,47 +1003,37 @@ namespace unittest { SECTION( "One cluster taking loop" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {1, 4}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 6); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {1, 4}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 6); + REQUIRE(clusters.size() == 1); + + } SECTION( "One cluster on boundary" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {2, 4}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {2, 4}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 1); + + } SECTION( "One fragment cluster on boundary" ) { @@ -1169,10 +1042,14 @@ namespace unittest { vector> seeds (2); pos_t pos = make_pos_t(2, false, 0); - seeds[0].push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(4, false, 0); - seeds[1].push_back({ pos, 0}); + ZipCode zipcode1; + zipcode1.fill_in_zipcode(dist_index, pos); + seeds[1].push_back({ pos, 0, zipcode1}); vector> clusters = clusterer.cluster_seeds(seeds, 3, 3); REQUIRE(clusters.size() == 2); @@ -1181,25 +1058,20 @@ namespace unittest { } SECTION( "One cluster on boundary" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {3, 4}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } + id_t seed_nodes[] = {3, 4}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); + } - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 1); + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 1); - } + } } TEST_CASE( "chain with loop", @@ -1238,90 +1110,70 @@ namespace unittest { SECTION( "One cluster taking loop" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {4, 5}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 11); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {4, 5}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 11); + REQUIRE(clusters.size() == 1); + + } SECTION( "One cluster not taking loop" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {4, 5, 3}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {4, 5, 3}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 1); + } SECTION( "One cluster not taking loop" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {4, 5, 6}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 8); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {4, 5, 6}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 8); + REQUIRE(clusters.size() == 1); + + } SECTION( "Two clusters" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {4, 5, 1}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE(clusters.size() == 3); + id_t seed_nodes[] = {4, 5, 1}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + vector clusters = clusterer.cluster_seeds(seeds, 3); + REQUIRE(clusters.size() == 3); + + } } TEST_CASE( "multiple clusters in a chain", @@ -1370,71 +1222,61 @@ namespace unittest { SECTION( "One cluster with seed struct" ) { - for (bool use_minimizers : {true, false} ) { - id_t seed_nodes[] = {2, 3, 4, 7, 8, 9, 11}; - //all are in the same cluster - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - if (use_minimizers) { - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - - vector clusters = clusterer.cluster_seeds(seeds, 10); - REQUIRE(clusters.size() == 1); + id_t seed_nodes[] = {2, 3, 4, 7, 8, 9, 11}; + //all are in the same cluster + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + + vector clusters = clusterer.cluster_seeds(seeds, 10); + REQUIRE(clusters.size() == 1); + } SECTION( "Two clusters" ) { - for (bool use_minimizers : {true, false} ) { - vector seed_nodes( {2, 3, 4, 7, 8, 10, 11}); - //Clusters should be {2, 3, 4}, {7, 8, 10, 11} - //Distance from pos on 4 to pos on 7 is 8, including one position - vector seeds; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } + vector seed_nodes( {2, 3, 4, 7, 8, 10, 11}); + //Clusters should be {2, 3, 4}, {7, 8, 10, 11} + //Distance from pos on 4 to pos on 7 is 8, including one position + vector seeds; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); + } - vector clusters = clusterer.cluster_seeds(seeds, 7); - vector> cluster_sets; - for (auto& c : clusters) { - hash_set h; - for (size_t s : c.seeds) { - h.insert(s); - } - cluster_sets.push_back(h); + vector clusters = clusterer.cluster_seeds(seeds, 7); + vector> cluster_sets; + for (auto& c : clusters) { + hash_set h; + for (size_t s : c.seeds) { + h.insert(s); } - REQUIRE( clusters.size() == 2); - REQUIRE (( (cluster_sets[0].count(0) == 1 && - cluster_sets[0].count(1) == 1 && - cluster_sets[0].count(2) == 1 && - cluster_sets[1].count(3) == 1 && - cluster_sets[1].count(4) == 1 && - cluster_sets[1].count(5) == 1 && - cluster_sets[1].count(6) == 1 ) || - - ( cluster_sets[1].count(0) == 1 && - cluster_sets[1].count(1) == 1 && - cluster_sets[1].count(2) == 1 && - cluster_sets[0].count(3) == 1 && - cluster_sets[0].count(4) == 1 && - cluster_sets[0].count(5) == 1 && - cluster_sets[0].count(6) == 1 ))); - + cluster_sets.push_back(h); } + REQUIRE( clusters.size() == 2); + REQUIRE (( (cluster_sets[0].count(0) == 1 && + cluster_sets[0].count(1) == 1 && + cluster_sets[0].count(2) == 1 && + cluster_sets[1].count(3) == 1 && + cluster_sets[1].count(4) == 1 && + cluster_sets[1].count(5) == 1 && + cluster_sets[1].count(6) == 1 ) || + + ( cluster_sets[1].count(0) == 1 && + cluster_sets[1].count(1) == 1 && + cluster_sets[1].count(2) == 1 && + cluster_sets[0].count(3) == 1 && + cluster_sets[0].count(4) == 1 && + cluster_sets[0].count(5) == 1 && + cluster_sets[0].count(6) == 1 ))); + + } SECTION( "One fragment cluster of the same node" ) { @@ -1445,82 +1287,64 @@ namespace unittest { //Distance from pos on 4 to pos on 7 is 8, including one position // vector> all_seeds(2); - for (bool use_minimizers : {true, false} ) { - vector& seeds = all_seeds[0] ; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector& seeds1 = all_seeds[1]; - for (id_t n : seed_nodes1) { - pos_t pos = make_pos_t(n, false, 0); - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds1.push_back({ pos, 0, zipcode}); - } else { - seeds1.push_back({ pos, 0}); - } - } + vector& seeds = all_seeds[0] ; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); + } + vector& seeds1 = all_seeds[1]; + for (id_t n : seed_nodes1) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds1.push_back({ pos, 0, zipcode}); + } - vector> paired_clusters = clusterer.cluster_seeds(all_seeds, 7, 15); - //Should be [[<[0,1,2], 0>],[<[3,4,5,6], 0>]] - REQUIRE( paired_clusters.size() == 2); - REQUIRE( paired_clusters[0].size() == 1); - REQUIRE( paired_clusters[1].size() == 2); - REQUIRE( paired_clusters[0][0].fragment == paired_clusters[1][0].fragment); - REQUIRE( paired_clusters[1][0].fragment == paired_clusters[1][1].fragment); - } + vector> paired_clusters = clusterer.cluster_seeds(all_seeds, 7, 15); + //Should be [[<[0,1,2], 0>],[<[3,4,5,6], 0>]] + REQUIRE( paired_clusters.size() == 2); + REQUIRE( paired_clusters[0].size() == 1); + REQUIRE( paired_clusters[1].size() == 2); + REQUIRE( paired_clusters[0][0].fragment == paired_clusters[1][0].fragment); + REQUIRE( paired_clusters[1][0].fragment == paired_clusters[1][1].fragment); + } SECTION( "One fragment cluster" ) { - for (bool use_minimizers : {true, false}) { - vector seed_nodes( {2, 3, 4}); - vector seed_nodes1({7, 8, 10, 11}); - //Clusters should be {2, 3, 4}, {7, 8, 10, 11} - //One fragment cluster - //Distance from pos on 4 to pos on 7 is 8, including one position - vector> all_seeds (2); - vector& seeds = all_seeds[0] ; - for (id_t n : seed_nodes) { - pos_t pos = make_pos_t(n, false, 0); - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector& seeds1 = all_seeds[1]; - for (id_t n : seed_nodes1) { - pos_t pos = make_pos_t(n, false, 0); - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds1.push_back({ pos, 0, zipcode}); - } else { - seeds1.push_back({ pos, 0}); - } - } + vector seed_nodes( {2, 3, 4}); + vector seed_nodes1({7, 8, 10, 11}); + //Clusters should be {2, 3, 4}, {7, 8, 10, 11} + //One fragment cluster + //Distance from pos on 4 to pos on 7 is 8, including one position + vector> all_seeds (2); + vector& seeds = all_seeds[0] ; + for (id_t n : seed_nodes) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); + } + vector& seeds1 = all_seeds[1]; + for (id_t n : seed_nodes1) { + pos_t pos = make_pos_t(n, false, 0); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds1.push_back({ pos, 0, zipcode}); + } - vector> paired_clusters = clusterer.cluster_seeds(all_seeds, 7, 15); - //Should be [[<[0,1,2], 0>],[<[3,4,5,6], 0>]] - REQUIRE( paired_clusters.size() == 2); - REQUIRE( paired_clusters[0].size() == 1); - REQUIRE( paired_clusters[1].size() == 1); - REQUIRE( paired_clusters[0][0].seeds.size() == 3); - REQUIRE( paired_clusters[1][0].seeds.size() == 4); - REQUIRE( paired_clusters[0][0].fragment == paired_clusters[1][0].fragment); - } + vector> paired_clusters = clusterer.cluster_seeds(all_seeds, 7, 15); + //Should be [[<[0,1,2], 0>],[<[3,4,5,6], 0>]] + REQUIRE( paired_clusters.size() == 2); + REQUIRE( paired_clusters[0].size() == 1); + REQUIRE( paired_clusters[1].size() == 1); + REQUIRE( paired_clusters[0][0].seeds.size() == 3); + REQUIRE( paired_clusters[1][0].seeds.size() == 4); + REQUIRE( paired_clusters[0][0].fragment == paired_clusters[1][0].fragment); + } SECTION( "Two fragment clusters with seed structs" ) { @@ -1652,7 +1476,9 @@ namespace unittest { pos_ts.emplace_back(3, false, 0); pos_ts.emplace_back(11, false, 9); for (pos_t pos : pos_ts) { - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } @@ -1705,7 +1531,9 @@ namespace unittest { vector seeds; for (id_t n : seed_nodes) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 20); @@ -1978,46 +1806,41 @@ namespace unittest { pos_ts.emplace_back(6, false, 0); pos_ts.emplace_back(8, false, 0); - for (bool use_minimizers : {true, false}) { - vector seeds; - for (pos_t pos : pos_ts){ + vector seeds; + for (pos_t pos : pos_ts){ - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds.push_back({ pos, 0,zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } - vector clusters = clusterer.cluster_seeds(seeds, 3); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0,zipcode}); + } + vector clusters = clusterer.cluster_seeds(seeds, 3); - REQUIRE( clusters.size() == 2); + REQUIRE( clusters.size() == 2); - vector> cluster_sets; - for (auto& c : clusters) { - hash_set h; - for (size_t s : c.seeds) { - h.insert(s); - } - cluster_sets.push_back(h); + vector> cluster_sets; + for (auto& c : clusters) { + hash_set h; + for (size_t s : c.seeds) { + h.insert(s); } - REQUIRE (( (cluster_sets[0].count(0) == 1 && - cluster_sets[0].count(1) == 1 && - cluster_sets[0].count(2) == 1 && - cluster_sets[0].count(3) == 1 && - cluster_sets[1].count(4) == 1 && - cluster_sets[1].count(5) == 1 && - cluster_sets[1].count(6) == 1) || - - ( cluster_sets[1].count(0) == 1 && - cluster_sets[1].count(1) == 1 && - cluster_sets[1].count(2) == 1 && - cluster_sets[1].count(3) == 1 && - cluster_sets[0].count(4) == 1 && - cluster_sets[0].count(5) == 1 && - cluster_sets[0].count(6) == 1 ))); - } + cluster_sets.push_back(h); + } + REQUIRE (( (cluster_sets[0].count(0) == 1 && + cluster_sets[0].count(1) == 1 && + cluster_sets[0].count(2) == 1 && + cluster_sets[0].count(3) == 1 && + cluster_sets[1].count(4) == 1 && + cluster_sets[1].count(5) == 1 && + cluster_sets[1].count(6) == 1) || + + ( cluster_sets[1].count(0) == 1 && + cluster_sets[1].count(1) == 1 && + cluster_sets[1].count(2) == 1 && + cluster_sets[1].count(3) == 1 && + cluster_sets[0].count(4) == 1 && + cluster_sets[0].count(5) == 1 && + cluster_sets[0].count(6) == 1 ))); + } SECTION( "Four clusters" ) { vector> all_seeds(1); @@ -2038,7 +1861,9 @@ namespace unittest { pos_ts.emplace_back(15, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2068,7 +1893,9 @@ namespace unittest { pos_ts.emplace_back(6, false, 0); pos_ts.emplace_back(8, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; pos_ts.clear(); @@ -2079,7 +1906,9 @@ namespace unittest { pos_ts.emplace_back(14, false, 0); pos_ts.emplace_back(15, false, 0); for (pos_t pos : pos_ts){ - seeds1.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds1.push_back({ pos, 0, zipcode}); } vector> paired_clusters = clusterer.cluster_seeds(all_seeds, 3, 3); @@ -2115,7 +1944,9 @@ namespace unittest { pos_ts.emplace_back(5, false, 5); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2155,7 +1986,9 @@ namespace unittest { pos_ts.emplace_back(3, false, 3); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2210,7 +2043,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters= clusterer.cluster_seeds(seeds, 10); @@ -2227,7 +2062,9 @@ namespace unittest { pos_ts.emplace_back(4, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2241,7 +2078,9 @@ namespace unittest { pos_ts.emplace_back(4, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2257,7 +2096,9 @@ namespace unittest { pos_ts.emplace_back(6, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2325,7 +2166,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2340,7 +2183,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2355,7 +2200,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2371,13 +2218,17 @@ namespace unittest { vector& seeds = all_seeds[0]; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector ids1({8, 12}); vector& seeds1 = all_seeds[1]; for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); - seeds1.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds1.push_back({ pos, 0, zipcode}); } @@ -2397,7 +2248,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -2412,7 +2265,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 6); @@ -2475,7 +2330,9 @@ namespace unittest { pos_ts.emplace_back(9, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2489,7 +2346,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2504,7 +2363,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2520,13 +2381,17 @@ namespace unittest { vector& seeds = all_seeds[0]; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos);; + seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); vector& seeds1 = all_seeds[1]; for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); - seeds1.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos);; + seeds1.push_back({ pos, 0, zipcode}); } //Clusters are //Read 1: {1, 3} in a fragment cluster with Read 2: {5} @@ -2554,13 +2419,17 @@ namespace unittest { vector& seeds = all_seeds[0]; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); vector& seeds1 = all_seeds[1]; for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); - seeds1.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds1.push_back({ pos, 0, zipcode}); } //Clusters are //Read 1: {1, 3} in a fragment cluster with Read 2: {5} @@ -2625,7 +2494,9 @@ namespace unittest { pos_ts.emplace_back(7, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2641,7 +2512,9 @@ namespace unittest { pos_ts.emplace_back(7, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2659,7 +2532,9 @@ namespace unittest { pos_ts.emplace_back(8, true, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2679,27 +2554,22 @@ namespace unittest { pos_ts[1].emplace_back(7, false, 0); pos_ts[1].emplace_back(8, true, 0); - for (bool use_minimizers : {true, false}) { - vector> seeds(2); - for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num ++) { - for (pos_t pos : pos_ts[read_num]){ - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds[read_num].push_back({ pos, 0, zipcode}); - } else { - seeds[read_num].push_back({ pos, 0}); - } - } + vector> seeds(2); + for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num ++) { + for (pos_t pos : pos_ts[read_num]){ + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[read_num].push_back({ pos, 0, zipcode}); } - - vector> clusters = clusterer.cluster_seeds(seeds, 4, 10); - - REQUIRE( clusters.size() == 2); - REQUIRE(clusters[0].size() == 1); - REQUIRE(clusters[1].size() == 1); - REQUIRE(clusters[0][0].fragment == clusters[1][0].fragment); } + + vector> clusters = clusterer.cluster_seeds(seeds, 4, 10); + + REQUIRE( clusters.size() == 2); + REQUIRE(clusters[0].size() == 1); + REQUIRE(clusters[1].size() == 1); + REQUIRE(clusters[0][0].fragment == clusters[1][0].fragment); + } @@ -2713,18 +2583,13 @@ namespace unittest { pos_ts.emplace_back(7, false, 0); pos_ts.emplace_back(8, true, 0); - for (bool use_minimizers : {true, false}) { - vector seeds; - for (pos_t pos : pos_ts){ - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - seeds.push_back({ pos, 0, zipcode}); - } else { - seeds.push_back({ pos, 0}); - } - } + vector seeds; + for (pos_t pos : pos_ts){ + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } + } @@ -2777,7 +2642,9 @@ namespace unittest { pos_ts.emplace_back(8, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2793,7 +2660,9 @@ namespace unittest { pos_ts.emplace_back(7, false, 0); for (pos_t pos : pos_ts){ - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 6); @@ -2806,7 +2675,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2858,7 +2729,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2871,7 +2744,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2884,7 +2759,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2898,7 +2775,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -2935,7 +2814,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2978,7 +2859,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2992,7 +2875,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -3006,7 +2891,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 18); @@ -3040,7 +2927,9 @@ namespace unittest { positions.emplace_back(make_pos_t(3, false, 1)); vector seeds; for (auto pos : positions) { - seeds.push_back({pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -3082,7 +2971,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -3096,7 +2987,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -3109,7 +3002,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -3122,7 +3017,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -3161,7 +3058,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -3175,7 +3074,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -3188,7 +3089,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -3201,7 +3104,9 @@ namespace unittest { vector seeds; for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); - seeds.push_back({ pos, 0}); + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -3271,26 +3176,21 @@ namespace unittest { size_t dist = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), true, &graph); cerr << "DISTANCE BETWEEN " << pos1 << " and " << pos2 << " = " << dist << endl; - //for (bool use_minimizers : {true, false}) { - // vector> seeds(2); - // for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { - // for (pos_t pos : pos_ts[read_num]) { + //vector> seeds(2); + //for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { + // for (pos_t pos : pos_ts[read_num]) { - // if (use_minimizers) { - // ZipCode zipcode; - // zipcode.fill_in_zipcode(dist_index, pos); - // seeds[read_num].push_back({ pos, 0, zipcode}); - // } else { - // seeds[read_num].push_back({ pos, 0}); - // } - // } - // } + // ZipCode zipcode; + // zipcode.fill_in_zipcode(dist_index, pos); + // seeds[read_num].push_back({ pos, 0, zipcode}); + // } + //} - // vector> clusters = clusterer.cluster_seeds(seeds, 15, 35); + //vector> clusters = clusterer.cluster_seeds(seeds, 15, 35); - // REQUIRE(clusters.size() == 1); - //} + //REQUIRE(clusters.size() == 1); + // REQUIRE(false); } */ @@ -3328,204 +3228,199 @@ namespace unittest { uniform_int_distribution randPosIndex(0, all_nodes.size()-1); - for (bool use_minimizers : {true, false}) { - for (size_t k = 0; k < 10 ; k++) { + for (size_t k = 0; k < 10 ; k++) { - vector> all_seeds(2); - size_t read_lim = 15;// Distance between read clusters - size_t fragment_lim = 35;// Distance between fragment clusters - for (size_t read = 0 ; read < 2 ; read ++) { - uniform_int_distribution randPosCount(3, 70); - for (int j = 0; j < randPosCount(generator); j++) { - //Check clusters of j random positions + vector> all_seeds(2); + size_t read_lim = 15;// Distance between read clusters + size_t fragment_lim = 35;// Distance between fragment clusters + for (size_t read = 0 ; read < 2 ; read ++) { + uniform_int_distribution randPosCount(3, 70); + for (int j = 0; j < randPosCount(generator); j++) { + //Check clusters of j random positions - id_t nodeID1 = all_nodes[randPosIndex(generator)]; - handle_t node1 = graph.get_handle(nodeID1); + id_t nodeID1 = all_nodes[randPosIndex(generator)]; + handle_t node1 = graph.get_handle(nodeID1); - offset_t offset1 = uniform_int_distribution(0,graph.get_length(node1) - 1)(generator); + offset_t offset1 = uniform_int_distribution(0,graph.get_length(node1) - 1)(generator); - pos_t pos = make_pos_t(nodeID1, - uniform_int_distribution(0,1)(generator) == 0,offset1 ); + pos_t pos = make_pos_t(nodeID1, + uniform_int_distribution(0,1)(generator) == 0,offset1 ); - - if (use_minimizers) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - all_seeds[read].push_back({ pos, 0, zipcode}); - } else { - all_seeds[read].push_back({ pos, 0}); - } + + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + all_seeds[read].push_back({ pos, 0, zipcode}); - } } - vector> paired_clusters = clusterer.cluster_seeds(all_seeds, read_lim, fragment_lim); - - vector> fragment_clusters; - - for (size_t read_num = 0 ; read_num < 2 ; read_num ++) { - auto& one_read_clusters = paired_clusters[read_num]; - if (one_read_clusters.size() > 0) { - for (size_t a = 0; a < one_read_clusters.size(); a++) { - // For each cluster -cluster this cluster to ensure that - // there is only one - vector clust = one_read_clusters[a].seeds; - size_t fragment_cluster = one_read_clusters[a].fragment; - if (fragment_cluster >= fragment_clusters.size()) { - fragment_clusters.resize(fragment_cluster+1); - } - - structures::UnionFind new_clusters (clust.size(), false); - - for (size_t i1 = 0 ; i1 < clust.size() ; i1++) { - pos_t pos1 = all_seeds[read_num][clust[i1]].pos; - fragment_clusters[fragment_cluster].emplace_back(pos1); - size_t len1 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos1)));; - pos_t rev1 = make_pos_t(get_id(pos1), !is_rev(pos1),len1 - get_offset(pos1)-1); - - for (size_t b = 0 ; b < one_read_clusters.size() ; b++) { - if (b != a) { - //For each other cluster - vector clust2 = one_read_clusters[b].seeds; - for (size_t i2 = 0 ; i2 < clust2.size() ; i2++) { - //And each position in each other cluster, - //make sure that this position is far away from i1 - pos_t pos2 = all_seeds[read_num][clust2[i2]].pos; - size_t len2 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos2))); - pos_t rev2 = make_pos_t(get_id(pos2), - !is_rev(pos2), - len2 - get_offset(pos2)-1); - - size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist = std::min(std::min(dist1, - dist2), std::min( dist3, dist4)); - if ( dist != -1 && dist <= read_lim) { - dist_index.print_self(); - graph.serialize("testGraph.hg"); - cerr << "These should have been in the same read cluster: " ; - cerr << pos1 << " and " << pos2 << endl; - cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; - REQUIRE(false); - } - - } - } - } - for (size_t i2 = 0 ; i2 < clust.size() ; i2++) { - //For each position in the same cluster - pos_t pos2 = all_seeds[read_num][clust[i2]].pos; - size_t len2 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos2))); - pos_t rev2 = make_pos_t(get_id(pos2), + } + vector> paired_clusters = clusterer.cluster_seeds(all_seeds, read_lim, fragment_lim); + + vector> fragment_clusters; + + for (size_t read_num = 0 ; read_num < 2 ; read_num ++) { + auto& one_read_clusters = paired_clusters[read_num]; + if (one_read_clusters.size() > 0) { + for (size_t a = 0; a < one_read_clusters.size(); a++) { + // For each cluster -cluster this cluster to ensure that + // there is only one + vector clust = one_read_clusters[a].seeds; + size_t fragment_cluster = one_read_clusters[a].fragment; + if (fragment_cluster >= fragment_clusters.size()) { + fragment_clusters.resize(fragment_cluster+1); + } + + structures::UnionFind new_clusters (clust.size(), false); + + for (size_t i1 = 0 ; i1 < clust.size() ; i1++) { + pos_t pos1 = all_seeds[read_num][clust[i1]].pos; + fragment_clusters[fragment_cluster].emplace_back(pos1); + size_t len1 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos1)));; + pos_t rev1 = make_pos_t(get_id(pos1), !is_rev(pos1),len1 - get_offset(pos1)-1); + + for (size_t b = 0 ; b < one_read_clusters.size() ; b++) { + if (b != a) { + //For each other cluster + vector clust2 = one_read_clusters[b].seeds; + for (size_t i2 = 0 ; i2 < clust2.size() ; i2++) { + //And each position in each other cluster, + //make sure that this position is far away from i1 + pos_t pos2 = all_seeds[read_num][clust2[i2]].pos; + size_t len2 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos2))); + pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), len2 - get_offset(pos2)-1); - size_t dist = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), true, &graph); - if ( dist != -1 && dist <= read_lim) { - new_clusters.union_groups(i1, i2); - } - } - } - auto actual_clusters = new_clusters.all_groups(); - if (actual_clusters.size() != 1) { - dist_index.print_self(); - graph.serialize("testGraph.hg"); - cerr << "These should be different read clusters: " << endl; - for (auto c : actual_clusters) { - cerr << "cluster: " ; - for (size_t i1 : c) { - cerr << all_seeds[read_num][clust[i1]].pos << " "; + size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist = std::min(std::min(dist1, + dist2), std::min( dist3, dist4)); + if ( dist != -1 && dist <= read_lim) { + dist_index.print_self(); + graph.serialize("testGraph.hg"); + cerr << "These should have been in the same read cluster: " ; + cerr << pos1 << " and " << pos2 << endl; + cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; + REQUIRE(false); + } + } - cerr << endl; } } - REQUIRE(actual_clusters.size() == 1); - } - } - } - for (size_t a = 0; a < fragment_clusters.size(); a++) { - // For each cluster -cluster this cluster to ensure that - // there is only one - vector clust = fragment_clusters[a]; - - structures::UnionFind new_clusters (clust.size(), false); - - for (size_t i1 = 0 ; i1 < clust.size() ; i1++) { - pos_t pos1 = clust[i1]; - size_t len1 = graph.get_length(graph.get_handle(get_id(pos1), false)); - pos_t rev1 = make_pos_t(get_id(pos1), - !is_rev(pos1), - len1 - get_offset(pos1)-1); - - for (size_t b = 0 ; b < fragment_clusters.size() ; b++) { - if (b != a) { - //For each other cluster - vector clust2 = fragment_clusters[b]; - for (size_t i2 = 0 ; i2 < clust2.size() ; i2++) { - //And each position in each other cluster, - //make sure that this position is far away from i1 - pos_t pos2 = clust2[i2]; - size_t len2 = graph.get_length(graph.get_handle(get_id(pos2), false)); - pos_t rev2 = make_pos_t(get_id(pos2), + for (size_t i2 = 0 ; i2 < clust.size() ; i2++) { + //For each position in the same cluster + pos_t pos2 = all_seeds[read_num][clust[i2]].pos; + size_t len2 = dist_index.minimum_length(dist_index.get_node_net_handle(get_id(pos2))); + pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), len2 - get_offset(pos2)-1); + size_t dist = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), true, &graph); + if ( dist != -1 && dist <= read_lim) { + new_clusters.union_groups(i1, i2); + } - size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist = std::min(std::min(dist1, dist2), std::min( dist3, dist4)); - if ( dist != -1 && dist <= fragment_lim) { - dist_index.print_self(); - graph.serialize("testGraph.hg"); - cerr << "These should have been in the same fragment cluster: " ; - cerr << pos1 << " and " << pos2 << endl; - cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; - REQUIRE(false); - } - + } + } + auto actual_clusters = new_clusters.all_groups(); + if (actual_clusters.size() != 1) { + dist_index.print_self(); + graph.serialize("testGraph.hg"); + cerr << "These should be different read clusters: " << endl; + for (auto c : actual_clusters) { + cerr << "cluster: " ; + for (size_t i1 : c) { + cerr << all_seeds[read_num][clust[i1]].pos << " "; } + cerr << endl; } } - for (size_t i2 = 0 ; i2 < clust.size() ; i2++) { - //For each position in the same cluster - pos_t pos2 = clust[i2]; - size_t len2 = graph.get_length(graph.get_handle(get_id(pos2), false)); - pos_t rev2 = make_pos_t(get_id(pos2), + REQUIRE(actual_clusters.size() == 1); + } + } + } + for (size_t a = 0; a < fragment_clusters.size(); a++) { + // For each cluster -cluster this cluster to ensure that + // there is only one + vector clust = fragment_clusters[a]; + + structures::UnionFind new_clusters (clust.size(), false); + + for (size_t i1 = 0 ; i1 < clust.size() ; i1++) { + pos_t pos1 = clust[i1]; + size_t len1 = graph.get_length(graph.get_handle(get_id(pos1), false)); + pos_t rev1 = make_pos_t(get_id(pos1), + !is_rev(pos1), + len1 - get_offset(pos1)-1); + + for (size_t b = 0 ; b < fragment_clusters.size() ; b++) { + if (b != a) { + //For each other cluster + vector clust2 = fragment_clusters[b]; + for (size_t i2 = 0 ; i2 < clust2.size() ; i2++) { + //And each position in each other cluster, + //make sure that this position is far away from i1 + pos_t pos2 = clust2[i2]; + size_t len2 = graph.get_length(graph.get_handle(get_id(pos2), false)); + pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), len2 - get_offset(pos2)-1); - size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); - size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); - size_t dist = std::min(std::min(dist1, - dist2), std::min( dist3, dist4)); - if ( dist != -1 && dist <= fragment_lim) { - new_clusters.union_groups(i1, i2); - } + size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist = std::min(std::min(dist1, dist2), std::min( dist3, dist4)); + if ( dist != -1 && dist <= fragment_lim) { + dist_index.print_self(); + graph.serialize("testGraph.hg"); + cerr << "These should have been in the same fragment cluster: " ; + cerr << pos1 << " and " << pos2 << endl; + cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; + REQUIRE(false); + } + + } } } - auto actual_clusters = new_clusters.all_groups(); - if (actual_clusters.size() != 1) { - dist_index.print_self(); - graph.serialize("testGraph.hg"); - cerr << "These should be different fragment clusters: " << endl; - for (auto c : actual_clusters) { - cerr << "cluster: " ; - for (size_t i1 : c) { - cerr << clust[i1] << " "; - } - cerr << endl; + for (size_t i2 = 0 ; i2 < clust.size() ; i2++) { + //For each position in the same cluster + pos_t pos2 = clust[i2]; + size_t len2 = graph.get_length(graph.get_handle(get_id(pos2), false)); + pos_t rev2 = make_pos_t(get_id(pos2), + !is_rev(pos2), + len2 - get_offset(pos2)-1); + size_t dist1 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist2 = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist3 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), false, &graph); + size_t dist4 = dist_index.minimum_distance(get_id(rev1), get_is_rev(rev1), get_offset(rev1), get_id(rev2), get_is_rev(rev2), get_offset(rev2), false, &graph); + size_t dist = std::min(std::min(dist1, + dist2), std::min( dist3, dist4)); + if ( dist != -1 && dist <= fragment_lim) { + new_clusters.union_groups(i1, i2); + } + + } + } + auto actual_clusters = new_clusters.all_groups(); + if (actual_clusters.size() != 1) { + dist_index.print_self(); + graph.serialize("testGraph.hg"); + cerr << "These should be different fragment clusters: " << endl; + for (auto c : actual_clusters) { + cerr << "cluster: " ; + for (size_t i1 : c) { + cerr << clust[i1] << " "; } + cerr << endl; } - REQUIRE(actual_clusters.size() == 1); } + REQUIRE(actual_clusters.size() == 1); } } + } } //end test case } From 83d1ffaac433fa50d348849b4f2bd57f4ebc267c Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 14:59:48 +0200 Subject: [PATCH 017/124] Fix is_root_snarl --- src/snarl_seed_clusterer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index feec6642b50..22f96c7673c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -725,7 +725,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? ZipCode::EMPTY : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; - bool is_root_snarl = is_root ? ZipCode::ROOT_SNARL : false; + bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter From f79ad097832713487efc6fd8c0d2e5cb0d7beb90 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 16:16:20 +0200 Subject: [PATCH 018/124] Reserve space in some vectors --- src/snarl_seed_clusterer.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 22f96c7673c..7b9d10e0f27 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -992,6 +992,7 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure //The cluster heads that will be removed from the parent's read_cluster_heads vector> to_erase; + to_erase.reserve(parent_problem->read_cluster_heads.size()); //Helper function that will compare two clusters //Given the read num and seed_num of the cluster head, the distance to the other node side we're looking at, @@ -2279,6 +2280,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Cluster heads to remove because they got combined with the current seed vector> to_remove; + to_remove.reserve(chain_problem->read_cluster_heads.size()); //And the new cluster containing the current seed, and possibly anything that gets combined with it ClusterHead new_cluster = {read_num, cluster_num, new_distances.first, new_distances.second}; @@ -2448,6 +2450,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t combined_fragment_left = std::numeric_limits::max(); size_t combined_fragment_right = std::numeric_limits::max(); vector> to_erase; + to_erase.reserve(child_problem.read_cluster_heads.size()); for (auto& child_cluster_head : child_problem.read_cluster_heads) { //Go through each of the clusters on this child @@ -2678,6 +2681,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //Clusters to remove from the chain because they got combined vector> to_erase; + to_erase.reserve(chain_problem->read_cluster_heads.size()); //And new clusters to add that didn't get combined vector, pair>> to_add; From 276d60b55c60873474504346a2fb95971e870cf8 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 16:16:37 +0200 Subject: [PATCH 019/124] Take out unused map --- src/snarl_seed_clusterer.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 7b9d10e0f27..1d4d6401b4e 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -323,12 +323,6 @@ cerr << "Add all seeds to nodes: " << endl; //Bool is true if the parent of the node is a root snarl std::vector nodes_to_cluster_now; - - //Map the parent SnarlTreeNodeProblem to its depth so we don't use get_depth() as much - hash_map parent_to_depth; - parent_to_depth.reserve(clustering_problem.seed_count_prefix_sum.back()); - - //All nodes we've already assigned hash_set seen_nodes; seen_nodes.reserve(clustering_problem.seed_count_prefix_sum.back()); @@ -438,7 +432,6 @@ cerr << "Add all seeds to nodes: " << endl; &seed, seed.seed->zipcode_decoder->max_depth() - 1); } - parent_to_depth.emplace(seed.payload.parent_handle, seed.payload.parent_depth); new_parent = true; } #ifdef DEBUG_CLUSTER From 5e4fea4a38279fd9f9bfa501a5b4e85959b7e24c Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 16:21:15 +0200 Subject: [PATCH 020/124] Take out another unused hash set --- src/snarl_seed_clusterer.cpp | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 1d4d6401b4e..f39db91fe79 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -323,10 +323,6 @@ cerr << "Add all seeds to nodes: " << endl; //Bool is true if the parent of the node is a root snarl std::vector nodes_to_cluster_now; - //All nodes we've already assigned - hash_set seen_nodes; - seen_nodes.reserve(clustering_problem.seed_count_prefix_sum.back()); - for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++){ vector* seeds = clustering_problem.all_seeds->at(read_num); for (size_t i = 0; i < seeds->size(); i++) { @@ -520,7 +516,7 @@ cerr << "Add all seeds to nodes: " << endl; //Create a new SnarlTreeNodeProblem for this node bool new_node = false; - if (seen_nodes.count(id) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.node_handle) == 0) { new_node = true; clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, clustering_problem.all_node_problems.size()); @@ -533,8 +529,6 @@ cerr << "Add all seeds to nodes: " << endl; //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; - seen_nodes.insert(id); - } seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; From acff6f6516489aee61969250d3367f5e7eb8299c Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 15 Jul 2024 18:43:16 +0200 Subject: [PATCH 021/124] Reserve more and fix indenting --- src/snarl_seed_clusterer.cpp | 5 +++-- src/zip_code.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index f39db91fe79..478f229bf19 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -1865,6 +1865,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small vector cluster_heads_to_add_again; + cluster_heads_to_add_again.reserve(chain_problem->read_cluster_heads.size()); //For remembering the best left distances of the chain, we only need to check for the smallest chain distance left //for the children up to the first node @@ -2097,8 +2098,8 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const SnarlTreeNodeProblem::SnarlTreeChild& current_child, bool is_first_child, bool is_last_child, bool skip_distances_to_ends) const { - size_t read_num = current_child.seed_indices.first; - size_t cluster_num = current_child.seed_indices.second; + size_t& read_num = current_child.seed_indices.first; + size_t& cluster_num = current_child.seed_indices.second; net_handle_t& chain_handle = chain_problem->containing_net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 828ee69d35d..cc19192783e 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1827,13 +1827,14 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); + //Simple and regular snarls are different for clustering if (distance_index.is_simple_snarl(grandparent_handle)) { payload.is_reversed = zip_value; payload.parent_is_chain=true; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { payload.is_reversed = false; - payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); + payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); } } else { payload.is_reversed = false; From 310beb7c998c3e8339f6d754cde7ce7a398240ab Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 16 Jul 2024 10:40:29 +0200 Subject: [PATCH 022/124] Reserve memory for zipcode --- src/snarl_seed_clusterer.cpp | 4 ++-- src/zip_code.cpp | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 478f229bf19..a1dd76528f4 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2098,8 +2098,8 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const SnarlTreeNodeProblem::SnarlTreeChild& current_child, bool is_first_child, bool is_last_child, bool skip_distances_to_ends) const { - size_t& read_num = current_child.seed_indices.first; - size_t& cluster_num = current_child.seed_indices.second; + const size_t& read_num = current_child.seed_indices.first; + const size_t& cluster_num = current_child.seed_indices.second; net_handle_t& chain_handle = chain_problem->containing_net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* diff --git a/src/zip_code.cpp b/src/zip_code.cpp index cc19192783e..7257c9c631c 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1567,6 +1567,7 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(payload != MIPayload::NO_CODE); + zipcode.data.reserve(16); //get one byte at a time from the payload and add it to the zip code size_t bit_mask = (1 << 8) - 1; From 73477be1caaf8002f8f8b516a5e6235cf260aaaf Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 16 Jul 2024 11:14:28 +0200 Subject: [PATCH 023/124] Reserve memory for decoders --- src/zip_code.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 7257c9c631c..407adee50a4 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -102,6 +102,7 @@ void ZipCode::from_vector(const std::vector& values) { ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : zipcode(zipcode), decoder(0), finished_decoding(false) { if (zipcode != nullptr) { + decoder.reserve(zipcode->byte_count() / 4); fill_in_full_decoder(); } } From f5d0c4b6ee8bb29c19af5d999dfd17b85cea35fb Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 16 Jul 2024 11:53:12 +0200 Subject: [PATCH 024/124] Use zipcode for snarl length --- src/snarl_seed_clusterer.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 2fe53b82f17..d6ba3639fc1 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -328,9 +328,9 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = distance_index.minimum_length(containing_net_handle); + node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); - end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); + end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); chain_component_start = distance_index.get_chain_component(start_in); chain_component_end = distance_index.get_chain_component(end_in); prefix_sum_value = SnarlDistanceIndex::sum( From a2dc51fc2d7156da323962a0ed54dfee7980c585 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 16 Jul 2024 12:36:43 +0200 Subject: [PATCH 025/124] Reserve more memory --- src/snarl_seed_clusterer.cpp | 3 +++ src/snarl_seed_clusterer.hpp | 1 + 2 files changed, 4 insertions(+) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index a1dd76528f4..fc8f91e27c1 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -322,6 +322,7 @@ cerr << "Add all seeds to nodes: " << endl; //All other seeds are added directly to their parent chains as children //Bool is true if the parent of the node is a root snarl std::vector nodes_to_cluster_now; + nodes_to_cluster_now.reserve(clustering_problem.all_seeds->size()); for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++){ vector* seeds = clustering_problem.all_seeds->at(read_num); @@ -2673,6 +2674,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //And new clusters to add that didn't get combined vector, pair>> to_add; + to_add.reserve(chain_problem->read_cluster_heads.size()); //There is at most one new cluster per read pair new_cluster_by_read; @@ -2995,6 +2997,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro //Go through the list of parent child pairs. Once we reach a new parent, cluster all children found up to this point net_handle_t current_parent = clustering_problem.root_children.front().first; vector children; + children.reserve(clustering_problem.root_children.size()); for (size_t root_child_i = 0 ; root_child_i < clustering_problem.root_children.size() ; root_child_i++) { pair& parent_to_child = clustering_problem.root_children[root_child_i]; net_handle_t& parent = parent_to_child.first; diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index d6ba3639fc1..8b27c6d1cca 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -458,6 +458,7 @@ class SnarlDistanceIndexClusterer { net_handle_to_node_problem_index.reserve(5*seed_count); all_node_problems.reserve(5*seed_count); + parent_snarls.reserve(seed_count); root_children.reserve(seed_count); } }; From c912afb2914737c9884f0a285fc2394de289e60e Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 17 Jul 2024 10:23:40 +0200 Subject: [PATCH 026/124] Add cluster checking --- src/snarl_seed_clusterer.cpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index fc8f91e27c1..57079e9241e 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -4,6 +4,8 @@ //#define DEBUG_CLUSTER //#define debug_distances +//#define EXHAUSTIVE_CLUSTER_CHECK + namespace vg { SnarlDistanceIndexClusterer::SnarlDistanceIndexClusterer( const SnarlDistanceIndex& distance_index, const HandleGraph* graph) : @@ -238,7 +240,10 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { cerr << endl; } -/* + + +#endif +#ifdef EXHAUSTIVE_CLUSTER_CHECK //CHeck read clusters for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { auto all_groups = clustering_problem.read_union_find[read_num].all_groups(); @@ -298,9 +303,6 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { assert (uf.all_groups().size() == 1); } } - */ - - #endif return make_tuple(std::move(clustering_problem.read_union_find), std::move(clustering_problem.fragment_union_find)); From ee5b6b66ee36a73ea1c3c72efbf716588c7ab0ed Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 17 Jul 2024 15:55:15 +0200 Subject: [PATCH 027/124] Find minimizer hit count by walking through minimziers ordered by read instead of score --- src/minimizer_mapper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index f240b2f6a1b..59cd8df8a0b 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3674,9 +3674,9 @@ std::vector MinimizerMapper::find_seeds(const std::vector // We are starting a new run start = i; limit = i + 1; run_hits = minimizers[i].hits; - for (size_t j = i + 1; j < minimizers.size() && minimizers[j].value.key == minimizers[i].value.key; j++) { + for (size_t j = i + 1; j < minimizers_in_read_order.size() && minimizers_in_read_order[j].value.key == minimizers_in_read_order[i].value.key; j++) { limit++; - run_hits += minimizers[j].hits; + run_hits += minimizers_in_read_order[j].hits; } // We haven't taken the first thing in the run yet. taking_run = false; From a7bb77c5d4c4b781b3b9218a78cc97f6b29a428b Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 17 Jul 2024 17:38:38 +0200 Subject: [PATCH 028/124] Revert "Find minimizer hit count by walking through minimziers ordered by read instead of score" This reverts commit ee5b6b66ee36a73ea1c3c72efbf716588c7ab0ed. --- src/minimizer_mapper.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index 59cd8df8a0b..f240b2f6a1b 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3674,9 +3674,9 @@ std::vector MinimizerMapper::find_seeds(const std::vector // We are starting a new run start = i; limit = i + 1; run_hits = minimizers[i].hits; - for (size_t j = i + 1; j < minimizers_in_read_order.size() && minimizers_in_read_order[j].value.key == minimizers_in_read_order[i].value.key; j++) { + for (size_t j = i + 1; j < minimizers.size() && minimizers[j].value.key == minimizers[i].value.key; j++) { limit++; - run_hits += minimizers_in_read_order[j].hits; + run_hits += minimizers[j].hits; } // We haven't taken the first thing in the run yet. taking_run = false; From 02a33699702961279610eef957b3f4e26d04058b Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 18 Jul 2024 17:24:55 +0200 Subject: [PATCH 029/124] Add hacky way of dealing with multicomponent chains --- src/zip_code.cpp | 18 ++++++++++++++++++ src/zip_code.hpp | 6 ++++++ 2 files changed, 24 insertions(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 407adee50a4..4ff1164827a 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -799,6 +799,12 @@ vector ZipCode::get_node_code(const net_handle_t& node, const SnarlDista vector node_code; //Assume this node is in a regular chain size_t prefix_sum = distance_index.get_prefix_sum_value(node); + if (distance_index.is_multicomponent_chain(distance_index.get_parent(node))) { + //TODO: This isn't great, should really use some better value than the length of the chain, + //which is just the length of the last component + prefix_sum += distance_index.get_chain_component(node) * + distance_index.chain_minimum_length(distance_index.get_parent(node)); + } node_code.emplace_back(prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); node_code.emplace_back(distance_index.minimum_length(node)+1); node_code.emplace_back(distance_index.is_reversed_in_parent(node)); @@ -831,6 +837,12 @@ vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); + if (distance_index.is_multicomponent_chain(distance_index.get_parent(snarl))) { + //TODO: This isn't great, should really use some better value than the length of the chain, + //which is just the length of the last component + prefix_sum += distance_index.get_chain_component(start_node) * + distance_index.chain_minimum_length(distance_index.get_parent(snarl)); + } snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); //Length of the snarl @@ -865,6 +877,12 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); + if (distance_index.is_multicomponent_chain(distance_index.get_parent(snarl))) { + //TODO: This isn't great, should really use some better value than the length of the chain, + //which is just the length of the last component + prefix_sum += distance_index.get_chain_component(start_node) * + distance_index.chain_minimum_length(distance_index.get_parent(snarl)); + } snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); //Length of the snarl diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4a30babc550..ab7599d75f8 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -176,6 +176,12 @@ class ZipCode { const static size_t NODE_LENGTH_OFFSET = 1; const static size_t NODE_IS_REVERSED_OFFSET = 2; + //To deal with multicomponent chains, the prefix sum value for nodes and snarls is actually + // the prefix sum + (component # * chain length) + // TODO: This is kinda hacky but it will prevent anything in a different + // component from being clustered together, assuming that the distance + // limit is smaller than the chain length + /* Functions for getting the code for each snarl/chain/node * Distances will be stored as distance+1, 0 will be reserved for inf From a4103219434e7bd90005e2eb15e612a4ace42ce3 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 19 Jul 2024 11:11:44 +0200 Subject: [PATCH 030/124] Revert "Add hacky way of dealing with multicomponent chains" This reverts commit 02a33699702961279610eef957b3f4e26d04058b. --- src/zip_code.cpp | 18 ------------------ src/zip_code.hpp | 6 ------ 2 files changed, 24 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 4ff1164827a..407adee50a4 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -799,12 +799,6 @@ vector ZipCode::get_node_code(const net_handle_t& node, const SnarlDista vector node_code; //Assume this node is in a regular chain size_t prefix_sum = distance_index.get_prefix_sum_value(node); - if (distance_index.is_multicomponent_chain(distance_index.get_parent(node))) { - //TODO: This isn't great, should really use some better value than the length of the chain, - //which is just the length of the last component - prefix_sum += distance_index.get_chain_component(node) * - distance_index.chain_minimum_length(distance_index.get_parent(node)); - } node_code.emplace_back(prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); node_code.emplace_back(distance_index.minimum_length(node)+1); node_code.emplace_back(distance_index.is_reversed_in_parent(node)); @@ -837,12 +831,6 @@ vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - if (distance_index.is_multicomponent_chain(distance_index.get_parent(snarl))) { - //TODO: This isn't great, should really use some better value than the length of the chain, - //which is just the length of the last component - prefix_sum += distance_index.get_chain_component(start_node) * - distance_index.chain_minimum_length(distance_index.get_parent(snarl)); - } snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); //Length of the snarl @@ -877,12 +865,6 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - if (distance_index.is_multicomponent_chain(distance_index.get_parent(snarl))) { - //TODO: This isn't great, should really use some better value than the length of the chain, - //which is just the length of the last component - prefix_sum += distance_index.get_chain_component(start_node) * - distance_index.chain_minimum_length(distance_index.get_parent(snarl)); - } snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); //Length of the snarl diff --git a/src/zip_code.hpp b/src/zip_code.hpp index ab7599d75f8..4a30babc550 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -176,12 +176,6 @@ class ZipCode { const static size_t NODE_LENGTH_OFFSET = 1; const static size_t NODE_IS_REVERSED_OFFSET = 2; - //To deal with multicomponent chains, the prefix sum value for nodes and snarls is actually - // the prefix sum + (component # * chain length) - // TODO: This is kinda hacky but it will prevent anything in a different - // component from being clustered together, assuming that the distance - // limit is smaller than the chain length - /* Functions for getting the code for each snarl/chain/node * Distances will be stored as distance+1, 0 will be reserved for inf From eb1e3024c6a4a286fe1a0116d87b8d5b1c408a5a Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 19 Jul 2024 19:45:35 +0200 Subject: [PATCH 031/124] Make a string identifier for a snarl tree node --- src/zip_code.cpp | 46 ++++++++++++++++++++++++++++++++++++++++++++++ src/zip_code.hpp | 18 ++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 407adee50a4..215b8799d61 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1868,5 +1868,51 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance return payload; } +net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { + string result = "" + for (size_t d = 0 ; d < depth ; d++) { + result += (decoder[i].first ? "1" : "0"); + if (d == 0) { + //Root structure + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + result += string(zip_value); + } + } else if (decoder[d].first) { + //is_chain so could be a chain or a node + if (decoder[d-1].first) { + //If the thing before this was also a chain, then it is a node + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OR_RANK_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + result += string(zip_value); + } + } else { + //Otherwise it's a chain + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + result += string(zip_value); + } + } + } else { + //Definitely a snarl + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + result += string(zip_value); + } + } + result += "." + + } + return result; +} + } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4a30babc550..fd1dc02d2a1 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -38,6 +38,13 @@ class ZipCodeDecoder; ///This can interpret zipcodes to format them as the old payload struct MIPayload; + +/// A struct to be used as a unique identifier for a snarl tree node (node/snarl/chain) +/// using information from the zipcodes. +/// It should be unique and hashable +typedef std::string net_identifier_t; + + /* Zip codes store the snarl decomposition location and distance information for a position on a graph * A zip code will contain all the information necessary to compute the minimum distance between two * positions, with minimal queries to the distance index @@ -326,6 +333,17 @@ class ZipCodeDecoder { ///Fill in a payload with values from the zipcode MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth + /// would be the node, also include the node id + net_identifier_t get_identifier(size_t depth) const; + +}; + +template<> +struct wang_hash { + size_t operator()(const net_identifier_t& id) const { + return wang_hash()(id); + } }; std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); From b177d079783bc64447afd6a2369b8b292775cdc7 Mon Sep 17 00:00:00 2001 From: Xian Date: Sun, 21 Jul 2024 15:52:49 +0200 Subject: [PATCH 032/124] Fix zipcode identifiers --- src/zip_code.cpp | 41 +++++++++++++++++++++++++++++++++-------- src/zip_code.hpp | 2 ++ 2 files changed, 35 insertions(+), 8 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 215b8799d61..27490cbf8b5 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1869,16 +1869,20 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { - string result = "" + if (depth == std::numeric_limits::max()) { + //This is equivalent to distance_index.get_root() + return "ROOT"; + } + string result = ""; for (size_t d = 0 ; d < depth ; d++) { - result += (decoder[i].first ? "1" : "0"); + result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure size_t zip_value; size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += string(zip_value); + result += std::to_string(zip_value); } } else if (decoder[d].first) { //is_chain so could be a chain or a node @@ -1888,7 +1892,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OR_RANK_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += string(zip_value); + result += std::to_string(zip_value); } } else { //Otherwise it's a chain @@ -1896,23 +1900,44 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += string(zip_value); + result += std::to_string(zip_value); } } } else { //Definitely a snarl size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN; i++) { + for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += string(zip_value); + result += std::to_string(zip_value); } } - result += "." + if (d < std::min(depth, max_depth())) { + result += "."; + } } + if (depth > max_depth()) { + //If this was node that's in a trivial chain + result += ".n"; + } + return result; } +const net_identifier_t ZipCodeDecoder::get_parent_identifier(const net_identifier_t& child) { + if (child == "ROOT") { + throw std::runtime_error("error: trying to get the parent of the root net_identifier_t"); + } + for (int i = child.size()-1 ; i >= 0 ; i--) { + if (child[i] == '.') { + return (net_identifier_t) string(child, 0, i); + } + } + //If we didn't find a '.', then the parent is just the root + return "ROOT"; +} + + } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index fd1dc02d2a1..074c404d378 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -336,6 +336,8 @@ class ZipCodeDecoder { /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id net_identifier_t get_identifier(size_t depth) const; + const static net_identifier_t get_parent_identifier(const net_identifier_t& child); + }; From 636abe8d4fa8656830fb38eed0a06de337ef2b4a Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 11:34:21 +0200 Subject: [PATCH 033/124] Add chain component to zipcodes --- src/unittest/zip_code.cpp | 45 +++++++++++++++++++++++++++++++++++++-- src/zip_code.cpp | 23 ++++++++++++-------- src/zip_code.hpp | 24 ++++++++++++--------- 3 files changed, 71 insertions(+), 21 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 103bac8eb1d..185733a4531 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -136,6 +136,10 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); + //The component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); @@ -195,6 +199,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2); + //Chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //node is reversed in the snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); @@ -424,6 +432,11 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); + //component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component( + distance_index.get_node_net_handle(n1->id()))); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); @@ -484,6 +497,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); + //chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + //Is the chain is reversed in the snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); @@ -518,6 +535,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()))); + //chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); @@ -593,6 +614,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); + //Chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + //Is the chain is reversed in the snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); @@ -637,6 +662,9 @@ using namespace std; distance_index.flip(distance_index.canonical(chain3))) != 0; REQUIRE(value_and_index.first == is_rev); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + //Chain code for chain 3-5 REQUIRE(decoder.decoder[4] == std::make_pair(true, value_and_index.second)); //Rank in parent @@ -664,10 +692,14 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); - //is_reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); snarl = distance_index.get_parent(chain4); + + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + + //is_reversed + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; REQUIRE(value_and_index.first == is_rev); @@ -993,6 +1025,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == child_count); + //component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, false)))); + //Snarl record offset value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_record_offset(irregular_snarl)); @@ -1514,6 +1550,11 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); + //Chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component( + distance_index.get_node_net_handle(n1->id()))); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 27490cbf8b5..ab47992f670 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -481,7 +481,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista } size_t zip_value; size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OR_RANK_OFFSET ; i++) { + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } @@ -796,21 +796,23 @@ vector ZipCode::get_node_code(const net_handle_t& node, const SnarlDista assert((distance_index.is_chain(distance_index.get_parent(node)) || distance_index.is_root(distance_index.get_parent(node)))); #endif //Node code is: offset in chain, length, is reversed - vector node_code; + vector node_code(NODE_SIZE); //Assume this node is in a regular chain size_t prefix_sum = distance_index.get_prefix_sum_value(node); - node_code.emplace_back(prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); - node_code.emplace_back(distance_index.minimum_length(node)+1); - node_code.emplace_back(distance_index.is_reversed_in_parent(node)); + node_code[NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; + node_code[NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; + node_code[NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); + size_t component = distance_index.get_chain_component(node); + node_code[NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; return node_code; } vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { //Chain code is: rank in snarl, length - vector chain_code; - chain_code.emplace_back(distance_index.get_rank_in_parent(chain)); + vector chain_code (CHAIN_SIZE); + chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); size_t len = distance_index.minimum_length(chain); - chain_code.emplace_back(len == std::numeric_limits::max() ? 0 : len+1); + chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; return chain_code; } @@ -833,6 +835,9 @@ vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + size_t component = distance_index.get_chain_component(start_node); + snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + //Length of the snarl size_t len = distance_index.minimum_length(snarl); snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); @@ -1890,7 +1895,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { //If the thing before this was also a chain, then it is a node size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OR_RANK_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 074c404d378..e29fa811bd5 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -154,8 +154,8 @@ class ZipCode { const static size_t CHAIN_LENGTH_OFFSET = 1; ///Offsets for snarl codes - const static size_t REGULAR_SNARL_SIZE = 5; - const static size_t IRREGULAR_SNARL_SIZE = 9; + const static size_t REGULAR_SNARL_SIZE = 6; + const static size_t IRREGULAR_SNARL_SIZE = 10; //Both regular and irregular snarls have these @@ -165,23 +165,27 @@ class ZipCode { const static size_t SNARL_OFFSET_IN_CHAIN_OFFSET = 1; const static size_t SNARL_LENGTH_OFFSET = 2; const static size_t SNARL_CHILD_COUNT_OFFSET = 3; + //THis will be the lower of the two component numbers if the snarl spans two + //This only happens if the snarl is not start-end connected, which we'll know from the length + const static size_t SNARL_CHAIN_COMPONENT_OFFSET = 4; //Only for regular snarls - const static size_t REGULAR_SNARL_IS_REVERSED_OFFSET = 4; + const static size_t REGULAR_SNARL_IS_REVERSED_OFFSET = 5; //Only for irregular snarls - const static size_t IRREGULAR_SNARL_RECORD_OFFSET = 4; + const static size_t IRREGULAR_SNARL_RECORD_OFFSET = 5; //Distance from the left side of the child to the start of the snarl - const static size_t IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET = 5; - const static size_t IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET = 6; - const static size_t IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET = 7; - const static size_t IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET = 8; + const static size_t IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET = 6; + const static size_t IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET = 7; + const static size_t IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET = 8; + const static size_t IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET = 9; ///Offsets for nodes - const static size_t NODE_SIZE = 3; - const static size_t NODE_OFFSET_OR_RANK_OFFSET = 0; + const static size_t NODE_SIZE = 4; + const static size_t NODE_OFFSET_OFFSET = 0; const static size_t NODE_LENGTH_OFFSET = 1; const static size_t NODE_IS_REVERSED_OFFSET = 2; + const static size_t NODE_CHAIN_COMPONENT_OFFSET = 3; /* Functions for getting the code for each snarl/chain/node From a4da073a55166a340a7c090cb57419ded89d94e9 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 11:38:51 +0200 Subject: [PATCH 034/124] Use zipcodes to get chain component for nodes --- src/zip_code.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index ab47992f670..6dfe0bc8fa2 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1772,9 +1772,8 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //TODO: For top-level chains we got this from the distance index payload.is_reversed = zip_value; - payload.chain_component = distance_index.is_multicomponent_chain(payload.parent_handle) - ? distance_index.get_chain_component(payload.node_handle) - : 0; + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; From 6e287f56232d0b7a1e07ccdffe99545954fed210 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 13:25:28 +0200 Subject: [PATCH 035/124] Add get_chain_component to zipcodes --- src/zip_code.cpp | 34 +++++++++++++++++++++++++++++++++- src/zip_code.hpp | 4 ++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 6dfe0bc8fa2..72c27a07887 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -485,7 +485,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } - return zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl @@ -498,6 +498,38 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } +size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { + + + if (depth == 0) { + //If this is the root chain/snarl/node + throw std::runtime_error("zipcodes don't have chain offsets for roots"); + + } else if (decoder[depth].first) { + //If this is a chain/node + + if (!decoder[depth-1].first) { + throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); + } + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + + return zip_value; + } else { + //If this is a snarl + + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + + return zip_value; + } +} bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index e29fa811bd5..eedae882804 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -299,6 +299,10 @@ class ZipCodeDecoder { ///Doesn't use a given distance index if it isn't needed size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + ///Get the chain component of a chain child. + ///For snarls, this will be the component of the start node + size_t get_chain_component(const size_t& depth) const ; + ///Is the snarl tree node backwards relative to its parent bool get_is_reversed_in_parent(const size_t& depth) const; From 6565acb7f0a8effb3301136b9a411fb3ed033bb4 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 13:41:55 +0200 Subject: [PATCH 036/124] Get chain component from zipcodes for payload --- src/zip_code.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 72c27a07887..27358910d53 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1859,6 +1859,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Snarl child_count std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Chain component + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; //is_reversed for regular snarl and record offset for irregular/cyclic snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); From c476d12cd0ac7e04c8f496aeb91f18ab3f79fac8 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 14:07:13 +0200 Subject: [PATCH 037/124] Use zipcode chain component --- src/snarl_seed_clusterer.cpp | 8 -------- src/snarl_seed_clusterer.hpp | 5 +++-- 2 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 57079e9241e..b8391972d2f 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2148,14 +2148,6 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { - //If this is the last node in the chain - if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { - //If they aren't in the same component - distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); - } else { - distance_from_current_end_to_end_of_chain = 0; - } } else if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 8b27c6d1cca..b592fbc15cd 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -331,8 +331,9 @@ class SnarlDistanceIndexClusterer { node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = distance_index.get_chain_component(start_in); - chain_component_end = distance_index.get_chain_component(end_in); + chain_component_start = seed->seed->zipcode_decoder->get_chain_component(zipcode_depth); + chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 + : chain_component_start; prefix_sum_value = SnarlDistanceIndex::sum( distance_index.get_prefix_sum_value(start_in), distance_index.minimum_length(start_in)); From 9e2153fbbdd25b8611a5109eafb17f31bb2ef5ac Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 16:26:31 +0200 Subject: [PATCH 038/124] Add failing unit test --- src/unittest/snarl_seed_clusterer.cpp | 105 +++++++++++++++++++------- 1 file changed, 79 insertions(+), 26 deletions(-) diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index 2df08b290a8..6ef11d3426f 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -796,6 +796,64 @@ namespace unittest { REQUIRE(clusters.size() == 2); } + } + TEST_CASE( "Top-level looping chain", + "[cluster][bug]" ) { + VG graph; + + Node* n1 = graph.create_node("AGCGTGTAGAGAA"); + Node* n2 = graph.create_node("ATGCGTGCTGAGCA"); + Node* n3 = graph.create_node("G"); + Node* n4 = graph.create_node("C"); + Node* n5 = graph.create_node("ATGCGTGCTGAGCA"); + Node* n6 = graph.create_node("GCTTAC"); + + Edge* e1 = graph.create_edge(n1, n2); + Edge* e2 = graph.create_edge(n1, n5, false, true); + Edge* e3 = graph.create_edge(n2, n3); + Edge* e4 = graph.create_edge(n2, n4); + Edge* e5 = graph.create_edge(n2, n6, true, false); + Edge* e6 = graph.create_edge(n3, n4); + Edge* e7 = graph.create_edge(n3, n5); + Edge* e8 = graph.create_edge(n4, n5); + Edge* e9 = graph.create_edge(n5, n6); + + IntegratedSnarlFinder snarl_finder(graph); + SnarlDistanceIndex dist_index; + fill_in_distance_index(&dist_index, &graph, &snarl_finder); + SnarlDistanceIndexClusterer clusterer(dist_index, &graph); + + ofstream out ("bug_graph.vg"); + graph.serialize(out); + + SECTION( "Two clusters" ) { + + vector> pos_ts(2); + pos_ts[0].emplace_back(1, false, 12); + pos_ts[0].emplace_back(3, true, 0); + pos_ts[0].emplace_back(6, true, 2); + pos_ts[1].emplace_back(4, false,0); + pos_ts[1].emplace_back(6,false, 5); + pos_ts[1].emplace_back(5,false, 9); + pos_ts[1].emplace_back(3,true, 0); + vector> seeds(2); + for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { + for (pos_t pos : pos_ts[read_num]) { + + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[read_num].push_back({ pos, 0, zipcode}); + } + } + vector> clusters = clusterer.cluster_seeds(seeds, 15, 35); + REQUIRE(clusters.size() == 2); + REQUIRE(clusters[0].size() == 2); + + + + } + + } TEST_CASE( "Cluster looping, multicomponent", "[cluster]" ) { @@ -3150,7 +3208,6 @@ namespace unittest { // REQUIRE(clusters.size() == 1); //}//end test case - /* TEST_CASE("Failed graph", "[failed_cluster]"){ HashGraph graph; @@ -3167,41 +3224,37 @@ namespace unittest { vector> pos_ts(2); - pos_ts[0].emplace_back(30, false, 0); - pos_ts[0].emplace_back(22, false, 0); - pos_t pos1 = pos_ts[0][0]; - pos_t pos2 = pos_ts[0][1]; - net_handle_t node31 = dist_index.get_node_net_handle(30); - - size_t dist = dist_index.minimum_distance(get_id(pos1), get_is_rev(pos1), get_offset(pos1), get_id(pos2), get_is_rev(pos2), get_offset(pos2), true, &graph); - cerr << "DISTANCE BETWEEN " << pos1 << " and " << pos2 << " = " << dist << endl; - + pos_ts[0].emplace_back(6, false, 12); + pos_ts[0].emplace_back(9, true, 0); + pos_ts[0].emplace_back(11, true, 2); + pos_ts[1].emplace_back(7, false,0); + pos_ts[1].emplace_back(11,false, 5); + pos_ts[1].emplace_back(8,false, 9); + pos_ts[1].emplace_back(9,true, 0); + vector> seeds(2); + for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { + for (pos_t pos : pos_ts[read_num]) { - //vector> seeds(2); - //for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { - // for (pos_t pos : pos_ts[read_num]) { - - // ZipCode zipcode; - // zipcode.fill_in_zipcode(dist_index, pos); - // seeds[read_num].push_back({ pos, 0, zipcode}); - // } - //} + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, pos); + seeds[read_num].push_back({ pos, 0, zipcode}); + } + } - //vector> clusters = clusterer.cluster_seeds(seeds, 15, 35); + vector> clusters = clusterer.cluster_seeds(seeds, 15, 35); - //REQUIRE(clusters.size() == 1); - // + REQUIRE(clusters.size() == 2); + REQUIRE(false); } - */ - TEST_CASE("Random graphs", "[cluster_random]"){ + TEST_CASE("Random graphs", "[cluster][cluster_random]"){ - for (int i = 0; i < 0; i++) { + for (int i = 0; i < 1000; i++) { // For each random graph default_random_engine generator(time(NULL)); - uniform_int_distribution variant_count(1, 70); + uniform_int_distribution variant_count(1, 10); uniform_int_distribution chrom_len(10, 200); //Make a random graph with three chromosomes of random lengths From 2c49a235e294009dd36c91a91fbb79e37a0b9179 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 17:19:24 +0200 Subject: [PATCH 039/124] Get chain component for irregular snarls --- src/zip_code.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 27358910d53..375611627cb 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -904,6 +904,9 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + size_t component = distance_index.get_chain_component(start_node); + snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + //Length of the snarl size_t len = distance_index.minimum_length(snarl); snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); @@ -1859,9 +1862,10 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Snarl child_count std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Chain component + //Chain component of the snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.chain_component = zip_value; + //TODO: SHould use this somehow + payload.chain_component = 0; //is_reversed for regular snarl and record offset for irregular/cyclic snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); From ce9b0adbfd9f2d82f56446dac143377ddf123b18 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 22 Jul 2024 20:27:07 +0200 Subject: [PATCH 040/124] Fix debug code --- src/snarl_seed_clusterer.cpp | 120 +++++++++++++++++++++-------------- 1 file changed, 73 insertions(+), 47 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index b8391972d2f..8ee59128821 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -31,7 +31,7 @@ vector SnarlDistanceIndexClusterer::cluste vector seed_caches(seeds.size()); for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER - assert (seeds[i].zipcode.byte_count() != 0) { + assert (seeds[i].zipcode.byte_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { @@ -218,7 +218,7 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { for (auto group : clustering_problem.read_union_find[read_num].all_groups()){ cerr << "\t\t"; for (size_t c : group) { - cerr << clustering_problem.all_seeds->at(read_num)->at(c).pos << " "; + cerr << clustering_problem.all_seeds->at(read_num)->at(c).seed->pos << " "; } cerr << endl; } @@ -235,7 +235,7 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { for (auto group : clustering_problem.fragment_union_find.all_groups()){ cerr << "\t"; for (size_t c : group) { - cerr << ordered_seeds[c].pos << " "; + cerr << ordered_seeds[c].seed->pos << " "; } cerr << endl; } @@ -252,19 +252,19 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { structures::UnionFind uf(group.size(), false); for (size_t i1 = 0 ; i1 < group.size() ; i1++) { size_t c = group[i1]; - pos_t pos1 = clustering_problem.all_seeds->at(read_num)->at(c).pos; - pos_t rev1 = make_pos_t(get_id(pos1), !is_rev(pos1), distance_index.node_length(get_id(pos1)) - get_offset(pos1) - 1); + pos_t pos1 = clustering_problem.all_seeds->at(read_num)->at(c).seed->pos; + pos_t rev1 = make_pos_t(get_id(pos1), !is_rev(pos1), distance_index.node_length(distance_index.get_node_net_handle(get_id(pos1))) - get_offset(pos1) - 1); for (size_t i2 = 0 ; i2 < i1 ; i2++) { size_t d = group[i2]; - pos_t pos2 = clustering_problem.all_seeds->at(read_num)->at(d).pos; - pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), distance_index.node_length(get_id(pos2))- get_offset(pos2) - 1); - size_t d1 = distance_index.min_distance(pos1, pos2); - size_t d2 = std::min(d1, distance_index.min_distance(pos1, rev2)); - size_t d3 = std::min(d2, distance_index.min_distance(rev1, rev2)); - size_t d4 = std::min(d3, distance_index.min_distance(rev1, pos2)); + pos_t pos2 = clustering_problem.all_seeds->at(read_num)->at(d).seed->pos; + pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), distance_index.node_length(distance_index.get_node_net_handle(get_id(pos2)))- get_offset(pos2) - 1); + size_t d1 = distance_index.minimum_distance(pos1, pos2); + size_t d2 = std::min(d1, distance_index.minimum_distance(pos1, rev2)); + size_t d3 = std::min(d2, distance_index.minimum_distance(rev1, rev2)); + size_t d4 = std::min(d3, distance_index.minimum_distance(rev1, pos2)); if (d4 != -1 && d4 <= clustering_problem.read_distance_limit) { uf.union_groups(i1, i2); @@ -275,12 +275,12 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { auto group2 = all_groups[g2]; for (size_t d : group2) { pos_t pos2 = clustering_problem.all_seeds->at(read_num)->at(d).pos; - pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), distance_index.node_length(get_id(pos2)) - get_offset(pos2) - 1); + pos_t rev2 = make_pos_t(get_id(pos2), !is_rev(pos2), distance_index.node_length(distance_index.get_node_net_handle(get_id(pos2))) - get_offset(pos2) - 1); size_t d1 = distance_index.min_distance(pos1, pos2); - size_t d2 = std::min(d1, distance_index.min_distance(pos1, rev2)); - size_t d3 = std::min(d2, distance_index.min_distance(rev1, rev2)); - size_t d4 = std::min(d3, distance_index.min_distance(rev1, pos2)); + size_t d2 = std::min(d1, distance_index.minimum_distance(pos1, rev2)); + size_t d3 = std::min(d2, distance_index.minimum_distance(rev1, rev2)); + size_t d4 = std::min(d3, distance_index.minimum_distance(rev1, pos2)); assert (d4 == -1 || d4 > clustering_problem.read_distance_limit); } @@ -355,30 +355,32 @@ cerr << "Add all seeds to nodes: " << endl; const MIPayload& payload = seed.payload; #ifdef DEBUG_CLUSTER - cerr << "Using cached values for node " << id << ": " - << ", " << seed.payload.record_offset - << ", " << seed.payload.parent_record_offset - << ", " << seed.payload.node_record_offset - << ", " << seed.payload.node_length - << ", " << seed.payload.prefix_sum - << ", " << seed.payload.chain_component << endl; + //cerr << "Using cached values for node " << id << ": " + // << ", " << seed.payload.record_offset + // << ", " << seed.payload.parent_record_offset + // << ", " << seed.payload.node_length + // << ", " << seed.payload.prefix_sum + // << ", " << seed.payload.chain_component << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); - assert(seed.payload.record_offset == distance_index.get_record_offset(handle)); //assert(seed.payload.parent_record_offset == // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) // :distance_index.get_record_offset(parent_handle))); - assert(seed.payload.node_record_offset == distance_index.get_node_record_offset(handle)); assert(seed.payload.node_length == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) // ? std::numeric_limits::max() // : distance_index.get_prefix_sum_value(handle); //assert(seed.payload.prefix_sum == prefix_sum); - assert(seed.payload.chain_component == (distance_index.is_multicomponent_chain(parent_handle) + + size_t chain_component = (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) - : 0)); + : 0); + chain_component = chain_component == std::numeric_limits::max() ? 0 : chain_component; + cerr << "For nod " << distance_index.net_handle_as_string(handle) << endl; + cerr << "Chain compoentn: " << chain_component << " was " << seed.payload.chain_component << endl; + assert(seed.payload.chain_component == chain_component); if (!distance_index.is_root(seed.payload.parent_handle)) { cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; @@ -708,7 +710,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); + //assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); } #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 @@ -774,6 +776,40 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER + cerr << "For child type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + ke<< distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; + cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; + cerr << "\t should be " + << distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " + + << distance_index.distance_to_parent_bound(parent, true, chain_handle, + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " + + << distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " + + << distance_index.distance_to_parent_bound(parent, false, chain_handle, + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << endl; assert(chain_problem->distance_start_left == distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, @@ -806,8 +842,6 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster : SnarlDistanceIndex::CHAIN_HANDLE), SnarlDistanceIndex::CHAIN_HANDLE))); - cerr << "This child has distances to end : " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right - << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; #endif //And add it to its parent snarl bool new_parent = false; @@ -880,7 +914,7 @@ void SnarlDistanceIndexClusterer::cluster_one_node( bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -1036,7 +1070,7 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure combined = true; #ifdef DEBUG_CLUSTER - cerr << "\t\t\tCombining read/cluster " << read_num << "/" << cluster_num << "... new cluster head:" << clustering_problem.all_seeds->at(read_num)->at(new_cluster_head_and_distances.cluster_num).pos << endl; + cerr << "\t\t\tCombining read/cluster " << read_num << "/" << cluster_num << "... new cluster head:" << clustering_problem.all_seeds->at(read_num)->at(new_cluster_head_and_distances.cluster_num).seed->pos << endl; cerr << "\t\t\t\t Best distances for this cluster: " << old_distances.first << " and " << old_distances.second << endl; cerr << "\t\t\t\t New best distances for combined cluster: " << new_cluster_head_and_distances.distance_left << " and " << new_cluster_head_and_distances.distance_right << endl; #endif @@ -1665,7 +1699,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -1788,7 +1822,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -1918,7 +1952,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -1937,7 +1971,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin for (auto group : clustering_problem.fragment_union_find.all_groups()){ cerr << "\t"; for (size_t c : group) { - cerr << ordered_seeds[c].pos << " "; + cerr << ordered_seeds[c].seed->pos << " "; } cerr << endl; } @@ -1982,7 +2016,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -2066,7 +2100,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin bool has_seeds = false; for (size_t x = 0 ; x < clustering_problem.all_seeds->at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; has_seeds = true; } } @@ -2111,7 +2145,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c */ #ifdef DEBUG_CLUSTER - cerr << "At child seed " << current_child_seed->seed->pos << endl; + cerr << "At child seed " << current_child_seed.seed->pos << endl; #endif //The distance from the right side of the last child to the left side of this child //(relative to the orientation of the chain @@ -2626,14 +2660,6 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (SnarlDistanceIndex::get_record_offset(current_child.net_handle) == SnarlDistanceIndex::get_record_offset(chain_problem->end_in)) { - //If this is the last node in the chain - if (chain_problem->chain_component_end != child_problem.chain_component_end) { - //If they aren't in the same component - distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); - } else { - distance_from_current_end_to_end_of_chain = 0; - } } else if (chain_problem->is_looping_chain) { //TODO: I think I should be able to do this without the distance index but none of our graphs so far have loops // so I'm not going to bother @@ -3047,7 +3073,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro cerr << "\t\t" << c.first << ":"<at(c.first)->size() ; x++) { if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).pos << " "; + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; } } cerr << endl; From da5f8913ab2b066bc7e6aba3a9495fa3e7c58e29 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 23 Jul 2024 15:06:44 +0200 Subject: [PATCH 041/124] Add chain component count to chains zipcodes --- src/unittest/zip_code.cpp | 96 ++++++++++++++++++++++++++++++++------- src/zip_code.cpp | 69 +++++++++++++++++++++------- src/zip_code.hpp | 16 ++++++- 3 files changed, 146 insertions(+), 35 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 185733a4531..96978bf8658 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -119,6 +119,10 @@ using namespace std; //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + + //Component count of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the node code //Third value is the prefix sum of the node @@ -180,6 +184,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the snarl code //1 for a regular snarl @@ -222,6 +230,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2+1); + //chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); @@ -415,6 +427,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Third value is the chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code //Third value is the prefix sum of the node @@ -478,6 +494,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Third value is the chain component count of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the regular snarl code REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); @@ -521,6 +541,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 3+1); + //chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code REQUIRE(decoder.decoder[3] == std::make_pair(true, value_and_index.second)); //Offset of the node in the chain @@ -595,6 +619,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Second value is the chain component count of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the regular snarl code for snarl 1-8 REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); @@ -636,6 +664,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 3+1); + //chain component_count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the regular snarl code for snarl 2-7 REQUIRE(decoder.decoder[3] == std::make_pair(false, value_and_index.second)); //1 as tag for regular snarl @@ -675,6 +707,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.minimum_length(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) +1); + //component_count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //REgular snarl code for snarl 3-5 REQUIRE(decoder.decoder[5] == std::make_pair(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -714,6 +750,9 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 4+1) ; + //Chain component + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0) ; //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); @@ -982,8 +1021,6 @@ using namespace std; bool chain_is_reversed = distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id())); - graph.serialize_to_file("test_graph.hg"); - SECTION ("zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); @@ -1001,6 +1038,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Third is the chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Irregular snarl code for snarl 1-4 REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); //0 as tag for irregular snarl @@ -1059,6 +1100,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1+1); + //Component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -1337,6 +1382,9 @@ using namespace std; //length value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2+1); + //component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Node 3 REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); @@ -1489,7 +1537,7 @@ using namespace std; }; } } - TEST_CASE("Top-level chain zipcode", "[zipcode]") { + TEST_CASE("Top-level chain zipcode", "[zipcode][bug]") { VG graph; @@ -1514,6 +1562,14 @@ using namespace std; IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex distance_index; fill_in_distance_index(&distance_index, &graph, &snarl_finder); + net_handle_t n = distance_index.get_node_net_handle(3); + while (! distance_index.is_root(n)) { + cerr << distance_index.net_handle_as_string(n) << endl; + n = distance_index.get_parent(n); + } + cerr << distance_index.net_handle_as_string(n) << endl; + + graph.serialize_to_file("test_graph.hg"); SECTION ("zip code for node on top-level chain") { net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); @@ -1534,6 +1590,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Third value is the chain component count + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code //Third value is the prefix sum of the node @@ -1561,19 +1621,19 @@ using namespace std; } SECTION("Distances") { ZipCode zip1; - zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - ZipCode zip2; - zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - ZipCode zip3; - zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - ZipCode zip4; - zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - ZipCode zip5; - zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - ZipCode zip6; - zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); - ZipCode zip7; - zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); + zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), false, 0)); + ZipCode zip2; + zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), false, 0)); + ZipCode zip3; + zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), false, 0)); + ZipCode zip4; + zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), false, 0)); + ZipCode zip5; + zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), false, 0)); + ZipCode zip6; + zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), false, 0)); + ZipCode zip7; + zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), false, 0)); ZipCodeDecoder decoder1(&zip1); ZipCodeDecoder decoder2(&zip2); @@ -1581,6 +1641,10 @@ using namespace std; decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); + ZipCodeDecoder decoder6(&zip6); + cerr << "DISTANCE: " << ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder6, make_pos_t(n6->id(), false, 0), + distance_index) << endl;; REQUIRE(ZipCode::is_farther_than(zip1, zip6, 3)); REQUIRE(!ZipCode::is_farther_than(zip1, zip6, 5)); REQUIRE(ZipCode::is_farther_than(zip1, zip7, 8)); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 375611627cb..69b5e7d63a7 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -33,13 +33,27 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p //the chain from the stack //If the root-level structure is a trivial chain, then just store the node (as a chain, which will have the //connected-component number as the rank in the snarl anyways) - if (!distance_index.is_trivial_chain(ancestors.back())) { + zipcode.add_value(distance_index.get_connected_component_number(ancestors.back())); + cerr << "Adding " << distance_index.net_handle_as_string(ancestors.back()) << endl; + if (ancestors.size() == 2 && distance_index.is_trivial_chain(ancestors.back())) { #ifdef DEBUG_ZIPCODE - cerr << "Adding code for top-level chain" << endl; + cerr << "Adding code for top-level trivial chain" << endl; #endif - zipcode.add_value(distance_index.get_connected_component_number(ancestors.back())); - ancestors.pop_back(); + zipcode.add_value(distance_index.minimum_length(ancestors.back())+1); + return; + } else { +#ifdef DEBUG_ZIPCODE + cerr << "Adding code for top-level chain" << endl; +#endif + + size_t component = distance_index.get_chain_component(distance_index.get_bound(ancestors.back(), true, false), true); + component = component == std::numeric_limits::max() ? 0 : component*2; + if (distance_index.is_looping_chain(ancestors.back())) { + component += 1; + } + zipcode.add_value(component); } + ancestors.pop_back(); } //Go through the ancestors top (root) down and add them to the zip code @@ -154,21 +168,19 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" return false; } else if (zip_length == 1) { //If there is one thing in the zipcode - - //Get the first value, which is 1 if the top-level structure is a chain - for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(previous_is_chain, zip_index) = zipcode->zipcode.get_value_and_next_index(0); - } - //The next thing is the connected-component number - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET - ZipCode::ROOT_IS_CHAIN_OFFSET -1; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - } + previous_is_chain = decoder.back().first; //If the top-level structure is a chain, it might actually be a node, in which case //the only other thing that got stored is the length if (previous_is_chain) { - if (zipcode->zipcode.get_value_and_next_index(zip_index).second == std::numeric_limits::max()) { - //If the zip code ends here, then this was a node and we're done + //Get to the end of the root chain + assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't + + for (size_t i = 0 ; i < ZipCode::ROOT_CHAIN_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + if (zip_index == std::numeric_limits::max()) { + //If the zip code ends here (after the length), then this was a node and we're done #ifdef DEBUG_ZIPCODE cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #endif @@ -195,6 +207,9 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } } else { //Otherwise, the top-level thing is a snarl and the next thing is a chain + for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } decoder.emplace_back(!previous_is_chain, zip_index); return false; } @@ -845,6 +860,15 @@ vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDis chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); size_t len = distance_index.minimum_length(chain); chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; + bool is_trivial = distance_index.is_trivial_chain(chain) ; + size_t component = is_trivial + ? 0 + : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); + component = component == std::numeric_limits::max() ? 0 : component*2; + if (!is_trivial && distance_index.is_looping_chain(chain)) { + component += 1; + } + chain_code[CHAIN_COMPONENT_COUNT_OFFSET] = component; return chain_code; } @@ -1460,12 +1484,16 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //The zips now point to the children of the shared chain, so we can proceed as if the top-level //structure was a chain + } else { + //If it is a chain, get one more thing to get to the end of the chain + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); } //Both zips now point to a thing in a shared chain //Get the minimum possible distance between the structures on the chain //For a lower bound, this assumes that the positions are as close as they can be on the structure in the chain - size_t prefix_sum1, prefix_sum2, length1, length2; + size_t prefix_sum1, prefix_sum2, length1, length2, component1, component2; //The next thing could either be a snarl or a node. If it is a node, vector next_values; @@ -1483,6 +1511,7 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //If the last thing was a node prefix_sum1 = next_values[0]; length1 = next_values[1]; + component1 = next_values[2]; prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; } else { @@ -1494,6 +1523,8 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //If the next thing was a regular snarl prefix_sum1 = next_values[1]; length1 = next_values[2]; + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + component1 = zip_value1; prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; } else { @@ -1519,6 +1550,7 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //If the last thing was a node prefix_sum2 = next_values[0]; length2 = next_values[1]; + component2 = next_values[2]; prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; } else { @@ -1530,6 +1562,8 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //If the next thing was a regular snarl prefix_sum2 = next_values[1]; length2 = next_values[2]; + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + component2 = zip_value2; prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; } else { @@ -1542,7 +1576,8 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si cerr << "Finding distance in chain between " << prefix_sum1 << " " << length1 << " and " << prefix_sum2 << " and " << length2 << endl; #endif - if (prefix_sum1 == std::numeric_limits::max() || + if (component1 != component2 || + prefix_sum1 == std::numeric_limits::max() || prefix_sum2 == std::numeric_limits::max() || length1 == std::numeric_limits::max() || length2 == std::numeric_limits::max()) { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index eedae882804..b3acc9c709a 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -139,20 +139,32 @@ class ZipCode { ///Offsets of values in a root chain or snarl code ///Roots have a bool for is_chain and an identifier, which is the ///connected component number from the distance index - const static size_t ROOT_CHAIN_OR_SNARL_SIZE = 2; + const static size_t ROOT_SNARL_SIZE = 2; const static size_t ROOT_IS_CHAIN_OFFSET = 0; const static size_t ROOT_IDENTIFIER_OFFSET = 1; + //FOr a chain, also include the component count + const static size_t ROOT_CHAIN_SIZE = 3; + const static size_t ROOT_CHAIN_COMPONENT_COUNT_OFFSET = 2; + //If the zipcode is for a root-level node, then there are only three things //in the zipcode, and the last is the length of the node const static size_t ROOT_NODE_SIZE = 3; const static size_t ROOT_NODE_LENGTH_OFFSET = 2; ///Offsets for chain codes - const static size_t CHAIN_SIZE = 2; + const static size_t CHAIN_SIZE = 3; const static size_t CHAIN_RANK_IN_SNARL_OFFSET = 0; const static size_t CHAIN_LENGTH_OFFSET = 1; + //This tells us if the chain is a multicomponent chain, how many components it has, and if the chain loops + //The value is the component of the last node in the chain * 2, +1 if the chain loops + //So 0 means normal chain, 1 means one component but the chain loops, 2 means 2 components, 3 means 2 components with a loop... + //This is maybe not the most efficient way of storing it but since it is pretty rare for the chains to + //be multicomponent chains and rarer for them to loop, and the multicomponent chains probably won't have + //a lot of components anyway, this is more efficient for the majority of cases when the value will be 0 + const static size_t CHAIN_COMPONENT_COUNT_OFFSET = 2; + ///Offsets for snarl codes const static size_t REGULAR_SNARL_SIZE = 6; const static size_t IRREGULAR_SNARL_SIZE = 10; From 3b4855e3418e05b24228715bf43773e9189196e6 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 23 Jul 2024 17:07:15 +0200 Subject: [PATCH 042/124] Add unit test for looping zipcodes and fix bugs --- src/unittest/zip_code.cpp | 63 +++++++++++++++++++++++++++++++++------ src/zip_code.cpp | 43 ++++++++++++++++++++++++-- src/zip_code.hpp | 7 +++++ 3 files changed, 101 insertions(+), 12 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 96978bf8658..a63141973c5 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -1537,7 +1537,7 @@ using namespace std; }; } } - TEST_CASE("Top-level chain zipcode", "[zipcode][bug]") { + TEST_CASE("Top-level chain zipcode", "[zipcode]") { VG graph; @@ -1562,14 +1562,6 @@ using namespace std; IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex distance_index; fill_in_distance_index(&distance_index, &graph, &snarl_finder); - net_handle_t n = distance_index.get_node_net_handle(3); - while (! distance_index.is_root(n)) { - cerr << distance_index.net_handle_as_string(n) << endl; - n = distance_index.get_parent(n); - } - cerr << distance_index.net_handle_as_string(n) << endl; - - graph.serialize_to_file("test_graph.hg"); SECTION ("zip code for node on top-level chain") { net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); @@ -1745,5 +1737,58 @@ using namespace std; } } + TEST_CASE( "Looping chain zipcode", "[zipcode][bug]" ) { + VG graph; + + Node* n1 = graph.create_node("ACACGTTGC"); + Node* n2 = graph.create_node("TCTCCACCGGCAAGTTTCACTTCACTT"); + Node* n3 = graph.create_node("A"); + Node* n4 = graph.create_node("AT"); + Node* n5 = graph.create_node("CGTGGGG"); + + Edge* e1 = graph.create_edge(n1, n2); + Edge* e2 = graph.create_edge(n1, n5); + Edge* e3 = graph.create_edge(n2, n3); + Edge* e4 = graph.create_edge(n2, n4); + Edge* e5 = graph.create_edge(n3, n4); + Edge* e6 = graph.create_edge(n4, n5); + + + IntegratedSnarlFinder snarl_finder(graph); + SnarlDistanceIndex distance_index; + fill_in_distance_index(&distance_index, &graph, &snarl_finder); + + //graph.to_dot(cerr); + + SECTION( "node2" ) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); + net_handle_t parent = distance_index.get_parent(node2); + cerr << distance_index.net_handle_as_string(parent) << endl; + net_handle_t bound = distance_index.get_bound(parent, true, false); + + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); + + REQUIRE(distance_index.minimum_length(node2) == decoder.get_length(1)); + REQUIRE(decoder.get_chain_component(1) == distance_index.get_chain_component(node2)); + REQUIRE(decoder.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); + REQUIRE(decoder.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); + REQUIRE(decoder.get_is_looping_chain(0)); + } + + SECTION( "node5" ) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + net_handle_t node = distance_index.get_node_net_handle(n5->id()); + net_handle_t parent = distance_index.get_parent(node); + net_handle_t bound = distance_index.get_bound(parent, true, false); + + ZipCodeDecoder decoder(&zipcode); + + REQUIRE(distance_index.minimum_length(node) == decoder.get_length(decoder.max_depth())); + } + } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 69b5e7d63a7..70dd4c1d552 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -34,7 +34,6 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p //If the root-level structure is a trivial chain, then just store the node (as a chain, which will have the //connected-component number as the rank in the snarl anyways) zipcode.add_value(distance_index.get_connected_component_number(ancestors.back())); - cerr << "Adding " << distance_index.net_handle_as_string(ancestors.back()) << endl; if (ancestors.size() == 2 && distance_index.is_trivial_chain(ancestors.back())) { #ifdef DEBUG_ZIPCODE cerr << "Adding code for top-level trivial chain" << endl; @@ -69,7 +68,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p zipcode.add_value(x); } #ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::NODE_SIZE); + assert(to_add.size() == ZipCode::NODE_SIZE); #endif } else if (distance_index.is_chain(current_ancestor)) { vector to_add = get_chain_code(current_ancestor, distance_index); @@ -545,6 +544,40 @@ size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { return zip_value; } } + +size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_end) const { + + if (!decoder[depth].first) { + throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); + } + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + if (zip_value % 2) { + if (!get_end) { + return 0; + } else { + zip_value -= 1; + } + } + + return zip_value / 2; +} + +bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { + + if (!decoder[depth].first) { + throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); + } + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return zip_value % 2; +} bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { @@ -1815,11 +1848,12 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //Walk through the zipcode to get values size_t zip_value; size_t zip_index = decoder[max_depth()-1].second; - //is_chain + //is_chain/rank in snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier for root, chain length for anything else std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if (decoder_length() == 2) { //If the node is a child of the root chain payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); @@ -1831,6 +1865,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); + //chain component count + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Node prefix sum std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index b3acc9c709a..1e105663e1e 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -315,6 +315,13 @@ class ZipCodeDecoder { ///For snarls, this will be the component of the start node size_t get_chain_component(const size_t& depth) const ; + ///Get the chain component of the last node in the chain + /// This behaves like the distance index get_chain_component- + /// for looping chains it returns the last component if get_end is true, + /// and 0 if it is false + size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; + bool get_is_looping_chain(const size_t& depth) const ; + ///Is the snarl tree node backwards relative to its parent bool get_is_reversed_in_parent(const size_t& depth) const; From d135aab6aef1c538e8f6e3bfeda5ae5ec1eef147 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 23 Jul 2024 17:07:33 +0200 Subject: [PATCH 043/124] Use zipcodes for chain component in clustering --- src/snarl_seed_clusterer.cpp | 5 ++++- src/snarl_seed_clusterer.hpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 8ee59128821..4782a4cf55c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -364,10 +364,12 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); + cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; //assert(seed.payload.parent_record_offset == // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) // :distance_index.get_record_offset(parent_handle))); + cerr << "Node length " << seed.payload.node_length << " should be " << distance_index.minimum_length(handle) << endl; assert(seed.payload.node_length == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) // ? std::numeric_limits::max() @@ -384,6 +386,7 @@ cerr << "Add all seeds to nodes: " << endl; if (!distance_index.is_root(seed.payload.parent_handle)) { cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; + cerr <<" Is actually " << distance_index.net_handle_as_string( distance_index.start_end_traversal_of(seed.payload.parent_handle)) << endl; assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); } #endif @@ -779,7 +782,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "For child type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth) << endl; cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " - ke<< distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index b592fbc15cd..2b123dead8b 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -323,7 +323,7 @@ class SnarlDistanceIndexClusterer { is_looping_chain = distance_index.is_looping_chain(containing_net_handle); node_length = distance_index.chain_minimum_length(containing_net_handle); end_in = distance_index.get_bound(containing_net_handle, true, true); - chain_component_end = distance_index.get_chain_component(end_in, true); + chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); } //Set the values needed to cluster a snarl From ddef07f63b3153b79d243572b01074963bf8a6dd Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 10:24:07 +0200 Subject: [PATCH 044/124] Use regular snarls to skip finding distances --- src/snarl_seed_clusterer.cpp | 68 ++++++++++++++++++++++++++---------- src/snarl_seed_clusterer.hpp | 3 +- 2 files changed, 52 insertions(+), 19 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 4782a4cf55c..deaa595db87 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -728,7 +728,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster !distance_index.is_externally_start_start_connected(chain_handle) && !distance_index.is_externally_start_end_connected(chain_handle) && !distance_index.is_externally_end_end_connected(chain_handle) && - !distance_index.is_looping_chain(chain_handle); + !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(chain_problem->zipcode_depth); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -1585,7 +1585,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (!distance_index.is_simple_snarl(snarl_handle)) { + if (snarl_problem->seed->seed->zipcode_decoder->get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters @@ -1601,8 +1601,13 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin SnarlTreeNodeProblem& child_problem_i = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].net_handle)); - if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && - child_problem_i.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { + if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 + ? clustering_problem.read_distance_limit + : clustering_problem.fragment_distance_limit) + && + child_problem_i.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 + ? clustering_problem.read_distance_limit + : clustering_problem.fragment_distance_limit)) { //If everything is too far away to cluster, then skip it continue; } @@ -1652,30 +1657,57 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin clustering_problem.net_handle_to_node_problem_index.at(node_problem.net_handle)); //Add the cluster heads + //May need to flip the distances for (auto& cluster_head : child_problem.read_cluster_heads) { snarl_problem->read_cluster_heads.emplace(cluster_head); + if (child_problem.is_reversed_in_parent) { + size_t old_left = clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left; + clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left = + clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_right; + clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_right = old_left; + } } + //Update the distances - //Because the orientation of the nodes was determined by the orientation of the chain, - //the orientation relative to the snarl is correct for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { if (read_num == 0) { - snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_left.first, - child_problem.read_best_left.first); - snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_right.first, - child_problem.read_best_right.first); + if (child_problem.is_reversed_in_parent) { + snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_left.first, + child_problem.read_best_left.first); + snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_right.first, + child_problem.read_best_right.first); + } else { + snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_left.first, + child_problem.read_best_left.first); + snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_right.first, + child_problem.read_best_right.first); + } } else { - snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_left.second, - child_problem.read_best_left.second); - snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_right.second, - child_problem.read_best_right.second); + if (child_problem.is_reversed_in_parent) { + snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_left.second, + child_problem.read_best_left.second); + snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_right.second, + child_problem.read_best_right.second); + } else { + snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_left.second, + child_problem.read_best_left.second); + snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_right.second, + child_problem.read_best_right.second); + } } } - snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_left, - child_problem.fragment_best_left); - snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_right, - child_problem.fragment_best_right); + if (child_problem.is_reversed_in_parent) { + snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_left, + child_problem.fragment_best_left); + snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_right, + child_problem.fragment_best_right); + } else { + snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_left, + child_problem.fragment_best_left); + snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_right, + child_problem.fragment_best_right); + } } diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 2b123dead8b..9fb176c0410 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -320,10 +320,11 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = distance_index.is_looping_chain(containing_net_handle); + is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); node_length = distance_index.chain_minimum_length(containing_net_handle); end_in = distance_index.get_bound(containing_net_handle, true, true); chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); + is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } //Set the values needed to cluster a snarl From 976174857ce11771b0c6f1599c88409fddf8be40 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 12:52:15 +0200 Subject: [PATCH 045/124] Add external connectivity to zipcodes --- src/unittest/zip_code.cpp | 78 ++++++++++++++++++++++++++++++++++++--- src/zip_code.cpp | 58 ++++++++++++++++++++++++++--- src/zip_code.hpp | 9 ++++- 3 files changed, 133 insertions(+), 12 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index a63141973c5..a9ad492c6c8 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -124,6 +124,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code //Third value is the prefix sum of the node @@ -188,6 +192,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the snarl code //1 for a regular snarl @@ -431,6 +439,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code //Third value is the prefix sum of the node @@ -498,6 +510,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the regular snarl code REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); @@ -623,6 +639,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the regular snarl code for snarl 1-8 REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); @@ -1042,6 +1062,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Irregular snarl code for snarl 1-4 REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); //0 as tag for irregular snarl @@ -1586,6 +1610,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 0); + //Connectivity of the chain + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //Next is the node code //Third value is the prefix sum of the node @@ -1633,10 +1661,7 @@ using namespace std; decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - ZipCodeDecoder decoder6(&zip6); - cerr << "DISTANCE: " << ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), - distance_index) << endl;; + REQUIRE(ZipCode::is_farther_than(zip1, zip6, 3)); REQUIRE(!ZipCode::is_farther_than(zip1, zip6, 5)); REQUIRE(ZipCode::is_farther_than(zip1, zip7, 8)); @@ -1737,7 +1762,7 @@ using namespace std; } } - TEST_CASE( "Looping chain zipcode", "[zipcode][bug]" ) { + TEST_CASE( "Looping chain zipcode", "[zipcode]" ) { VG graph; Node* n1 = graph.create_node("ACACGTTGC"); @@ -1765,7 +1790,6 @@ using namespace std; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t parent = distance_index.get_parent(node2); - cerr << distance_index.net_handle_as_string(parent) << endl; net_handle_t bound = distance_index.get_bound(parent, true, false); ZipCodeDecoder decoder(&zipcode); @@ -1790,5 +1814,47 @@ using namespace std; REQUIRE(distance_index.minimum_length(node) == decoder.get_length(decoder.max_depth())); } } + TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { + VG graph; + + Node* n1 = graph.create_node("GCA"); + Node* n2 = graph.create_node("T"); + Node* n3 = graph.create_node("G"); + Node* n4 = graph.create_node("CTGA"); + Node* n5 = graph.create_node("GCA"); + Node* n6 = graph.create_node("G"); + + Edge* e1 = graph.create_edge(n1, n2); + Edge* e2 = graph.create_edge(n1, n3); + Edge* e3 = graph.create_edge(n2, n4); + Edge* e4 = graph.create_edge(n3, n4); + Edge* e5 = graph.create_edge(n4, n5); + Edge* e6 = graph.create_edge(n4, n6); + Edge* e7 = graph.create_edge(n5, n6); + Edge* e8 = graph.create_edge(n1, n1, true, false); + + ofstream out ("testGraph.hg"); + graph.serialize(out); + + IntegratedSnarlFinder snarl_finder(graph); + SnarlDistanceIndex dist_index; + fill_in_distance_index(&dist_index, &graph, &snarl_finder); + + + SECTION( "Check connectivity" ) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); + ZipCodeDecoder decoder(&zipcode); + + REQUIRE(decoder.get_length(1) == 1); + + if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { + REQUIRE(decoder.is_externally_end_end_connected(0)); + } else { + REQUIRE(decoder.is_externally_start_start_connected(0)); + } + + } + } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 70dd4c1d552..0d0e40a5c87 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -51,6 +51,19 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p component += 1; } zipcode.add_value(component); + + size_t connectivity = 0; + if ( distance_index.is_externally_start_end_connected(ancestors.back())) { + connectivity = connectivity | 1; + } + if ( distance_index.is_externally_start_start_connected(ancestors.back())) { + connectivity = connectivity | 2; + } + if ( distance_index.is_externally_end_end_connected(ancestors.back())) { + connectivity = connectivity | 4; + } + + zipcode.add_value(connectivity); } ancestors.pop_back(); } @@ -173,9 +186,9 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" //the only other thing that got stored is the length if (previous_is_chain) { //Get to the end of the root chain - assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't + assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE+1);//This is true for now but all this will change if it isn't - for (size_t i = 0 ; i < ZipCode::ROOT_CHAIN_SIZE ; i++) { + for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_index == std::numeric_limits::max()) { @@ -186,6 +199,8 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; finished_decoding = true; return true; } else { + //Get to the end of the root chain + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Otherwise, check if this is a node or a snarl. If it is a node, then there are three things remaining size_t start_index = zip_index; @@ -812,6 +827,37 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna } } +bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { + assert(depth == 0); + assert(decoder[0].first); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return (zip_value & 1) != 0; +} +bool ZipCodeDecoder::is_externally_start_start_connected (const size_t& depth) const { + assert(depth == 0); + assert(decoder[0].first); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return (zip_value & 2) != 0; +} +bool ZipCodeDecoder::is_externally_end_end_connected (const size_t& depth) const { + assert(depth == 0); + assert(decoder[0].first); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return (zip_value & 4) != 0; +} + const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, const size_t& depth) { @@ -1518,9 +1564,11 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si //structure was a chain } else { - //If it is a chain, get one more thing to get to the end of the chain - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + //If it is a chain, get two more things to get to the end of the chain + for (size_t i = 0 ; i < 2 ; ++i) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } } //Both zips now point to a thing in a shared chain diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 1e105663e1e..99da795b259 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -144,8 +144,10 @@ class ZipCode { const static size_t ROOT_IDENTIFIER_OFFSET = 1; //FOr a chain, also include the component count - const static size_t ROOT_CHAIN_SIZE = 3; + const static size_t ROOT_CHAIN_SIZE = 4; const static size_t ROOT_CHAIN_COMPONENT_COUNT_OFFSET = 2; + //This is a bitvector storing if there is connectivity between the bounds of the chain + const static size_t ROOT_CHAIN_CONNECTIVITY_OFFSET = 3; //If the zipcode is for a root-level node, then there are only three things //in the zipcode, and the last is the length of the node @@ -344,6 +346,11 @@ class ZipCodeDecoder { /// The minimum distance from start or end of the snarl to the left or right side of the child size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; + bool is_externally_start_end_connected(const size_t& depth) const; + bool is_externally_start_start_connected(const size_t& depth) const; + bool is_externally_end_end_connected(const size_t& depth) const; + + ///Are the two decoders pointing to the same snarl tree node at the given depth ///This only checks if the values in the zipcode are the same at the given depth, ///so if the preceeding snarl tree nodes are different, From 74d4122c36c4a6040ec80a39a2104ef1225c7d88 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 12:52:36 +0200 Subject: [PATCH 046/124] Take out end_in net_handle_t --- src/snarl_seed_clusterer.cpp | 11 +++++------ src/snarl_seed_clusterer.hpp | 7 +------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index deaa595db87..4afa364b9a0 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2695,12 +2695,11 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->is_looping_chain) { - //TODO: I think I should be able to do this without the distance index but none of our graphs so far have loops - // so I'm not going to bother - //If it's a looping chain then use the distance index - distance_from_current_end_to_end_of_chain = distance_index.distance_in_parent(chain_handle, chain_problem->end_in, - current_child.net_handle); + } else if (chain_problem->chain_component_end != child_problem.chain_component_end) { + //If it's not in the same component + distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); + //TODO: Used to do this, I"m pretty sure I don't need to though + //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.net_handle); } else if (child_problem.node_length == std::numeric_limits::max() ) { //If the node length is infinite, then it is a snarl that isn't start-end connected, so the start //and end of the snarl are in different components of the chain. Since it reached here, the end diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 9fb176c0410..239d1e0d182 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -260,10 +260,6 @@ class SnarlDistanceIndexClusterer { net_handle_t parent_net_handle; net_handle_t grandparent_net_handle; - //The boundary node of containing_net_handle, for a snarl or chain - //if it is a snarl, then this is the actual node, not the sentinel - net_handle_t end_in; - //One representative seed so we can get the zipcode and stuff const SeedCache* seed; size_t zipcode_depth; @@ -322,7 +318,6 @@ class SnarlDistanceIndexClusterer { void set_chain_values(const SnarlDistanceIndex& distance_index) { is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); node_length = distance_index.chain_minimum_length(containing_net_handle); - end_in = distance_index.get_bound(containing_net_handle, true, true); chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } @@ -331,7 +326,7 @@ class SnarlDistanceIndexClusterer { void set_snarl_values(const SnarlDistanceIndex& distance_index) { node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); - end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); + net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); chain_component_start = seed->seed->zipcode_decoder->get_chain_component(zipcode_depth); chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; From 804c44d4d7937dcc49cbd90a5c70db9f699d4fe9 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 13:49:36 +0200 Subject: [PATCH 047/124] Fix payload with new values --- src/zip_code.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 0d0e40a5c87..be1ad85166b 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1907,6 +1907,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; From 18b4e7eae9d69536281355dcbd8ace20f8323883 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 14:34:49 +0200 Subject: [PATCH 048/124] Add external connectivity for root nodes --- src/unittest/zip_code.cpp | 4 ++++ src/zip_code.cpp | 46 ++++++++++++++++++++++++--------------- src/zip_code.hpp | 6 ++--- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index a9ad492c6c8..da72dcbdf14 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -33,6 +33,10 @@ using namespace std; value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 11+1); + //Connectivity + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); + //That's it REQUIRE(value_and_index.second == std::numeric_limits::max()); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index be1ad85166b..1f6f1bd2ba6 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -39,6 +39,18 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p cerr << "Adding code for top-level trivial chain" << endl; #endif zipcode.add_value(distance_index.minimum_length(ancestors.back())+1); + size_t connectivity = 0; + if ( distance_index.is_externally_start_end_connected(ancestors.back())) { + connectivity = connectivity | 1; + } + if ( distance_index.is_externally_start_start_connected(ancestors.back())) { + connectivity = connectivity | 2; + } + if ( distance_index.is_externally_end_end_connected(ancestors.back())) { + connectivity = connectivity | 4; + } + + zipcode.add_value(connectivity); return; } else { #ifdef DEBUG_ZIPCODE @@ -52,19 +64,19 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p } zipcode.add_value(component); - size_t connectivity = 0; - if ( distance_index.is_externally_start_end_connected(ancestors.back())) { - connectivity = connectivity | 1; - } - if ( distance_index.is_externally_start_start_connected(ancestors.back())) { - connectivity = connectivity | 2; - } - if ( distance_index.is_externally_end_end_connected(ancestors.back())) { - connectivity = connectivity | 4; - } - - zipcode.add_value(connectivity); } + size_t connectivity = 0; + if ( distance_index.is_externally_start_end_connected(ancestors.back())) { + connectivity = connectivity | 1; + } + if ( distance_index.is_externally_start_start_connected(ancestors.back())) { + connectivity = connectivity | 2; + } + if ( distance_index.is_externally_end_end_connected(ancestors.back())) { + connectivity = connectivity | 4; + } + + zipcode.add_value(connectivity); ancestors.pop_back(); } @@ -186,7 +198,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" //the only other thing that got stored is the length if (previous_is_chain) { //Get to the end of the root chain - assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE+1);//This is true for now but all this will change if it isn't + assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -199,8 +211,6 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; finished_decoding = true; return true; } else { - //Get to the end of the root chain - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Otherwise, check if this is a node or a snarl. If it is a node, then there are three things remaining size_t start_index = zip_index; @@ -832,7 +842,7 @@ bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) con assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 1) != 0; @@ -842,7 +852,7 @@ bool ZipCodeDecoder::is_externally_start_start_connected (const size_t& depth) c assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 2) != 0; @@ -852,7 +862,7 @@ bool ZipCodeDecoder::is_externally_end_end_connected (const size_t& depth) const assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_CHAIN_CONNECTIVITY_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 4) != 0; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 99da795b259..376d7d1483e 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -146,12 +146,12 @@ class ZipCode { //FOr a chain, also include the component count const static size_t ROOT_CHAIN_SIZE = 4; const static size_t ROOT_CHAIN_COMPONENT_COUNT_OFFSET = 2; - //This is a bitvector storing if there is connectivity between the bounds of the chain - const static size_t ROOT_CHAIN_CONNECTIVITY_OFFSET = 3; + //This is a bitvector storing if there is connectivity between the bounds of the node/chain + const static size_t ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET = 3; //If the zipcode is for a root-level node, then there are only three things //in the zipcode, and the last is the length of the node - const static size_t ROOT_NODE_SIZE = 3; + const static size_t ROOT_NODE_SIZE = 4; const static size_t ROOT_NODE_LENGTH_OFFSET = 2; ///Offsets for chain codes From 56fbd522577fc608345a8ee8b7f700c890724952 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 14:59:17 +0200 Subject: [PATCH 049/124] Use zipcode for external connectivity --- src/snarl_seed_clusterer.cpp | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 4afa364b9a0..8b46776bc2c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -725,10 +725,10 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - !distance_index.is_externally_start_start_connected(chain_handle) && - !distance_index.is_externally_start_end_connected(chain_handle) && - !distance_index.is_externally_end_end_connected(chain_handle) && - !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(chain_problem->zipcode_depth); + !chain_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(0); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -1439,9 +1439,18 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust //Get the distances between the two sides of the child - size_t distance_left_left = distance_index.is_externally_start_start_connected(handle) ? 0 : std::numeric_limits::max(); - size_t distance_left_right = distance_index.is_externally_start_end_connected(handle) ? 0 : std::numeric_limits::max(); - size_t distance_right_right = distance_index.is_externally_end_end_connected(handle) ? 0 : std::numeric_limits::max(); + size_t distance_left_left = + child_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); + size_t distance_left_right = + child_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); + size_t distance_right_right = + child_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); if (distance_left_left == std::numeric_limits::max() && distance_left_right == std::numeric_limits::max() && distance_right_right == std::numeric_limits::max()) { From f2bd83a596cfbd486a46078e1fc6081345256317 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 17:36:09 +0200 Subject: [PATCH 050/124] Don't use distance index for ordering children in chains --- src/snarl_seed_clusterer.cpp | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 8b46776bc2c..93b6cd34bc6 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -1794,7 +1794,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //First, sort the children of the chain //If there is only one child, check if it's a seeed - bool only_seeds=chain_problem->children.size() == 1 ? distance_index.is_node(chain_problem->children.front().net_handle) + bool only_seeds=chain_problem->children.size() == 1 ? chain_problem->children.front().is_seed : true; std::sort(chain_problem->children.begin(), chain_problem->children.end(), @@ -1819,7 +1819,18 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin if (child1.chain_component != child2.chain_component) { return child1.chain_component < child2.chain_component; } else if (child1.prefix_sum == child2.prefix_sum) { - return distance_index.is_ordered_in_chain(child1.net_handle, child2.net_handle); + //Get the prefix sum values not including the offset in the positions + size_t prefix_sum1 = child1.is_seed + ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).payload.prefix_sum + : child1.prefix_sum; + size_t prefix_sum2 = child2.is_seed + ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).payload.prefix_sum + : child2.prefix_sum; + if (prefix_sum1 == prefix_sum2){ + return child2.is_seed; + } else { + return prefix_sum1 < prefix_sum2; + } } else { return child1.prefix_sum < child2.prefix_sum; } @@ -1844,7 +1855,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //This also does the work of clustering a trivial chain (which is just a node), which should be the same amount of work as using cluster_one_node cluster_seeds_on_linear_structure(clustering_problem, chain_problem, chain_problem->node_length, - !distance_index.is_trivial_chain(chain_handle), is_top_level_chain); + !chain_problem->is_trivial_chain, is_top_level_chain); #ifdef DEBUG_CLUSTER cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; From 07de5dec59f7f8dcf0d5f782def19d88d448ebe8 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 21:31:23 +0200 Subject: [PATCH 051/124] Fix orientation for simple snarls since we took them out --- src/zip_code.cpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 1f6f1bd2ba6..9ecabafc196 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2003,15 +2003,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); - //Simple and regular snarls are different for clustering - if (distance_index.is_simple_snarl(grandparent_handle)) { - payload.is_reversed = zip_value; - payload.parent_is_chain=true; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); - } else { - payload.is_reversed = false; - payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); - } + payload.is_reversed = zip_value; + payload.parent_is_chain=true; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { payload.is_reversed = false; payload.parent_record_offset = zip_value; From 52034134d502c69223b32ffc2da8f1d96cff05b7 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 21:40:21 +0200 Subject: [PATCH 052/124] Start taking out net_handle_t's from the payload --- src/snarl_seed_clusterer.cpp | 49 ++---------------------------------- src/zip_code.cpp | 13 +++------- 2 files changed, 6 insertions(+), 56 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 93b6cd34bc6..32cd20bdc8e 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -356,8 +356,6 @@ cerr << "Add all seeds to nodes: " << endl; #ifdef DEBUG_CLUSTER //cerr << "Using cached values for node " << id << ": " - // << ", " << seed.payload.record_offset - // << ", " << seed.payload.parent_record_offset // << ", " << seed.payload.node_length // << ", " << seed.payload.prefix_sum // << ", " << seed.payload.chain_component << endl; @@ -366,9 +364,6 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; - //assert(seed.payload.parent_record_offset == - // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) - // :distance_index.get_record_offset(parent_handle))); cerr << "Node length " << seed.payload.node_length << " should be " << distance_index.minimum_length(handle) << endl; assert(seed.payload.node_length == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) @@ -409,8 +404,8 @@ cerr << "Add all seeds to nodes: " << endl; } cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; - assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) - : distance_index.is_reversed_in_parent(seed.payload.node_handle))); + //assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) + // : distance_index.is_reversed_in_parent(seed.payload.node_handle))); #endif //Add the parent chain or trivial chain @@ -476,46 +471,6 @@ cerr << "Add all seeds to nodes: " << endl; } - //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too - if ( new_parent) { - if (seed.payload.is_trivial_chain && !seed.payload.parent_is_root) { - bool grandparent_is_simple_snarl = seed.payload.parent_is_chain; - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = grandparent_is_simple_snarl - ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(seed.payload.node_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE, - 1) - : distance_index.get_net_handle_from_values(seed.payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; -#endif - - if (grandparent_is_simple_snarl) { - //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too - parent_problem.has_grandparent_handle = true; - parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( - seed.payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; -#endif - } - } else if (seed.payload.parent_is_root && seed.payload.parent_is_chain && !seed.payload.is_trivial_chain) { - //The parent chain is a child of the root - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( - 0, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; -#endif - } - } - - } else { //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 9ecabafc196..99cde7252fc 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1895,7 +1895,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.is_reversed = false; payload.parent_handle = distance_index.get_root(); payload.parent_type = ZipCode::ROOT_NODE; - payload.parent_record_offset = 0; } else if (decoder[max_depth() - 1].first) { //If the parent is a chain @@ -1922,7 +1921,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; } - payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); //chain component count std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -1965,10 +1963,10 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); - payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); + payload.parent_handle = distance_index.get_net_handle_from_values( + distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { zip_index = decoder[max_depth()-1].second; @@ -2002,13 +2000,10 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed - net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); payload.is_reversed = zip_value; payload.parent_is_chain=true; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { payload.is_reversed = false; - payload.parent_record_offset = zip_value; } } From 461de6450304b9cbe25e8e4a6e6252d8c22989ad Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 24 Jul 2024 21:46:07 +0200 Subject: [PATCH 053/124] Take out grandparent handle --- src/snarl_seed_clusterer.cpp | 12 ------------ src/snarl_seed_clusterer.hpp | 4 +--- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 32cd20bdc8e..00752565999 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -605,12 +605,6 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); - if (snarl_problem.has_grandparent_handle) { - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = snarl_problem.grandparent_net_handle; - } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); @@ -812,12 +806,6 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); - if (chain_problem.has_grandparent_handle) { - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(parent)); - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = chain_problem.grandparent_net_handle; - } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(parent)); diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 239d1e0d182..012a4a4c952 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -255,10 +255,9 @@ class SnarlDistanceIndexClusterer { - //The parent and grandparent of containing_net_handle, which might or might not be set + //The parent of containing_net_handle, which might or might not be set //This is just to store information from the minimizer cache net_handle_t parent_net_handle; - net_handle_t grandparent_net_handle; //One representative seed so we can get the zipcode and stuff const SeedCache* seed; @@ -278,7 +277,6 @@ class SnarlDistanceIndexClusterer { //These are sometimes set if the value was in the cache bool has_parent_handle = false; - bool has_grandparent_handle = false; //Only set this for nodes or snarls in chains bool is_reversed_in_parent = false; From 86555139d7640e3cb7664654c6856b45cde61e62 Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 25 Jul 2024 10:30:16 +0200 Subject: [PATCH 054/124] Add net identifier strings but don't actually use them --- src/snarl_seed_clusterer.cpp | 34 ++++++++++++++++++++++++---------- src/snarl_seed_clusterer.hpp | 18 ++++++++++++------ src/zip_code.cpp | 1 + src/zip_code.hpp | 2 ++ 4 files changed, 39 insertions(+), 16 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 00752565999..60dd318ef88 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -418,7 +418,9 @@ cerr << "Add all seeds to nodes: " << endl; new_parent = true; if (seed.payload.is_trivial_chain ) { clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, + ZipCodeDecoder::get_parent_identifier(seed.payload.identifier), + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); @@ -426,7 +428,9 @@ cerr << "Add all seeds to nodes: " << endl; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, + ZipCodeDecoder::get_parent_identifier(seed.payload.identifier), + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &seed, seed.seed->zipcode_decoder->max_depth() - 1); } @@ -483,7 +487,9 @@ cerr << "Add all seeds to nodes: " << endl; new_node = true; clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, + seed.payload.identifier, + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), @@ -540,7 +546,9 @@ cerr << "Add all seeds to nodes: " << endl; if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(parent, + ZipCodeDecoder::get_parent_identifier(node_problem.containing_net_id), + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, seed, 0); } @@ -598,7 +606,9 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(snarl_parent, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(snarl_parent, + ZipCodeDecoder::get_parent_identifier(snarl_problem->containing_net_id), + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, snarl_problem->seed, snarl_problem->zipcode_depth-1); @@ -689,7 +699,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the parent is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(parent, + ZipCodeDecoder::get_parent_identifier(chain_problem->containing_net_id), + clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); } @@ -800,9 +812,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - chain_problem->seed, chain_problem->zipcode_depth-1); + clustering_problem.all_node_problems.emplace_back(parent, + ZipCodeDecoder::get_parent_identifier(chain_problem->containing_net_id), + clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); @@ -2994,7 +3008,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro } //Keep track of all clusters on the root - SnarlTreeNodeProblem root_problem(distance_index.get_root(), clustering_problem.all_seeds->size(), + SnarlTreeNodeProblem root_problem(distance_index.get_root(), ZipCodeDecoder::get_root_identifier(), clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &clustering_problem.all_seeds->at(0)->front(), 0); //TODO: ikd about the seed here diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 012a4a4c952..8ad5f993cdb 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -216,6 +216,7 @@ class SnarlDistanceIndexClusterer { //If the net_handle is a node, then the child is a seed, otherwise the handle //is used to find the problem net_handle_t net_handle; + net_identifier_t net_id; pair seed_indices; //The values used to sort the children of a chain @@ -249,12 +250,12 @@ class SnarlDistanceIndexClusterer { size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); + net_identifier_t containing_net_id; + net_identifier_t parent_net_id; + //The snarl tree node that the clusters are on net_handle_t containing_net_handle; - - - //The parent of containing_net_handle, which might or might not be set //This is just to store information from the minimizer cache net_handle_t parent_net_handle; @@ -276,6 +277,7 @@ class SnarlDistanceIndexClusterer { size_t loop_right = std::numeric_limits::max(); //These are sometimes set if the value was in the cache + bool has_net_handle = false; bool has_parent_handle = false; //Only set this for nodes or snarls in chains @@ -289,18 +291,21 @@ class SnarlDistanceIndexClusterer { //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, + SnarlTreeNodeProblem( net_handle_t net, net_identifier_t id, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, const SeedCache* seed, size_t zipcode_depth) : containing_net_handle(std::move(net)), + containing_net_id(std::move(id)), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); + parent_net_id = ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Constructor for a node or trivial chain, used to remember information from the cache - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, + SnarlTreeNodeProblem( net_handle_t net, net_identifier_t id, size_t read_count, size_t seed_count, bool is_reversed_in_parent, size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : containing_net_handle(net), + containing_net_id(std::move(id)), is_reversed_in_parent(is_reversed_in_parent), node_length(node_length), prefix_sum_value(prefix_sum), @@ -309,7 +314,8 @@ class SnarlDistanceIndexClusterer { fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), seed(seed), zipcode_depth(zipcode_depth) { - read_cluster_heads.reserve(seed_count); + read_cluster_heads.reserve(seed_count); + parent_net_id = ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Set the values needed to cluster a chain diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 99cde7252fc..d8121c93fad 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1870,6 +1870,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; + payload.identifier = get_identifier(max_depth()); if (decoder_length() == 1) { //If the root-level structure is a node diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 376d7d1483e..aa936bd7d09 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -370,6 +370,7 @@ class ZipCodeDecoder { /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id net_identifier_t get_identifier(size_t depth) const; + const static net_identifier_t get_root_identifier() { return "ROOT"; }; const static net_identifier_t get_parent_identifier(const net_identifier_t& child); @@ -401,6 +402,7 @@ struct MIPayload { net_handle_t node_handle; net_handle_t parent_handle; + net_identifier_t identifier; size_t node_length = std::numeric_limits::max(); size_t prefix_sum = 0; From 5170df04cc400d89f8e9159a6c6c3bfe217af5c7 Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 25 Jul 2024 11:15:49 +0200 Subject: [PATCH 055/124] Use net identifiers as keys for all the lookups --- src/snarl_seed_clusterer.cpp | 127 ++++++++++++++++++----------------- src/snarl_seed_clusterer.hpp | 16 ++--- 2 files changed, 74 insertions(+), 69 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 60dd318ef88..8ffcc618a4d 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -155,7 +155,7 @@ cerr << "\tread distance limit: " << read_distance_limit << " and fragment dista //Initially populated by get_nodes(), which adds chains whose nodes contain seeds //Chains are added when the child snarls are found //A ClusteringProblem will have pointers to the current and next level of the snarl tree - vector> chains_by_level; + vector> chains_by_level; chains_by_level.reserve(distance_index.get_max_tree_depth()+1); @@ -314,7 +314,7 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { //chain to chains_by_level //If a node is a child of the root or of a root snarl, then add cluster it and //remember to cluster the root snarl -void SnarlDistanceIndexClusterer::get_nodes( ClusteringProblem& clustering_problem, vector>& chains_by_level) const { +void SnarlDistanceIndexClusterer::get_nodes( ClusteringProblem& clustering_problem, vector>& chains_by_level) const { #ifdef DEBUG_CLUSTER cerr << "Add all seeds to nodes: " << endl; #endif @@ -413,13 +413,14 @@ cerr << "Add all seeds to nodes: " << endl; new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.parent_handle) == 0) { + net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(seed.payload.identifier); + if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; if (seed.payload.is_trivial_chain ) { - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, - ZipCodeDecoder::get_parent_identifier(seed.payload.identifier), + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), @@ -427,9 +428,9 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, - ZipCodeDecoder::get_parent_identifier(seed.payload.identifier), + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &seed, seed.seed->zipcode_decoder->max_depth() - 1); @@ -458,7 +459,7 @@ cerr << "Add all seeds to nodes: " << endl; : seed.payload.node_length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.parent_handle)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); parent_problem.children.back().net_handle = seed.payload.node_handle; parent_problem.children.back().seed_indices = {read_num, i}; @@ -471,7 +472,7 @@ cerr << "Add all seeds to nodes: " << endl; //And the parent to chains_by_level if (new_parent) { - chains_by_level[seed.payload.parent_depth].emplace_back(seed.payload.parent_handle); + chains_by_level[seed.payload.parent_depth].emplace_back(parent_id); } @@ -483,9 +484,9 @@ cerr << "Add all seeds to nodes: " << endl; //Create a new SnarlTreeNodeProblem for this node bool new_node = false; - if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.node_handle) == 0) { + if (clustering_problem.net_identifier_to_node_problem_index.count(seed.payload.identifier) == 0) { new_node = true; - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, + clustering_problem.net_identifier_to_node_problem_index.emplace(seed.payload.identifier, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, seed.payload.identifier, @@ -503,7 +504,7 @@ cerr << "Add all seeds to nodes: " << endl; seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); - SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.node_handle)); + SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(seed.payload.identifier)); node_problem.children.emplace_back(); node_problem.children.back().net_handle = seed.payload.node_handle; @@ -530,29 +531,28 @@ cerr << "Add all seeds to nodes: " << endl; //Go through and cluster nodes that are children of the root or root snarls for(const SeedCache* seed : nodes_to_cluster_now) { - const net_handle_t& node_net_handle = seed->payload.node_handle; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(seed->payload.identifier)); //Cluster the node. Give it the range in node_to_seeds, which is from seed_range_start //to either current_iterator (if current_iterator is a different node), or the end of node_to_seeds //if current_iterator is the last thing in the list and the same node cluster_one_node(clustering_problem, &node_problem); - net_handle_t parent = node_problem.parent_net_handle; + net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(seed->payload.identifier); if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root - if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { - clustering_problem.net_handle_to_node_problem_index.emplace(parent, + if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, - ZipCodeDecoder::get_parent_identifier(node_problem.containing_net_id), + clustering_problem.all_node_problems.emplace_back(node_problem.parent_net_handle, + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, seed, 0); } - clustering_problem.root_children.emplace_back(parent, node_net_handle); + clustering_problem.root_children.emplace_back(parent_id, seed->payload.identifier); } else { //Otherwise, just compare the single child's external connectivity compare_and_combine_cluster_on_one_child(clustering_problem, &node_problem); @@ -571,11 +571,11 @@ cerr << "Add all seeds to nodes: " << endl; //Assumes that all the children of the snarls have been clustered already and are present in clustering_problem.snarls_to_children void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& clustering_problem) const { - for (const net_handle_t& snarl_handle : clustering_problem.parent_snarls) { + for (const net_identifier_t& snarl_id : clustering_problem.parent_snarls) { //Go through each of the snarls at this level, cluster them, //and find which chains they belong to, if any SnarlTreeNodeProblem* snarl_problem = &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(snarl_id)); #ifdef DEBUG_CLUSTER cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; @@ -601,39 +601,41 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(snarl_id); bool new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { + if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { new_parent = true; - clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(snarl_parent, - ZipCodeDecoder::get_parent_identifier(snarl_problem->containing_net_id), + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, snarl_problem->seed, snarl_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(snarl_id)); } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); + clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); //Add the snarl to its parent chain parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = snarl_handle; + parent_problem.children.back().net_handle = snarl_problem->containing_net_handle; + parent_problem.children.back().identifier = snarl_id; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = false; if (new_parent) { //And the parent chain to the things to be clustered next - clustering_problem.parent_chains->emplace_back(snarl_parent); + clustering_problem.parent_chains->emplace_back(parent_id); } } #ifdef DEBUG_CLUSTER - cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_handle) << " as a child of " - << distance_index.net_handle_as_string(distance_index.get_parent(snarl_handle)) << endl; + cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->net_handle) << " as a child of " + << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->net_handle)) << endl; #endif } @@ -649,10 +651,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster } - for (const net_handle_t& chain_handle : *(clustering_problem.current_chains)) { + for (const net_identifier_t& chain_id : *(clustering_problem.current_chains)) { SnarlTreeNodeProblem* chain_problem = &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); + net_handle_t chain_handle = chain_problem->containing_net_handle; #ifdef DEBUG_CLUSTER @@ -668,6 +671,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(chain_id); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -697,15 +701,15 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the parent is the root, remember to cluster it if (is_root_snarl) { //If the parent is a root snarl, then remember it to cluster in the root - if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { - clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); + if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent, - ZipCodeDecoder::get_parent_identifier(chain_problem->containing_net_id), + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); } - clustering_problem.root_children.emplace_back(parent, chain_handle); + clustering_problem.root_children.emplace_back(parent_id, chain_id); } else if (!is_top_level_chain) { //Otherwise, cluster it with itself using external connectivity only //is_top_level_chain also includes external connectivity, so if it's true we don't need to check this @@ -809,28 +813,29 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif //And add it to its parent snarl bool new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { + if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { new_parent = true; - clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); + clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent, - ZipCodeDecoder::get_parent_identifier(chain_problem->containing_net_id), + parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(parent)); + clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); parent_problem.children.back().net_handle = chain_handle; + parent_problem.children.back().identifier = chain_id; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = false; if (new_parent) { - clustering_problem.parent_snarls.emplace_back(parent); + clustering_problem.parent_snarls.emplace_back(parent_id); } } @@ -1565,7 +1570,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Go through each child node of the netgraph SnarlTreeNodeProblem& child_problem_i = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].net_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(snarl_problem->children[i].identifier)); if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit @@ -1593,7 +1598,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the other node and its clusters SnarlTreeNodeProblem& child_problem_j = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].net_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(snarl_problem->children[j].identifier)); if (child_problem_j.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && child_problem_j.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { @@ -1620,7 +1625,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (SnarlTreeNodeProblem::SnarlTreeChild& node_problem : snarl_problem->children) { //Go through each child node of the netgraph and add its clusters to the snarl SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(node_problem.net_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(node_problem.identifier)); //Add the cluster heads //May need to flip the distances @@ -1762,16 +1767,16 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet child1.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; + clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).chain_component_start; child1.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; + clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).prefix_sum_value; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet child2.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).chain_component_start; + clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).chain_component_start; child2.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).prefix_sum_value; + clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).prefix_sum_value; } if (child1.chain_component != child2.chain_component) { return child1.chain_component < child2.chain_component; @@ -1901,15 +1906,15 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin size_t last_prefix_sum = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; + clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).chain_component_start; size_t last_length = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.node_length : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; + clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).node_length; size_t last_chain_component_end = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.chain_component : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; + clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).chain_component_start; //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small @@ -2607,7 +2612,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& net_handle_t& chain_handle = chain_problem->containing_net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); + clustering_problem.net_identifier_to_node_problem_index.at(current_child.identifier)); //Skip this child if its seeds are all too far away bool skip_snarl = false; @@ -3026,19 +3031,19 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro }); //Go through the list of parent child pairs. Once we reach a new parent, cluster all children found up to this point - net_handle_t current_parent = clustering_problem.root_children.front().first; - vector children; + net_identifier_t current_parent = clustering_problem.root_children.front().first; + vector children; children.reserve(clustering_problem.root_children.size()); for (size_t root_child_i = 0 ; root_child_i < clustering_problem.root_children.size() ; root_child_i++) { - pair& parent_to_child = clustering_problem.root_children[root_child_i]; - net_handle_t& parent = parent_to_child.first; + pair& parent_to_child = clustering_problem.root_children[root_child_i]; + net_identifier_t& parent = parent_to_child.first; if (current_parent == parent || root_child_i == 0) { children.emplace_back(parent_to_child.second); } if (current_parent != parent || root_child_i == clustering_problem.root_children.size()-1) { #ifdef DEBUG_CLUSTER - cerr << "Clustering root snarl " << distance_index.net_handle_as_string(parent) << " with " << children.size() << " chidlren" << endl; + cerr << "Clustering root snarl with " << children.size() << " chidlren" << endl; #endif if (children.size() > 0) { @@ -3047,7 +3052,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro //Go through each child node of the netgraph SnarlTreeNodeProblem* child_problem_i = &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(children[i])); + clustering_problem.net_identifier_to_node_problem_index.at(children[i])); for (const pair& head : child_problem_i->read_cluster_heads) { child_distances[head.second + clustering_problem.seed_count_prefix_sum[head.first]] = make_pair(clustering_problem.all_seeds->at(head.first)->at(head.second).distance_left, @@ -3059,7 +3064,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro //Get the other node and its clusters SnarlTreeNodeProblem* child_problem_j = &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(children[j])); + clustering_problem.net_identifier_to_node_problem_index.at(children[j])); diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 8ad5f993cdb..67c231b9fac 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -216,7 +216,7 @@ class SnarlDistanceIndexClusterer { //If the net_handle is a node, then the child is a seed, otherwise the handle //is used to find the problem net_handle_t net_handle; - net_identifier_t net_id; + net_identifier_t identifier; pair seed_indices; //The values used to sort the children of a chain @@ -408,14 +408,14 @@ class SnarlDistanceIndexClusterer { //The snarls and chains get updated as we move up the snarl tree //Maps each net_handle_t to an index to its node problem, in all_node_problems - hash_map net_handle_to_node_problem_index; + hash_map net_identifier_to_node_problem_index; //This stores all the snarl tree nodes and their clustering scratch work vector all_node_problems; //All chains for the current level of the snarl tree and gets updated as the algorithm //moves up the snarl tree. At one iteration, the algorithm will go through each chain //in chain to children and cluster the chain using clusters on the children - vector* current_chains; + vector* current_chains; //Same as current_chains but for the level of the snarl @@ -423,18 +423,18 @@ class SnarlDistanceIndexClusterer { //This gets updated as the current level is processed - the snarls from this level //are added as children to parent_chain_to_children. //After processing one level, this becomes the next chain_to_children - vector* parent_chains; + vector* parent_chains; //All snarls for the current level of the snarl tree //(chains from chain_to_children get added to their parent snarls, snarls get added to parent_snarls //then all snarls in snarl_to_children are clustered and added to parent_chain_to_children) - vector parent_snarls; + vector parent_snarls; //This holds all the child problems of the root //Each pair is the parent and the child. This will be sorted by parent before //clustering - vector> root_children; + vector> root_children; ///////////////////////////////////////////////////////// @@ -457,7 +457,7 @@ class SnarlDistanceIndexClusterer { } - net_handle_to_node_problem_index.reserve(5*seed_count); + net_identifier_to_node_problem_index.reserve(5*seed_count); all_node_problems.reserve(5*seed_count); parent_snarls.reserve(seed_count); root_children.reserve(seed_count); @@ -470,7 +470,7 @@ class SnarlDistanceIndexClusterer { //If a node is a child of the root or of a root snarl, then add cluster it and //remember to cluster the root snarl void get_nodes( ClusteringProblem& clustering_problem, - vector>& chains_by_level) const; + vector>& chains_by_level) const; //Cluster all the snarls at the current level From e541b480c9ad479c8bff9b313c3e1eb9eb132db1 Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 25 Jul 2024 16:44:02 +0200 Subject: [PATCH 056/124] Mostly take out finding net handles until they're needed --- src/snarl_seed_clusterer.cpp | 119 ++++++++++++++++++++--------------- src/snarl_seed_clusterer.hpp | 23 ++++--- src/zip_code.cpp | 17 ----- src/zip_code.hpp | 3 - 4 files changed, 85 insertions(+), 77 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 8ffcc618a4d..e15553d13eb 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -419,8 +419,7 @@ cerr << "Add all seeds to nodes: " << endl; new_parent = true; if (seed.payload.is_trivial_chain ) { clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), @@ -429,8 +428,7 @@ cerr << "Add all seeds to nodes: " << endl; } else { //The parent is an actual chain clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &seed, seed.seed->zipcode_decoder->max_depth() - 1); @@ -461,7 +459,7 @@ cerr << "Add all seeds to nodes: " << endl; //Add this seed to its parent cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = seed.payload.node_handle; + parent_problem.children.back().has_net_handle = false; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; parent_problem.children.back().has_chain_values = true; @@ -488,17 +486,12 @@ cerr << "Add all seeds to nodes: " << endl; new_node = true; clustering_problem.net_identifier_to_node_problem_index.emplace(seed.payload.identifier, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, - seed.payload.identifier, + clustering_problem.all_node_problems.emplace_back(seed.payload.identifier, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); - - //Remember the parent of this node, since it will be needed to remember the root snarl later - clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; - } seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; @@ -507,7 +500,7 @@ cerr << "Add all seeds to nodes: " << endl; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(seed.payload.identifier)); node_problem.children.emplace_back(); - node_problem.children.back().net_handle = seed.payload.node_handle; + node_problem.children.back().has_net_handle = false; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; node_problem.children.back().has_chain_values = true; @@ -546,11 +539,15 @@ cerr << "Add all seeds to nodes: " << endl; if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(node_problem.parent_net_handle, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, seed, 0); + if (node_problem.has_parent_handle) { + clustering_problem.all_node_problems.back().containing_net_handle = node_problem.parent_net_handle; + clustering_problem.all_node_problems.back().has_net_handle = true; + + } } clustering_problem.root_children.emplace_back(parent_id, seed->payload.identifier); } else { @@ -577,9 +574,6 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster SnarlTreeNodeProblem* snarl_problem = &clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(snarl_id)); -#ifdef DEBUG_CLUSTER - cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; -#endif //Cluster the snarlindex]; cluster_one_snarl(clustering_problem, snarl_problem); @@ -598,20 +592,20 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Make a new SnarlTreeNodeProblem for the parent - net_handle_t snarl_parent = snarl_problem->has_parent_handle - ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(snarl_id); bool new_parent = false; if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { new_parent = true; clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(snarl_parent, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, snarl_problem->seed, snarl_problem->zipcode_depth-1); + if (snarl_problem->has_parent_handle) { + clustering_problem.all_node_problems.back().containing_net_handle = snarl_problem->parent_net_handle; + clustering_problem.all_node_problems.back().has_net_handle = true; + } //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( @@ -622,6 +616,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Add the snarl to its parent chain parent_problem.children.emplace_back(); + parent_problem.children.back().has_net_handle = true; parent_problem.children.back().net_handle = snarl_problem->containing_net_handle; parent_problem.children.back().identifier = snarl_id; parent_problem.children.back().is_seed = false; @@ -655,10 +650,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster SnarlTreeNodeProblem* chain_problem = &clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); - net_handle_t chain_handle = chain_problem->containing_net_handle; - #ifdef DEBUG_CLUSTER + net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; @@ -703,11 +697,14 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the parent is a root snarl, then remember it to cluster in the root if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); + if (chain_problem->has_parent_handle) { + clustering_problem.all_node_problems.back().containing_net_handle = chain_problem->parent_net_handle; + clustering_problem.all_node_problems.back().has_net_handle = true; + } } clustering_problem.root_children.emplace_back(parent_id, chain_id); } else if (!is_top_level_chain) { @@ -816,19 +813,25 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { new_parent = true; clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, - parent_id, + clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); + //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved - SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); + chain_problem = &(clustering_problem.all_node_problems.at( + clustering_problem.net_identifier_to_node_problem_index.at(chain_id))); + + if (chain_problem->has_parent_handle) { + clustering_problem.all_node_problems.back().containing_net_handle = chain_problem->parent_net_handle; + clustering_problem.all_node_problems.back().has_net_handle = true; + } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = chain_handle; + parent_problem.children.back().has_net_handle = true; + parent_problem.children.back().net_handle = chain_problem->containing_net_handle; parent_problem.children.back().identifier = chain_id; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = false; @@ -846,7 +849,8 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster void SnarlDistanceIndexClusterer::cluster_one_node( ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* node_problem) const { #ifdef DEBUG_CLUSTER - cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; + net_handle_t node_handle = distance_index.get_node_net_handle(id(node_problem->seed->seed->pos)); + cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_handle) << endl; #endif size_t node_length = node_problem->node_length; @@ -864,7 +868,7 @@ void SnarlDistanceIndexClusterer::cluster_one_node( #ifdef DEBUG_CLUSTER - cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; + cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_handle) << endl; bool got_left = false; bool got_right = false; @@ -920,16 +924,21 @@ void SnarlDistanceIndexClusterer::cluster_one_node( void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structures(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem1, SnarlTreeNodeProblem* child_problem2, SnarlTreeNodeProblem* parent_problem, const vector> & child_distances, bool is_root, bool first_child) const { + + assert(child_problem1->has_net_handle); + assert(child_problem2->has_net_handle); + assert(parent_problem->has_net_handle); + + net_handle_t& child_handle1 =child_problem1->containing_net_handle; + net_handle_t& child_handle2 =child_problem2->containing_net_handle; + net_handle_t& parent_handle =parent_problem->containing_net_handle; + #ifdef DEBUG_CLUSTER cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem1->containing_net_handle) << " and " << distance_index.net_handle_as_string(child_problem2->containing_net_handle) << " which are children of " << distance_index.net_handle_as_string(parent_problem->containing_net_handle) << endl; #endif - net_handle_t& parent_handle = parent_problem->containing_net_handle; - net_handle_t& child_handle1 = child_problem1->containing_net_handle; - net_handle_t& child_handle2 = child_problem2->containing_net_handle; - //Get the distances between the two sides of the children in the parent @@ -1397,8 +1406,6 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust << " to itself in the root" << endl; #endif - net_handle_t& handle = child_problem->containing_net_handle; - //Get the distances between the two sides of the child size_t distance_left_left = @@ -1764,6 +1771,16 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin if (!child1.is_seed || !child2.is_seed) { only_seeds = false; } + //Since the parent is a chain, the fastest way to get the handle is from the distance index so check here if we can do that + if (!chain_problem->has_net_handle) { + if (child1.has_net_handle) { + chain_problem->containing_net_handle = distance_index.get_parent(child1.net_handle); + chain_problem->has_net_handle = true; + } else if (child2.has_net_handle) { + chain_problem->containing_net_handle = distance_index.get_parent(child2.net_handle); + chain_problem->has_net_handle = true; + } + } if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet child1.chain_component = clustering_problem.all_node_problems.at( @@ -1798,14 +1815,17 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } }); - net_handle_t& chain_handle = chain_problem->containing_net_handle; - if (!chain_problem->is_trivial_chain && ! is_top_level_chain) { //If we need it, get the values from the distance index: //is_looping_chain, node_length, the end boundary node, and the end component //THese only get used if we need the distances to the ends of the chain chain_problem->set_chain_values(distance_index); + } else if (!chain_problem->has_net_handle) { + //If we haven't gotten the chain handle yet, then we need to get it now + //If one of the children already had a net handle, then it would have been best to get it from the distance index + //but if it doesn't have a handle yet then just get it from the zipcode + chain_problem->set_net_handle(distance_index); } @@ -1820,6 +1840,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin !chain_problem->is_trivial_chain, is_top_level_chain); #ifdef DEBUG_CLUSTER + net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; cerr << "\t with best left and right values: " << chain_problem->fragment_best_left << " " << chain_problem->fragment_best_right << endl; @@ -2154,7 +2175,6 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const size_t& read_num = current_child.seed_indices.first; const size_t& cluster_num = current_child.seed_indices.second; - net_handle_t& chain_handle = chain_problem->containing_net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* Get a bunch of distances from the current child that will be used to calculate distance @@ -2169,7 +2189,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if (last_child.net_handle == current_child.net_handle) { + if (last_child.identifier == current_child.identifier) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; } else if ( last_chain_component_end == current_child_seed.payload.chain_component) { @@ -2218,7 +2238,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #endif - if (last_child.net_handle != current_child.net_handle && + if (last_child.identifier != current_child.identifier && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, chain_problem->fragment_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -2298,7 +2318,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.net_handle == current_child.net_handle ? 0 + (last_child.identifier == current_child.identifier ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload.node_length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) @@ -2339,7 +2359,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child), current_child_seed.distance_left), 1); - if (!is_first_child && last_child.net_handle == current_child.net_handle) { + if (!is_first_child && last_child.identifier == current_child.identifier) { //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node @@ -2610,7 +2630,6 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& }; - net_handle_t& chain_handle = chain_problem->containing_net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(current_child.identifier)); @@ -2664,7 +2683,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.net_handle == current_child.net_handle ? 0 + (last_child.identifier == current_child.identifier ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, child_problem.node_length)); @@ -2726,7 +2745,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e chain_problem->read_best_right = std::make_pair(std::numeric_limits::max(), std::numeric_limits::max()); - if (last_child.net_handle != current_child.net_handle && + if (last_child.identifier != current_child.identifier && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, old_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -3013,7 +3032,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro } //Keep track of all clusters on the root - SnarlTreeNodeProblem root_problem(distance_index.get_root(), ZipCodeDecoder::get_root_identifier(), clustering_problem.all_seeds->size(), + SnarlTreeNodeProblem root_problem(ZipCodeDecoder::get_root_identifier(), clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &clustering_problem.all_seeds->at(0)->front(), 0); //TODO: ikd about the seed here diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 67c231b9fac..8eecd7f2b1a 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -213,9 +213,10 @@ class SnarlDistanceIndexClusterer { //Struct to store one child, which may be a seed, node, snarl, or chain struct SnarlTreeChild { - //If the net_handle is a node, then the child is a seed, otherwise the handle - //is used to find the problem + //This may or may not be set net_handle_t net_handle; + + //Used as an identifier net_identifier_t identifier; pair seed_indices; @@ -231,6 +232,7 @@ class SnarlDistanceIndexClusterer { //For a seed, it gets set when the child is made, otherwise the first time this //child is seen when sorting bool has_chain_values; + bool has_net_handle; }; //The children of this snarl tree node //Initially unsorted, sort before clustering for chains @@ -291,9 +293,8 @@ class SnarlDistanceIndexClusterer { //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem( net_handle_t net, net_identifier_t id, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, + SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, const SeedCache* seed, size_t zipcode_depth) : - containing_net_handle(std::move(net)), containing_net_id(std::move(id)), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), seed(seed), @@ -302,9 +303,8 @@ class SnarlDistanceIndexClusterer { parent_net_id = ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Constructor for a node or trivial chain, used to remember information from the cache - SnarlTreeNodeProblem( net_handle_t net, net_identifier_t id, size_t read_count, size_t seed_count, bool is_reversed_in_parent, + SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, bool is_reversed_in_parent, size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : - containing_net_handle(net), containing_net_id(std::move(id)), is_reversed_in_parent(is_reversed_in_parent), node_length(node_length), @@ -328,6 +328,10 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { + if (!has_net_handle) { + containing_net_handle = seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index); + has_net_handle = true; + } node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); @@ -342,8 +346,13 @@ class SnarlDistanceIndexClusterer { //Distance to go backward in the chain and back loop_left = SnarlDistanceIndex::sum(distance_index.get_reverse_loop_value(start_in), 2*distance_index.minimum_length(start_in)); + } - + void set_net_handle(const SnarlDistanceIndex& distance_index) { + if (!has_net_handle) { + has_net_handle = true; + containing_net_handle = seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index); + } } }; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index d8121c93fad..952ba7024d5 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1884,9 +1884,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); //Root node length std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -1894,12 +1891,10 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; payload.is_reversed = false; - payload.parent_handle = distance_index.get_root(); payload.parent_type = ZipCode::ROOT_NODE; } else if (decoder[max_depth() - 1].first) { //If the parent is a chain - payload.node_handle = distance_index.get_node_net_handle(id); payload.parent_is_chain = true; payload.parent_is_root = false; @@ -1914,12 +1909,10 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (decoder_length() == 2) { //If the node is a child of the root chain - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } else { - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; } @@ -1945,11 +1938,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } else { //If the node is a child of a snarl - payload.node_handle = distance_index.get_node_net_handle(id); - payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE, - distance_index.get_node_record_offset(payload.node_handle)); payload.parent_is_chain = false; payload.parent_is_root = decoder_length() == 2; payload.is_trivial_chain = true; @@ -1963,11 +1951,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_handle = payload.parent_handle; - payload.parent_handle = distance_index.get_net_handle_from_values( - distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { zip_index = decoder[max_depth()-1].second; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index aa936bd7d09..60bdf8900fd 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -400,8 +400,6 @@ struct MIPayload { constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); - net_handle_t node_handle; - net_handle_t parent_handle; net_identifier_t identifier; size_t node_length = std::numeric_limits::max(); @@ -409,7 +407,6 @@ struct MIPayload { size_t chain_component = 0; //Depth according to the distance index size_t parent_depth = 0; - size_t parent_record_offset = 0; ZipCode::code_type_t parent_type = ZipCode::EMPTY; bool is_reversed = false; From 985ea328bd23807b9f4f710d5ee2eda7ec53336a Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 25 Jul 2024 19:19:28 +0200 Subject: [PATCH 057/124] Fix identifiers but its not working --- src/snarl_seed_clusterer.cpp | 44 +++++++++++++++++++----------------- src/zip_code.cpp | 8 +++++-- src/zip_code.hpp | 5 +++- 3 files changed, 33 insertions(+), 24 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index e15553d13eb..4613de8bbba 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2,7 +2,7 @@ #include -//#define DEBUG_CLUSTER +#define DEBUG_CLUSTER //#define debug_distances //#define EXHAUSTIVE_CLUSTER_CHECK @@ -379,11 +379,6 @@ cerr << "Add all seeds to nodes: " << endl; cerr << "Chain compoentn: " << chain_component << " was " << seed.payload.chain_component << endl; assert(seed.payload.chain_component == chain_component); - if (!distance_index.is_root(seed.payload.parent_handle)) { - cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; - cerr <<" Is actually " << distance_index.net_handle_as_string( distance_index.start_end_traversal_of(seed.payload.parent_handle)) << endl; - assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); - } #endif if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) @@ -392,20 +387,20 @@ cerr << "Add all seeds to nodes: " << endl; //Also update the zipcode on the seed #ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(handle) << endl; //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() - // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); - cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; - assert(seed.payload.node_length == distance_index.minimum_length(seed.payload.node_handle)); - cerr << "Reversed in parent? " << distance_index.net_handle_as_string(seed.payload.node_handle) << " " << distance_index.net_handle_as_string(seed.payload.parent_handle) << " " << seed.payload.is_reversed << endl; + // : distance_index.get_prefix_sum_value(handle))); + cerr << "Node length should be " << distance_index.minimum_length(handle) << " actually " << seed.payload.node_length << endl; + assert(seed.payload.node_length == distance_index.minimum_length(handle)); + cerr << "Reversed in parent? " << distance_index.net_handle_as_string(handle) << " " << distance_index.net_handle_as_string(parent_handle) << " " << seed.payload.is_reversed << endl; cerr << "is trivial? " << seed.payload.is_trivial_chain << endl; - if (!distance_index.is_root(seed.payload.parent_handle)) { - cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(seed.payload.parent_handle)) << endl; + if (!distance_index.is_root(parent_handle)) { + cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(parent_handle)) << endl; } - cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; + cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(parent_handle) << endl; //assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) - // : distance_index.is_reversed_in_parent(seed.payload.node_handle))); + // : distance_index.is_reversed_in_parent(handle))); #endif //Add the parent chain or trivial chain @@ -437,7 +432,7 @@ cerr << "Add all seeds to nodes: " << endl; new_parent = true; } #ifdef DEBUG_CLUSTER - assert(seed.payload.parent_depth == distance_index.get_depth(seed.payload.parent_handle)); + assert(seed.payload.parent_depth == distance_index.get_depth(parent_handle)); #endif @@ -477,12 +472,12 @@ cerr << "Add all seeds to nodes: " << endl; } else { //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node - - +cerr <<" Check identifier " << seed.payload.identifier << endl; //Create a new SnarlTreeNodeProblem for this node bool new_node = false; if (clustering_problem.net_identifier_to_node_problem_index.count(seed.payload.identifier) == 0) { + cerr << "Mak ea new node problem" << endl; new_node = true; clustering_problem.net_identifier_to_node_problem_index.emplace(seed.payload.identifier, clustering_problem.all_node_problems.size()); @@ -493,9 +488,11 @@ cerr << "Add all seeds to nodes: " << endl; std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); } + cerr << "Ad distances" << endl; seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); + cerr << "Add nodes node problem" << endl; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(seed.payload.identifier)); @@ -629,8 +626,8 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster } #ifdef DEBUG_CLUSTER - cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->net_handle) << " as a child of " - << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->net_handle)) << endl; + cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << " as a child of " + << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->containing_net_handle)) << endl; #endif } @@ -655,7 +652,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { - cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; + if (x.has_net_handle) { + cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; + } else { + cerr << "\t(didn't store the net handle)" << endl; + } } #endif @@ -1892,6 +1893,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin #ifdef DEBUG_CLUSTER + net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster chain " << distance_index.net_handle_as_string(chain_handle) << endl; cerr << "\t chain has " << chain_problem->children.size() << " children" << endl; #endif diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 952ba7024d5..afe42fda2ed 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1871,6 +1871,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; payload.identifier = get_identifier(max_depth()); + cerr << "Found identifier " << payload.identifier << endl; if (decoder_length() == 1) { //If the root-level structure is a node @@ -2016,12 +2017,13 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { + cerr << "Get identifier at ddepth " << depth << endl; if (depth == std::numeric_limits::max()) { //This is equivalent to distance_index.get_root() return "ROOT"; } string result = ""; - for (size_t d = 0 ; d < depth ; d++) { + for (size_t d = 0 ; d <= depth ; d++) { result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure @@ -2030,6 +2032,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); + cerr << "Add identifier " << zip_value << endl; } } else if (decoder[d].first) { //is_chain so could be a chain or a node @@ -2062,7 +2065,8 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { if (d < std::min(depth, max_depth())) { result += "."; } - + + cerr << "At depth " << d << " result is " << result << endl; } if (depth > max_depth()) { //If this was node that's in a trivial chain diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 60bdf8900fd..67f98d1ca02 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -376,10 +376,13 @@ class ZipCodeDecoder { }; +//How to hash a net_identifier_t template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { - return wang_hash()(id); + cerr <<" Get hash of " << id << endl; + string id_string = static_cast(id); + return std::hash{}(id_string); } }; From 9b36204824338920be7070107ca85b2d281ec8e4 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 26 Jul 2024 10:22:23 +0200 Subject: [PATCH 058/124] Fix parent --- src/snarl_seed_clusterer.hpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 8eecd7f2b1a..40d9484d2bd 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -300,7 +300,7 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - parent_net_id = ZipCodeDecoder::get_parent_identifier(containing_net_id); + parent_net_id =containing_net_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Constructor for a node or trivial chain, used to remember information from the cache SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, bool is_reversed_in_parent, @@ -315,7 +315,7 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - parent_net_id = ZipCodeDecoder::get_parent_identifier(containing_net_id); + parent_net_id = containing_net_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Set the values needed to cluster a chain @@ -328,10 +328,6 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - if (!has_net_handle) { - containing_net_handle = seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index); - has_net_handle = true; - } node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); From d23653a6d84dba36524f04df4e867986e50816c4 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 26 Jul 2024 11:35:04 +0200 Subject: [PATCH 059/124] Fix getting the node identifier --- src/zip_code.cpp | 12 ++++++------ src/zip_code.hpp | 1 - 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index afe42fda2ed..77d179c3aec 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1870,7 +1870,6 @@ void ZipCodeCollection::deserialize(std::istream& in) { } MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; - payload.identifier = get_identifier(max_depth()); cerr << "Found identifier " << payload.identifier << endl; if (decoder_length() == 1) { @@ -1893,6 +1892,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.is_trivial_chain = true; payload.is_reversed = false; payload.parent_type = ZipCode::ROOT_NODE; + payload.identifier = get_identifier(max_depth()); } else if (decoder[max_depth() - 1].first) { //If the parent is a chain @@ -1934,7 +1934,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.chain_component = zip_value; - + payload.identifier = get_identifier(max_depth()); } else { //If the node is a child of a snarl @@ -2000,6 +2000,8 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Since the node is technically in a trivial chain, get the node identifier not the chain + payload.identifier = get_identifier(max_depth()+1); //Get the rest as default values } @@ -2017,13 +2019,13 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { - cerr << "Get identifier at ddepth " << depth << endl; if (depth == std::numeric_limits::max()) { //This is equivalent to distance_index.get_root() return "ROOT"; } string result = ""; - for (size_t d = 0 ; d <= depth ; d++) { + for (size_t d = 0 ; d <= std::min(max_depth(), depth) ; d++) { + cerr << " at depth " << d << " with max depth " << max_depth() << " and dep th " << depth << endl; result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure @@ -2032,7 +2034,6 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); - cerr << "Add identifier " << zip_value << endl; } } else if (decoder[d].first) { //is_chain so could be a chain or a node @@ -2066,7 +2067,6 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { result += "."; } - cerr << "At depth " << d << " result is " << result << endl; } if (depth > max_depth()) { //If this was node that's in a trivial chain diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 67f98d1ca02..a1d9786bac8 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -380,7 +380,6 @@ class ZipCodeDecoder { template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { - cerr <<" Get hash of " << id << endl; string id_string = static_cast(id); return std::hash{}(id_string); } From 6e0974357af3ec4f03fc0ce1afb471f1ef2e4897 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 26 Jul 2024 14:43:43 +0200 Subject: [PATCH 060/124] Fix getting handle to trivial chain --- src/zip_code.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 77d179c3aec..d597aac2570 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -709,12 +709,18 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, //If this is a chain/node net_handle_t n = distance_index->get_node_net_handle(id); - for (size_t d = max_depth() ; d > depth ; d--) { - n = distance_index->get_parent(n); - if (distance_index->is_trivial_chain(n)){ + size_t max = max_depth(); + if (max > 1 && decoder[max].first && !decoder[max-1].first) { + //If the last thing is a trivial chain + if (depth == max+1) { + return n; + } else { n = distance_index->get_parent(n); } } + for (size_t d = max ; d > depth ; d--) { + n = distance_index->get_parent(n); + } return n; } else { //If this is a snarl From e433831015ddb830d7039caca1a1780338753321 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 26 Jul 2024 16:31:32 +0200 Subject: [PATCH 061/124] Include chain component in identifier --- src/zip_code.cpp | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index d597aac2570..f9449c7b5ec 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -703,7 +703,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } - return distance_index->get_handle_from_connected_component(zip_value); + return distance_index->start_end_traversal_of(distance_index->get_handle_from_connected_component(zip_value)); } else if (decoder[depth].first) { //If this is a chain/node @@ -713,7 +713,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, if (max > 1 && decoder[max].first && !decoder[max-1].first) { //If the last thing is a trivial chain if (depth == max+1) { - return n; + return distance_index->start_end_traversal_of(n); } else { n = distance_index->get_parent(n); } @@ -721,7 +721,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, for (size_t d = max ; d > depth ; d--) { n = distance_index->get_parent(n); } - return n; + return distance_index->start_end_traversal_of(n); } else { //If this is a snarl @@ -741,7 +741,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, n = distance_index->get_parent(n); } } - return n; + return distance_index->start_end_traversal_of(n); } else { //Irregular snarl @@ -1876,7 +1876,6 @@ void ZipCodeCollection::deserialize(std::istream& in) { } MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; - cerr << "Found identifier " << payload.identifier << endl; if (decoder_length() == 1) { //If the root-level structure is a node @@ -2047,9 +2046,14 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { //If the thing before this was also a chain, then it is a node size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); + if (i == ZipCode::NODE_OFFSET_OFFSET) { + result += std::to_string(zip_value); + } else if (i == ZipCode::NODE_CHAIN_COMPONENT_OFFSET) { + result += "\\"; + result += std::to_string(zip_value); + } } } else { //Otherwise it's a chain @@ -2064,9 +2068,13 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { //Definitely a snarl size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); + if (i == ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET) { + result += std::to_string(zip_value); + } else if (i == ZipCode::SNARL_CHAIN_COMPONENT_OFFSET) { + result += std::to_string(zip_value); + } } } if (d < std::min(depth, max_depth())) { From fd9a049b458f94e7918ecea1017c4ebb8dd6eb47 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 26 Jul 2024 22:27:02 +0200 Subject: [PATCH 062/124] Fix getting net handle for child of root snarl --- src/zip_code.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index f9449c7b5ec..01a0f9f7079 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -710,7 +710,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, net_handle_t n = distance_index->get_node_net_handle(id); size_t max = max_depth(); - if (max > 1 && decoder[max].first && !decoder[max-1].first) { + if (max >= 1 && decoder[max].first && !decoder[max-1].first) { //If the last thing is a trivial chain if (depth == max+1) { return distance_index->start_end_traversal_of(n); @@ -1958,6 +1958,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.parent_type = ZipCode::ROOT_SNARL; + payload.identifier = get_identifier(max_depth()); } else { zip_index = decoder[max_depth()-1].second; //is_regular @@ -1995,6 +1996,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } else { payload.is_reversed = false; } + payload.identifier = get_identifier(max_depth()+1); } //We should be at the node/trivial chain now @@ -2005,8 +2007,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //Since the node is technically in a trivial chain, get the node identifier not the chain - payload.identifier = get_identifier(max_depth()+1); + //This will be the node of the trivial chain //Get the rest as default values } From 27f47cfaca5b5ca1477beb955fd2f4fc6c81e42f Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 27 Jul 2024 20:35:38 +0200 Subject: [PATCH 063/124] Fix clustering the root snarl --- src/snarl_seed_clusterer.cpp | 127 ++++++++++++++++---------- src/unittest/snarl_seed_clusterer.cpp | 84 +++++++++++++---- src/unittest/zip_code.cpp | 53 ++++++++++- src/zip_code.cpp | 1 - 4 files changed, 197 insertions(+), 68 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 4613de8bbba..80260d23fd3 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2,7 +2,7 @@ #include -#define DEBUG_CLUSTER +//#define DEBUG_CLUSTER //#define debug_distances //#define EXHAUSTIVE_CLUSTER_CHECK @@ -331,7 +331,7 @@ cerr << "Add all seeds to nodes: " << endl; for (size_t i = 0; i < seeds->size(); i++) { SeedCache& seed = seeds->at(i); pos_t pos = seed.seed->pos; - id_t id = get_id(pos); + id_t node_id = get_id(pos); #ifdef DEBUG_CLUSTER @@ -355,12 +355,13 @@ cerr << "Add all seeds to nodes: " << endl; const MIPayload& payload = seed.payload; #ifdef DEBUG_CLUSTER - //cerr << "Using cached values for node " << id << ": " +cerr << "Node has identifier " << seed.payload.identifier << endl; + //cerr << "Using cached values for node " << node_id << ": " // << ", " << seed.payload.node_length // << ", " << seed.payload.prefix_sum // << ", " << seed.payload.chain_component << endl; - net_handle_t handle = distance_index.get_node_net_handle(id); + net_handle_t handle = distance_index.get_node_net_handle(node_id); net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; @@ -454,6 +455,7 @@ cerr << "Add all seeds to nodes: " << endl; //Add this seed to its parent cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); + parent_problem.children.back().identifier = seed.payload.identifier; parent_problem.children.back().has_net_handle = false; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; @@ -472,12 +474,10 @@ cerr << "Add all seeds to nodes: " << endl; } else { //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node -cerr <<" Check identifier " << seed.payload.identifier << endl; //Create a new SnarlTreeNodeProblem for this node bool new_node = false; if (clustering_problem.net_identifier_to_node_problem_index.count(seed.payload.identifier) == 0) { - cerr << "Mak ea new node problem" << endl; new_node = true; clustering_problem.net_identifier_to_node_problem_index.emplace(seed.payload.identifier, clustering_problem.all_node_problems.size()); @@ -488,15 +488,14 @@ cerr <<" Check identifier " << seed.payload.identifier << endl; std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); } - cerr << "Ad distances" << endl; seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); - cerr << "Add nodes node problem" << endl; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(seed.payload.identifier)); node_problem.children.emplace_back(); + node_problem.children.back().identifier = seed.payload.identifier; node_problem.children.back().has_net_handle = false; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; @@ -527,10 +526,11 @@ cerr <<" Check identifier " << seed.payload.identifier << endl; //Cluster the node. Give it the range in node_to_seeds, which is from seed_range_start //to either current_iterator (if current_iterator is a different node), or the end of node_to_seeds //if current_iterator is the last thing in the list and the same node - cluster_one_node(clustering_problem, &node_problem); + cluster_one_node(clustering_problem, &node_problem); net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(seed->payload.identifier); + if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { @@ -587,11 +587,11 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster if (reachable_left || reachable_right) { - //Make a new SnarlTreeNodeProblem for the parent net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(snarl_id); bool new_parent = false; if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { + //Make a new SnarlTreeNodeProblem for the parent new_parent = true; clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); @@ -613,6 +613,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Add the snarl to its parent chain parent_problem.children.emplace_back(); + parent_problem.children.back().identifier = snarl_problem->containing_net_id; parent_problem.children.back().has_net_handle = true; parent_problem.children.back().net_handle = snarl_problem->containing_net_handle; parent_problem.children.back().identifier = snarl_id; @@ -623,13 +624,11 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster clustering_problem.parent_chains->emplace_back(parent_id); } - } - #ifdef DEBUG_CLUSTER - cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << " as a child of " - << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->containing_net_handle)) << endl; + cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << " as a child of " + << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->containing_net_handle)) << endl; #endif - + } } clustering_problem.parent_snarls.clear(); } @@ -666,9 +665,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); - net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(chain_id); + net_identifier_t parent_id = chain_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(chain_id); #ifdef DEBUG_CLUSTER - cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; + cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << " with id " << parent_id << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; //assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); @@ -720,11 +719,13 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() ? false : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + chain_problem->distance_start_left = snarl_child_is_rev ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); @@ -746,6 +747,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + cerr << "Is reversed? " << snarl_child_is_rev << endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " @@ -831,6 +833,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); parent_problem.children.emplace_back(); + parent_problem.children.back().identifier = chain_problem->containing_net_id; parent_problem.children.back().has_net_handle = true; parent_problem.children.back().net_handle = chain_problem->containing_net_handle; parent_problem.children.back().identifier = chain_id; @@ -850,7 +853,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster void SnarlDistanceIndexClusterer::cluster_one_node( ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* node_problem) const { #ifdef DEBUG_CLUSTER - net_handle_t node_handle = distance_index.get_node_net_handle(id(node_problem->seed->seed->pos)); + net_handle_t node_handle = node_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(node_problem->seed->seed->pos), node_problem->zipcode_depth, &distance_index); cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_handle) << endl; #endif @@ -926,9 +929,19 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure SnarlTreeNodeProblem* child_problem2, SnarlTreeNodeProblem* parent_problem, const vector> & child_distances, bool is_root, bool first_child) const { - assert(child_problem1->has_net_handle); - assert(child_problem2->has_net_handle); - assert(parent_problem->has_net_handle); + if (!child_problem1->has_net_handle) { + child_problem1->set_net_handle(distance_index); + } + if (!child_problem2->has_net_handle) { + child_problem2->set_net_handle(distance_index); + } + //I'm pretty sure this will only not have been set for a root snarl, in which case its fastest to get it from the zipcode + //instead of distance_index.get_parent + if (!parent_problem->has_net_handle) { + parent_problem->set_net_handle(distance_index); + + } + net_handle_t& child_handle1 =child_problem1->containing_net_handle; net_handle_t& child_handle2 =child_problem2->containing_net_handle; @@ -938,6 +951,9 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem1->containing_net_handle) << " and " << distance_index.net_handle_as_string(child_problem2->containing_net_handle) << " which are children of " << distance_index.net_handle_as_string(parent_problem->containing_net_handle) << endl; + cerr << "parent should be " << distance_index.net_handle_as_string(distance_index.get_parent(child_problem1->containing_net_handle )) << endl; + assert(distance_index.start_end_traversal_of(distance_index.get_parent(child_problem1->containing_net_handle )) == distance_index.start_end_traversal_of(parent_problem->containing_net_handle)); + assert(distance_index.start_end_traversal_of(distance_index.get_parent(child_problem2->containing_net_handle )) == distance_index.start_end_traversal_of(parent_problem->containing_net_handle)); #endif @@ -1403,7 +1419,10 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem) const { #ifdef DEBUG_CLUSTER - cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->containing_net_handle) + net_handle_t child_handle = child_problem->zipcode_depth == child_problem->seed->seed->zipcode_decoder->max_depth() + ? distance_index.get_node_net_handle(id(child_problem->seed->seed->pos)) + : child_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(child_problem->seed->seed->pos), child_problem->zipcode_depth, &distance_index); + cerr << "\tCompare " << distance_index.net_handle_as_string(child_handle) << " to itself in the root" << endl; #endif @@ -1554,6 +1573,9 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the clusters on this snarl, assumes that all of the snarls children have been clustered already. + if (!snarl_problem->has_net_handle) { + snarl_problem->set_net_handle(distance_index); + } snarl_problem->set_snarl_values(distance_index); net_handle_t& snarl_handle = snarl_problem->containing_net_handle; @@ -1775,10 +1797,10 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //Since the parent is a chain, the fastest way to get the handle is from the distance index so check here if we can do that if (!chain_problem->has_net_handle) { if (child1.has_net_handle) { - chain_problem->containing_net_handle = distance_index.get_parent(child1.net_handle); + chain_problem->containing_net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(child1.net_handle)); chain_problem->has_net_handle = true; } else if (child2.has_net_handle) { - chain_problem->containing_net_handle = distance_index.get_parent(child2.net_handle); + chain_problem->containing_net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(child2.net_handle)); chain_problem->has_net_handle = true; } } @@ -1816,17 +1838,17 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } }); - + if (!chain_problem->has_net_handle) { + //If we haven't gotten the chain handle yet, then we need to get it now + //If one of the children already had a net handle, then it would have been best to get it from the distance index + //but if it doesn't have a handle yet then just get it from the zipcode + chain_problem->set_net_handle(distance_index); + } if (!chain_problem->is_trivial_chain && ! is_top_level_chain) { //If we need it, get the values from the distance index: //is_looping_chain, node_length, the end boundary node, and the end component //THese only get used if we need the distances to the ends of the chain chain_problem->set_chain_values(distance_index); - } else if (!chain_problem->has_net_handle) { - //If we haven't gotten the chain handle yet, then we need to get it now - //If one of the children already had a net handle, then it would have been best to get it from the distance index - //but if it doesn't have a handle yet then just get it from the zipcode - chain_problem->set_net_handle(distance_index); } @@ -2185,6 +2207,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #ifdef DEBUG_CLUSTER cerr << "At child seed " << current_child_seed.seed->pos << endl; + cerr << "Component: " << current_child_seed.payload.chain_component << endl; #endif //The distance from the right side of the last child to the left side of this child //(relative to the orientation of the chain @@ -3033,10 +3056,6 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro return; } - //Keep track of all clusters on the root - SnarlTreeNodeProblem root_problem(ZipCodeDecoder::get_root_identifier(), clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - &clustering_problem.all_seeds->at(0)->front(), 0); //TODO: ikd about the seed here //Remember old distances @@ -3064,10 +3083,12 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro } if (current_parent != parent || root_child_i == clustering_problem.root_children.size()-1) { #ifdef DEBUG_CLUSTER - cerr << "Clustering root snarl with " << children.size() << " chidlren" << endl; + cerr << "Clustering root snarl " << parent << " with " << children.size() << " chidlren" << endl; #endif if (children.size() > 0) { + //Keep track of all clusters on the root + SnarlTreeNodeProblem* root_problem = &clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent)); for (size_t i = 0; i < children.size() ; i++) { //Go through each child node of the netgraph @@ -3090,7 +3111,7 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro compare_and_combine_cluster_on_child_structures(clustering_problem, child_problem_i, - child_problem_j, &root_problem, child_distances, true, false); + child_problem_j, root_problem, child_distances, true, false); } } @@ -3105,22 +3126,25 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro cerr << "\tFound clusters on the root" << endl; for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { cerr << "\t for read num " << read_num << endl; - for (pair c : root_problem.read_cluster_heads) { - if (c.first == read_num) { - cerr << "\t\t" << c.first << ":"<at(c.first)->size() ; x++) { - if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; + for (std::pair& parent_child_pair : clustering_problem.root_children) { + auto& root_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_child_pair.first)); + for (pair c : root_problem.read_cluster_heads) { + if (c.first == read_num) { + cerr << "\t\t" << c.first << ":"<at(c.first)->size() ; x++) { + if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; + } } + cerr << endl; } - cerr << endl; } } } - for (pair group_id : root_problem.read_cluster_heads) { - assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); - } + //for (pair group_id : root_problem.read_cluster_heads) { + // assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); + //} #endif } @@ -3134,7 +3158,10 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr return; } #ifdef DEBUG_CLUSTER - cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; + net_handle_t node_handle = node_problem->zipcode_depth == node_problem->seed->seed->zipcode_decoder->max_depth() + ? distance_index.get_node_net_handle(id(node_problem->seed->seed->pos)) + : node_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(node_problem->seed->seed->pos), node_problem->zipcode_depth, &distance_index); + cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_handle) << endl; cerr << "\t with node length " << structure_length << endl; #endif @@ -3208,7 +3235,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr } #ifdef DEBUG_CLUSTER - cerr << "\t" << distance_index.net_handle_as_string(node_problem->containing_net_handle) << " is shorter than the distance limit so just one cluster" << endl; + cerr << "\t" << distance_index.net_handle_as_string(node_handle) << " is shorter than the distance limit so just one cluster" << endl; #endif return; diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index 6ef11d3426f..63e02a7551a 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -728,7 +728,7 @@ namespace unittest { SnarlDistanceIndex dist_index; fill_in_distance_index(&dist_index, &graph, &snarl_finder); SnarlDistanceIndexClusterer clusterer(dist_index, &graph); - + //graph.to_dot(cerr); SECTION( "Three clusters going across snarl" ) { @@ -798,7 +798,7 @@ namespace unittest { } } TEST_CASE( "Top-level looping chain", - "[cluster][bug]" ) { + "[cluster]" ) { VG graph; Node* n1 = graph.create_node("AGCGTGTAGAGAA"); @@ -823,8 +823,6 @@ namespace unittest { fill_in_distance_index(&dist_index, &graph, &snarl_finder); SnarlDistanceIndexClusterer clusterer(dist_index, &graph); - ofstream out ("bug_graph.vg"); - graph.serialize(out); SECTION( "Two clusters" ) { @@ -1674,7 +1672,7 @@ namespace unittest { REQUIRE( clusters.size() == 1); } } - TEST_CASE( "Loop on first node in a top-level chain","[cluster]" ) { + TEST_CASE( "Loop on first node in a top-level chain","[cluster][bug]" ) { VG graph; Node* n1 = graph.create_node("GCA"); @@ -1702,6 +1700,10 @@ namespace unittest { IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; fill_in_distance_index(&dist_index, &graph, &snarl_finder); + ofstream out ("testGraph.hg"); + graph.serialize(out); + + SnarlDistanceIndexClusterer clusterer(dist_index, &graph); @@ -3224,13 +3226,31 @@ namespace unittest { vector> pos_ts(2); - pos_ts[0].emplace_back(6, false, 12); - pos_ts[0].emplace_back(9, true, 0); - pos_ts[0].emplace_back(11, true, 2); - pos_ts[1].emplace_back(7, false,0); - pos_ts[1].emplace_back(11,false, 5); - pos_ts[1].emplace_back(8,false, 9); - pos_ts[1].emplace_back(9,true, 0); + pos_ts[0].emplace_back(1, false, 57); + pos_ts[0].emplace_back(1, true, 15); + pos_ts[0].emplace_back(2, false, 25); + pos_ts[0].emplace_back(1, false, 36); + pos_ts[0].emplace_back(5, true, 16); + pos_ts[0].emplace_back(1, false, 46); + pos_ts[0].emplace_back(2, true, 21); + pos_ts[0].emplace_back(1, true, 10); + + pos_ts[1].emplace_back(2, false, 0); + pos_ts[1].emplace_back(2, true, 2); + pos_ts[1].emplace_back(6, true, 24); + pos_ts[1].emplace_back(6, true, 44); + pos_ts[1].emplace_back(1, false, 42); + pos_ts[1].emplace_back(2, false, 19); + pos_ts[1].emplace_back(2, false, 23); + pos_ts[1].emplace_back(5, true, 19); + pos_ts[1].emplace_back(4, false, 73); + pos_ts[1].emplace_back(4, true, 57); + pos_ts[1].emplace_back(3, false, 23); + pos_ts[1].emplace_back(6, true, 10); + pos_ts[1].emplace_back(5, false, 19); + + + vector> seeds(2); for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { for (pos_t pos : pos_ts[read_num]) { @@ -3265,7 +3285,7 @@ namespace unittest { IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; - fill_in_distance_index(&dist_index, &graph, &snarl_finder, 5); + fill_in_distance_index(&dist_index, &graph, &snarl_finder); @@ -3296,9 +3316,9 @@ namespace unittest { handle_t node1 = graph.get_handle(nodeID1); offset_t offset1 = uniform_int_distribution(0,graph.get_length(node1) - 1)(generator); + bool rev = uniform_int_distribution(0,1)(generator) == 0; - pos_t pos = make_pos_t(nodeID1, - uniform_int_distribution(0,1)(generator) == 0,offset1 ); + pos_t pos = make_pos_t(nodeID1, rev,offset1 ); @@ -3354,12 +3374,20 @@ namespace unittest { if ( dist != -1 && dist <= read_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); + + cerr << "Failed with positions" << endl; + + for (size_t read = 0 ; read < 2 ; read ++) { + cerr << "read: " << read << endl; + for (auto& seed : all_seeds[i]) { + cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; + } + } cerr << "These should have been in the same read cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; REQUIRE(false); } - } } } @@ -3381,6 +3409,14 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); + cerr << "Failed with positions" << endl; + + for (size_t read = 0 ; read < 2 ; read ++) { + cerr << "read: " << read << endl; + for (auto& seed : all_seeds[i]) { + cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; + } + } cerr << "These should be different read clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; @@ -3429,6 +3465,14 @@ namespace unittest { if ( dist != -1 && dist <= fragment_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); + cerr << "Failed with positions" << endl; + + for (size_t read = 0 ; read < 2 ; read ++) { + cerr << "read: " << read << endl; + for (auto& seed : all_seeds[i]) { + cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; + } + } cerr << "These should have been in the same fragment cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; @@ -3461,6 +3505,14 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); + cerr << "Failed with positions" << endl; + + for (size_t read = 0 ; read < 2 ; read ++) { + cerr << "read: " << read << endl; + for (auto& seed : all_seeds[i]) { + cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; + } + } cerr << "These should be different fragment clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index da72dcbdf14..8259e81b686 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -81,6 +81,23 @@ using namespace std; distance_index) == 3); } + SECTION("get net handle") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + ZipCodeDecoder decoder(&zipcode); + net_handle_t n = distance_index.get_node_net_handle(n1->id()); + net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); + for (int i = decoder.max_depth()+1 ; i >= 0 ; --i) { + assert(distance_index.start_end_traversal_of(n) == + distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n1->id(), i , &distance_index))); + if (i != 0) { + assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); + n = distance_index.get_parent(n); + id = decoder.get_parent_identifier(id); + } + } + + } } TEST_CASE("Simple chain zipcode", "[zipcode]") { //Snarl 1-3, snarl 3-6 @@ -1312,7 +1329,7 @@ using namespace std; } } - TEST_CASE("Top-level snarl zipcode", "[zipcode]") { + TEST_CASE("Top-level snarl zipcode", "[zipcode][test]") { VG graph; @@ -1564,6 +1581,40 @@ using namespace std; REQUIRE(zipcode == decoded); }; } + SECTION("get net handle node 1") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + ZipCodeDecoder decoder(&zipcode); + net_handle_t n = distance_index.get_node_net_handle(n1->id()); + net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); + for (int i = decoder.max_depth()+1 ; i >= 0 ; --i) { + assert(distance_index.start_end_traversal_of(n) == + distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n1->id(), i , &distance_index))); + if (i != 0) { + assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); + n = distance_index.get_parent(n); + id = decoder.get_parent_identifier(id); + } + } + + } + SECTION("get net handle") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + ZipCodeDecoder decoder(&zipcode); + net_handle_t n = distance_index.get_node_net_handle(n4->id()); + net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); + for (int i = decoder.max_depth() +1 ; i >= 0 ; --i) { + assert(distance_index.start_end_traversal_of(n) == + distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n4->id(), i , &distance_index))); + if (i != 0) { + assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); + n = distance_index.get_parent(n); + id = decoder.get_parent_identifier(id); + } + } + + } } TEST_CASE("Top-level chain zipcode", "[zipcode]") { diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 01a0f9f7079..a284425372c 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2031,7 +2031,6 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { } string result = ""; for (size_t d = 0 ; d <= std::min(max_depth(), depth) ; d++) { - cerr << " at depth " << d << " with max depth " << max_depth() << " and dep th " << depth << endl; result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure From 7e74771b853752d5203d70fe394aef6b9338e2f3 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 13:40:25 +0200 Subject: [PATCH 064/124] Get chain sorting values for snarls earlier and sort properly for two seeds --- src/snarl_seed_clusterer.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 80260d23fd3..03d2c491935 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -618,7 +618,10 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster parent_problem.children.back().net_handle = snarl_problem->containing_net_handle; parent_problem.children.back().identifier = snarl_id; parent_problem.children.back().is_seed = false; - parent_problem.children.back().has_chain_values = false; + parent_problem.children.back().chain_component = snarl_problem->chain_component_start; + parent_problem.children.back().prefix_sum = snarl_problem->prefix_sum_value; + + parent_problem.children.back().has_chain_values = true; if (new_parent) { //And the parent chain to the things to be clustered next clustering_problem.parent_chains->emplace_back(parent_id); @@ -1806,10 +1809,12 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet + //TODO: I think this should never happen child1.chain_component = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).chain_component_start; child1.prefix_sum = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).prefix_sum_value; + child1.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet @@ -1817,10 +1822,11 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).chain_component_start; child2.prefix_sum = clustering_problem.all_node_problems.at( clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).prefix_sum_value; + child2.has_chain_values = true; } if (child1.chain_component != child2.chain_component) { return child1.chain_component < child2.chain_component; - } else if (child1.prefix_sum == child2.prefix_sum) { + } else if (child1.prefix_sum == child2.prefix_sum && !(child1.is_seed && child2.is_seed)) { //Get the prefix sum values not including the offset in the positions size_t prefix_sum1 = child1.is_seed ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).payload.prefix_sum From a4eadd3723658e3737f074f76b2ea5121e59356a Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 15:13:04 +0200 Subject: [PATCH 065/124] Get parent from children --- src/snarl_seed_clusterer.cpp | 39 ++++++++++++++++++++++-------------- src/zip_code.cpp | 9 ++++++++- src/zip_code.hpp | 2 +- 3 files changed, 33 insertions(+), 17 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 03d2c491935..1c2cdba78af 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -620,6 +620,9 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster parent_problem.children.back().is_seed = false; parent_problem.children.back().chain_component = snarl_problem->chain_component_start; parent_problem.children.back().prefix_sum = snarl_problem->prefix_sum_value; + if (snarl_problem->has_parent_handle && ! parent_problem.has_net_handle) { + parent_problem.containing_net_handle = snarl_problem->parent_net_handle; + } parent_problem.children.back().has_chain_values = true; if (new_parent) { @@ -651,7 +654,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); #ifdef DEBUG_CLUSTER - net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); + net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index, &chain_problem->containing_net_handle); cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { if (x.has_net_handle) { @@ -663,19 +666,6 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif - net_handle_t parent = chain_problem->has_parent_handle - ? chain_problem->parent_net_handle - : (chain_problem->zipcode_depth == 0 - ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); - net_identifier_t parent_id = chain_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(chain_id); -#ifdef DEBUG_CLUSTER - cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << " with id " << parent_id << endl; - if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { - cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - //assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); - } -#endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); @@ -692,6 +682,19 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); + net_handle_t parent = chain_problem->has_parent_handle + ? chain_problem->parent_net_handle + : (chain_problem->zipcode_depth == 0 + ? distance_index.get_root() + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index, &chain_problem->containing_net_handle))); + net_identifier_t parent_id = chain_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(chain_id); +#ifdef DEBUG_CLUSTER + cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << " with id " << parent_id << endl; + if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { + cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; + //assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); + } +#endif //Add the chain to its parent if (is_root) { @@ -842,6 +845,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster parent_problem.children.back().identifier = chain_id; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = false; + if (chain_problem->has_parent_handle && ! parent_problem.has_net_handle) { + parent_problem.containing_net_handle = chain_problem->parent_net_handle; + } if (new_parent) { @@ -941,7 +947,10 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure //I'm pretty sure this will only not have been set for a root snarl, in which case its fastest to get it from the zipcode //instead of distance_index.get_parent if (!parent_problem->has_net_handle) { - parent_problem->set_net_handle(distance_index); + parent_problem->containing_net_handle = parent_problem->seed->seed->zipcode_decoder->get_net_handle_slow( + id(parent_problem->seed->seed->pos), parent_problem->zipcode_depth, + &distance_index, &child_problem1->containing_net_handle); + parent_problem->has_net_handle = true; } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a284425372c..65d682fa05b 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -693,7 +693,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } } -net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -707,6 +707,9 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, } else if (decoder[depth].first) { //If this is a chain/node + if (child != nullptr) { + return distance_index->get_parent(*child); + } net_handle_t n = distance_index->get_node_net_handle(id); size_t max = max_depth(); @@ -734,6 +737,10 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, if (zip_value == 1) { //If this is a regular snarl + if (child != nullptr) { + return distance_index->get_parent(*child); + } + net_handle_t n = distance_index->get_node_net_handle(id); for (size_t d = max_depth() ; d > depth ; d--) { n = distance_index->get_parent(n); diff --git a/src/zip_code.hpp b/src/zip_code.hpp index a1d9786bac8..f81d6fcf831 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -334,7 +334,7 @@ class ZipCodeDecoder { ///Get the handle of the thing at the given depth. This can be used for anything but is slow, /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child = nullptr) const; ///Get the information that was stored to get the address in the distance index ///This is the connected component number for a root structure, or the address of From bc642aa5d207fb00e39d60af60d74adbc8a807c1 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 15:24:13 +0200 Subject: [PATCH 066/124] Take out distance index when not used --- src/snarl_seed_clusterer.cpp | 4 ++-- src/zip_code.cpp | 2 +- src/zip_code.hpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 1c2cdba78af..bd0fdcca6b9 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos)); } } vector*> all_seed_caches = {&seed_caches}; @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos)); } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 65d682fa05b..ddc7fa1efa6 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1881,7 +1881,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { +MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { MIPayload payload; if (decoder_length() == 1) { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index f81d6fcf831..333f47c2fab 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -365,7 +365,7 @@ class ZipCodeDecoder { //TODO: I want to make a struct for holding all values of a code as real values ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + MIPayload get_payload_from_zipcode(nid_t id) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id From 1a32937cb55ddcf2b4746b4675b11eae0a1e087c Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 16:39:19 +0200 Subject: [PATCH 067/124] Take the distance index out of more functions that don't use it --- src/snarl_seed_clusterer.cpp | 15 +++++++++------ src/snarl_seed_clusterer.hpp | 2 +- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index bd0fdcca6b9..ed963195ca3 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -426,7 +426,7 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, + clustering_problem.seed_count_prefix_sum.back(), &seed, seed.seed->zipcode_decoder->max_depth() - 1); } @@ -538,7 +538,7 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, + clustering_problem.seed_count_prefix_sum.back(), seed, 0); if (node_problem.has_parent_handle) { clustering_problem.all_node_problems.back().containing_net_handle = node_problem.parent_net_handle; @@ -597,7 +597,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, + clustering_problem.seed_count_prefix_sum.back(), snarl_problem->seed, snarl_problem->zipcode_depth-1); if (snarl_problem->has_parent_handle) { clustering_problem.all_node_problems.back().containing_net_handle = snarl_problem->parent_net_handle; @@ -686,8 +686,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index, &chain_problem->containing_net_handle))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow( + id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, + &distance_index, &chain_problem->containing_net_handle))); net_identifier_t parent_id = chain_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(chain_id); + #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << " with id " << parent_id << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -705,7 +708,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, + clustering_problem.seed_count_prefix_sum.back(), chain_problem->seed, chain_problem->zipcode_depth-1); if (chain_problem->has_parent_handle) { clustering_problem.all_node_problems.back().containing_net_handle = chain_problem->parent_net_handle; @@ -824,7 +827,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(parent_id, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, + clustering_problem.seed_count_prefix_sum.back(), chain_problem->seed, chain_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 40d9484d2bd..7b67f16a89a 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -293,7 +293,7 @@ class SnarlDistanceIndexClusterer { //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, + SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, const SeedCache* seed, size_t zipcode_depth) : containing_net_id(std::move(id)), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), From 2a77f667ad2e4c1e22951f7588cf96669c8b3e09 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 17:39:28 +0200 Subject: [PATCH 068/124] Use distance index for chain values less --- src/snarl_seed_clusterer.hpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 7b67f16a89a..7e1d80c5460 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -321,7 +321,15 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); - node_length = distance_index.chain_minimum_length(containing_net_handle); + if (zipcode_depth == 0 || is_looping_chain || seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true) != 0) { + node_length = distance_index.chain_minimum_length(containing_net_handle); + } else { + node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); + } + if (distance_index.chain_minimum_length(containing_net_handle) != node_length) { + cerr << "Got wrong length for chain " << distance_index.net_handle_as_string(seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index)) << " at depth " << zipcode_depth << endl; + cerr << "distances: " << distance_index.chain_minimum_length(containing_net_handle) << " and " << node_length << endl; + } chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } From d2fc0a3b2117ad4c545d63cda90355dc87e09890 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 29 Jul 2024 17:44:47 +0200 Subject: [PATCH 069/124] turn off debug --- src/snarl_seed_clusterer.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 7e1d80c5460..e52fc48b7f1 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -326,10 +326,6 @@ class SnarlDistanceIndexClusterer { } else { node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); } - if (distance_index.chain_minimum_length(containing_net_handle) != node_length) { - cerr << "Got wrong length for chain " << distance_index.net_handle_as_string(seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index)) << " at depth " << zipcode_depth << endl; - cerr << "distances: " << distance_index.chain_minimum_length(containing_net_handle) << " and " << node_length << endl; - } chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } From 82f714efc32c14acbbede57a494a3c9971d7eba6 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 30 Jul 2024 14:57:17 +0200 Subject: [PATCH 070/124] Get identifier at the same time as payload --- src/snarl_seed_clusterer.cpp | 2 +- src/zip_code.cpp | 292 ++++++++++++++++++++++------------- src/zip_code.hpp | 1 + 3 files changed, 186 insertions(+), 109 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index ed963195ca3..2f080a70f38 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -654,7 +654,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); #ifdef DEBUG_CLUSTER - net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index, &chain_problem->containing_net_handle); + net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { if (x.has_net_handle) { diff --git a/src/zip_code.cpp b/src/zip_code.cpp index ddc7fa1efa6..a8ff1570f55 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -850,6 +850,16 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna } } +bool ZipCodeDecoder::is_externally_connected (const size_t& depth) const { + assert(depth == 0); + assert(decoder[0].first); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } + return zip_value != 0; +} bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); @@ -1883,12 +1893,15 @@ void ZipCodeCollection::deserialize(std::istream& in) { } MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { MIPayload payload; + //TODO: This is basically copying what get_identifier does but it's faster to have it here instead of running through the zipcode a second time if (decoder_length() == 1) { //If the root-level structure is a node payload.parent_is_root = true; payload.parent_is_chain = true; + payload.identifier = "1"; + //Walk through the zipcode to get values size_t zip_value; size_t zip_index = decoder[0].second; @@ -1896,6 +1909,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier+= std::to_string(zip_value); //Root node length std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -1904,120 +1918,179 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { payload.is_trivial_chain = true; payload.is_reversed = false; payload.parent_type = ZipCode::ROOT_NODE; - payload.identifier = get_identifier(max_depth()); - - } else if (decoder[max_depth() - 1].first) { - //If the parent is a chain - payload.parent_is_chain = true; - payload.parent_is_root = false; - - //Walk through the zipcode to get values - size_t zip_value; - size_t zip_index = decoder[max_depth()-1].second; - //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - if (decoder_length() == 2) { - //If the node is a child of the root chain - payload.parent_type = ZipCode::ROOT_CHAIN; - payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - } else { - payload.parent_type = ZipCode::CHAIN; - } - - //chain component count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - //Node prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //Node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //is_reversed - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //TODO: For top-level chains we got this from the distance index - payload.is_reversed = zip_value; - - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.chain_component = zip_value; - - payload.identifier = get_identifier(max_depth()); } else { - //If the node is a child of a snarl - - payload.parent_is_chain = false; - payload.parent_is_root = decoder_length() == 2; - payload.is_trivial_chain = true; - - - size_t zip_value; - size_t zip_index; - if (payload.parent_is_root) { - //is_chain - zip_index = decoder[0].second; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.parent_type = ZipCode::ROOT_SNARL; - payload.identifier = get_identifier(max_depth()); - } else { - zip_index = decoder[max_depth()-1].second; - //is_regular - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //If this is a non-root snarl, get as much as we can from it - payload.parent_type = ZipCode::EMPTY; - if (zip_value == 0) { - payload.parent_type = ZipCode::IRREGULAR_SNARL; - } else if (zip_value == 1) { - payload.parent_type = ZipCode::REGULAR_SNARL; + //If the node is nested + payload.identifier = ""; + for (size_t d = 0 ; d <= max_depth()-1 ; d++) { + payload.identifier += (decoder[d].first ? "1" : "0"); + bool at_parent = d == max_depth() - 1; + if (d == 0 && !at_parent) { + //Root structure that isn't the parent of the node + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if (i == ZipCode::ROOT_IDENTIFIER_OFFSET) { + payload.identifier += std::to_string(zip_value); + } + } } else { - payload.parent_type = ZipCode::CYCLIC_SNARL; - } + size_t zip_value; + size_t zip_index = decoder[d].second; - //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if (decoder[d].first) { + //is_chain so could be a chain or a node, but I'm not going to let it get to the node child of a chain + //in the loop- if that happens, then it will be handled if at_parent is true + if (at_parent) { + payload.parent_is_chain = true; + payload.is_trivial_chain = false; + if (decoder_length() == 2) { + //If the node is a child of the root chain + payload.parent_is_root = true; + payload.parent_type = ZipCode::ROOT_CHAIN; + //is chain for root + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + //Root identifier + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier += std::to_string(zip_value); + } else { + payload.parent_is_root = false; + payload.parent_type = ZipCode::CHAIN; + //rank in snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + //Remember the rank for the identifier + payload.identifier += std::to_string(zip_value); + } + + + //Now get the node info + payload.identifier += ".1"; + zip_index = decoder[d+1].second; + + //Node prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + payload.identifier += std::to_string(zip_value); + + //Node length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = 0 ? zip_value == std::numeric_limits::max() : zip_value-1; + + //is_reversed + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //TODO: For top-level chains we got this from the distance index + payload.is_reversed = zip_value; + + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; + payload.identifier += "\\"; + payload.identifier += std::to_string(zip_value); + } else { + //Otherwise, this is just a chain + for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if ( i == ZipCode::CHAIN_RANK_IN_SNARL_OFFSET) { + payload.identifier += std::to_string(zip_value); + } + } + } + } else { + //Definitely a snarl + if (at_parent) { + payload.parent_is_chain = false; + payload.parent_is_root = decoder_length() == 2; + payload.is_trivial_chain = true; + + if (payload.parent_is_root) { + assert(d == 0); + //is_chain + zip_index = decoder[0].second; + + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Identifier for root snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier += std::to_string(zip_value); + + payload.parent_type = ZipCode::ROOT_SNARL; + } else { + zip_index = decoder[max_depth()-1].second; + //is_regular + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //If this is a non-root snarl, get as much as we can from it + if (zip_value == 0) { + payload.parent_type = ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + payload.parent_type = ZipCode::REGULAR_SNARL; + } else { + payload.parent_type = ZipCode::CYCLIC_SNARL; + } + + //Snarl prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier += std::to_string(zip_value); + + payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + + //Snarl length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Snarl child_count + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Chain component of the snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier += "\\"; + payload.identifier += std::to_string(zip_value); + //TODO: SHould use this somehow + payload.chain_component = 0; + //is_reversed for regular snarl and record offset for irregular/cyclic snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + if (payload.parent_type == ZipCode::REGULAR_SNARL) { + //Snarl is reversed + payload.is_reversed = zip_value; + payload.parent_is_chain=true; + } else { + payload.is_reversed = false; + } - payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + } - //Snarl length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Snarl child_count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //TODO: SHould use this somehow - payload.chain_component = 0; - //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //We should be at the node/trivial chain now + zip_index = decoder[max_depth()].second; + //Chain rank in snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.identifier += ".1"; + payload.identifier += std::to_string(zip_value); + if (!payload.parent_is_root) { + payload.identifier += ".n"; + } + //Chain length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - if (payload.parent_type == ZipCode::REGULAR_SNARL) { - //Snarl is reversed - payload.is_reversed = zip_value; - payload.parent_is_chain=true; - } else { - payload.is_reversed = false; + //This will be the node of the trivial chain + //Get the rest as default values + } else { + for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + if (i == ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET) { + payload.identifier += std::to_string(zip_value); + } else if (i == ZipCode::SNARL_CHAIN_COMPONENT_OFFSET) { + payload.identifier += "\\"; + payload.identifier += std::to_string(zip_value); + } + } + } + } + } + if (d < (max_depth() - 1)) { + payload.identifier += "."; } - payload.identifier = get_identifier(max_depth()+1); - } - //We should be at the node/trivial chain now - zip_index = decoder[max_depth()].second; - //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Chain length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - - //This will be the node of the trivial chain - //Get the rest as default values - } + payload.parent_depth = 0; for (size_t d = 0 ; d <= max_depth() ; d++) { auto type = get_code_type(d); @@ -2026,8 +2099,6 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { } } - - return payload; } @@ -2045,7 +2116,9 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); + if (i == ZipCode::ROOT_IDENTIFIER_OFFSET) { + result += std::to_string(zip_value); + } } } else if (decoder[d].first) { //is_chain so could be a chain or a node @@ -2068,7 +2141,9 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); + if ( i == ZipCode::CHAIN_RANK_IN_SNARL_OFFSET) { + result += std::to_string(zip_value); + } } } } else { @@ -2080,6 +2155,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { if (i == ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET) { result += std::to_string(zip_value); } else if (i == ZipCode::SNARL_CHAIN_COMPONENT_OFFSET) { + result += "\\"; result += std::to_string(zip_value); } } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 333f47c2fab..f6f6eb28305 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -346,6 +346,7 @@ class ZipCodeDecoder { /// The minimum distance from start or end of the snarl to the left or right side of the child size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; + bool is_externally_connected(const size_t& depth) const; bool is_externally_start_end_connected(const size_t& depth) const; bool is_externally_start_start_connected(const size_t& depth) const; bool is_externally_end_end_connected(const size_t& depth) const; From aa87f36a8aab8ee924ec264510865ae1f5eda573 Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Wed, 31 Jul 2024 00:23:30 -0700 Subject: [PATCH 071/124] Undo using identifier strings as keys and fix some bugs --- src/snarl_seed_clusterer.cpp | 471 ++++++++++++-------------- src/snarl_seed_clusterer.hpp | 57 ++-- src/unittest/snarl_seed_clusterer.cpp | 84 +---- src/unittest/zip_code.cpp | 53 +-- src/zip_code.cpp | 365 ++++++++------------ src/zip_code.hpp | 14 +- 6 files changed, 415 insertions(+), 629 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 2f080a70f38..6dbb291b647 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos)); + seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos)); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -155,7 +155,7 @@ cerr << "\tread distance limit: " << read_distance_limit << " and fragment dista //Initially populated by get_nodes(), which adds chains whose nodes contain seeds //Chains are added when the child snarls are found //A ClusteringProblem will have pointers to the current and next level of the snarl tree - vector> chains_by_level; + vector> chains_by_level; chains_by_level.reserve(distance_index.get_max_tree_depth()+1); @@ -314,7 +314,7 @@ for (size_t i = 1 ; i < clustering_problem.all_seeds->size() ; i++) { //chain to chains_by_level //If a node is a child of the root or of a root snarl, then add cluster it and //remember to cluster the root snarl -void SnarlDistanceIndexClusterer::get_nodes( ClusteringProblem& clustering_problem, vector>& chains_by_level) const { +void SnarlDistanceIndexClusterer::get_nodes( ClusteringProblem& clustering_problem, vector>& chains_by_level) const { #ifdef DEBUG_CLUSTER cerr << "Add all seeds to nodes: " << endl; #endif @@ -331,7 +331,7 @@ cerr << "Add all seeds to nodes: " << endl; for (size_t i = 0; i < seeds->size(); i++) { SeedCache& seed = seeds->at(i); pos_t pos = seed.seed->pos; - id_t node_id = get_id(pos); + id_t id = get_id(pos); #ifdef DEBUG_CLUSTER @@ -355,16 +355,20 @@ cerr << "Add all seeds to nodes: " << endl; const MIPayload& payload = seed.payload; #ifdef DEBUG_CLUSTER -cerr << "Node has identifier " << seed.payload.identifier << endl; - //cerr << "Using cached values for node " << node_id << ": " + //cerr << "Using cached values for node " << id << ": " + // << ", " << seed.payload.record_offset + // << ", " << seed.payload.parent_record_offset // << ", " << seed.payload.node_length // << ", " << seed.payload.prefix_sum // << ", " << seed.payload.chain_component << endl; - net_handle_t handle = distance_index.get_node_net_handle(node_id); + net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; + //assert(seed.payload.parent_record_offset == + // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) + // :distance_index.get_record_offset(parent_handle))); cerr << "Node length " << seed.payload.node_length << " should be " << distance_index.minimum_length(handle) << endl; assert(seed.payload.node_length == distance_index.minimum_length(handle)); //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) @@ -380,6 +384,11 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; cerr << "Chain compoentn: " << chain_component << " was " << seed.payload.chain_component << endl; assert(seed.payload.chain_component == chain_component); + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; + cerr <<" Is actually " << distance_index.net_handle_as_string( distance_index.start_end_traversal_of(seed.payload.parent_handle)) << endl; + assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); + } #endif if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) @@ -388,20 +397,20 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; //Also update the zipcode on the seed #ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(handle) << endl; + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() - // : distance_index.get_prefix_sum_value(handle))); - cerr << "Node length should be " << distance_index.minimum_length(handle) << " actually " << seed.payload.node_length << endl; - assert(seed.payload.node_length == distance_index.minimum_length(handle)); - cerr << "Reversed in parent? " << distance_index.net_handle_as_string(handle) << " " << distance_index.net_handle_as_string(parent_handle) << " " << seed.payload.is_reversed << endl; + // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); + cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; + assert(seed.payload.node_length == distance_index.minimum_length(seed.payload.node_handle)); + cerr << "Reversed in parent? " << distance_index.net_handle_as_string(seed.payload.node_handle) << " " << distance_index.net_handle_as_string(seed.payload.parent_handle) << " " << seed.payload.is_reversed << endl; cerr << "is trivial? " << seed.payload.is_trivial_chain << endl; - if (!distance_index.is_root(parent_handle)) { - cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(parent_handle)) << endl; + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(seed.payload.parent_handle)) << endl; } - cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(parent_handle) << endl; + cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; - //assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) - // : distance_index.is_reversed_in_parent(handle))); + assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) + : distance_index.is_reversed_in_parent(seed.payload.node_handle))); #endif //Add the parent chain or trivial chain @@ -409,31 +418,28 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; new_parent = false; - net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(seed.payload.identifier); - if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.parent_handle) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; if (seed.payload.is_trivial_chain ) { - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, &seed, seed.seed->zipcode_decoder->max_depth() - 1); } new_parent = true; } #ifdef DEBUG_CLUSTER - assert(seed.payload.parent_depth == distance_index.get_depth(parent_handle)); + assert(seed.payload.parent_depth == distance_index.get_depth(seed.payload.parent_handle)); #endif @@ -453,10 +459,9 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; : seed.payload.node_length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.parent_handle)); parent_problem.children.emplace_back(); - parent_problem.children.back().identifier = seed.payload.identifier; - parent_problem.children.back().has_net_handle = false; + parent_problem.children.back().net_handle = seed.payload.node_handle; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; parent_problem.children.back().has_chain_values = true; @@ -467,7 +472,47 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; //And the parent to chains_by_level if (new_parent) { - chains_by_level[seed.payload.parent_depth].emplace_back(parent_id); + chains_by_level[seed.payload.parent_depth].emplace_back(seed.payload.parent_handle); + } + + + //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too + if ( new_parent) { + if (seed.payload.is_trivial_chain && !seed.payload.parent_is_root) { + bool grandparent_is_simple_snarl = seed.payload.parent_is_chain; + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = grandparent_is_simple_snarl + ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(seed.payload.node_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE, + 1) + : distance_index.get_net_handle_from_values(seed.payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; +#endif + + if (grandparent_is_simple_snarl) { + //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too + parent_problem.has_grandparent_handle = true; + parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( + seed.payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; +#endif + } + } else if (seed.payload.parent_is_root && seed.payload.parent_is_chain && !seed.payload.is_trivial_chain) { + //The parent chain is a child of the root + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( + 0, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; +#endif + } } @@ -475,28 +520,32 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node + + //Create a new SnarlTreeNodeProblem for this node bool new_node = false; - if (clustering_problem.net_identifier_to_node_problem_index.count(seed.payload.identifier) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.node_handle) == 0) { new_node = true; - clustering_problem.net_identifier_to_node_problem_index.emplace(seed.payload.identifier, + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.identifier, - clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode_decoder->max_depth()); + + //Remember the parent of this node, since it will be needed to remember the root snarl later + clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; + } seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); - SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(seed.payload.identifier)); + SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.node_handle)); node_problem.children.emplace_back(); - node_problem.children.back().identifier = seed.payload.identifier; - node_problem.children.back().has_net_handle = false; + node_problem.children.back().net_handle = seed.payload.node_handle; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; node_problem.children.back().has_chain_values = true; @@ -520,33 +569,27 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; //Go through and cluster nodes that are children of the root or root snarls for(const SeedCache* seed : nodes_to_cluster_now) { + const net_handle_t& node_net_handle = seed->payload.node_handle; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(seed->payload.identifier)); + clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); //Cluster the node. Give it the range in node_to_seeds, which is from seed_range_start //to either current_iterator (if current_iterator is a different node), or the end of node_to_seeds //if current_iterator is the last thing in the list and the same node - cluster_one_node(clustering_problem, &node_problem); - - net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(seed->payload.identifier); + cluster_one_node(clustering_problem, &node_problem); + net_handle_t parent = node_problem.parent_net_handle; if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root - if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, + if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { + clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, seed, 0); - if (node_problem.has_parent_handle) { - clustering_problem.all_node_problems.back().containing_net_handle = node_problem.parent_net_handle; - clustering_problem.all_node_problems.back().has_net_handle = true; - - } } - clustering_problem.root_children.emplace_back(parent_id, seed->payload.identifier); + clustering_problem.root_children.emplace_back(parent, node_net_handle); } else { //Otherwise, just compare the single child's external connectivity compare_and_combine_cluster_on_one_child(clustering_problem, &node_problem); @@ -565,12 +608,15 @@ cerr << "Node has identifier " << seed.payload.identifier << endl; //Assumes that all the children of the snarls have been clustered already and are present in clustering_problem.snarls_to_children void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& clustering_problem) const { - for (const net_identifier_t& snarl_id : clustering_problem.parent_snarls) { + for (const net_handle_t& snarl_handle : clustering_problem.parent_snarls) { //Go through each of the snarls at this level, cluster them, //and find which chains they belong to, if any SnarlTreeNodeProblem* snarl_problem = &clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(snarl_id)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); +#ifdef DEBUG_CLUSTER + cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; +#endif //Cluster the snarlindex]; cluster_one_snarl(clustering_problem, snarl_problem); @@ -587,54 +633,53 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster if (reachable_left || reachable_right) { + //Make a new SnarlTreeNodeProblem for the parent - net_identifier_t parent_id = ZipCodeDecoder::get_parent_identifier(snarl_id); + net_handle_t snarl_parent = snarl_problem->has_parent_handle + ? snarl_problem->parent_net_handle + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; - if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { - //Make a new SnarlTreeNodeProblem for the parent + if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, + clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), + clustering_problem.all_node_problems.emplace_back(snarl_parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, snarl_problem->seed, snarl_problem->zipcode_depth-1); - if (snarl_problem->has_parent_handle) { - clustering_problem.all_node_problems.back().containing_net_handle = snarl_problem->parent_net_handle; - clustering_problem.all_node_problems.back().has_net_handle = true; - } //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(snarl_id)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); + if (snarl_problem.has_grandparent_handle) { + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = snarl_problem.grandparent_net_handle; + } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); //Add the snarl to its parent chain parent_problem.children.emplace_back(); - parent_problem.children.back().identifier = snarl_problem->containing_net_id; - parent_problem.children.back().has_net_handle = true; - parent_problem.children.back().net_handle = snarl_problem->containing_net_handle; - parent_problem.children.back().identifier = snarl_id; + parent_problem.children.back().net_handle = snarl_handle; parent_problem.children.back().is_seed = false; + parent_problem.children.back().has_chain_values = true; parent_problem.children.back().chain_component = snarl_problem->chain_component_start; parent_problem.children.back().prefix_sum = snarl_problem->prefix_sum_value; - if (snarl_problem->has_parent_handle && ! parent_problem.has_net_handle) { - parent_problem.containing_net_handle = snarl_problem->parent_net_handle; - } - parent_problem.children.back().has_chain_values = true; if (new_parent) { //And the parent chain to the things to be clustered next - clustering_problem.parent_chains->emplace_back(parent_id); + clustering_problem.parent_chains->emplace_back(snarl_parent); } + } + #ifdef DEBUG_CLUSTER - cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << " as a child of " - << distance_index.net_handle_as_string(distance_index.get_parent(snarl_problem->containing_net_handle)) << endl; + cerr << "\tRecording snarl " << distance_index.net_handle_as_string(snarl_handle) << " as a child of " + << distance_index.net_handle_as_string(distance_index.get_parent(snarl_handle)) << endl; #endif - } + } clustering_problem.parent_snarls.clear(); } @@ -648,24 +693,32 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster } - for (const net_identifier_t& chain_id : *(clustering_problem.current_chains)) { + for (const net_handle_t& chain_handle : *(clustering_problem.current_chains)) { SnarlTreeNodeProblem* chain_problem = &clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(chain_id)); + clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); + #ifdef DEBUG_CLUSTER - net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { - if (x.has_net_handle) { - cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; - } else { - cerr << "\t(didn't store the net handle)" << endl; - } + cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; } #endif + net_handle_t parent = chain_problem->has_parent_handle + ? chain_problem->parent_net_handle + : (chain_problem->zipcode_depth == 0 + ? distance_index.get_root() + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); +#ifdef DEBUG_CLUSTER + cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; + if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { + cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; + assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); + } +#endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); @@ -682,40 +735,19 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); - net_handle_t parent = chain_problem->has_parent_handle - ? chain_problem->parent_net_handle - : (chain_problem->zipcode_depth == 0 - ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow( - id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, - &distance_index, &chain_problem->containing_net_handle))); - net_identifier_t parent_id = chain_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(chain_id); - -#ifdef DEBUG_CLUSTER - cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << " with id " << parent_id << endl; - if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { - cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - //assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); - } -#endif //Add the chain to its parent if (is_root) { //If the parent is the root, remember to cluster it if (is_root_snarl) { //If the parent is a root snarl, then remember it to cluster in the root - if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), + if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { + clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, chain_problem->seed, chain_problem->zipcode_depth-1); - if (chain_problem->has_parent_handle) { - clustering_problem.all_node_problems.back().containing_net_handle = chain_problem->parent_net_handle; - clustering_problem.all_node_problems.back().has_net_handle = true; - } } - clustering_problem.root_children.emplace_back(parent_id, chain_id); + clustering_problem.root_children.emplace_back(parent, chain_handle); } else if (!is_top_level_chain) { //Otherwise, cluster it with itself using external connectivity only //is_top_level_chain also includes external connectivity, so if it's true we don't need to check this @@ -728,13 +760,11 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() ? false : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); - chain_problem->distance_start_left = snarl_child_is_rev ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); @@ -756,7 +786,6 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; - cerr << "Is reversed? " << snarl_child_is_rev << endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " @@ -822,39 +851,32 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif //And add it to its parent snarl bool new_parent = false; - if (clustering_problem.net_identifier_to_node_problem_index.count(parent_id) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { new_parent = true; - clustering_problem.net_identifier_to_node_problem_index.emplace(parent_id, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_id, - clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - chain_problem->seed, chain_problem->zipcode_depth-1); - + clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved - chain_problem = &(clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(chain_id))); - - if (chain_problem->has_parent_handle) { - clustering_problem.all_node_problems.back().containing_net_handle = chain_problem->parent_net_handle; - clustering_problem.all_node_problems.back().has_net_handle = true; + SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); + if (chain_problem.has_grandparent_handle) { + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(parent)); + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = chain_problem.grandparent_net_handle; } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(parent_id)); + clustering_problem.net_handle_to_node_problem_index.at(parent)); parent_problem.children.emplace_back(); - parent_problem.children.back().identifier = chain_problem->containing_net_id; - parent_problem.children.back().has_net_handle = true; - parent_problem.children.back().net_handle = chain_problem->containing_net_handle; - parent_problem.children.back().identifier = chain_id; + parent_problem.children.back().net_handle = chain_handle; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = false; - if (chain_problem->has_parent_handle && ! parent_problem.has_net_handle) { - parent_problem.containing_net_handle = chain_problem->parent_net_handle; - } if (new_parent) { - clustering_problem.parent_snarls.emplace_back(parent_id); + clustering_problem.parent_snarls.emplace_back(parent); } } @@ -865,8 +887,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster void SnarlDistanceIndexClusterer::cluster_one_node( ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* node_problem) const { #ifdef DEBUG_CLUSTER - net_handle_t node_handle = node_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(node_problem->seed->seed->pos), node_problem->zipcode_depth, &distance_index); - cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_handle) << endl; + cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; #endif size_t node_length = node_problem->node_length; @@ -884,7 +905,7 @@ void SnarlDistanceIndexClusterer::cluster_one_node( #ifdef DEBUG_CLUSTER - cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_handle) << endl; + cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; bool got_left = false; bool got_right = false; @@ -940,37 +961,16 @@ void SnarlDistanceIndexClusterer::cluster_one_node( void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structures(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem1, SnarlTreeNodeProblem* child_problem2, SnarlTreeNodeProblem* parent_problem, const vector> & child_distances, bool is_root, bool first_child) const { - - if (!child_problem1->has_net_handle) { - child_problem1->set_net_handle(distance_index); - } - if (!child_problem2->has_net_handle) { - child_problem2->set_net_handle(distance_index); - } - //I'm pretty sure this will only not have been set for a root snarl, in which case its fastest to get it from the zipcode - //instead of distance_index.get_parent - if (!parent_problem->has_net_handle) { - parent_problem->containing_net_handle = parent_problem->seed->seed->zipcode_decoder->get_net_handle_slow( - id(parent_problem->seed->seed->pos), parent_problem->zipcode_depth, - &distance_index, &child_problem1->containing_net_handle); - parent_problem->has_net_handle = true; - - } - - - net_handle_t& child_handle1 =child_problem1->containing_net_handle; - net_handle_t& child_handle2 =child_problem2->containing_net_handle; - net_handle_t& parent_handle =parent_problem->containing_net_handle; - #ifdef DEBUG_CLUSTER cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem1->containing_net_handle) << " and " << distance_index.net_handle_as_string(child_problem2->containing_net_handle) << " which are children of " << distance_index.net_handle_as_string(parent_problem->containing_net_handle) << endl; - cerr << "parent should be " << distance_index.net_handle_as_string(distance_index.get_parent(child_problem1->containing_net_handle )) << endl; - assert(distance_index.start_end_traversal_of(distance_index.get_parent(child_problem1->containing_net_handle )) == distance_index.start_end_traversal_of(parent_problem->containing_net_handle)); - assert(distance_index.start_end_traversal_of(distance_index.get_parent(child_problem2->containing_net_handle )) == distance_index.start_end_traversal_of(parent_problem->containing_net_handle)); #endif + net_handle_t& parent_handle = parent_problem->containing_net_handle; + net_handle_t& child_handle1 = child_problem1->containing_net_handle; + net_handle_t& child_handle2 = child_problem2->containing_net_handle; + //Get the distances between the two sides of the children in the parent @@ -1434,13 +1434,12 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem) const { #ifdef DEBUG_CLUSTER - net_handle_t child_handle = child_problem->zipcode_depth == child_problem->seed->seed->zipcode_decoder->max_depth() - ? distance_index.get_node_net_handle(id(child_problem->seed->seed->pos)) - : child_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(child_problem->seed->seed->pos), child_problem->zipcode_depth, &distance_index); - cerr << "\tCompare " << distance_index.net_handle_as_string(child_handle) + cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->containing_net_handle) << " to itself in the root" << endl; #endif + net_handle_t& handle = child_problem->containing_net_handle; + //Get the distances between the two sides of the child size_t distance_left_left = @@ -1586,17 +1585,14 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* snarl_problem) const { //Get the clusters on this snarl, assumes that all of the snarls children have been clustered already. - + +#ifdef DEBUG_CLUSTER + cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; +#endif - if (!snarl_problem->has_net_handle) { - snarl_problem->set_net_handle(distance_index); - } snarl_problem->set_snarl_values(distance_index); net_handle_t& snarl_handle = snarl_problem->containing_net_handle; -#ifdef DEBUG_CLUSTER - cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_handle) << endl; -#endif //If the snarl is a simple snarl, then there is no clustering to do because there is no path between @@ -1615,7 +1611,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Go through each child node of the netgraph SnarlTreeNodeProblem& child_problem_i = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(snarl_problem->children[i].identifier)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].net_handle)); if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit @@ -1643,7 +1639,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the other node and its clusters SnarlTreeNodeProblem& child_problem_j = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(snarl_problem->children[j].identifier)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].net_handle)); if (child_problem_j.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && child_problem_j.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { @@ -1670,7 +1666,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (SnarlTreeNodeProblem::SnarlTreeChild& node_problem : snarl_problem->children) { //Go through each child node of the netgraph and add its clusters to the snarl SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(node_problem.identifier)); + clustering_problem.net_handle_to_node_problem_index.at(node_problem.net_handle)); //Add the cluster heads //May need to flip the distances @@ -1809,31 +1805,20 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin if (!child1.is_seed || !child2.is_seed) { only_seeds = false; } - //Since the parent is a chain, the fastest way to get the handle is from the distance index so check here if we can do that - if (!chain_problem->has_net_handle) { - if (child1.has_net_handle) { - chain_problem->containing_net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(child1.net_handle)); - chain_problem->has_net_handle = true; - } else if (child2.has_net_handle) { - chain_problem->containing_net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(child2.net_handle)); - chain_problem->has_net_handle = true; - } - } if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet - //TODO: I think this should never happen child1.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).chain_component_start; + clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; child1.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(child1.identifier)).prefix_sum_value; - child1.has_chain_values = true; + clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; + child2.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet child2.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).chain_component_start; + clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).chain_component_start; child2.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(child2.identifier)).prefix_sum_value; + clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).prefix_sum_value; child2.has_chain_values = true; } if (child1.chain_component != child2.chain_component) { @@ -1856,12 +1841,9 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } }); - if (!chain_problem->has_net_handle) { - //If we haven't gotten the chain handle yet, then we need to get it now - //If one of the children already had a net handle, then it would have been best to get it from the distance index - //but if it doesn't have a handle yet then just get it from the zipcode - chain_problem->set_net_handle(distance_index); - } + net_handle_t& chain_handle = chain_problem->containing_net_handle; + + if (!chain_problem->is_trivial_chain && ! is_top_level_chain) { //If we need it, get the values from the distance index: //is_looping_chain, node_length, the end boundary node, and the end component @@ -1881,7 +1863,6 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin !chain_problem->is_trivial_chain, is_top_level_chain); #ifdef DEBUG_CLUSTER - net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; cerr << "\t with best left and right values: " << chain_problem->fragment_best_left << " " << chain_problem->fragment_best_right << endl; @@ -1933,7 +1914,6 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin #ifdef DEBUG_CLUSTER - net_handle_t chain_handle = chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index); cerr << "Cluster chain " << distance_index.net_handle_as_string(chain_handle) << endl; cerr << "\t chain has " << chain_problem->children.size() << " children" << endl; #endif @@ -1969,15 +1949,15 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin size_t last_prefix_sum = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left : clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).chain_component_start; + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; size_t last_length = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.node_length : clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).node_length; + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.chain_component : clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(last_child.identifier)).chain_component_start; + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small @@ -2217,6 +2197,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const size_t& read_num = current_child.seed_indices.first; const size_t& cluster_num = current_child.seed_indices.second; + net_handle_t& chain_handle = chain_problem->containing_net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* Get a bunch of distances from the current child that will be used to calculate distance @@ -2225,14 +2206,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #ifdef DEBUG_CLUSTER cerr << "At child seed " << current_child_seed.seed->pos << endl; - cerr << "Component: " << current_child_seed.payload.chain_component << endl; #endif //The distance from the right side of the last child to the left side of this child //(relative to the orientation of the chain size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if (last_child.identifier == current_child.identifier) { + if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; } else if ( last_chain_component_end == current_child_seed.payload.chain_component) { @@ -2281,7 +2261,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #endif - if (last_child.identifier != current_child.identifier && + if (last_child.net_handle != current_child.net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, chain_problem->fragment_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -2361,7 +2341,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.identifier == current_child.identifier ? 0 + (last_child.net_handle == current_child.net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload.node_length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) @@ -2402,7 +2382,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child), current_child_seed.distance_left), 1); - if (!is_first_child && last_child.identifier == current_child.identifier) { + if (!is_first_child && last_child.net_handle == current_child.net_handle) { //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node @@ -2673,8 +2653,9 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& }; + net_handle_t& chain_handle = chain_problem->containing_net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(current_child.identifier)); + clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); //Skip this child if its seeds are all too far away bool skip_snarl = false; @@ -2726,7 +2707,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.identifier == current_child.identifier ? 0 + (last_child.net_handle == current_child.net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, child_problem.node_length)); @@ -2788,7 +2769,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e chain_problem->read_best_right = std::make_pair(std::numeric_limits::max(), std::numeric_limits::max()); - if (last_child.identifier != current_child.identifier && + if (last_child.net_handle != current_child.net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, old_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -3074,6 +3055,10 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro return; } + //Keep track of all clusters on the root + SnarlTreeNodeProblem root_problem(distance_index.get_root(), clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + &clustering_problem.all_seeds->at(0)->front(), 0); //TODO: ikd about the seed here //Remember old distances @@ -3089,30 +3074,28 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro }); //Go through the list of parent child pairs. Once we reach a new parent, cluster all children found up to this point - net_identifier_t current_parent = clustering_problem.root_children.front().first; - vector children; + net_handle_t current_parent = clustering_problem.root_children.front().first; + vector children; children.reserve(clustering_problem.root_children.size()); for (size_t root_child_i = 0 ; root_child_i < clustering_problem.root_children.size() ; root_child_i++) { - pair& parent_to_child = clustering_problem.root_children[root_child_i]; - net_identifier_t& parent = parent_to_child.first; + pair& parent_to_child = clustering_problem.root_children[root_child_i]; + net_handle_t& parent = parent_to_child.first; if (current_parent == parent || root_child_i == 0) { children.emplace_back(parent_to_child.second); } if (current_parent != parent || root_child_i == clustering_problem.root_children.size()-1) { #ifdef DEBUG_CLUSTER - cerr << "Clustering root snarl " << parent << " with " << children.size() << " chidlren" << endl; + cerr << "Clustering root snarl " << distance_index.net_handle_as_string(parent) << " with " << children.size() << " chidlren" << endl; #endif if (children.size() > 0) { - //Keep track of all clusters on the root - SnarlTreeNodeProblem* root_problem = &clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent)); for (size_t i = 0; i < children.size() ; i++) { //Go through each child node of the netgraph SnarlTreeNodeProblem* child_problem_i = &clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(children[i])); + clustering_problem.net_handle_to_node_problem_index.at(children[i])); for (const pair& head : child_problem_i->read_cluster_heads) { child_distances[head.second + clustering_problem.seed_count_prefix_sum[head.first]] = make_pair(clustering_problem.all_seeds->at(head.first)->at(head.second).distance_left, @@ -3124,12 +3107,12 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro //Get the other node and its clusters SnarlTreeNodeProblem* child_problem_j = &clustering_problem.all_node_problems.at( - clustering_problem.net_identifier_to_node_problem_index.at(children[j])); + clustering_problem.net_handle_to_node_problem_index.at(children[j])); compare_and_combine_cluster_on_child_structures(clustering_problem, child_problem_i, - child_problem_j, root_problem, child_distances, true, false); + child_problem_j, &root_problem, child_distances, true, false); } } @@ -3144,25 +3127,22 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro cerr << "\tFound clusters on the root" << endl; for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { cerr << "\t for read num " << read_num << endl; - for (std::pair& parent_child_pair : clustering_problem.root_children) { - auto& root_problem = clustering_problem.all_node_problems.at(clustering_problem.net_identifier_to_node_problem_index.at(parent_child_pair.first)); - for (pair c : root_problem.read_cluster_heads) { - if (c.first == read_num) { - cerr << "\t\t" << c.first << ":"<at(c.first)->size() ; x++) { - if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { - cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; - } + for (pair c : root_problem.read_cluster_heads) { + if (c.first == read_num) { + cerr << "\t\t" << c.first << ":"<at(c.first)->size() ; x++) { + if (clustering_problem.read_union_find[c.first].find_group(x) == c.second) { + cerr << clustering_problem.all_seeds->at(c.first)->at(x).seed->pos << " "; } - cerr << endl; } + cerr << endl; } } } - //for (pair group_id : root_problem.read_cluster_heads) { - // assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); - //} + for (pair group_id : root_problem.read_cluster_heads) { + assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); + } #endif } @@ -3176,10 +3156,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr return; } #ifdef DEBUG_CLUSTER - net_handle_t node_handle = node_problem->zipcode_depth == node_problem->seed->seed->zipcode_decoder->max_depth() - ? distance_index.get_node_net_handle(id(node_problem->seed->seed->pos)) - : node_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(node_problem->seed->seed->pos), node_problem->zipcode_depth, &distance_index); - cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_handle) << endl; + cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; cerr << "\t with node length " << structure_length << endl; #endif @@ -3253,7 +3230,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr } #ifdef DEBUG_CLUSTER - cerr << "\t" << distance_index.net_handle_as_string(node_handle) << " is shorter than the distance limit so just one cluster" << endl; + cerr << "\t" << distance_index.net_handle_as_string(node_problem->containing_net_handle) << " is shorter than the distance limit so just one cluster" << endl; #endif return; diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index e52fc48b7f1..239d1e0d182 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -213,11 +213,9 @@ class SnarlDistanceIndexClusterer { //Struct to store one child, which may be a seed, node, snarl, or chain struct SnarlTreeChild { - //This may or may not be set + //If the net_handle is a node, then the child is a seed, otherwise the handle + //is used to find the problem net_handle_t net_handle; - - //Used as an identifier - net_identifier_t identifier; pair seed_indices; //The values used to sort the children of a chain @@ -232,7 +230,6 @@ class SnarlDistanceIndexClusterer { //For a seed, it gets set when the child is made, otherwise the first time this //child is seen when sorting bool has_chain_values; - bool has_net_handle; }; //The children of this snarl tree node //Initially unsorted, sort before clustering for chains @@ -252,15 +249,16 @@ class SnarlDistanceIndexClusterer { size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - net_identifier_t containing_net_id; - net_identifier_t parent_net_id; - //The snarl tree node that the clusters are on net_handle_t containing_net_handle; - //The parent of containing_net_handle, which might or might not be set + + + + //The parent and grandparent of containing_net_handle, which might or might not be set //This is just to store information from the minimizer cache net_handle_t parent_net_handle; + net_handle_t grandparent_net_handle; //One representative seed so we can get the zipcode and stuff const SeedCache* seed; @@ -279,8 +277,8 @@ class SnarlDistanceIndexClusterer { size_t loop_right = std::numeric_limits::max(); //These are sometimes set if the value was in the cache - bool has_net_handle = false; bool has_parent_handle = false; + bool has_grandparent_handle = false; //Only set this for nodes or snarls in chains bool is_reversed_in_parent = false; @@ -293,19 +291,18 @@ class SnarlDistanceIndexClusterer { //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, const SeedCache* seed, size_t zipcode_depth) : - containing_net_id(std::move(id)), + containing_net_handle(std::move(net)), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - parent_net_id =containing_net_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(containing_net_id); } //Constructor for a node or trivial chain, used to remember information from the cache - SnarlTreeNodeProblem(net_identifier_t id, size_t read_count, size_t seed_count, bool is_reversed_in_parent, + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : - containing_net_id(std::move(id)), + containing_net_handle(net), is_reversed_in_parent(is_reversed_in_parent), node_length(node_length), prefix_sum_value(prefix_sum), @@ -314,18 +311,13 @@ class SnarlDistanceIndexClusterer { fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), seed(seed), zipcode_depth(zipcode_depth) { - read_cluster_heads.reserve(seed_count); - parent_net_id = containing_net_id == "ROOT" ? "ROOT" : ZipCodeDecoder::get_parent_identifier(containing_net_id); + read_cluster_heads.reserve(seed_count); } //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); - if (zipcode_depth == 0 || is_looping_chain || seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true) != 0) { - node_length = distance_index.chain_minimum_length(containing_net_handle); - } else { - node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); - } + node_length = distance_index.chain_minimum_length(containing_net_handle); chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } @@ -346,13 +338,8 @@ class SnarlDistanceIndexClusterer { //Distance to go backward in the chain and back loop_left = SnarlDistanceIndex::sum(distance_index.get_reverse_loop_value(start_in), 2*distance_index.minimum_length(start_in)); - } - void set_net_handle(const SnarlDistanceIndex& distance_index) { - if (!has_net_handle) { - has_net_handle = true; - containing_net_handle = seed->seed->zipcode_decoder->get_net_handle_slow(id(seed->seed->pos), zipcode_depth, &distance_index); - } + } }; @@ -417,14 +404,14 @@ class SnarlDistanceIndexClusterer { //The snarls and chains get updated as we move up the snarl tree //Maps each net_handle_t to an index to its node problem, in all_node_problems - hash_map net_identifier_to_node_problem_index; + hash_map net_handle_to_node_problem_index; //This stores all the snarl tree nodes and their clustering scratch work vector all_node_problems; //All chains for the current level of the snarl tree and gets updated as the algorithm //moves up the snarl tree. At one iteration, the algorithm will go through each chain //in chain to children and cluster the chain using clusters on the children - vector* current_chains; + vector* current_chains; //Same as current_chains but for the level of the snarl @@ -432,18 +419,18 @@ class SnarlDistanceIndexClusterer { //This gets updated as the current level is processed - the snarls from this level //are added as children to parent_chain_to_children. //After processing one level, this becomes the next chain_to_children - vector* parent_chains; + vector* parent_chains; //All snarls for the current level of the snarl tree //(chains from chain_to_children get added to their parent snarls, snarls get added to parent_snarls //then all snarls in snarl_to_children are clustered and added to parent_chain_to_children) - vector parent_snarls; + vector parent_snarls; //This holds all the child problems of the root //Each pair is the parent and the child. This will be sorted by parent before //clustering - vector> root_children; + vector> root_children; ///////////////////////////////////////////////////////// @@ -466,7 +453,7 @@ class SnarlDistanceIndexClusterer { } - net_identifier_to_node_problem_index.reserve(5*seed_count); + net_handle_to_node_problem_index.reserve(5*seed_count); all_node_problems.reserve(5*seed_count); parent_snarls.reserve(seed_count); root_children.reserve(seed_count); @@ -479,7 +466,7 @@ class SnarlDistanceIndexClusterer { //If a node is a child of the root or of a root snarl, then add cluster it and //remember to cluster the root snarl void get_nodes( ClusteringProblem& clustering_problem, - vector>& chains_by_level) const; + vector>& chains_by_level) const; //Cluster all the snarls at the current level diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index 63e02a7551a..6ef11d3426f 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -728,7 +728,7 @@ namespace unittest { SnarlDistanceIndex dist_index; fill_in_distance_index(&dist_index, &graph, &snarl_finder); SnarlDistanceIndexClusterer clusterer(dist_index, &graph); - + //graph.to_dot(cerr); SECTION( "Three clusters going across snarl" ) { @@ -798,7 +798,7 @@ namespace unittest { } } TEST_CASE( "Top-level looping chain", - "[cluster]" ) { + "[cluster][bug]" ) { VG graph; Node* n1 = graph.create_node("AGCGTGTAGAGAA"); @@ -823,6 +823,8 @@ namespace unittest { fill_in_distance_index(&dist_index, &graph, &snarl_finder); SnarlDistanceIndexClusterer clusterer(dist_index, &graph); + ofstream out ("bug_graph.vg"); + graph.serialize(out); SECTION( "Two clusters" ) { @@ -1672,7 +1674,7 @@ namespace unittest { REQUIRE( clusters.size() == 1); } } - TEST_CASE( "Loop on first node in a top-level chain","[cluster][bug]" ) { + TEST_CASE( "Loop on first node in a top-level chain","[cluster]" ) { VG graph; Node* n1 = graph.create_node("GCA"); @@ -1700,10 +1702,6 @@ namespace unittest { IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; fill_in_distance_index(&dist_index, &graph, &snarl_finder); - ofstream out ("testGraph.hg"); - graph.serialize(out); - - SnarlDistanceIndexClusterer clusterer(dist_index, &graph); @@ -3226,31 +3224,13 @@ namespace unittest { vector> pos_ts(2); - pos_ts[0].emplace_back(1, false, 57); - pos_ts[0].emplace_back(1, true, 15); - pos_ts[0].emplace_back(2, false, 25); - pos_ts[0].emplace_back(1, false, 36); - pos_ts[0].emplace_back(5, true, 16); - pos_ts[0].emplace_back(1, false, 46); - pos_ts[0].emplace_back(2, true, 21); - pos_ts[0].emplace_back(1, true, 10); - - pos_ts[1].emplace_back(2, false, 0); - pos_ts[1].emplace_back(2, true, 2); - pos_ts[1].emplace_back(6, true, 24); - pos_ts[1].emplace_back(6, true, 44); - pos_ts[1].emplace_back(1, false, 42); - pos_ts[1].emplace_back(2, false, 19); - pos_ts[1].emplace_back(2, false, 23); - pos_ts[1].emplace_back(5, true, 19); - pos_ts[1].emplace_back(4, false, 73); - pos_ts[1].emplace_back(4, true, 57); - pos_ts[1].emplace_back(3, false, 23); - pos_ts[1].emplace_back(6, true, 10); - pos_ts[1].emplace_back(5, false, 19); - - - + pos_ts[0].emplace_back(6, false, 12); + pos_ts[0].emplace_back(9, true, 0); + pos_ts[0].emplace_back(11, true, 2); + pos_ts[1].emplace_back(7, false,0); + pos_ts[1].emplace_back(11,false, 5); + pos_ts[1].emplace_back(8,false, 9); + pos_ts[1].emplace_back(9,true, 0); vector> seeds(2); for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { for (pos_t pos : pos_ts[read_num]) { @@ -3285,7 +3265,7 @@ namespace unittest { IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; - fill_in_distance_index(&dist_index, &graph, &snarl_finder); + fill_in_distance_index(&dist_index, &graph, &snarl_finder, 5); @@ -3316,9 +3296,9 @@ namespace unittest { handle_t node1 = graph.get_handle(nodeID1); offset_t offset1 = uniform_int_distribution(0,graph.get_length(node1) - 1)(generator); - bool rev = uniform_int_distribution(0,1)(generator) == 0; - pos_t pos = make_pos_t(nodeID1, rev,offset1 ); + pos_t pos = make_pos_t(nodeID1, + uniform_int_distribution(0,1)(generator) == 0,offset1 ); @@ -3374,20 +3354,12 @@ namespace unittest { if ( dist != -1 && dist <= read_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); - - cerr << "Failed with positions" << endl; - - for (size_t read = 0 ; read < 2 ; read ++) { - cerr << "read: " << read << endl; - for (auto& seed : all_seeds[i]) { - cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; - } - } cerr << "These should have been in the same read cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; REQUIRE(false); } + } } } @@ -3409,14 +3381,6 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); - cerr << "Failed with positions" << endl; - - for (size_t read = 0 ; read < 2 ; read ++) { - cerr << "read: " << read << endl; - for (auto& seed : all_seeds[i]) { - cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; - } - } cerr << "These should be different read clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; @@ -3465,14 +3429,6 @@ namespace unittest { if ( dist != -1 && dist <= fragment_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); - cerr << "Failed with positions" << endl; - - for (size_t read = 0 ; read < 2 ; read ++) { - cerr << "read: " << read << endl; - for (auto& seed : all_seeds[i]) { - cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; - } - } cerr << "These should have been in the same fragment cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; @@ -3505,14 +3461,6 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); - cerr << "Failed with positions" << endl; - - for (size_t read = 0 ; read < 2 ; read ++) { - cerr << "read: " << read << endl; - for (auto& seed : all_seeds[i]) { - cerr << "\t" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << endl; - } - } cerr << "These should be different fragment clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 8259e81b686..da72dcbdf14 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -81,23 +81,6 @@ using namespace std; distance_index) == 3); } - SECTION("get net handle") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - net_handle_t n = distance_index.get_node_net_handle(n1->id()); - net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); - for (int i = decoder.max_depth()+1 ; i >= 0 ; --i) { - assert(distance_index.start_end_traversal_of(n) == - distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n1->id(), i , &distance_index))); - if (i != 0) { - assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); - n = distance_index.get_parent(n); - id = decoder.get_parent_identifier(id); - } - } - - } } TEST_CASE("Simple chain zipcode", "[zipcode]") { //Snarl 1-3, snarl 3-6 @@ -1329,7 +1312,7 @@ using namespace std; } } - TEST_CASE("Top-level snarl zipcode", "[zipcode][test]") { + TEST_CASE("Top-level snarl zipcode", "[zipcode]") { VG graph; @@ -1581,40 +1564,6 @@ using namespace std; REQUIRE(zipcode == decoded); }; } - SECTION("get net handle node 1") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - net_handle_t n = distance_index.get_node_net_handle(n1->id()); - net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); - for (int i = decoder.max_depth()+1 ; i >= 0 ; --i) { - assert(distance_index.start_end_traversal_of(n) == - distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n1->id(), i , &distance_index))); - if (i != 0) { - assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); - n = distance_index.get_parent(n); - id = decoder.get_parent_identifier(id); - } - } - - } - SECTION("get net handle") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - net_handle_t n = distance_index.get_node_net_handle(n4->id()); - net_identifier_t id = decoder.get_identifier(decoder.max_depth()+1); - for (int i = decoder.max_depth() +1 ; i >= 0 ; --i) { - assert(distance_index.start_end_traversal_of(n) == - distance_index.start_end_traversal_of(decoder.get_net_handle_slow(n4->id(), i , &distance_index))); - if (i != 0) { - assert(decoder.get_identifier(i-1) == decoder.get_parent_identifier(id)); - n = distance_index.get_parent(n); - id = decoder.get_parent_identifier(id); - } - } - - } } TEST_CASE("Top-level chain zipcode", "[zipcode]") { diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a8ff1570f55..60a764bca2c 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -693,7 +693,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } } -net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child) const { +net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -703,28 +703,19 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } - return distance_index->start_end_traversal_of(distance_index->get_handle_from_connected_component(zip_value)); + return distance_index->get_handle_from_connected_component(zip_value); } else if (decoder[depth].first) { //If this is a chain/node - if (child != nullptr) { - return distance_index->get_parent(*child); - } net_handle_t n = distance_index->get_node_net_handle(id); - size_t max = max_depth(); - if (max >= 1 && decoder[max].first && !decoder[max-1].first) { - //If the last thing is a trivial chain - if (depth == max+1) { - return distance_index->start_end_traversal_of(n); - } else { + for (size_t d = max_depth() ; d > depth ; d--) { + n = distance_index->get_parent(n); + if (distance_index->is_trivial_chain(n)){ n = distance_index->get_parent(n); } } - for (size_t d = max ; d > depth ; d--) { - n = distance_index->get_parent(n); - } - return distance_index->start_end_traversal_of(n); + return n; } else { //If this is a snarl @@ -737,10 +728,6 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, if (zip_value == 1) { //If this is a regular snarl - if (child != nullptr) { - return distance_index->get_parent(*child); - } - net_handle_t n = distance_index->get_node_net_handle(id); for (size_t d = max_depth() ; d > depth ; d--) { n = distance_index->get_parent(n); @@ -748,7 +735,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, n = distance_index->get_parent(n); } } - return distance_index->start_end_traversal_of(n); + return n; } else { //Irregular snarl @@ -850,16 +837,6 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna } } -bool ZipCodeDecoder::is_externally_connected (const size_t& depth) const { - assert(depth == 0); - assert(decoder[0].first); - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - } - return zip_value != 0; -} bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); @@ -1891,17 +1868,14 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { +MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; - //TODO: This is basically copying what get_identifier does but it's faster to have it here instead of running through the zipcode a second time if (decoder_length() == 1) { //If the root-level structure is a node payload.parent_is_root = true; payload.parent_is_chain = true; - payload.identifier = "1"; - //Walk through the zipcode to get values size_t zip_value; size_t zip_index = decoder[0].second; @@ -1909,7 +1883,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier+= std::to_string(zip_value); + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); //Root node length std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); @@ -1917,180 +1893,143 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; payload.is_reversed = false; + payload.parent_handle = distance_index.get_root(); payload.parent_type = ZipCode::ROOT_NODE; + payload.parent_record_offset = 0; + + } else if (decoder[max_depth() - 1].first) { + //If the parent is a chain + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_is_chain = true; + payload.parent_is_root = false; + + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[max_depth()-1].second; + //is_chain/rank in snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + //root_identifier for root, chain length for anything else + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + if (decoder_length() == 2) { + //If the node is a child of the root chain + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_type = ZipCode::ROOT_CHAIN; + payload.parent_is_root = true; + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + } else { + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); + payload.parent_type = ZipCode::CHAIN; + } + payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); + + //chain component count + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + + //Node prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Node length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //is_reversed + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //TODO: For top-level chains we got this from the distance index + payload.is_reversed = zip_value; + + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; + + } else { - //If the node is nested - payload.identifier = ""; - for (size_t d = 0 ; d <= max_depth()-1 ; d++) { - payload.identifier += (decoder[d].first ? "1" : "0"); - bool at_parent = d == max_depth() - 1; - if (d == 0 && !at_parent) { - //Root structure that isn't the parent of the node - size_t zip_value; - size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if (i == ZipCode::ROOT_IDENTIFIER_OFFSET) { - payload.identifier += std::to_string(zip_value); - } - } + //If the node is a child of a snarl + + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE, + distance_index.get_node_record_offset(payload.node_handle)); + payload.parent_is_chain = false; + payload.parent_is_root = decoder_length() == 2; + payload.is_trivial_chain = true; + + + size_t zip_value; + size_t zip_index; + if (payload.parent_is_root) { + //is_chain + zip_index = decoder[0].second; + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Identifier for root snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_handle = payload.parent_handle; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::ROOT_HANDLE); + payload.parent_type = ZipCode::ROOT_SNARL; + } else { + zip_index = decoder[max_depth()-1].second; + //is_regular + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //If this is a non-root snarl, get as much as we can from it + payload.parent_type = ZipCode::EMPTY; + if (zip_value == 0) { + payload.parent_type = ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + payload.parent_type = ZipCode::REGULAR_SNARL; } else { - size_t zip_value; - size_t zip_index = decoder[d].second; + payload.parent_type = ZipCode::CYCLIC_SNARL; + } - if (decoder[d].first) { - //is_chain so could be a chain or a node, but I'm not going to let it get to the node child of a chain - //in the loop- if that happens, then it will be handled if at_parent is true - if (at_parent) { - payload.parent_is_chain = true; - payload.is_trivial_chain = false; - if (decoder_length() == 2) { - //If the node is a child of the root chain - payload.parent_is_root = true; - payload.parent_type = ZipCode::ROOT_CHAIN; - //is chain for root - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - //Root identifier - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier += std::to_string(zip_value); - } else { - payload.parent_is_root = false; - payload.parent_type = ZipCode::CHAIN; - //rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - //Remember the rank for the identifier - payload.identifier += std::to_string(zip_value); - } - - - //Now get the node info - payload.identifier += ".1"; - zip_index = decoder[d+1].second; - - //Node prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.prefix_sum = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - payload.identifier += std::to_string(zip_value); - - //Node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_length = 0 ? zip_value == std::numeric_limits::max() : zip_value-1; - - //is_reversed - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //TODO: For top-level chains we got this from the distance index - payload.is_reversed = zip_value; - - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.chain_component = zip_value; - payload.identifier += "\\"; - payload.identifier += std::to_string(zip_value); - } else { - //Otherwise, this is just a chain - for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if ( i == ZipCode::CHAIN_RANK_IN_SNARL_OFFSET) { - payload.identifier += std::to_string(zip_value); - } - } - } - } else { - //Definitely a snarl - if (at_parent) { - payload.parent_is_chain = false; - payload.parent_is_root = decoder_length() == 2; - payload.is_trivial_chain = true; - - if (payload.parent_is_root) { - assert(d == 0); - //is_chain - zip_index = decoder[0].second; - - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier += std::to_string(zip_value); - - payload.parent_type = ZipCode::ROOT_SNARL; - } else { - zip_index = decoder[max_depth()-1].second; - //is_regular - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //If this is a non-root snarl, get as much as we can from it - if (zip_value == 0) { - payload.parent_type = ZipCode::IRREGULAR_SNARL; - } else if (zip_value == 1) { - payload.parent_type = ZipCode::REGULAR_SNARL; - } else { - payload.parent_type = ZipCode::CYCLIC_SNARL; - } - - //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier += std::to_string(zip_value); - - payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - - //Snarl length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Snarl child_count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier += "\\"; - payload.identifier += std::to_string(zip_value); - //TODO: SHould use this somehow - payload.chain_component = 0; - //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - - if (payload.parent_type == ZipCode::REGULAR_SNARL) { - //Snarl is reversed - payload.is_reversed = zip_value; - payload.parent_is_chain=true; - } else { - payload.is_reversed = false; - } + //Snarl prefix sum + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - } + payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //We should be at the node/trivial chain now - zip_index = decoder[max_depth()].second; - //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.identifier += ".1"; - payload.identifier += std::to_string(zip_value); - if (!payload.parent_is_root) { - payload.identifier += ".n"; - } - //Chain length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Snarl length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Snarl child_count + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Chain component of the snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //TODO: SHould use this somehow + payload.chain_component = 0; + //is_reversed for regular snarl and record offset for irregular/cyclic snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - //This will be the node of the trivial chain - //Get the rest as default values - } else { - for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if (i == ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET) { - payload.identifier += std::to_string(zip_value); - } else if (i == ZipCode::SNARL_CHAIN_COMPONENT_OFFSET) { - payload.identifier += "\\"; - payload.identifier += std::to_string(zip_value); - } - } - } + if (payload.parent_type == ZipCode::REGULAR_SNARL) { + //Snarl is reversed + net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); + //Simple and regular snarls are different for clustering + if (distance_index.is_simple_snarl(grandparent_handle)) { + payload.is_reversed = zip_value; + payload.parent_is_chain=true; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); + } else { + payload.is_reversed = false; + payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); } + + } else { + payload.is_reversed = false; + payload.parent_record_offset = zip_value; } - if (d < (max_depth() - 1)) { - payload.identifier += "."; - } + } - } + //We should be at the node/trivial chain now + zip_index = decoder[max_depth()].second; + //Chain rank in snarl + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + //Chain length + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + + //Get the rest as default values + } payload.parent_depth = 0; for (size_t d = 0 ; d <= max_depth() ; d++) { auto type = get_code_type(d); @@ -2099,6 +2038,8 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id) const { } } + + return payload; } @@ -2108,7 +2049,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { return "ROOT"; } string result = ""; - for (size_t d = 0 ; d <= std::min(max_depth(), depth) ; d++) { + for (size_t d = 0 ; d < depth ; d++) { result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure @@ -2116,9 +2057,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if (i == ZipCode::ROOT_IDENTIFIER_OFFSET) { - result += std::to_string(zip_value); - } + result += std::to_string(zip_value); } } else if (decoder[d].first) { //is_chain so could be a chain or a node @@ -2126,14 +2065,9 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { //If the thing before this was also a chain, then it is a node size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if (i == ZipCode::NODE_OFFSET_OFFSET) { - result += std::to_string(zip_value); - } else if (i == ZipCode::NODE_CHAIN_COMPONENT_OFFSET) { - result += "\\"; - result += std::to_string(zip_value); - } + result += std::to_string(zip_value); } } else { //Otherwise it's a chain @@ -2141,29 +2075,22 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if ( i == ZipCode::CHAIN_RANK_IN_SNARL_OFFSET) { - result += std::to_string(zip_value); - } + result += std::to_string(zip_value); } } } else { //Definitely a snarl size_t zip_value; size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET; i++) { + for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); - if (i == ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET) { - result += std::to_string(zip_value); - } else if (i == ZipCode::SNARL_CHAIN_COMPONENT_OFFSET) { - result += "\\"; - result += std::to_string(zip_value); - } + result += std::to_string(zip_value); } } if (d < std::min(depth, max_depth())) { result += "."; } - + } if (depth > max_depth()) { //If this was node that's in a trivial chain diff --git a/src/zip_code.hpp b/src/zip_code.hpp index f6f6eb28305..376d7d1483e 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -334,7 +334,7 @@ class ZipCodeDecoder { ///Get the handle of the thing at the given depth. This can be used for anything but is slow, /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child = nullptr) const; + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; ///Get the information that was stored to get the address in the distance index ///This is the connected component number for a root structure, or the address of @@ -346,7 +346,6 @@ class ZipCodeDecoder { /// The minimum distance from start or end of the snarl to the left or right side of the child size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; - bool is_externally_connected(const size_t& depth) const; bool is_externally_start_end_connected(const size_t& depth) const; bool is_externally_start_start_connected(const size_t& depth) const; bool is_externally_end_end_connected(const size_t& depth) const; @@ -366,23 +365,20 @@ class ZipCodeDecoder { //TODO: I want to make a struct for holding all values of a code as real values ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id) const; + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id net_identifier_t get_identifier(size_t depth) const; - const static net_identifier_t get_root_identifier() { return "ROOT"; }; const static net_identifier_t get_parent_identifier(const net_identifier_t& child); }; -//How to hash a net_identifier_t template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { - string id_string = static_cast(id); - return std::hash{}(id_string); + return wang_hash()(id); } }; @@ -403,13 +399,15 @@ struct MIPayload { constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); - net_identifier_t identifier; + net_handle_t node_handle; + net_handle_t parent_handle; size_t node_length = std::numeric_limits::max(); size_t prefix_sum = 0; size_t chain_component = 0; //Depth according to the distance index size_t parent_depth = 0; + size_t parent_record_offset = 0; ZipCode::code_type_t parent_type = ZipCode::EMPTY; bool is_reversed = false; From b9a2010c6740c8caeb2800abfb731e6ad78b5a6d Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 31 Jul 2024 13:59:14 +0200 Subject: [PATCH 072/124] Put zipcode and decoder together --- src/algorithms/chain_items.hpp | 16 +- src/minimizer_mapper.cpp | 3 +- src/minimizer_mapper.hpp | 8 +- src/minimizer_mapper_from_chains.cpp | 46 +-- src/snarl_seed_clusterer.cpp | 62 +-- src/snarl_seed_clusterer.hpp | 36 +- src/subcommand/zipcode_main.cpp | 6 +- src/unittest/snarl_seed_clusterer.cpp | 126 ++++++- src/unittest/zip_code.cpp | 517 +++++++++++++------------- src/unittest/zip_code_tree.cpp | 56 +++ src/zip_code.cpp | 375 +++++++++---------- src/zip_code.hpp | 246 ++++++------ src/zip_code_tree.cpp | 160 ++++---- 13 files changed, 895 insertions(+), 762 deletions(-) diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index 387be2f7806..9511487034d 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -107,8 +107,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries to the start of this anchor, or null if /// none is set. - inline ZipCodeDecoder* start_hint() const { - return start_decoder; + inline ZipCode* start_hint() const { + return start_zip; } /// Get the graph distance from wherever the start hint is positioned back @@ -120,8 +120,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries from the end of this anchor, or null if /// none is set. - inline ZipCodeDecoder* end_hint() const { - return end_decoder; + inline ZipCode* end_hint() const { + return end_zip; } /// Get the graph distance from wherever the end hint is positioned forward @@ -142,14 +142,14 @@ class Anchor { /// Compose a read start position, graph start position, and match length into an Anchor. /// Can also bring along a distance hint and a seed number. - inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCodeDecoder* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_decoder(hint), end_decoder(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { + inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCode* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_zip(hint), end_zip(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { // Nothing to do! } /// Compose two Anchors into an Anchor that represents coming in through /// the first one and going out through the second, like a tunnel. Useful /// for representing chains as chainable items. - inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_decoder(first.start_hint()), end_decoder(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { + inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_zip(first.start_hint()), end_zip(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { // Nothing to do! } @@ -170,8 +170,8 @@ class Anchor { int points; size_t start_seed; size_t end_seed; - ZipCodeDecoder* start_decoder; - ZipCodeDecoder* end_decoder; + ZipCode* start_zip; + ZipCode* end_zip; size_t start_offset; size_t end_offset; size_t seed_length; diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index f240b2f6a1b..c70d26f3cbf 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3757,8 +3757,7 @@ std::vector MinimizerMapper::find_seeds(const std::vector //If the zipcode was saved in the payload seeds.back().zipcode.fill_in_zipcode_from_payload(minimizer.occs[j].payload); } - ZipCodeDecoder* decoder = new ZipCodeDecoder(&seeds.back().zipcode); - seeds.back().zipcode_decoder.reset(decoder); + seeds.back().zipcode.fill_in_full_decoder(); } diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index 502f442543b..117e9b624bf 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -601,15 +601,15 @@ class MinimizerMapper : public AlignerClient { /// How do we convert chain info to an actual seed of the type we are using? /// Also needs to know the hit position, and the minimizer number. - inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip, ZipCodeDecoder* decoder) { - return { hit, minimizer, zip, std::unique_ptr(decoder)}; + inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip) { + return { hit, minimizer, zip}; } /// Convert a collection of seeds to a collection of chaining anchors. - std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const; + std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const; /// Convert a single seed to a single chaining anchor. - static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); + static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); /// Convert a read region, and the seeds that that region covers the /// stapled bases of (sorted by stapled base), into a single chaining diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index 4da269028eb..00823cb63a0 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -91,26 +91,26 @@ static pos_t forward_pos(const MinimizerMapper::Seed& seed, const VectorViewget_distance_index_address(0) == - end_seed1.zipcode_decoder->get_distance_index_address(0)); - assert(start_seed2.zipcode_decoder->get_distance_index_address(0) == - end_seed2.zipcode_decoder->get_distance_index_address(0)); + assert(start_seed1.zipcode.get_distance_index_address(0) == + end_seed1.zipcode.get_distance_index_address(0)); + assert(start_seed2.zipcode.get_distance_index_address(0) == + end_seed2.zipcode.get_distance_index_address(0)); #endif - if (start_seed1.zipcode_decoder->get_distance_index_address(0) != - start_seed2.zipcode_decoder->get_distance_index_address(0)) { + if (start_seed1.zipcode.get_distance_index_address(0) != + start_seed2.zipcode.get_distance_index_address(0)) { //If the two ranges are on different connected components return false; } - if (start_seed1.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_SNARL) { + if (start_seed1.zipcode.get_code_type(0) == ZipCode::ROOT_SNARL) { //If this is in a root snarl - if (start_seed1.zipcode_decoder->get_rank_in_snarl(1) != - start_seed2.zipcode_decoder->get_rank_in_snarl(1) + if (start_seed1.zipcode.get_rank_in_snarl(1) != + start_seed2.zipcode.get_rank_in_snarl(1) || - start_seed1.zipcode_decoder->get_rank_in_snarl(1) != - end_seed1.zipcode_decoder->get_rank_in_snarl(1) + start_seed1.zipcode.get_rank_in_snarl(1) != + end_seed1.zipcode.get_rank_in_snarl(1) || - start_seed2.zipcode_decoder->get_rank_in_snarl(1) != - end_seed2.zipcode_decoder->get_rank_in_snarl(1)) { + start_seed2.zipcode.get_rank_in_snarl(1) != + end_seed2.zipcode.get_rank_in_snarl(1)) { //If the two ranges are on different children of the snarl return false; } @@ -119,20 +119,20 @@ static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1 //Get the offset used for determining the range //On the top-level chain, node, or child of the top-level snarl auto get_seed_offset = [&] (const MinimizerMapper::Seed& seed) { - if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_CHAIN) { - return seed.zipcode_decoder->get_offset_in_chain(1); - } else if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_NODE) { - return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(0) - offset(seed.pos) + if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN) { + return seed.zipcode.get_offset_in_chain(1); + } else if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_NODE) { + return is_rev(seed.pos) ? seed.zipcode.get_length(0) - offset(seed.pos) : offset(seed.pos); } else { //Otherwise, this is a top-level snarl, and we've already made sure that it's on the //same child chain/node - if (seed.zipcode_decoder->get_code_type(1) == ZipCode::CHAIN) { + if (seed.zipcode.get_code_type(1) == ZipCode::CHAIN) { //On a chain - return seed.zipcode_decoder->get_offset_in_chain(2); + return seed.zipcode.get_offset_in_chain(2); } else { //On a node - return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(1) - offset(seed.pos) + return is_rev(seed.pos) ? seed.zipcode.get_length(1) - offset(seed.pos) : offset(seed.pos); } } @@ -3861,7 +3861,7 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l return to_return; } -std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const { +std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const { std::vector to_return; to_return.reserve(seeds.size()); for (size_t i = 0; i < seeds.size(); i++) { @@ -3870,7 +3870,7 @@ std::vector MinimizerMapper::to_anchors(const Alignment& aln return to_return; } -algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { +algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { // Turn each seed into the part of its match on the node where the // anchoring end (start for forward-strand minimizers, end for // reverse-strand minimizers) falls. @@ -3928,7 +3928,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector // TODO: Always make sequence and quality available for scoring! // We're going to score the anchor as the full minimizer, and rely on the margins to stop us from taking overlapping anchors. int score = aligner->score_exact_match(aln, read_start - margin_left, length + margin_right); - return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, seed.zipcode_decoder.get(), hint_start); + return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, &(seed.zipcode), hint_start); } algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, size_t read_start, size_t read_end, const std::vector& sorted_seeds, const std::vector& seed_anchors, const std::vector::const_iterator& mismatch_begin, const std::vector::const_iterator& mismatch_end, const HandleGraph& graph, const Aligner* aligner) { diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 6dbb291b647..31579b53103 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -426,14 +426,14 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode_decoder->max_depth()); + &seed, seed.seed->zipcode.max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, - &seed, seed.seed->zipcode_decoder->max_depth() - 1); + &seed, seed.seed->zipcode.max_depth() - 1); } new_parent = true; @@ -532,7 +532,7 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode_decoder->max_depth()); + &seed, seed.seed->zipcode.max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; @@ -637,7 +637,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; @@ -711,7 +711,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -721,17 +721,17 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); + : chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - !chain_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(0); + !chain_problem->seed->seed->zipcode.is_externally_start_start_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_start_end_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_end_end_connected(0) && + !chain_problem->seed->seed->zipcode.get_is_looping_chain(0); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -760,32 +760,32 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode.max_depth() ? false - : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + : chain_problem->seed->seed->zipcode.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER - cerr << "For child type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth) << endl; - cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; - cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " - << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + cerr << "For child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "For parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " @@ -1443,15 +1443,15 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust //Get the distances between the two sides of the child size_t distance_left_left = - child_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_start_start_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_left_right = - child_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_start_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_right_right = - child_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_end_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); if (distance_left_left == std::numeric_limits::max() && @@ -1597,7 +1597,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (snarl_problem->seed->seed->zipcode_decoder->get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { + if (snarl_problem->seed->seed->zipcode.get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 239d1e0d182..22f8478e6ff 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -70,42 +70,23 @@ class SnarlDistanceIndexClusterer { pos_t pos; size_t source; // Source minimizer. ZipCode zipcode; //zipcode for distance information, optionally stored in the minimizer payload - //TODO: unique_ptr? - std::unique_ptr zipcode_decoder; //The decoder for the zipcode Seed() = default; Seed(pos_t pos, size_t source, ZipCode zipcode) : pos(pos), source(source), zipcode(zipcode) { - ZipCodeDecoder* decoder = new ZipCodeDecoder(&this->zipcode); - zipcode_decoder.reset(decoder); - zipcode_decoder->fill_in_full_decoder(); - } - Seed(pos_t pos, size_t source, ZipCode zipcode, std::unique_ptr zipcode_decoder) : - pos(pos), source(source), zipcode(zipcode), zipcode_decoder(std::move(zipcode_decoder)){ - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } + zipcode.fill_in_full_decoder(); } //Move constructor Seed (Seed&& other) : pos(std::move(other.pos)), source(std::move(other.source)), - zipcode(std::move(other.zipcode)), - zipcode_decoder(std::move(other.zipcode_decoder)) { - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } - } + zipcode(std::move(other.zipcode)){} //Move assignment operator Seed& operator=(Seed&& other) { pos = std::move(other.pos); source = std::move(other.source); zipcode = std::move(other.zipcode); - zipcode_decoder = std::move(other.zipcode_decoder); - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } return *this; } }; @@ -121,9 +102,6 @@ class SnarlDistanceIndexClusterer { //TODO: I think I can skip the zipcode now since I have the payload MIPayload payload; - //TODO: This doesn't actually get used but I'll use it if I use the zipcodes properly - //std::unique_ptr zipcode_decoder; - //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds //For a seed in a chain, distance_left is the left of the chain, right is the distance @@ -316,18 +294,18 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); + is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); node_length = distance_index.chain_minimum_length(containing_net_handle); - chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); - is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); + chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); + is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); + node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = seed->seed->zipcode_decoder->get_chain_component(zipcode_depth); + chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; prefix_sum_value = SnarlDistanceIndex::sum( diff --git a/src/subcommand/zipcode_main.cpp b/src/subcommand/zipcode_main.cpp index a4649cb5808..4e61724c04a 100644 --- a/src/subcommand/zipcode_main.cpp +++ b/src/subcommand/zipcode_main.cpp @@ -260,14 +260,14 @@ int main_zipcode(int argc, char** argv) { //Get zip codes ZipCode zip1; zip1.fill_in_zipcode(*distance_index, pos1); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(*distance_index, pos2); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); + zip2.fill_in_full_decoder(); //Time finding distance with the zip codes std::chrono::time_point start = std::chrono::system_clock::now(); - size_t zip_distance = ZipCode::minimum_distance_between(decoder1, pos1, decoder2, pos2, *distance_index); + size_t zip_distance = ZipCode::minimum_distance_between(zip1, pos1, zip2, pos2, *distance_index); std::chrono::time_point end = std::chrono::system_clock::now(); std::chrono::duration elapsed_seconds = end-start; elapsed_seconds_zip.emplace_back(elapsed_seconds.count()); diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index 6ef11d3426f..41c6212d9e1 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -44,6 +44,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -87,6 +88,7 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -121,6 +123,7 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -158,6 +161,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -207,6 +211,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -224,6 +229,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -241,6 +247,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -258,15 +265,18 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -283,15 +293,18 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -345,6 +358,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -362,6 +376,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -379,6 +394,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -396,15 +412,18 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -421,15 +440,18 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -477,6 +499,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -496,6 +519,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -561,6 +585,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -576,6 +601,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 1); @@ -591,6 +617,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -606,6 +633,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -621,6 +649,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -636,6 +665,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -653,6 +683,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 8); @@ -668,6 +699,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -742,6 +774,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -768,6 +801,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -790,6 +824,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -842,6 +877,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -949,6 +985,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -967,6 +1004,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -986,6 +1024,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -1004,6 +1043,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1022,6 +1062,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 11); @@ -1068,6 +1109,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1085,6 +1127,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1102,11 +1145,13 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(4, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode1}); vector> clusters = clusterer.cluster_seeds(seeds, 3, 3); @@ -1123,6 +1168,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1175,6 +1221,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1192,6 +1239,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1208,6 +1256,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1225,6 +1274,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1287,6 +1337,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1304,6 +1355,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1350,6 +1402,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1357,6 +1410,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1383,6 +1437,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1390,6 +1445,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1416,6 +1472,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1423,6 +1480,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1450,6 +1508,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1457,6 +1516,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1519,6 +1579,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1536,6 +1597,7 @@ namespace unittest { for (pos_t pos : pos_ts) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1591,6 +1653,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1606,6 +1669,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1620,6 +1684,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1667,6 +1732,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1715,6 +1781,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1731,6 +1798,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1775,6 +1843,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 20); @@ -1791,6 +1860,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1869,6 +1939,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1921,6 +1992,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1953,6 +2025,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1966,6 +2039,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2004,6 +2078,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2046,6 +2121,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2103,6 +2179,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2122,6 +2199,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2138,6 +2216,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2156,6 +2235,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2226,6 +2306,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2243,6 +2324,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2260,6 +2342,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2278,6 +2361,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({8, 12}); @@ -2286,6 +2370,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2308,6 +2393,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2325,6 +2411,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2390,6 +2477,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2406,6 +2494,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2423,6 +2512,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2440,7 +2530,8 @@ namespace unittest { for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos);; + zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2448,7 +2539,8 @@ namespace unittest { for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos);; + zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2479,6 +2571,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2487,6 +2580,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2554,6 +2648,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2572,6 +2667,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2592,6 +2688,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2617,6 +2714,7 @@ namespace unittest { for (pos_t pos : pos_ts[read_num]){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -2645,6 +2743,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2702,6 +2801,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2720,6 +2820,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 6); @@ -2735,6 +2836,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2789,6 +2891,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2804,6 +2907,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2819,6 +2923,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2835,6 +2940,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2874,6 +2980,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2919,6 +3026,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2935,6 +3043,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2951,6 +3060,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2987,6 +3097,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({pos, 0, zipcode}); } @@ -3031,6 +3142,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3047,6 +3159,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3062,6 +3175,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3077,6 +3191,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3118,6 +3233,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3134,6 +3250,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3149,6 +3266,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3164,6 +3282,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3202,6 +3321,7 @@ namespace unittest { // for (pos_t pos : pos_ts) { // ZipCode zipcode; // zipcode.fill_in_zipcode(dist_index, pos); + // zipcode.fill_in_full_decoder(); // seeds.push_back({ pos, 0, zipcode}); // } // vector clusters = clusterer.cluster_seeds(seeds, read_lim); @@ -3237,6 +3357,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -3304,6 +3425,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); all_seeds[read].push_back({ pos, 0, zipcode}); } diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index da72dcbdf14..22bd68ac308 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -45,22 +45,22 @@ using namespace std; SECTION("decoder") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 1); - REQUIRE(decoder.decoder.front().first == 1); - REQUIRE(decoder.decoder.front().second == 0); + REQUIRE(zipcode.decoder_length() == 1); + REQUIRE(zipcode.decoder.front().first == 1); + REQUIRE(zipcode.decoder.front().second == 0); } SECTION("decoded code") { + cerr << "New code" << endl; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); - ZipCodeDecoder decoder(&zipcode); - - REQUIRE(decoder.get_length(0) == distance_index.minimum_length(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_NODE); + REQUIRE(zipcode.get_length(0) == distance_index.minimum_length(chain1)); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_NODE); } SECTION("n1 as payload") { ZipCode zipcode; @@ -75,9 +75,9 @@ using namespace std; SECTION("Distances within one node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(ZipCode::minimum_distance_between(decoder, make_pos_t(n1->id(), false, 0), - decoder, make_pos_t(n1->id(), false, 3), + zipcode.fill_in_full_decoder(); + REQUIRE(ZipCode::minimum_distance_between(zipcode, make_pos_t(n1->id(), false, 0), + zipcode, make_pos_t(n1->id(), false, 3), distance_index) == 3); } @@ -111,14 +111,14 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -135,7 +135,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -159,34 +159,34 @@ using namespace std; SECTION ("decoded zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Next is the node code - REQUIRE(decoder.get_code_type( 1) == ZipCode::NODE); - REQUIRE(decoder.get_length( 1) == distance_index.minimum_length(node1)); - REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(zipcode.get_code_type( 1) == ZipCode::NODE); + REQUIRE(zipcode.get_length( 1) == distance_index.minimum_length(node1)); + REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -203,7 +203,7 @@ using namespace std; //Next is the snarl code //1 for a regular snarl - REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -233,7 +233,7 @@ using namespace std; //Next is the chain code //rank of the chain in the snarl - REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); @@ -254,78 +254,78 @@ using namespace std; SECTION ("decoded zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl36 = distance_index.get_parent(chain4); net_handle_t chain1 = distance_index.get_parent(snarl36); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //values for the snarl - REQUIRE(decoder.get_length(1) == distance_index.minimum_length(snarl36)); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(snarl36)); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), distance_index.flip(chain4)) != 0; //values for the chain - REQUIRE(decoder.get_length(2) == distance_index.minimum_length(chain4)); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == distance_index.minimum_length(chain4)); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - ZipCodeDecoder decoder3(&zip3); - ZipCodeDecoder decoder4(&zip4); - ZipCodeDecoder decoder5(&zip5); - ZipCodeDecoder decoder6(&zip6); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 2), - decoder1, make_pos_t(n1->id(), true, 2), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 2), + zip1, make_pos_t(n1->id(), true, 2), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 6); - REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 1), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 1), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == 7); } @@ -426,11 +426,11 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -450,7 +450,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -477,31 +477,31 @@ using namespace std; SECTION ("decode zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); - REQUIRE(decoder.get_length(1) == distance_index.minimum_length(node1)); - REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(decoder.get_code_type(1) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(node1)); + REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 4); + REQUIRE(zipcode.decoder_length() == 4); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -519,7 +519,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code - REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -550,7 +550,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Next is the chain code - REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -566,7 +566,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the node code - REQUIRE(decoder.decoder[3] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == std::make_pair(true, value_and_index.second)); //Offset of the node in the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); @@ -591,45 +591,45 @@ using namespace std; SECTION ("decode zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t chain2 = distance_index.get_parent(node2); net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(decoder.get_length(1) == 0); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == 0); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl1, distance_index.get_bound(snarl1, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(decoder.get_length(2) == 3); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == 3); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); //Node at depth 3 - REQUIRE(decoder.get_length(3) == 1); - REQUIRE(decoder.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); - REQUIRE(decoder.get_code_type(3) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); + REQUIRE(zipcode.get_length(3) == 1); + REQUIRE(zipcode.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); + REQUIRE(zipcode.get_code_type(3) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); } SECTION ("zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 7); + zipcode.fill_in_full_decoder(); + REQUIRE(zipcode.decoder_length() == 7); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -648,7 +648,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -678,7 +678,7 @@ using namespace std; distance_index.flip(distance_index.canonical(chain2))) != 0; REQUIRE(value_and_index.first == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( @@ -693,7 +693,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(decoder.decoder[3] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == std::make_pair(false, value_and_index.second)); //1 as tag for regular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -722,7 +722,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(decoder.decoder[4] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[4] == std::make_pair(true, value_and_index.second)); //Rank in parent value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); @@ -736,7 +736,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //REgular snarl code for snarl 3-5 - REQUIRE(decoder.decoder[5] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[5] == std::make_pair(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -765,7 +765,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Chain code for node 4 - REQUIRE(decoder.decoder[6] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[6] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; @@ -787,6 +787,7 @@ using namespace std; SECTION ("decoded zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl3 = distance_index.get_parent(chain4); @@ -796,119 +797,118 @@ using namespace std; net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(decoder.get_length(1) == 0); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == 0); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); - REQUIRE(decoder.get_length(2) == 3); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == 3); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); //Snarl at depth 3 - REQUIRE(decoder.get_length(3) == 1); - REQUIRE(decoder.get_offset_in_chain(3) == 1); - REQUIRE(decoder.get_code_type(3) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(3) == 1); + REQUIRE(zipcode.get_offset_in_chain(3) == 1); + REQUIRE(zipcode.get_code_type(3) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain3); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain3))) != 0; //Chain at depth 4 - REQUIRE(decoder.get_is_reversed_in_parent(4) == is_rev); - REQUIRE(decoder.get_length(4) == distance_index.minimum_length(chain3)); - REQUIRE(decoder.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(decoder.get_code_type(4) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(4) == is_rev); + REQUIRE(zipcode.get_length(4) == distance_index.minimum_length(chain3)); + REQUIRE(zipcode.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(zipcode.get_code_type(4) == ZipCode::CHAIN); //Snarl3 at depth 5 - REQUIRE(decoder.get_length(5) == 0); - REQUIRE(decoder.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); - REQUIRE(decoder.get_code_type(5) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(5) == 0); + REQUIRE(zipcode.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); + REQUIRE(zipcode.get_code_type(5) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain4); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; //node/chain at depth 6 - REQUIRE(decoder.get_is_reversed_in_parent(6) == is_rev); - REQUIRE(decoder.get_length(6) == 4); - REQUIRE(decoder.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(decoder.get_code_type(6) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(6) == is_rev); + REQUIRE(zipcode.get_length(6) == 4); + REQUIRE(zipcode.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(zipcode.get_code_type(6) == ZipCode::CHAIN); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); + zip7.fill_in_full_decoder(); ZipCode zip8; zip8.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); + zip8.fill_in_full_decoder(); - ZipCodeDecoder decoder1 (&zip1); - ZipCodeDecoder decoder2 (&zip2); - ZipCodeDecoder decoder3 (&zip3); - ZipCodeDecoder decoder4 (&zip4); - ZipCodeDecoder decoder5 (&zip5); - ZipCodeDecoder decoder6 (&zip6); - ZipCodeDecoder decoder7 (&zip7); - ZipCodeDecoder decoder8 (&zip8); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder8, make_pos_t(n8->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip8, make_pos_t(n8->id(), false, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder8, make_pos_t(n8->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip8, make_pos_t(n8->id(), true, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder7, make_pos_t(n7->id(), true, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip7, make_pos_t(n7->id(), true, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 2); } @@ -1048,11 +1048,11 @@ using namespace std; SECTION ("zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1071,7 +1071,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(decoder.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //0 as tag for irregular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2); @@ -1119,7 +1119,7 @@ using namespace std; //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //Rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1138,105 +1138,108 @@ using namespace std; SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); net_handle_t snarl1 = distance_index.get_parent(chain3); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl1 at depth 1 - REQUIRE(decoder.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CYCLIC_SNARL); + REQUIRE(zipcode.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CYCLIC_SNARL); //chain3 at depth 3 - REQUIRE(decoder.get_length(2) == 1); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(2) == 1); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); bool chain_is_rev = distance_index.is_reversed_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))); //node1 to left side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); //Node 1 to right side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); //node4 to left side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); //Node 4 to right side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); + zip7.fill_in_full_decoder(); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - ZipCodeDecoder decoder3(&zip3); - ZipCodeDecoder decoder4(&zip4); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder1, make_pos_t(n1->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip1, make_pos_t(n1->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 3); //Shouldn't take the loop in the chain - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), - decoder1, make_pos_t(n1->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), + zip1, make_pos_t(n1->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 1), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 1), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); } @@ -1341,11 +1344,11 @@ using namespace std; SECTION ("zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(decoder.decoder[0] == std::make_pair(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1356,7 +1359,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(decoder.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); @@ -1367,32 +1370,32 @@ using namespace std; SECTION ("decoded zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); net_handle_t root_snarl = distance_index.get_parent(chain1); //Root snarl - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(distance_index.get_parent(chain1))); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); //Chain1 at depth 1 - REQUIRE(decoder.get_length(1) == 3); - REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(1) == 3); + REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); } SECTION ("zip code for node in chain in top-level snarl") { net_handle_t node1 = distance_index.get_node_net_handle(n3->id()); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(decoder.decoder[0] == std::make_pair(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1403,7 +1406,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(decoder.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1415,7 +1418,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Node 3 - REQUIRE(decoder.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); @@ -1430,67 +1433,69 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); //Root snarl - REQUIRE(decoder.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(zipcode.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); //chain2 at depth 1 - REQUIRE(decoder.get_length(1) == 2); - REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(1) == 2); + REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); //node3 at depth 2 - REQUIRE(decoder.get_length(2) == 1); - REQUIRE(decoder.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); - REQUIRE(decoder.get_code_type(2) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); + REQUIRE(zipcode.get_length(2) == 1); + REQUIRE(zipcode.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); - ZipCodeDecoder zip_decoder1(&zip1); - ZipCodeDecoder zip_decoder2(&zip2); - ZipCodeDecoder zip_decoder3(&zip3); - ZipCodeDecoder zip_decoder6(&zip6); - ZipCodeDecoder zip_decoder7(&zip7); - - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder2, make_pos_t(n2->id(), false, 0), + zip7.fill_in_full_decoder(); + + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), true, 0), - zip_decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), true, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder3, make_pos_t(n3->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), true, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder6, make_pos_t(n6->id(), false, 0), - zip_decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip6, make_pos_t(n6->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 1); } @@ -1597,14 +1602,14 @@ using namespace std; net_handle_t grandparent = distance_index.get_parent(parent); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -1621,7 +1626,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -1646,8 +1651,10 @@ using namespace std; SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), false, 0)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), false, 0)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), false, 0)); ZipCode zip4; @@ -1659,10 +1666,8 @@ using namespace std; ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), false, 0)); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); @@ -1792,30 +1797,30 @@ using namespace std; SECTION( "node2" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t parent = distance_index.get_parent(node2); net_handle_t bound = distance_index.get_bound(parent, true, false); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(distance_index.minimum_length(node2) == decoder.get_length(1)); - REQUIRE(decoder.get_chain_component(1) == distance_index.get_chain_component(node2)); - REQUIRE(decoder.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); - REQUIRE(decoder.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); - REQUIRE(decoder.get_is_looping_chain(0)); + REQUIRE(distance_index.minimum_length(node2) == zipcode.get_length(1)); + REQUIRE(zipcode.get_chain_component(1) == distance_index.get_chain_component(node2)); + REQUIRE(zipcode.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); + REQUIRE(zipcode.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); + REQUIRE(zipcode.get_is_looping_chain(0)); } SECTION( "node5" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node = distance_index.get_node_net_handle(n5->id()); net_handle_t parent = distance_index.get_parent(node); net_handle_t bound = distance_index.get_bound(parent, true, false); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.minimum_length(node) == decoder.get_length(decoder.max_depth())); + REQUIRE(distance_index.minimum_length(node) == zipcode.get_length(zipcode.max_depth())); } } TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { @@ -1848,14 +1853,14 @@ using namespace std; SECTION( "Check connectivity" ) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); - ZipCodeDecoder decoder(&zipcode); + zipcode.fill_in_full_decoder(); - REQUIRE(decoder.get_length(1) == 1); + REQUIRE(zipcode.get_length(1) == 1); if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { - REQUIRE(decoder.is_externally_end_end_connected(0)); + REQUIRE(zipcode.is_externally_end_end_connected(0)); } else { - REQUIRE(decoder.is_externally_start_start_connected(0)); + REQUIRE(zipcode.is_externally_start_start_connected(0)); } } diff --git a/src/unittest/zip_code_tree.cpp b/src/unittest/zip_code_tree.cpp index 409f386a50d..3e3765948df 100644 --- a/src/unittest/zip_code_tree.cpp +++ b/src/unittest/zip_code_tree.cpp @@ -40,6 +40,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -84,6 +85,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -154,6 +156,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -264,6 +267,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -386,6 +390,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -432,6 +437,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -494,6 +500,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -578,6 +585,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -627,6 +635,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -760,6 +769,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -834,6 +844,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -871,6 +882,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -908,6 +920,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -944,6 +957,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -978,6 +992,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1003,6 +1018,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1029,6 +1045,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1055,6 +1072,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1081,6 +1099,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1138,6 +1157,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1195,6 +1215,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1250,6 +1271,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1351,6 +1373,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1415,6 +1438,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1506,6 +1530,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1538,6 +1563,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1568,6 +1594,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1593,6 +1620,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1620,6 +1648,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1647,6 +1676,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1673,6 +1703,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1775,6 +1806,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1806,6 +1838,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1835,6 +1868,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1866,6 +1900,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1923,6 +1958,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -1993,6 +2029,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2063,6 +2100,7 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2106,6 +2144,7 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2184,6 +2223,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2238,6 +2278,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2282,6 +2323,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2324,6 +2366,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2373,6 +2416,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2423,6 +2467,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2488,6 +2533,7 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2552,6 +2598,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2572,6 +2619,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2614,6 +2662,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2633,6 +2682,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2677,6 +2727,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2696,6 +2747,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2715,6 +2767,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2779,6 +2832,7 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2824,6 +2878,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); } distance_index.for_each_child(distance_index.get_root(), [&](net_handle_t child) { @@ -2890,6 +2945,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, (size_t)j, zipcode}); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 60a764bca2c..9e5debeb7c9 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -137,16 +137,9 @@ void ZipCode::from_vector(const std::vector& values) { zipcode.from_vector(values); } -ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : - zipcode(zipcode), decoder(0), finished_decoding(false) { - if (zipcode != nullptr) { - decoder.reserve(zipcode->byte_count() / 4); - fill_in_full_decoder(); - } -} -void ZipCodeDecoder::fill_in_full_decoder() { - if (zipcode->byte_count() == 0 || finished_decoding) { +void ZipCode::fill_in_full_decoder() { + if (byte_count() == 0 || finished_decoding) { //If the zipcode is empty return; } @@ -157,7 +150,7 @@ void ZipCodeDecoder::fill_in_full_decoder() { finished_decoding = true; } -bool ZipCodeDecoder::fill_in_next_decoder() { +bool ZipCode::fill_in_next_decoder() { #ifdef DEBUG_ZIPCODE cerr << "Decode one more thing in the zipcode. Currently decoded " << decoder_length() << " things" << endl; #endif @@ -178,7 +171,7 @@ bool ZipCodeDecoder::fill_in_next_decoder() { if (zip_length == 0) { //If there is nothing in the decoder yet, then the first thing will start at 0 for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //Is the root a chain/node? @@ -201,7 +194,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_index == std::numeric_limits::max()) { //If the zip code ends here (after the length), then this was a node and we're done @@ -217,7 +210,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //If it's a node, then there are three remaining things in the index //If it were a snarl, then there are more than three things for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -232,7 +225,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } else { //Otherwise, the top-level thing is a snarl and the next thing is a chain for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -264,7 +257,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //chain size_t check_zip_index = zip_index; for (size_t i = 0 ; i < std::min(ZipCode::CHAIN_SIZE, ZipCode::NODE_SIZE) ; i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //If the zipcode ends after a chain if (check_zip_index == std::numeric_limits::max()) { @@ -277,7 +270,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Now check if it was actually a real node for (size_t i = 0 ; i < std::max(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE) - std::min(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE); i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //This might be a node that is a child of the chain, in which case there is one @@ -297,7 +290,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Otherwise, the last thing was a chain //Get to the end of the chain for (size_t i = 0 ; i < ZipCode::CHAIN_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //zip_index is now the start of the current thing that we want to add - the thing after the chain @@ -312,7 +305,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Check if the current thing is a node check_zip_index = zip_index; for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //Return the start of this thing, and true if it was a node @@ -328,7 +321,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //The regular/irregular snarl tag for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { @@ -337,7 +330,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #endif //Regular snarl, so 2 remaining things in the code for (size_t i = 0 ; i < ZipCode::REGULAR_SNARL_SIZE - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -349,7 +342,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //is a top-level irregular snarl. Otherwise a normal irregular snarl size_t code_size = ZipCode::IRREGULAR_SNARL_SIZE; for (size_t i = 0 ; i < code_size - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -358,12 +351,12 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } } -size_t ZipCodeDecoder::max_depth() const { +size_t ZipCode::max_depth() const { return decoder_length()-1; } -ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { +ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { //Now get the code type //A snarl is always a snarl. A chain could actually be a node @@ -396,7 +389,7 @@ ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 0) { return ZipCode::IRREGULAR_SNARL; @@ -409,7 +402,7 @@ ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { } } -size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -419,7 +412,7 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -435,7 +428,7 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { @@ -445,14 +438,14 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { +size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { if (depth == 0) { @@ -469,7 +462,7 @@ size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -478,7 +471,7 @@ size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { } } -size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -496,7 +489,7 @@ size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDis size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -505,7 +498,7 @@ size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDis } } -size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -521,7 +514,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -531,13 +524,13 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { +size_t ZipCode::get_chain_component(const size_t& depth) const { if (depth == 0) { @@ -553,7 +546,7 @@ size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -563,14 +556,14 @@ size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } } -size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_end) const { +size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) const { if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -578,7 +571,7 @@ size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_en size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value % 2) { if (!get_end) { @@ -591,7 +584,7 @@ size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_en return zip_value / 2; } -bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { +bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -599,11 +592,11 @@ bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value % 2; } -bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { +bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { if (depth == 0) { @@ -619,7 +612,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -628,14 +621,14 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_index = decoder[depth-1].second; //zip_value is true if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -649,7 +642,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { } } -net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { //get_net_handle_slow does the same thing so if this gets changed need to change that too @@ -658,7 +651,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -673,7 +666,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist size_t zip_index = decoder[depth].second; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -685,7 +678,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -693,7 +686,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } } -net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -701,7 +694,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -723,7 +716,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, size_t zip_index = decoder[depth].second; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -742,7 +735,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -751,7 +744,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, } -size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { +size_t ZipCode::get_distance_index_address(const size_t& depth) const { if (depth == 0) { @@ -759,7 +752,7 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -774,7 +767,7 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { size_t zip_index = decoder[depth].second; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -786,13 +779,13 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } } } -size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { +size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { #ifdef DEBUG_ZIPCODE assert(depth > 0); @@ -802,13 +795,13 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna size_t zip_index = decoder[depth-1].second; //zip_value is 1 if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //Zip value is true if the child is reversed @@ -831,53 +824,53 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna distance_offset = ZipCode::IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET; } for (size_t i = 0 ; i <= distance_offset - ZipCode::SNARL_IS_REGULAR_OFFSET -1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value - 1; } } -bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { +bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 1) != 0; } -bool ZipCodeDecoder::is_externally_start_start_connected (const size_t& depth) const { +bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 2) != 0; } -bool ZipCodeDecoder::is_externally_end_end_connected (const size_t& depth) const { +bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); size_t zip_value; size_t zip_index = decoder[depth].second; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 4) != 0; } -const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, +const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, const size_t& depth) { - if (decoder1.max_depth() < depth && decoder2.max_depth() < depth ) { + if (zip1.max_depth() < depth && zip2.max_depth() < depth ) { return false; } //First, check if the code types are the same - ZipCode::code_type_t type1 = decoder1.get_code_type(depth); - ZipCode::code_type_t type2 = decoder2.get_code_type(depth); + ZipCode::code_type_t type1 = zip1.get_code_type(depth); + ZipCode::code_type_t type2 = zip2.get_code_type(depth); if (type1 != type2) { return false; } @@ -885,44 +878,39 @@ const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCod if (type1 == ZipCode::ROOT_NODE || type1 == ZipCode::ROOT_CHAIN || type1 == ZipCode::ROOT_SNARL || type1 == ZipCode::IRREGULAR_SNARL || type1 == ZipCode::CYCLIC_SNARL ) { //If the codes are for root-structures or irregular/cyclic snarls, just check if the //connected component numbers are the same - return decoder1.get_distance_index_address(depth) == decoder2.get_distance_index_address(depth); + return zip1.get_distance_index_address(depth) == zip2.get_distance_index_address(depth); } else { //Check the parent type. If the parent is a snarl, then check rank. If it's a chain, //then check the prefix sum - if (decoder1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { + if (zip1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || + zip1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || + zip1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || + zip1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { //If the parent is a snarl, then check the rank - return decoder1.get_rank_in_snarl(depth) == decoder2.get_rank_in_snarl(depth); + return zip1.get_rank_in_snarl(depth) == zip2.get_rank_in_snarl(depth); } else { //Otherwise, check the offset in the chain //Since the type is the same, this is sufficient - return decoder1.get_offset_in_chain(depth) == decoder2.get_offset_in_chain(depth); + return zip1.get_offset_in_chain(depth) == zip2.get_offset_in_chain(depth); } } } -void ZipCodeDecoder::dump(std::ostream& out) const { - if (!zipcode) { - // We're decoding nothing - out << *this; - } else { - std::vector numbers = zipcode->to_vector(); - // Print out the numbers in a way that is easy to copy-paste as a vector literal. - out << " numbers = to_vector(); + // Print out the numbers in a way that is easy to copy-paste as a vector literal. + out << ""; } + out << "}>"; } -std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder) { - return out << ""; +std::ostream& operator<<(std::ostream& out, const ZipCode& zip) { + return out << ""; } @@ -1056,8 +1044,8 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons } -size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos_t& pos1, - ZipCodeDecoder& zip2_decoder, const pos_t& pos2, const SnarlDistanceIndex& distance_index, +size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, + ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit, bool undirected_distance, const HandleGraph* graph){ @@ -1065,11 +1053,11 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Make sure that the zip codes actually correspond to the positions ZipCode check_zip1; check_zip1.fill_in_zipcode(distance_index, pos1); - assert(*zip1_decoder.zipcode == check_zip1); + assert(zip1 == check_zip1); ZipCode check_zip2; check_zip2.fill_in_zipcode(distance_index, pos2); - assert(*zip2_decoder.zipcode == check_zip2); + assert(zip2 == check_zip2); cerr << endl << "Minimum distance between " << pos1 << " and " << pos2 << " using zipcodes" << endl; cerr << "Ancestors for " << pos1 << endl; @@ -1090,7 +1078,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Helper function to update the distances to the ends of the parent //distance_start and distance_end get updated - auto update_distances_to_ends_of_parent = [&] (ZipCodeDecoder& decoder, const size_t& child_depth, + auto update_distances_to_ends_of_parent = [&] (ZipCode& zip, const size_t& child_depth, size_t& distance_to_start, size_t& distance_to_end) { #ifdef DEBUG_ZIPCODE cerr << "Update distance to ends of parent at depth " << child_depth << endl; @@ -1101,12 +1089,12 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - code_type_t parent_type = decoder.get_code_type(child_depth-1); + code_type_t parent_type = zip.get_code_type(child_depth-1); if (parent_type == IRREGULAR_SNARL || parent_type == CYCLIC_SNARL) { //If the parent is an irregular snarl - net_handle_t parent_handle = decoder.get_net_handle(child_depth-1, &distance_index); - size_t child_rank = decoder.get_rank_in_snarl(child_depth); + net_handle_t parent_handle = zip.get_net_handle(child_depth-1, &distance_index); + size_t child_rank = zip.get_rank_in_snarl(child_depth); distance_start_left = distance_index.distance_in_snarl(parent_handle, child_rank, false, 0, false, graph); distance_start_right = distance_index.distance_in_snarl(parent_handle, @@ -1121,7 +1109,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos } else if (parent_type == REGULAR_SNARL) { //If its a regular snarl, then the distances to the ends are either 0 or inf //For a regular snarl, the snarl stores if the child was reversed, rather than the child - if (decoder.get_is_reversed_in_parent(child_depth)) { + if (zip.get_is_reversed_in_parent(child_depth)) { distance_start_left = std::numeric_limits::max(); distance_start_right = 0; distance_end_right = std::numeric_limits::max(); @@ -1136,30 +1124,30 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos cerr << "Distances to parent regular snarl: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; #endif } else if (parent_type == CHAIN) { - if (decoder.get_code_type(child_depth) == NODE && - decoder.get_is_reversed_in_parent(child_depth)){ + if (zip.get_code_type(child_depth) == NODE && + zip.get_is_reversed_in_parent(child_depth)){ //If this is reversed in the chain distance_start_left = std::numeric_limits::max(); distance_end_right = std::numeric_limits::max(); //Prefix sum of the child - distance_end_left = decoder.get_offset_in_chain(child_depth, &distance_index); + distance_end_left = zip.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_start_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - decoder.get_length(child_depth-1, &distance_index), - decoder.get_offset_in_chain(child_depth, &distance_index)), - decoder.get_length(child_depth, &distance_index)); + zip.get_length(child_depth-1, &distance_index), + zip.get_offset_in_chain(child_depth, &distance_index)), + zip.get_length(child_depth, &distance_index)); } else { //If it is a node that isn't reversed in the chain, or it's a snarl which is never reversed distance_end_left = std::numeric_limits::max(); distance_start_right = std::numeric_limits::max(); //Prefix sum of the child - distance_start_left = decoder.get_offset_in_chain(child_depth, &distance_index); + distance_start_left = zip.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_end_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - decoder.get_length(child_depth-1, &distance_index), - decoder.get_offset_in_chain(child_depth, &distance_index)), - decoder.get_length(child_depth, &distance_index)); + zip.get_length(child_depth-1, &distance_index), + zip.get_offset_in_chain(child_depth, &distance_index)), + zip.get_length(child_depth, &distance_index)); } #ifdef DEBUG_ZIPCODE cerr << "Distances to parent chain: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; @@ -1177,7 +1165,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos }; - if (zip1_decoder.get_distance_index_address(0) != zip2_decoder.get_distance_index_address(0)) { + if (zip1.get_distance_index_address(0) != zip2.get_distance_index_address(0)) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif @@ -1186,18 +1174,17 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //The two positions are in the same connected component so now fill in the rest //of the decoder and try to find the distance - zip1_decoder.fill_in_full_decoder(); - zip2_decoder.fill_in_full_decoder(); + zip1.fill_in_full_decoder(); + zip2.fill_in_full_decoder(); //Now find the lowest common ancestor of the two zipcodes size_t lowest_common_ancestor_depth = 0; bool still_equal = true; while (still_equal) { - if (lowest_common_ancestor_depth == zip1_decoder.decoder_length()-1 || - lowest_common_ancestor_depth == zip2_decoder.decoder_length()-1 || - !ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, - lowest_common_ancestor_depth+1)) { + if (lowest_common_ancestor_depth == zip1.decoder_length()-1 || + lowest_common_ancestor_depth == zip2.decoder_length()-1 || + !ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth+1)) { //If we've hit the end of either decoder or if they are no longer equal, //Then break the loop and keep the current lowest_common_ancestor_depth still_equal = false; @@ -1221,26 +1208,26 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos if (distance_limit != std::numeric_limits::max() && - lowest_common_ancestor_depth < zip1_decoder.decoder_length()-1){ + lowest_common_ancestor_depth < zip1.decoder_length()-1){ //If we're aborting when the distance is definitely too far, - code_type_t ancestor_type = zip1_decoder.get_code_type(lowest_common_ancestor_depth); + code_type_t ancestor_type = zip1.get_code_type(lowest_common_ancestor_depth); if (ancestor_type == CHAIN || ancestor_type == ROOT_CHAIN) { //If the current ancestor is a chain, then check the distance - size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); - size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum1 = zip1.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum2 = zip2.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); size_t distance_in_chain; if (prefix_sum1 < prefix_sum2) { //zip1 comes before zip2 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum2, SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip1.get_length(lowest_common_ancestor_depth+1, &distance_index))); } else { //zip2 comes before zip1 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum1, SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip2.get_length(lowest_common_ancestor_depth+1, &distance_index))); } if (distance_in_chain > distance_limit) { return std::numeric_limits::max(); @@ -1250,15 +1237,15 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Start from the nodes size_t distance_to_start1 = is_rev(pos1) - ? zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1) + ? zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1) : offset(pos1) + 1; size_t distance_to_end1 = is_rev(pos1) ? offset(pos1) + 1 - : zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1); + : zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1); size_t distance_to_start2 = is_rev(pos2) - ? zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2) + ? zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2) : offset(pos2) + 1; size_t distance_to_end2 = is_rev(pos2) ? offset(pos2) + 1 - : zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2); + : zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2); if (!undirected_distance) { //These are directed distances so set backwards distances to inf @@ -1281,22 +1268,22 @@ cerr << "Finding distances to ancestors of first position" << endl; //Now walk up the snarl tree from each position to one level below the lowest common ancestor - for (int i = zip1_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip1.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip1_decoder, i+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip1, i+1, distance_to_start1, distance_to_end1); } #ifdef DEBUG_ZIPCODE cerr << "Finding distances to ancestors of second position" << endl; #endif //The same thing for the second position - for (int i = zip2_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip2.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip2_decoder, i+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip2, i+1, distance_to_start2, distance_to_end2); } @@ -1305,7 +1292,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "Distances in children of common ancestor: " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; //Check that the current nodes are actually children of the lca - assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, lowest_common_ancestor_depth)); + assert(ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth)); #endif //Find the distance between them in the lowest common ancestor @@ -1320,18 +1307,18 @@ cerr << "Finding distances to ancestors of second position" << endl; cerr << "At " << depth << "st/th ancestor" << endl; cerr << "\tdistances are " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; #endif - if (depth == zip1_decoder.decoder_length()-1) { + if (depth == zip1.decoder_length()-1) { //If the lca is a node that both positions are on #ifdef DEBUG_ZIPCODE //If the lca is a node, then both the zipcode nodes should be the same node - assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth)); - assert(depth == zip2_decoder.decoder_length()-1); + assert(ZipCode::is_equal(zip1, zip2, depth)); + assert(depth == zip2.decoder_length()-1); cerr << "\tAncestor should be a node" << endl; #endif size_t d1 = SnarlDistanceIndex::sum(distance_to_end1, distance_to_start2); size_t d2 = SnarlDistanceIndex::sum(distance_to_end2, distance_to_start1); - size_t node_length = zip1_decoder.get_length(depth, &distance_index); + size_t node_length = zip1.get_length(depth, &distance_index); if (d1 > node_length) { distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d1, node_length),1)); @@ -1340,31 +1327,31 @@ cerr << "Finding distances to ancestors of second position" << endl; distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d2, node_length),1)); } - } else if ( zip1_decoder.decoder[depth].first) { + } else if ( zip1.decoder[depth].first) { #ifdef DEBUG_ZIPCODE cerr << "\tancestor should be a chain" << endl; #endif //If this ancestor is a chain //If the children are reversed in the chain, then flip their distances - bool rev1 = (zip1_decoder.get_code_type(depth+1) == NODE && - zip1_decoder.get_is_reversed_in_parent(depth+1)); + bool rev1 = (zip1.get_code_type(depth+1) == NODE && + zip1.get_is_reversed_in_parent(depth+1)); size_t dist_start1 = rev1 ? distance_to_end1 : distance_to_start1; size_t dist_end1 = rev1 ? distance_to_start1 : distance_to_end1; - bool rev2 = zip2_decoder.get_code_type(depth+1) == NODE && - zip2_decoder.get_is_reversed_in_parent(depth+1); + bool rev2 = zip2.get_code_type(depth+1) == NODE && + zip2.get_is_reversed_in_parent(depth+1); size_t dist_start2 = rev2 ? distance_to_end2 : distance_to_start2; size_t dist_end2 = rev2 ? distance_to_start2 : distance_to_end2; //If they are the same child, then there is no path between them in the chain because we don't allow loops //So first check that they aren't the same - if (!(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth+1) - )){//TODO: I think this is unnecessary || (zip1_decoder.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) - size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(depth+1, &distance_index); - size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(depth+1, &distance_index); - code_type_t code_type1 = zip1_decoder.get_code_type(depth+1); - code_type_t code_type2 = zip2_decoder.get_code_type(depth+1); + if (!(ZipCode::is_equal(zip1, zip2, depth+1) + )){//TODO: I think this is unnecessary || (zip1.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) + size_t prefix_sum1 = zip1.get_offset_in_chain(depth+1, &distance_index); + size_t prefix_sum2 = zip2.get_offset_in_chain(depth+1, &distance_index); + code_type_t code_type1 = zip1.get_code_type(depth+1); + code_type_t code_type2 = zip2.get_code_type(depth+1); if (prefix_sum1 < prefix_sum2 || (prefix_sum1 == prefix_sum2 && @@ -1378,7 +1365,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1388,7 +1375,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(depth+1, &distance_index))), + zip1.get_length(depth+1, &distance_index))), dist_end1),1)); } } else { @@ -1396,7 +1383,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(Prefix sum 2 + distance left 2) - (prefix sum1+ length 1) + distance right 1 #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it isn't a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1.get_length(depth+1, &distance_index) << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1407,7 +1394,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(depth+1, &distance_index))), + zip1.get_length(depth+1, &distance_index))), dist_end1),1) ); } @@ -1419,7 +1406,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(prefix sum 1 + distance left 1) - (prefix sum 2 + length 2) + distance right 2 #ifdef DEBUG_ZIPCODE cerr << "Second child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2_decoder.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; + cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; #endif if (dist_start1 != std::numeric_limits::max() && dist_end2 != std::numeric_limits::max() ){ @@ -1429,7 +1416,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(depth+1, &distance_index))), + zip2.get_length(depth+1, &distance_index))), dist_end2), 1)); } } else { @@ -1448,7 +1435,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(depth+1, &distance_index))), + zip2.get_length(depth+1, &distance_index))), dist_end2),1) ); } @@ -1456,8 +1443,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } } //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); } else { #ifdef DEBUG_ZIPCODE @@ -1467,11 +1454,11 @@ cerr << "Finding distances to ancestors of second position" << endl; //If the parent is a regular snarl, then there is no path between them so //just update the distances to the ends of the parent - if (zip1_decoder.get_code_type(depth) != REGULAR_SNARL) { + if (zip1.get_code_type(depth) != REGULAR_SNARL) { //Parent may be an irregular snarl or a root snarl (which is also irregular) - net_handle_t parent_handle = zip1_decoder.get_net_handle(depth, &distance_index); - size_t rank1 = zip1_decoder.get_rank_in_snarl(depth+1); - size_t rank2 = zip2_decoder.get_rank_in_snarl(depth+1); + net_handle_t parent_handle = zip1.get_net_handle(depth, &distance_index); + size_t rank1 = zip1.get_rank_in_snarl(depth+1); + size_t rank2 = zip2.get_rank_in_snarl(depth+1); #ifdef DEBUG_ZIPCODE cerr << "irregular snarl so find distances in the distance index: " << distance_index.net_handle_as_string(parent_handle) << endl; cerr << "\t at offset " << distance_index.get_record_offset(parent_handle) << endl; @@ -1504,8 +1491,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } #endif //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); } #ifdef DEBUG_ZIPCODE cerr << "distance in ancestor: " << distance_between << endl; @@ -1868,7 +1855,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; if (decoder_length() == 1) { @@ -1880,15 +1867,15 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance size_t zip_value; size_t zip_index = decoder[0].second; //Root is chain - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); //Root node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; @@ -1907,17 +1894,17 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance size_t zip_value; size_t zip_index = decoder[max_depth()-1].second; //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (decoder_length() == 2) { //If the node is a child of the root chain payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; @@ -1925,20 +1912,20 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); //chain component count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Node prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //is_reversed - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: For top-level chains we got this from the distance index payload.is_reversed = zip_value; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.chain_component = zip_value; @@ -1961,9 +1948,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_is_root) { //is_chain zip_index = decoder[0].second; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, @@ -1973,7 +1960,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } else { zip_index = decoder[max_depth()-1].second; //is_regular - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //If this is a non-root snarl, get as much as we can from it payload.parent_type = ZipCode::EMPTY; if (zip_value == 0) { @@ -1985,20 +1972,20 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Snarl length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Snarl child_count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: SHould use this somehow payload.chain_component = 0; //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed @@ -2022,9 +2009,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //We should be at the node/trivial chain now zip_index = decoder[max_depth()].second; //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Get the rest as default values @@ -2043,7 +2030,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance return payload; } -net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { +net_identifier_t ZipCode::get_identifier(size_t depth) const { if (depth == std::numeric_limits::max()) { //This is equivalent to distance_index.get_root() return "ROOT"; @@ -2056,7 +2043,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else if (decoder[d].first) { @@ -2066,7 +2053,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else { @@ -2074,7 +2061,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2083,7 +2070,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].second; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2100,7 +2087,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { return result; } -const net_identifier_t ZipCodeDecoder::get_parent_identifier(const net_identifier_t& child) { +const net_identifier_t ZipCode::get_parent_identifier(const net_identifier_t& child) { if (child == "ROOT") { throw std::runtime_error("error: trying to get the parent of the root net_identifier_t"); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 376d7d1483e..992a8e27dc3 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -19,18 +19,14 @@ using namespace std; * A ZipCode stores the information and can be used to create a zipcode. It can be used * to calculate the distance between zipcodes * - * A ZipCodeDecoder is used for interpreting zipcodes to find specific values that were - * stored in the ZipCode. A ZipCodeDecoder must be constructed from a specific zipcode. + * A decoder is used for interpreting zipcodes to find specific values that were + * stored in the ZipCode. * Construction of a decoder occurs one code at a time, starting from the root snarl or chain, - * so it is possible to have a partially constructed ZipCodeDecoder, to avoid having to + * so it is possible to have a partially constructed decoder, to avoid having to * walk through the entire ZipCode to get the values for things higher in the snarl tree. * The full decoder must be constructed to get values for the node. */ -///A decoder for interpreting a zipcode -///Can interpret the values for a snarl tree node given the depth -///(depth in the snarl tree, also the index into the zipcode vector) -class ZipCodeDecoder; ///A struct to interpret the minimizer payload @@ -59,7 +55,8 @@ class ZipCode { /// Regular snarls are bubbles. Irregular snarls are snarls that aren't bubbles but are dags /// Cyclic snarls are non-dags. They are stored the same as irregular snarls. Only the type is different public: - enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; + enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; + public: //Fill in an empty zipcode given a position @@ -83,8 +80,8 @@ class ZipCode { //The decoders may or may not be filled in, and may be filled in when this is run //If distance_limit is set, return std::numeric_limits::max() if the distance //will be greater than the distance limit - static size_t minimum_distance_between(ZipCodeDecoder& zip_decoder1, const pos_t& pos1, - ZipCodeDecoder& zip_decoder2, const pos_t& pos2, + static size_t minimum_distance_between(ZipCode& zip1, const pos_t& pos1, + ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit = std::numeric_limits::max(), bool undirected_distance=false, @@ -214,167 +211,156 @@ class ZipCode { const SnarlDistanceIndex& distance_index); //Return a vector of size_ts that will represent the snarl in the zip code inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); - friend class ZipCodeDecoder; -}; -/// Print a code type to a stream -std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type); - -//A structure for holding a vector of zipcodes -//This is really just used for serializing -class ZipCodeCollection { - private: - vector zipcodes; + //////////////////////////////// Stuff for decoding the zipcode public: - ZipCodeCollection () {} - - void serialize(std::ostream& out) const; - void deserialize(std::istream& in); - bool empty() const {return zipcodes.empty();} - ZipCode at(size_t i) const {return zipcodes.at(i);} - void emplace_back(ZipCode zip) {zipcodes.emplace_back(zip);} - size_t size() const { return zipcodes.size();} + //TODO: Make the decoder and zipcode private, still need it for unit testing + ///The decoder as a vector of pair, one for each snarl tree node in the zip + ///where is_chain indicates whether it's a chain/node, and index + ///is the index of the node/snarl/chain code in the varint_vector_t + std::vector> decoder; - private: + ///Did we fill in the entire decoder + ///TODO: I'm making it fill in the decoder automatically because it seems to be faster that way, instead of + /// waiting to see which parts are actually needed + bool finished_decoding = false; - //magic number to identify the file - const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 2; - - public: - const static std::uint32_t get_magic_number() {return magic_number;} - const static std::string get_magic_number_as_string() { - std::uint32_t num = get_magic_number(); - return std::string(reinterpret_cast(&num), sizeof(num)); - } + public: + ///Go through the entire zipcode and fill in the decoder + void fill_in_full_decoder(); -}; + ///Fill in one more item in the decoder + ///Returns true if this is the last thing in the zipcode and false if there is more to decode + bool fill_in_next_decoder(); + ///What is the maximum depth of this zipcode? + size_t max_depth() const; -/* - * Struct for interpreting a ZipCode - */ -class ZipCodeDecoder { + ///How many codes in the zipcode have been decoded? + size_t decoder_length() const {return decoder.size();} - public: - //TODO: Make the decoder and zipcode private, still need it for unit testing - ///The decoder as a vector of pair, one for each snarl tree node in the zip - ///where is_chain indicates whether it's a chain/node, and index - ///is the index of the node/snarl/chain code in the varint_vector_t - std::vector> decoder; + ///What type of snarl tree node is at the given depth (index into the zipcode) + ZipCode::code_type_t get_code_type(const size_t& depth) const ; - ///The zipcode that this is decoding - const ZipCode* zipcode; + ///Get the length of a snarl tree node given the depth in the snarl tree + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - ///Did we fill in the entire decoder - bool finished_decoding; + ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl + size_t get_rank_in_snarl(const size_t& depth) const ; - public: + ///Get the number of children in a snarl. Throw an exception if it isn't a snarl + size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - ///Constructor that goes through the zipcode and decodes it to fill in decoder - ///If a depth is given, then only fill in up to depth snarl tree nodes - ///Otherwise, fill in the whole zipcode - ZipCodeDecoder(const ZipCode* zipcode = nullptr); + ///Get the prefix sum of a child of a chain + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - ///Go through the entire zipcode and fill in the decoder - void fill_in_full_decoder(); + ///Get the chain component of a chain child. + ///For snarls, this will be the component of the start node + size_t get_chain_component(const size_t& depth) const ; - ///Fill in one more item in the decoder - ///Returns true if this is the last thing in the zipcode and false if there is more to decode - bool fill_in_next_decoder(); + ///Get the chain component of the last node in the chain + /// This behaves like the distance index get_chain_component- + /// for looping chains it returns the last component if get_end is true, + /// and 0 if it is false + size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; + bool get_is_looping_chain(const size_t& depth) const ; - ///What is the maximum depth of this zipcode? - size_t max_depth() const; + ///Is the snarl tree node backwards relative to its parent + bool get_is_reversed_in_parent(const size_t& depth) const; - ///How many codes in the zipcode have been decoded? - size_t decoder_length() const {return decoder.size();} + ///Get the handle of the thing at the given depth. This can only be used for + ///Root-level structures or irregular snarls + net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; - ///What type of snarl tree node is at the given depth (index into the zipcode) - ZipCode::code_type_t get_code_type(const size_t& depth) const ; + ///Get the handle of the thing at the given depth. This can be used for anything but is slow, + /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I + /// remember that it's slow + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; - ///Get the length of a snarl tree node given the depth in the snarl tree - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + ///Get the information that was stored to get the address in the distance index + ///This is the connected component number for a root structure, or the address of + ///an irregular snarl. Throws an error for anything else + ///This is used for checking equality without looking at the distance index. + ///Use get_net_handle for getting the actual handle + size_t get_distance_index_address(const size_t& depth) const; - ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl - size_t get_rank_in_snarl(const size_t& depth) const ; + /// The minimum distance from start or end of the snarl to the left or right side of the child + size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; - ///Get the number of children in a snarl. Throw an exception if it isn't a snarl - size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + bool is_externally_start_end_connected(const size_t& depth) const; + bool is_externally_start_start_connected(const size_t& depth) const; + bool is_externally_end_end_connected(const size_t& depth) const; - ///Get the prefix sum of a child of a chain - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - ///Get the chain component of a chain child. - ///For snarls, this will be the component of the start node - size_t get_chain_component(const size_t& depth) const ; + ///Are the two decoders pointing to the same snarl tree node at the given depth + ///This only checks if the values in the zipcode are the same at the given depth, + ///so if the preceeding snarl tree nodes are different, + ///then this might actually refer to different things + const static bool is_equal(const ZipCode& zip1, const ZipCode& zip2, + const size_t& depth); - ///Get the chain component of the last node in the chain - /// This behaves like the distance index get_chain_component- - /// for looping chains it returns the last component if get_end is true, - /// and 0 if it is false - size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; - bool get_is_looping_chain(const size_t& depth) const ; + /// Dump a ZipCode to a stream so that it can be reconstructed for a + /// unit test from the resulting information. + void dump(std::ostream& out) const; - ///Is the snarl tree node backwards relative to its parent - bool get_is_reversed_in_parent(const size_t& depth) const; + //TODO: I want to make a struct for holding all values of a code as real values - ///Get the handle of the thing at the given depth. This can only be used for - ///Root-level structures or irregular snarls - net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; + ///Fill in a payload with values from the zipcode + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; - ///Get the handle of the thing at the given depth. This can be used for anything but is slow, - /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I - /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; + /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth + /// would be the node, also include the node id + net_identifier_t get_identifier(size_t depth) const; + const static net_identifier_t get_parent_identifier(const net_identifier_t& child); - ///Get the information that was stored to get the address in the distance index - ///This is the connected component number for a root structure, or the address of - ///an irregular snarl. Throws an error for anything else - ///This is used for checking equality without looking at the distance index. - ///Use get_net_handle for getting the actual handle - size_t get_distance_index_address(const size_t& depth) const; +}; - /// The minimum distance from start or end of the snarl to the left or right side of the child - size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; +/// Print a code type to a stream +std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type); - bool is_externally_start_end_connected(const size_t& depth) const; - bool is_externally_start_start_connected(const size_t& depth) const; - bool is_externally_end_end_connected(const size_t& depth) const; +//A structure for holding a vector of zipcodes +//This is really just used for serializing +class ZipCodeCollection { + private: + vector zipcodes; - ///Are the two decoders pointing to the same snarl tree node at the given depth - ///This only checks if the values in the zipcode are the same at the given depth, - ///so if the preceeding snarl tree nodes are different, - ///then this might actually refer to different things - const static bool is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, - const size_t& depth); + public: + ZipCodeCollection () {} - /// Dump a ZipCodeDecoder to a stream so that it can be reconstructed for a - /// unit test from the resulting information. - void dump(std::ostream& out) const; + void serialize(std::ostream& out) const; + void deserialize(std::istream& in); + bool empty() const {return zipcodes.empty();} + ZipCode at(size_t i) const {return zipcodes.at(i);} + void emplace_back(ZipCode zip) {zipcodes.emplace_back(zip);} + size_t size() const { return zipcodes.size();} - //TODO: I want to make a struct for holding all values of a code as real values + private: - ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + //magic number to identify the file + const static uint32_t magic_number = 0x5a495053; //ZIPS + const static uint32_t version = 2; - /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth - /// would be the node, also include the node id - net_identifier_t get_identifier(size_t depth) const; - const static net_identifier_t get_parent_identifier(const net_identifier_t& child); + public: + const static std::uint32_t get_magic_number() {return magic_number;} + const static std::string get_magic_number_as_string() { + std::uint32_t num = get_magic_number(); + return std::string(reinterpret_cast(&num), sizeof(num)); + } }; + template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { @@ -382,7 +368,7 @@ struct wang_hash { } }; -std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); +std::ostream& operator<<(std::ostream& out, const ZipCode& decoder); /** diff --git a/src/zip_code_tree.cpp b/src/zip_code_tree.cpp index 1055949af1b..1ed2bc13afd 100644 --- a/src/zip_code_tree.cpp +++ b/src/zip_code_tree.cpp @@ -55,7 +55,7 @@ void ZipCodeForest::open_chain(forest_growing_state_t& forest_state, #endif const Seed& current_seed = forest_state.seeds->at(seed_index); - size_t current_max_depth = current_seed.zipcode_decoder->max_depth(); + size_t current_max_depth = current_seed.zipcode.max_depth(); if (depth == 0) { //If this is the start of a new top-level chain, make a new tree, which will be the new active tree @@ -177,7 +177,7 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, //The value that got stored in forest_state.sibling_indices_at_depth was the prefix sum //traversing the chain according to its orientation in the tree, so either way //the distance is the length of the chain - the prefix sum - size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode_decoder->get_length(depth), + size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode.get_length(depth), forest_state.sibling_indices_at_depth[depth].back().value); bool add_distances = true; if (distance_to_chain_end > forest_state.distance_limit && forest_state.open_chains.back().second) { @@ -260,9 +260,9 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, std::numeric_limits::max(), false); //Update the distance to the end of the chain to be the distance from the previous child - size_t last_length = depth == last_seed.zipcode_decoder->max_depth() + size_t last_length = depth == last_seed.zipcode.max_depth() ? 0 - : last_seed.zipcode_decoder->get_length(depth+1); + : last_seed.zipcode.get_length(depth+1); distance_to_chain_end = SnarlDistanceIndex::sum(distance_to_chain_end, SnarlDistanceIndex::sum(last_edge, @@ -299,10 +299,10 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, bool chain_is_reversed) { const Seed& current_seed = forest_state.seeds->at(seed_index); - ZipCode::code_type_t current_type = current_seed.zipcode_decoder->get_code_type(depth); + ZipCode::code_type_t current_type = current_seed.zipcode.get_code_type(depth); //Is this chain actually a node pretending to be a chain - bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode_decoder->max_depth(); + bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode.max_depth(); //For a root node or trivial chain, the "chain" is actually just the node, so the depth // of the chain we're working on is the same depth. Otherwise, the depth is depth-1 @@ -320,11 +320,11 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //Otherwise, get the distance to the start or end of the chain current_offset = chain_is_reversed - ? SnarlDistanceIndex::minus(current_seed.zipcode_decoder->get_length(chain_depth) , + ? SnarlDistanceIndex::minus(current_seed.zipcode.get_length(chain_depth) , SnarlDistanceIndex::sum( - current_seed.zipcode_decoder->get_offset_in_chain(depth), - current_seed.zipcode_decoder->get_length(depth))) - : current_seed.zipcode_decoder->get_offset_in_chain(depth); + current_seed.zipcode.get_offset_in_chain(depth), + current_seed.zipcode.get_length(depth))) + : current_seed.zipcode.get_offset_in_chain(depth); } @@ -537,7 +537,7 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //stored should be the offset of the end bound of the snarl, so add the //length of the snarl current_offset = SnarlDistanceIndex::sum(current_offset, - current_seed.zipcode_decoder->get_length(depth)); + current_seed.zipcode.get_length(depth)); } @@ -614,7 +614,7 @@ void ZipCodeForest::close_snarl(forest_growing_state_t& forest_state, forest_state.sibling_indices_at_depth[depth-1].pop_back(); //Snarl prefix sum is now the distance from the start of the chain to the start of the snarl - snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode_decoder->get_length(depth)); + snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode.get_length(depth)); //Now update forest_state.sibling_indices_at_depth to be the previous thing in the chain forest_state.sibling_indices_at_depth[depth-1].push_back({ @@ -745,9 +745,9 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //If we're getting the distance to the end of the snarl, then this is the length of the snarl // otherwise, it is the distance from the seed to the start (or end) of the snarl - size_t snarl_distance = to_snarl_end ? seed.zipcode_decoder->get_length(depth) + size_t snarl_distance = to_snarl_end ? seed.zipcode.get_length(depth) : SnarlDistanceIndex::sum (distance_to_chain_start, - seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); + seed.zipcode.get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); //Add the edge trees[forest_state.active_tree_index].zip_code_tree.at(last_child_index - 1 - sibling_i) = @@ -757,7 +757,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //Otherwise, the previous thing was another child of the snarl //and we need to record the distance between these two size_t distance; - if (seed.zipcode_decoder->get_code_type(depth) == ZipCode::REGULAR_SNARL) { + if (seed.zipcode.get_code_type(depth) == ZipCode::REGULAR_SNARL) { //If this is the child of a regular snarl, then the distance between //any two chains is inf, and the distance to any bound is 0 distance = to_snarl_end ? sibling.distances.second : std::numeric_limits::max(); @@ -771,19 +771,19 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co if (to_snarl_end && !is_cyclic_snarl) { distance = SnarlDistanceIndex::sum(sibling.distances.second, - sibling_seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); + sibling_seed.zipcode.get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); } else { //If to_snarl_end is true, then we want the distance to the end (or start if snarl_is_reversed) // Rank is 0 and the orientation doesn't matter size_t rank2 = to_snarl_end ? (snarl_is_reversed ? 0 : 1) - : seed.zipcode_decoder->get_rank_in_snarl(depth+1); + : seed.zipcode.get_rank_in_snarl(depth+1); bool right_side2 = child_is_reversed; //If the sibling is the start, then get the distance to the appropriate bound size_t rank1 = sibling.type == ZipCodeTree::SNARL_START ? (snarl_is_reversed ? 1 : 0) - : sibling_seed.zipcode_decoder->get_rank_in_snarl(depth+1); + : sibling_seed.zipcode.get_rank_in_snarl(depth+1); bool right_side1 = !sibling.is_reversed; size_t distance_to_end_of_last_child = sibling.type == ZipCodeTree::SNARL_START ? 0 @@ -791,7 +791,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //The bools for this are true if the distance is to/from the right side of the child //We want the right side of 1 (which comes first in the dag ordering) to the left side of 2 //relative to the orientation of the snarl - net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth, forest_state.distance_index); + net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth, forest_state.distance_index); distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum( forest_state.distance_index->distance_in_snarl(snarl_handle, rank1, right_side1, rank2, right_side2), distance_to_chain_start), @@ -938,7 +938,7 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< } else if (current_item.get_type() == ZipCodeTree::SEED) { //If this is a seed, check the snarls we've seen previously for (const size_t& snarl_depth : snarl_depths) { - if (seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) + if (seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::REGULAR_SNARL) { //If this is a regular snarl, then it must be a DAG too dag_count++; @@ -946,11 +946,11 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< //If this is an irregular snarl //Check the snarl in the distance index - net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode_decoder->get_net_handle(snarl_depth, &distance_index); + net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode.get_net_handle(snarl_depth, &distance_index); #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || - seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || - seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); + assert(seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || + seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || + seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); assert(distance_index.is_snarl(snarl_handle)); #endif if (distance_index.is_dag(snarl_handle)) { @@ -976,13 +976,13 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< return std::make_pair(dag_count, non_dag_count); } bool ZipCodeTree::seed_is_reversed_at_depth (const Seed& seed, size_t depth, const SnarlDistanceIndex& distance_index){ - if (seed.zipcode_decoder->get_is_reversed_in_parent(depth)) { + if (seed.zipcode.get_is_reversed_in_parent(depth)) { return true; - } else if (depth > 0 && (seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL - || seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { + } else if (depth > 0 && (seed.zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL + || seed.zipcode.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { //If the parent is an irregular snarl, then check the orientation of the child in the snarl - net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth-1, &distance_index); - size_t rank = seed.zipcode_decoder->get_rank_in_snarl(depth); + net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth-1, &distance_index); + size_t rank = seed.zipcode.get_rank_in_snarl(depth); if (distance_index.distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && @@ -1109,10 +1109,10 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //so if things are traversed backwards, reverse the orientation bool a_is_reversed = false; bool b_is_reversed = false; - while (depth < seeds->at(previous_seed_index).zipcode_decoder->max_depth() && - depth < seeds->at(current_item.get_value()).zipcode_decoder->max_depth() && - ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, - *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { + while (depth < seeds->at(previous_seed_index).zipcode.max_depth() && + depth < seeds->at(current_item.get_value()).zipcode.max_depth() && + ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, + seeds->at(current_item.get_value()).zipcode, depth)) { //Remember the orientation if (ZipCodeTree::seed_is_reversed_at_depth(seeds->at(previous_seed_index), depth, distance_index)) { @@ -1142,19 +1142,19 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //Either depth is the last thing in previous_seed_index or current_item.value, or they are different at this depth - if ( ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, - *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { + if ( ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, + seeds->at(current_item.get_value()).zipcode, depth)) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthey are on the same node" << endl; #endif //If they are equal, then they must be on the same node size_t offset1 = is_rev(seeds->at(previous_seed_index).pos) - ? seeds->at(previous_seed_index).zipcode_decoder->get_length(depth) + ? seeds->at(previous_seed_index).zipcode.get_length(depth) - offset(seeds->at(previous_seed_index).pos) : offset(seeds->at(previous_seed_index).pos); size_t offset2 = is_rev(seeds->at(current_item.get_value()).pos) - ? seeds->at(current_item.get_value()).zipcode_decoder->get_length(depth) + ? seeds->at(current_item.get_value()).zipcode.get_length(depth) - offset(seeds->at(current_item.get_value()).pos) : offset(seeds->at(current_item.get_value()).pos); if (!current_is_in_cyclic_snarl) { @@ -1172,28 +1172,28 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, cerr << "\tThey are on different connected components" << endl; #endif //If they are on different connected components, sort by connected component - assert( seeds->at(previous_seed_index).zipcode_decoder->get_distance_index_address(0) <= - seeds->at(current_item.get_value()).zipcode_decoder->get_distance_index_address(0)); + assert( seeds->at(previous_seed_index).zipcode.get_distance_index_address(0) <= + seeds->at(current_item.get_value()).zipcode.get_distance_index_address(0)); - } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::CHAIN - || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { + } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::CHAIN + || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common chain" << endl; #endif //If previous_seed_index and current_item.value are both children of a chain - size_t offset_a = seeds->at(previous_seed_index).zipcode_decoder->get_offset_in_chain(depth); - size_t offset_b = seeds->at(current_item.get_value()).zipcode_decoder->get_offset_in_chain(depth); + size_t offset_a = seeds->at(previous_seed_index).zipcode.get_offset_in_chain(depth); + size_t offset_b = seeds->at(current_item.get_value()).zipcode.get_offset_in_chain(depth); if (!current_is_in_cyclic_snarl) { if ( offset_a == offset_b) { //If they have the same prefix sum, then the snarl comes first //They will never be on the same child at this depth if (parent_of_a_is_reversed) { - assert(seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && - seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); + assert(seeds->at(current_item.get_value()).zipcode.get_code_type(depth) != ZipCode::NODE && + seeds->at(previous_seed_index).zipcode.get_code_type(depth) == ZipCode::NODE); } else { - assert( seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && - seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); + assert( seeds->at(previous_seed_index).zipcode.get_code_type(depth) != ZipCode::NODE && + seeds->at(current_item.get_value()).zipcode.get_code_type(depth) == ZipCode::NODE); } } else { //Check if the parent chain is reversed and if so, then the order should be reversed @@ -1205,8 +1205,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, } } } - } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::REGULAR_SNARL - || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { + } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::REGULAR_SNARL + || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common dag snarl" << endl; #endif @@ -1215,8 +1215,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, // The ranks of children in snarls are in a topological order, so // sort on the ranks if (!current_is_in_cyclic_snarl) { - assert( seeds->at(previous_seed_index).zipcode_decoder->get_rank_in_snarl(depth) <= - seeds->at(current_item.get_value()).zipcode_decoder->get_rank_in_snarl(depth)); + assert( seeds->at(previous_seed_index).zipcode.get_rank_in_snarl(depth) <= + seeds->at(current_item.get_value()).zipcode.get_rank_in_snarl(depth)); } } @@ -2031,20 +2031,20 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\tThis is the root snarl so sort by connected component: " - << seed.zipcode_decoder->get_distance_index_address(0) << endl; + << seed.zipcode.get_distance_index_address(0) << endl; #endif - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode_decoder->get_distance_index_address(0)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode.get_distance_index_address(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(0)); } else if (interval.code_type == ZipCode::NODE || interval.code_type == ZipCode::ROOT_NODE - || seed.zipcode_decoder->max_depth() == interval.depth) { + || seed.zipcode.max_depth() == interval.depth) { #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\t this is a node: offset: " << ( is_rev(seed.pos) - ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) + ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)) << endl;; #endif sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( - is_rev(seed.pos) != order_is_reversed ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) + is_rev(seed.pos) != order_is_reversed ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(ZipCode::NODE); @@ -2058,12 +2058,12 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, // and 2 will be added to the node with an offset in the node of 0 (node 3 if the chain is traversed forward) // See sort_value_t for more details - size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode_decoder->get_length(interval.depth), - SnarlDistanceIndex::sum( seed.zipcode_decoder->get_offset_in_chain(interval.depth+1), - seed.zipcode_decoder->get_length(interval.depth+1))) - : seed.zipcode_decoder->get_offset_in_chain(interval.depth+1); + size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode.get_length(interval.depth), + SnarlDistanceIndex::sum( seed.zipcode.get_offset_in_chain(interval.depth+1), + seed.zipcode.get_length(interval.depth+1))) + : seed.zipcode.get_offset_in_chain(interval.depth+1); - ZipCode::code_type_t child_type = seed.zipcode_decoder->get_code_type(interval.depth+1); + ZipCode::code_type_t child_type = seed.zipcode.get_code_type(interval.depth+1); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(child_type); if (child_type == ZipCode::REGULAR_SNARL @@ -2075,9 +2075,9 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, sort_values_by_seed[zipcode_sort_order[i]].set_chain_order(1); } else { //If this is a node, then the order depends on where the position falls in the node - bool node_is_rev = seed.zipcode_decoder->get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); + bool node_is_rev = seed.zipcode.get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); node_is_rev = order_is_reversed ? !node_is_rev : node_is_rev; - size_t node_offset = node_is_rev ? seed.zipcode_decoder->get_length(interval.depth+1) - offset(seed.pos) + size_t node_offset = node_is_rev ? seed.zipcode.get_length(interval.depth+1) - offset(seed.pos) : offset(seed.pos); sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(SnarlDistanceIndex::sum(prefix_sum, node_offset)); @@ -2093,13 +2093,13 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #endif } else { #ifdef DEBUG_ZIP_CODE_SORTING - cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1) << endl; + cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode.get_rank_in_snarl(interval.depth+1) << endl; #endif // The ranks of children in irregular snarls are in a topological order, so // sort on the ranks // The rank of children in a regular snarl is arbitrary but it doesn't matter anyway - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode.get_rank_in_snarl(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(interval.depth+1)); } min_sort_value = std::min(min_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); max_sort_value = std::max(max_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); @@ -2204,7 +2204,7 @@ void ZipCodeForest::get_next_intervals(forest_growing_state_t& forest_state, con if (interval.code_type != ZipCode::EMPTY && - seeds->at(zipcode_sort_order[interval.interval_start]).zipcode_decoder->max_depth() == interval.depth ) { + seeds->at(zipcode_sort_order[interval.interval_start]).zipcode.max_depth() == interval.depth ) { //If this is a trivial chain, then just return the same interval as a node #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthis was a trivial chain so just return the same interval as a node" << endl; @@ -2434,7 +2434,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth()+1); + seeds.at(forest_state.seed_sort_order[current_interval.interval_start]).zipcode.max_depth()+1); cerr << "Close anything open" << endl; #endif while (!forest_state.open_intervals.empty()) { @@ -2607,7 +2607,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth(); + seeds.at(forest_state.seed_sort_order[current_interval.interval_start]).zipcode.max_depth(); for (size_t seed_i = current_interval.interval_start ; seed_i < current_interval.interval_end ; seed_i++) { @@ -2709,9 +2709,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s const SnarlDistanceIndex* distance_index = forest_state.distance_index; #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_code_type(snarl_interval.depth) + assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_code_type(snarl_interval.depth) == ZipCode::CYCLIC_SNARL); - net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); + net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); cerr << "Sorting and finding intervals for cyclic snarl " << distance_index->net_handle_as_string(handle); size_t child_count = 0; for (auto& x : child_intervals) { @@ -2720,7 +2720,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s cerr << " with " << child_count << " children" << endl; #endif - net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); + net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); /****** For each interval, form runs of reachable seeds @@ -2800,9 +2800,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s //Get up to half of the values from before the snarl while (check_i >= parent_interval.interval_start && parent_offset_values.size() <= check_count/2) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); } check_i--; @@ -2813,9 +2813,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s check_i = snarl_interval.interval_end; while (check_i < parent_interval.interval_end && parent_offset_values.size() < check_count) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); } check_i++; @@ -2857,7 +2857,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s #ifdef DEBUG_ZIP_CODE_TREE //This is how seed_is_reversed_at_depth currently works but double check this in case it changed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); assert (distance_index->distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && distance_index->distance_in_snarl(snarl_handle, 1, false, rank, true) == std::numeric_limits::max()); @@ -2866,7 +2866,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s interval_is_reversable = false; } else { //If the interval is not reversed in the snarl, check if it can be reversed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); size_t distance_start = distance_index->distance_in_snarl(snarl_handle, 0, false, rank, true); size_t distance_end = distance_index->distance_in_snarl(snarl_handle, 1, false, rank, false); interval_is_reversable = distance_start != std::numeric_limits::max() @@ -2899,7 +2899,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s std::get<1>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = sort_values_by_seed[zipcode_sort_order[sort_i]].get_sort_value(); std::get<2>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = - seed.zipcode_decoder->max_depth() <= snarl_interval.depth+2; + seed.zipcode.max_depth() <= snarl_interval.depth+2; //Make a new run for the seed, to be updated with anything combined with it From a18edec77b599494b2db644130693d54c69190a2 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 31 Jul 2024 14:59:15 +0200 Subject: [PATCH 073/124] Fix unit tests --- src/unittest/snarl_seed_clusterer.cpp | 54 +++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 8 deletions(-) diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index 41c6212d9e1..ce7dde12972 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -3344,13 +3344,28 @@ namespace unittest { vector> pos_ts(2); - pos_ts[0].emplace_back(6, false, 12); - pos_ts[0].emplace_back(9, true, 0); - pos_ts[0].emplace_back(11, true, 2); - pos_ts[1].emplace_back(7, false,0); - pos_ts[1].emplace_back(11,false, 5); - pos_ts[1].emplace_back(8,false, 9); - pos_ts[1].emplace_back(9,true, 0); + pos_ts[0].emplace_back(15, false, 9); + pos_ts[0].emplace_back(19, false, 23); + pos_ts[0].emplace_back(12, false, 4); + pos_ts[0].emplace_back(7, true, 2); + pos_ts[0].emplace_back(3, false, 16); + pos_ts[0].emplace_back(1, true, 6); + pos_ts[0].emplace_back(8, false, 10); + pos_ts[0].emplace_back(1, true, 2); + pos_ts[1].emplace_back(18, true, 0); + pos_ts[1].emplace_back(2, false, 0); + pos_ts[1].emplace_back(5, true, 19); + pos_ts[1].emplace_back(7, true, 9); + pos_ts[1].emplace_back(12, false, 9); + pos_ts[1].emplace_back(8, true, 14); + pos_ts[1].emplace_back(7, false, 7); + pos_ts[1].emplace_back(4, false, 2); + pos_ts[1].emplace_back(17, false, 42); + pos_ts[1].emplace_back(18, true, 0); + pos_ts[1].emplace_back(16, false, 3); + pos_ts[1].emplace_back(11, true, 16); + pos_ts[1].emplace_back(2, false, 0); + vector> seeds(2); for (size_t read_num = 0 ; read_num < pos_ts.size() ; read_num++) { for (pos_t pos : pos_ts[read_num]) { @@ -3386,7 +3401,7 @@ namespace unittest { IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; - fill_in_distance_index(&dist_index, &graph, &snarl_finder, 5); + fill_in_distance_index(&dist_index, &graph, &snarl_finder); @@ -3476,6 +3491,12 @@ namespace unittest { if ( dist != -1 && dist <= read_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); + graph.serialize("testGraph.hg"); + for (size_t i = 0 ; i < 2 ; i++) { + for (auto& seed : all_seeds[i]) { + cerr << "pos_ts[" << i << "].emplace_back(" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << ");" << endl; + } + } cerr << "These should have been in the same read cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; @@ -3503,6 +3524,11 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); + for (size_t i = 0 ; i < 2 ; i++) { + for (auto& seed : all_seeds[i]) { + cerr << "pos_ts[" << i << "].emplace_back(" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << ");" << endl; + } + } cerr << "These should be different read clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; @@ -3551,6 +3577,12 @@ namespace unittest { if ( dist != -1 && dist <= fragment_lim) { dist_index.print_self(); graph.serialize("testGraph.hg"); + graph.serialize("testGraph.hg"); + for (size_t i = 0 ; i < 2 ; i++) { + for (auto& seed : all_seeds[i]) { + cerr << "pos_ts[" << i << "].emplace_back(" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << ");" << endl; + } + } cerr << "These should have been in the same fragment cluster: " ; cerr << pos1 << " and " << pos2 << endl; cerr << dist1 << " " << dist2 << " " << dist3 << " " << dist4 << endl; @@ -3583,6 +3615,12 @@ namespace unittest { if (actual_clusters.size() != 1) { dist_index.print_self(); graph.serialize("testGraph.hg"); + graph.serialize("testGraph.hg"); + for (size_t i = 0 ; i < 2 ; i++) { + for (auto& seed : all_seeds[i]) { + cerr << "pos_ts[" << i << "].emplace_back(" << id(seed.pos) << ", " << (is_rev(seed.pos) ? "true, " : "false, ") << offset(seed.pos) << ");" << endl; + } + } cerr << "These should be different fragment clusters: " << endl; for (auto c : actual_clusters) { cerr << "cluster: " ; From 831f23155aed5d41b03b9a93f90608db5f291118 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 31 Jul 2024 17:02:34 +0200 Subject: [PATCH 074/124] Fix reserving decoder length --- src/zip_code.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 9e5debeb7c9..99004b283a4 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -143,6 +143,7 @@ void ZipCode::fill_in_full_decoder() { //If the zipcode is empty return; } + decoder.reserve(byte_count() / 4); bool done=false; while (!done) { done = fill_in_next_decoder(); From 2923cde6b1c9fe0307d700a5d40963a3b65e97ae Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 31 Jul 2024 17:31:56 +0200 Subject: [PATCH 075/124] Add an int vector that uses a minimal bit width for storing stuff --- src/min_width_int_vector.cpp | 53 +++++++++++++++ src/min_width_int_vector.hpp | 57 +++++++++++++++++ src/unittest/min_width_int_vector.cpp | 92 +++++++++++++++++++++++++++ 3 files changed, 202 insertions(+) create mode 100644 src/min_width_int_vector.cpp create mode 100644 src/min_width_int_vector.hpp create mode 100644 src/unittest/min_width_int_vector.cpp diff --git a/src/min_width_int_vector.cpp b/src/min_width_int_vector.cpp new file mode 100644 index 00000000000..4d4e3215dba --- /dev/null +++ b/src/min_width_int_vector.cpp @@ -0,0 +1,53 @@ +#include "min_width_int_vector.hpp" +#include +#include +#include + +//#define DEBUG_MININT + +namespace vg { +using namespace std; + +void min_width_int_vector_t::from_vector(const vector& input_data, size_t max_val) { + if (max_val != 0) { + width = std::max(width, 1 + (size_t)std::floor(std::log2(max_val))); + } else if (width == 0) { + //If we haven't already set the width, find it from the max value of the input data + for (const size_t& x : input_data) { + max_val = std::max(x, max_val); + } + width = 1 + (size_t)std::floor(std::log2(max_val)); + } + data.reserve(input_data.size()*width); + + for (const size_t& x : input_data) { + push_back(x); + } +} + +void min_width_int_vector_t::push_back(size_t val) { +#ifdef DEBUG_MININT + assert(width >= 1 + (size_t)std::floor(std::log2(val))); +#endif + for (size_t i = 0 ; i < width ; i++) { + data.emplace_back(val & (1 << (width - i - 1))); + } + +} + +size_t min_width_int_vector_t::size() const { + return data.size() / width; +} +size_t min_width_int_vector_t::at(size_t index) const { + size_t result = 0; + size_t start_index = index * width; + for (size_t i = 0 ; i < width ; i++) { + if (data[i + start_index]) { + result |= (1 << (width - i - 1)); + } + } + return result; +} + + +} diff --git a/src/min_width_int_vector.hpp b/src/min_width_int_vector.hpp new file mode 100644 index 00000000000..e4f76a762c3 --- /dev/null +++ b/src/min_width_int_vector.hpp @@ -0,0 +1,57 @@ +#ifndef VG_MINWIDTH_INT_HPP_INCLUDED +#define VG_MINWIDTH_INT_HPP_INCLUDED + +#include +#include + +/** \file min_width_int_vector.hpp + * Methods for storing a vector of integers with minimal bit width + */ + +namespace vg{ +using namespace std; + +/* A struct to store a vector of integers with minimal bit width + */ +struct min_width_int_vector_t { + + public: + + min_width_int_vector_t () : + width(0) {} + + min_width_int_vector_t (size_t width) : + width(width) {} + + + ///Make this a copy of input_data + ///If maxval is set, then this is the maximum value in the input data, + /// or the maximum value to be stored with the bitwidth + ///If there is no max_val and the width has not already been set, get the + /// width from the maximum value in input_data + void from_vector(const vector& input_data, size_t max_val = 0); + + ///Add a value to the end of the vector + void push_back(size_t val); + + ///How long is the vector + size_t size() const; + + ///Get the value at the given index + size_t at(size_t index) const; + + //Check what the bit width is + size_t get_bitwidth() const { return width;} + + + private: + + /// The bit width that is being used to store the integers + /// This can be up to 64 + size_t width : 7; + + ///The actual data stored in the vector + std::vector data; +}; +} +#endif diff --git a/src/unittest/min_width_int_vector.cpp b/src/unittest/min_width_int_vector.cpp new file mode 100644 index 00000000000..f61ec4b6ff3 --- /dev/null +++ b/src/unittest/min_width_int_vector.cpp @@ -0,0 +1,92 @@ +#include "catch.hpp" +#include +#include +#include "../min_width_int_vector.hpp" + +namespace vg{ +namespace unittest{ +using namespace std; + + TEST_CASE("Array of ints added one at a time", "[minint]") { + SECTION ("[0]") { + min_width_int_vector_t minint_vector (1); + minint_vector.push_back(0); + REQUIRE(minint_vector.size() == 1); + REQUIRE(minint_vector.at(0) == 0); + } + SECTION ("[1]") { + min_width_int_vector_t minint_vector (1); + minint_vector.push_back(1); + REQUIRE(minint_vector.size() == 1); + REQUIRE(minint_vector.at(0) == 1); + } + SECTION ("[1, 2]") { + min_width_int_vector_t minint_vector(2); + minint_vector.push_back(1); + minint_vector.push_back(2); + REQUIRE(minint_vector.size() == 2); + REQUIRE(minint_vector.at(0) == 1); + REQUIRE(minint_vector.at(1) == 2); + } + SECTION ("more values") { + vector values {1, 3243, 123634, 53454, 0}; + min_width_int_vector_t minint_vector(1+(size_t)std::floor(std::log2(123634))); + for (auto& x : values) { + minint_vector.push_back(x); + } + assert(minint_vector.size() == values.size()); + for (size_t i = 0 ; i < values.size() ; i++) { + assert(minint_vector.at(i) == values[i]); + } + } + } + TEST_CASE("Array of ints from vector", "[minint]") { + SECTION ("[0]") { + vector original {0}; + min_width_int_vector_t minint_vector; + minint_vector.from_vector(original); + REQUIRE(minint_vector.size() == 1); + REQUIRE(minint_vector.at(0) == 0); + REQUIRE(minint_vector.get_bitwidth() == 1); + } + SECTION ("[1]") { + vector original {1}; + min_width_int_vector_t minint_vector; + minint_vector.from_vector(original); + REQUIRE(minint_vector.size() == 1); + REQUIRE(minint_vector.at(0) == 1); + REQUIRE(minint_vector.get_bitwidth() == 1); + } + SECTION ("[1, 2]") { + vector original {1, 2}; + min_width_int_vector_t minint_vector; + minint_vector.from_vector(original); + + REQUIRE(minint_vector.size() == 2); + REQUIRE(minint_vector.at(0) == 1); + REQUIRE(minint_vector.at(1) == 2); + REQUIRE(minint_vector.get_bitwidth() == 2); + } + SECTION ("more values") { + vector values {1, 3243, 123634, 53454, 0}; + min_width_int_vector_t minint_vector (3); + minint_vector.from_vector(values, 123634); + REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); + assert(minint_vector.size() == values.size()); + for (size_t i = 0 ; i < values.size() ; i++) { + assert(minint_vector.at(i) == values[i]); + } + } + SECTION ("more values without bitwidth") { + vector values {1, 3243, 123634, 53454, 0}; + min_width_int_vector_t minint_vector; + minint_vector.from_vector(values); + assert(minint_vector.size() == values.size()); + for (size_t i = 0 ; i < values.size() ; i++) { + assert(minint_vector.at(i) == values[i]); + } + REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); + } + } +} +} From 595cafbfcb0b896fe4e4fd78ad15a100b57f98fb Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 1 Aug 2024 23:16:32 +0200 Subject: [PATCH 076/124] Use new int vectors for zipcodes but it doesn't work yet --- src/min_width_int_vector.cpp | 30 +- src/min_width_int_vector.hpp | 54 +- src/snarl_seed_clusterer.cpp | 8 +- src/subcommand/minimizer_main.cpp | 6 +- src/unittest/min_width_int_vector.cpp | 10 +- src/unittest/snarl_seed_clusterer.cpp | 2 +- src/unittest/zip_code.cpp | 481 ++++------- src/zip_code.cpp | 1147 ++++++++++--------------- src/zip_code.hpp | 53 +- 9 files changed, 740 insertions(+), 1051 deletions(-) diff --git a/src/min_width_int_vector.cpp b/src/min_width_int_vector.cpp index 4d4e3215dba..3ca1cc4d802 100644 --- a/src/min_width_int_vector.cpp +++ b/src/min_width_int_vector.cpp @@ -1,23 +1,37 @@ #include "min_width_int_vector.hpp" -#include -#include -#include -//#define DEBUG_MININT +#define DEBUG_MININT namespace vg { using namespace std; void min_width_int_vector_t::from_vector(const vector& input_data, size_t max_val) { +#ifdef DEBUG_MININT + cerr << "get minint vector from int vector " << endl; +#endif if (max_val != 0) { - width = std::max(width, 1 + (size_t)std::floor(std::log2(max_val))); +#ifdef DEBUG_MININT + cerr << "Get width from max value " << max_val << " bigger of " << ((size_t) width) << " and " << (std::floor(std::log2(max_val)) + 1) << endl; +#endif + width = (uint8_t) std::max((size_t) width, (size_t)(std::floor(std::log2((float) max_val)) + 1)); } else if (width == 0) { //If we haven't already set the width, find it from the max value of the input data for (const size_t& x : input_data) { max_val = std::max(x, max_val); } - width = 1 + (size_t)std::floor(std::log2(max_val)); +#ifdef DEBUG_MININT + cerr << "Found max value " << max_val << " and got width " << width << endl; +#endif + width = 1 + (size_t)std::floor(std::log2((float) max_val)); } +#ifdef DEBUG_MININT + for (size_t x : input_data) { + cerr << x << " "; + } + for (size_t x : input_data) { + assert( width >= (uint8_t)(std::floor(std::log2(x)) + 1)); + } +#endif data.reserve(input_data.size()*width); for (const size_t& x : input_data) { @@ -25,9 +39,11 @@ void min_width_int_vector_t::from_vector(const vector& input_data, size_ } } + + void min_width_int_vector_t::push_back(size_t val) { #ifdef DEBUG_MININT - assert(width >= 1 + (size_t)std::floor(std::log2(val))); + assert(width >= (uint8_t) (1 + (size_t)std::floor(std::log2(val)))); #endif for (size_t i = 0 ; i < width ; i++) { data.emplace_back(val & (1 << (width - i - 1))); diff --git a/src/min_width_int_vector.hpp b/src/min_width_int_vector.hpp index e4f76a762c3..b428b9b393b 100644 --- a/src/min_width_int_vector.hpp +++ b/src/min_width_int_vector.hpp @@ -2,7 +2,14 @@ #define VG_MINWIDTH_INT_HPP_INCLUDED #include +#include #include +#include +#include +#include +#include + + /** \file min_width_int_vector.hpp * Methods for storing a vector of integers with minimal bit width @@ -15,13 +22,27 @@ using namespace std; */ struct min_width_int_vector_t { + private: + + /// How many bits are used to store the bit width used + /// This is needed for serializing + const static size_t BIT_WIDTH_WIDTH = 8; + + /// The bit width that is being used to store the integers + uint8_t width; + + ///The actual data stored in the vector + std::vector data; + public: - min_width_int_vector_t () : - width(0) {} + min_width_int_vector_t () { + width = 0; + } - min_width_int_vector_t (size_t width) : - width(width) {} + min_width_int_vector_t (size_t w) { + width = w; + } ///Make this a copy of input_data @@ -40,18 +61,27 @@ struct min_width_int_vector_t { ///Get the value at the given index size_t at(size_t index) const; - //Check what the bit width is - size_t get_bitwidth() const { return width;} + ///Check what the bit width is + // This is a size_t because it's blank when I try to write it to stderr + size_t get_bit_width() const { return (size_t) width;} + ///How many bits are we using total + size_t get_bit_count() const { return data.size(); } - private: + ///////////Access the bit vector itself for serializing + bool bit_at(size_t i) const {return data[i];} + void set_bitvector_length(size_t l) {data.resize(l);} + void set_bit_at(size_t i) {data[i] = true;} + void set_bit_width(size_t w) {width = w;} - /// The bit width that is being used to store the integers - /// This can be up to 64 - size_t width : 7; - ///The actual data stored in the vector - std::vector data; + ///Equality operator + //TODO: This isn't actually checking the values- the widths could be different but still represent the same vectors. + // but that would be pretty slow to check so leave it + inline bool operator==(const min_width_int_vector_t& other) const { + return width == other.width && data == other.data; + } + }; } #endif diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 31579b53103..220c36082f0 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -31,10 +31,10 @@ vector SnarlDistanceIndexClusterer::cluste vector seed_caches(seeds.size()); for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER - assert (seeds[i].zipcode.byte_count() != 0) ; + assert (seeds[i].zipcode.bit_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); - if (seeds[i].zipcode.byte_count() != 0) { + if (seeds[i].zipcode.bit_count() != 0) { seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } @@ -75,10 +75,10 @@ vector> SnarlDistanceIndexClusterer for (size_t i = 0 ; i < all_seeds[read_num].size() ; i++) { #ifdef DEBUG_CLUSTER //The zipcode should be filled in - assert(all_seeds[read_num][i].zipcode.byte_count() != 0); + assert(all_seeds[read_num][i].zipcode.bit_count() != 0); #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); - if (all_seeds[read_num][i].zipcode.byte_count() != 0) { + if (all_seeds[read_num][i].zipcode.bit_count() != 0) { all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 73c30133801..d75cf6bcd3e 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -387,8 +387,8 @@ int main_minimizer(int argc, char** argv) { //For each minimizer, writes the size of the zip code and then the zip code as a tsv pair value (0, 0); - //How many bytes get used - cout << zipcode.zipcode.byte_count(); + //How many bits get used + cout << zipcode.zipcode.get_bit_count(); //Each integer saved while (value.second != std::numeric_limits::max()) { value = zipcode.zipcode.get_value_and_next_index(value.second); @@ -396,7 +396,7 @@ int main_minimizer(int argc, char** argv) { } cout << endl; #endif - if (zipcode.zipcode.byte_count() < 15) { + if (zipcode.zipcode.get_bit_count() <= 112) { //If the zipcode is small enough to store in the payload return zipcode.get_payload_from_zip(); } else if (!zipcode_name.empty()) { diff --git a/src/unittest/min_width_int_vector.cpp b/src/unittest/min_width_int_vector.cpp index f61ec4b6ff3..e4739646716 100644 --- a/src/unittest/min_width_int_vector.cpp +++ b/src/unittest/min_width_int_vector.cpp @@ -47,7 +47,7 @@ using namespace std; minint_vector.from_vector(original); REQUIRE(minint_vector.size() == 1); REQUIRE(minint_vector.at(0) == 0); - REQUIRE(minint_vector.get_bitwidth() == 1); + REQUIRE(minint_vector.get_bit_width() == 1); } SECTION ("[1]") { vector original {1}; @@ -55,7 +55,7 @@ using namespace std; minint_vector.from_vector(original); REQUIRE(minint_vector.size() == 1); REQUIRE(minint_vector.at(0) == 1); - REQUIRE(minint_vector.get_bitwidth() == 1); + REQUIRE(minint_vector.get_bit_width() == 1); } SECTION ("[1, 2]") { vector original {1, 2}; @@ -65,13 +65,13 @@ using namespace std; REQUIRE(minint_vector.size() == 2); REQUIRE(minint_vector.at(0) == 1); REQUIRE(minint_vector.at(1) == 2); - REQUIRE(minint_vector.get_bitwidth() == 2); + REQUIRE(minint_vector.get_bit_width() == 2); } SECTION ("more values") { vector values {1, 3243, 123634, 53454, 0}; min_width_int_vector_t minint_vector (3); minint_vector.from_vector(values, 123634); - REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); + REQUIRE(minint_vector.get_bit_width() == 1+(size_t)std::floor(std::log2(123634))); assert(minint_vector.size() == values.size()); for (size_t i = 0 ; i < values.size() ; i++) { assert(minint_vector.at(i) == values[i]); @@ -85,7 +85,7 @@ using namespace std; for (size_t i = 0 ; i < values.size() ; i++) { assert(minint_vector.at(i) == values[i]); } - REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); + REQUIRE(minint_vector.get_bit_width() == 1+(size_t)std::floor(std::log2(123634))); } } } diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index ce7dde12972..d0569d4063e 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -833,7 +833,7 @@ namespace unittest { } } TEST_CASE( "Top-level looping chain", - "[cluster][bug]" ) { + "[cluster]" ) { VG graph; Node* n1 = graph.create_node("AGCGTGTAGAGAA"); diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 22bd68ac308..489f141d484 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -8,7 +8,7 @@ namespace vg{ namespace unittest{ using namespace std; - TEST_CASE("One node zipcode", "[zipcode]") { + TEST_CASE("One node zipcode", "[zipcode][bug]") { VG graph; Node* n1 = graph.create_node("GCAAACAGATT"); @@ -22,23 +22,19 @@ using namespace std; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); //Second value is the rank of the node (chain) in the root-snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Third value is the length of the node - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 11+1); + REQUIRE(zipcode.zipcode.at(2) == 11+1); //Connectivity - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 4); } @@ -66,7 +62,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -116,44 +112,36 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Component count of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + REQUIRE(zipcode.decoder[1] == std::make_pair(true,(size_t)4)); + REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(5) == 3+1); //Fifth is if the node is reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( + REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //The component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(7) == 0); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 7); } SECTION ("decoded zip code for node on top-level chain") { @@ -184,70 +172,57 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 3); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.zipcode.at(0) == 1); + REQUIRE(zipcode.decoder.at(0) == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the snarl code //1 for a regular snarl - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.decoder.at(1) == std::make_pair(false, (size_t)4)); + REQUIRE(zipcode.zipcode.at(4) == 1); //prefix sum of the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == (chain_is_reversed ? 5 : 6)+1); + REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 5 : 6)+1); //length of the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(6) == 1+1); //Child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 2); + REQUIRE(zipcode.zipcode.at(7) == 2); //Chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(8) == 0); //node is reversed in the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl = distance_index.get_parent(chain4); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(chain4)) != 0; - REQUIRE(value_and_index.first == is_rev); + REQUIRE(zipcode.zipcode.at(9) == is_rev); //Next is the chain code //rank of the chain in the snarl - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( + REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t)10)); + REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); //node length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 2+1); + REQUIRE(zipcode.zipcode.at(11) == 2+1); //chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(12) == 0); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 12); } @@ -333,7 +308,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -343,7 +318,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -353,7 +328,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -363,7 +338,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -373,7 +348,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -383,7 +358,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -432,45 +407,37 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Third value is the chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 4)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(5) == 3+1); //Fifth is if the node is reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( + REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component( + REQUIRE(zipcode.zipcode.at(7) == distance_index.get_chain_component( distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 7); } @@ -503,88 +470,71 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Third value is the chain component count of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the regular snarl code - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); //1 for regular snarl tag - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(4) == 1); //Prefix sum of the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == (chain_is_reversed ? 4 : 3)+1); + REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 4 : 3)+1); //snarl length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0+1); + REQUIRE(zipcode.zipcode.at(6) == 0+1); //Snarl child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(7) == 1); //chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //Is the chain is reversed in the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; - REQUIRE(value_and_index.first == is_rev); + REQUIRE(zipcode.zipcode.at(9) == is_rev); //Next is the chain code - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 10)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( + REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent( distance_index.get_parent(distance_index.get_node_net_handle(n2->id())))); //chain length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(11) == 3+1); //chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(12) == 0); //Next is the node code - REQUIRE(zipcode.decoder[3] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == std::make_pair(true, (size_t) 13)); //Offset of the node in the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); + REQUIRE(zipcode.zipcode.at(13) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); //length of the node - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(14) == 1+1); //is the node reversed in the parent - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()))); + REQUIRE(zipcode.zipcode.at(15) == distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()))); //chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + REQUIRE(zipcode.zipcode.at(16) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 16); } @@ -632,154 +582,123 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Second value is the chain component count of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 3)); //1 for regular snarl tag - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(4) == 1); //Prefix sum of the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == (chain_is_reversed ? 4 : 3)+1); + REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 4 : 3)+1); //snarl length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0+1); + REQUIRE(zipcode.zipcode.at(6) == 0+1); //snarl child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(7) == 1); //Chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //Is the chain is reversed in the snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; - REQUIRE(value_and_index.first == is_rev); + REQUIRE(zipcode.zipcode.at(9) == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 10)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( + REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent( distance_index.get_parent(distance_index.get_node_net_handle(n2->id())))); //chain length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(11) == 3+1); //chain component_count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(12) == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(zipcode.decoder[3] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == std::make_pair(false, (size_t) 13)); //1 as tag for regular snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(13) == 1); //offset in chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(14) == 1+1); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(15) == 1+1); //child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 2); + REQUIRE(zipcode.zipcode.at(16) == 2); //is_reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); snarl = distance_index.get_parent(chain3); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain3))) != 0; - REQUIRE(value_and_index.first == is_rev); + REQUIRE(zipcode.zipcode.at(17) == is_rev); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + REQUIRE(zipcode.zipcode.at(18) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(zipcode.decoder[4] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[4] == std::make_pair(true, (size_t) 19)); //Rank in parent - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); + REQUIRE(zipcode.zipcode.at(19) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.minimum_length(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) +1); + REQUIRE(zipcode.zipcode.at(20) == distance_index.minimum_length(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) +1); //component_count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(21) == 0); //REgular snarl code for snarl 3-5 - REQUIRE(zipcode.decoder[5] == std::make_pair(false, value_and_index.second)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.decoder[5] == std::make_pair(false, (size_t) 22)); + REQUIRE(zipcode.zipcode.at(22) == 1); //offset in chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)+1); + REQUIRE(zipcode.zipcode.at(23) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)+1); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0+1); + REQUIRE(zipcode.zipcode.at(24) == 0+1); //child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(25) == 1); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); snarl = distance_index.get_parent(chain4); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + REQUIRE(zipcode.zipcode.at(26) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //is_reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; - REQUIRE(value_and_index.first == is_rev); + REQUIRE(zipcode.zipcode.at(27) == is_rev); //Chain code for node 4 - REQUIRE(zipcode.decoder[6] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[6] == std::make_pair(true, (size_t) 28)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; + REQUIRE(zipcode.zipcode.at(28) == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 4+1) ; + REQUIRE(zipcode.zipcode.at(29) == 4+1) ; //Chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0) ; + REQUIRE(zipcode.zipcode.at(30) == 0) ; //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 30); } @@ -938,7 +857,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -948,7 +867,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -958,7 +877,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -968,7 +887,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -978,7 +897,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -988,7 +907,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -998,7 +917,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1008,7 +927,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1055,85 +974,68 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Third is the chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); //0 as tag for irregular snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 2); + REQUIRE(zipcode.zipcode.at(4) == 2); net_handle_t irregular_snarl = distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n2->id()))); //Snarl prefix sum - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t bound = distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, true)); - REQUIRE(value_and_index.first == SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(bound), + REQUIRE(zipcode.zipcode.at(5) == SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(bound), distance_index.minimum_length(bound))+1); //Snarl length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.minimum_length(irregular_snarl)+1); + REQUIRE(zipcode.zipcode.at(6) == distance_index.minimum_length(irregular_snarl)+1); size_t child_count = 0 ; distance_index.for_each_child(irregular_snarl, [&] (const net_handle_t& child) { child_count++; }); //Snarl child count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == child_count); + REQUIRE(zipcode.zipcode.at(7) == child_count); //component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, false)))); + REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, false)))); //Snarl record offset - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_record_offset(irregular_snarl)); + REQUIRE(zipcode.zipcode.at(9) == distance_index.get_record_offset(irregular_snarl)); //Distance from left side of child to snarl start - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); + //REQUIRE(zipcode.zipcode.at(10) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); //Distance from right side of child to snarl start - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); + //REQUIRE(zipcode.zipcode.at(11) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); //Distance from left side of child to snarl end - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); + //REQUIRE(zipcode.zipcode.at(12) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Distance from right side of child to snarl end - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); + //REQUIRE(zipcode.zipcode.at(13) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 14)); //Rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); + REQUIRE(zipcode.zipcode.at(14) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); //Length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(15) == 1+1); //Component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(16) == 0); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 16); } SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; @@ -1247,7 +1149,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1257,7 +1159,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1267,7 +1169,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1277,7 +1179,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1287,7 +1189,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1297,7 +1199,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1307,7 +1209,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1351,21 +1253,17 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(0) == 0); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); + REQUIRE(zipcode.zipcode.at(1) == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 2)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); + REQUIRE(zipcode.zipcode.at(2) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(3) == 3+1); } SECTION ("decoded zip code for node in top-level snarl") { ZipCode zipcode; @@ -1398,33 +1296,26 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(0) == 0); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); + REQUIRE(zipcode.zipcode.at(1) == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 2)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); + REQUIRE(zipcode.zipcode.at(2) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 2+1); + REQUIRE(zipcode.zipcode.at(3) == 2+1); //component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(4) == 0); //Node 3 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 5)); //rank in snarl - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); + REQUIRE(zipcode.zipcode.at(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); //length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 1+1); + REQUIRE(zipcode.zipcode.at(6) == 1+1); } SECTION ("decode zip code for node in chain in top-level snarl") { net_handle_t node3 = distance_index.get_node_net_handle(n3->id()); @@ -1503,7 +1394,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1513,7 +1404,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1523,7 +1414,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1533,7 +1424,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1543,7 +1434,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1553,7 +1444,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1563,7 +1454,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1607,45 +1498,37 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain - pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); - REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.zipcode.at(0) == 1); REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(1) == 0); //Third value is the chain component count - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(2) == 0); //Connectivity of the chain - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 0); + REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 4)); + REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == 3+1); + REQUIRE(zipcode.zipcode.at(5) == 3+1); //Fifth is if the node is reversed - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( + REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //Chain component - value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); - REQUIRE(value_and_index.first == distance_index.get_chain_component( + REQUIRE(zipcode.zipcode.at(7) == distance_index.get_chain_component( distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(value_and_index.second == std::numeric_limits::max()); + REQUIRE(zipcode.zipcode.size() == 7); } SECTION("Distances") { @@ -1682,7 +1565,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1692,7 +1575,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1702,7 +1585,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1712,7 +1595,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1722,7 +1605,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1732,7 +1615,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1742,7 +1625,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (zipcode.bit_count() <= 112) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 99004b283a4..2e681638f70 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1,6 +1,6 @@ #include "zip_code.hpp" -//#define DEBUG_ZIPCODE +#define DEBUG_ZIPCODE namespace vg{ using namespace std; @@ -16,29 +16,42 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p current_handle = distance_index.get_parent(current_handle); } + //Make a temporary zipcode that will turn into the real one + vector temp_zipcode; + temp_zipcode.reserve(ancestors.size() * 4); + //Remember the maximum value we see to set the bitwidth when we make the real zipcode + size_t max_value = 0; + //Now add the root-level snarl or chain if (distance_index.is_root_snarl(current_handle)) { //FIrst thing is a snarl, so add the snarl's connected component number - zipcode.add_value(0); + temp_zipcode.emplace_back(0); #ifdef DEBUG_ZIPCODE cerr << "Adding code for top-level snarl " << distance_index.net_handle_as_string(current_handle) << endl; #endif - zipcode.add_value(distance_index.get_connected_component_number(current_handle)); + temp_zipcode.emplace_back(distance_index.get_connected_component_number(current_handle)); + max_value = std::max(max_value, temp_zipcode.back()); } else { +#ifdef DEBUG_ZIPCODE + cerr << "Adding code for top-level chain " << distance_index.net_handle_as_string(current_handle) << endl; +#endif //FIrst thing is a chain so add its connected component number and remove the chain from the stack - zipcode.add_value(1); + temp_zipcode.emplace_back(1); + max_value = std::max(max_value, temp_zipcode.back()); //If the root-level structure is actually a chain, then save the connected component number and take out //the chain from the stack //If the root-level structure is a trivial chain, then just store the node (as a chain, which will have the //connected-component number as the rank in the snarl anyways) - zipcode.add_value(distance_index.get_connected_component_number(ancestors.back())); + temp_zipcode.emplace_back(distance_index.get_connected_component_number(ancestors.back())); + max_value = std::max(max_value, temp_zipcode.back()); if (ancestors.size() == 2 && distance_index.is_trivial_chain(ancestors.back())) { #ifdef DEBUG_ZIPCODE cerr << "Adding code for top-level trivial chain" << endl; #endif - zipcode.add_value(distance_index.minimum_length(ancestors.back())+1); + temp_zipcode.emplace_back(distance_index.minimum_length(ancestors.back())+1); + max_value = std::max(max_value, temp_zipcode.back()); size_t connectivity = 0; if ( distance_index.is_externally_start_end_connected(ancestors.back())) { connectivity = connectivity | 1; @@ -50,7 +63,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p connectivity = connectivity | 4; } - zipcode.add_value(connectivity); + temp_zipcode.emplace_back(connectivity); + max_value = std::max(max_value, temp_zipcode.back()); + zipcode.from_vector(temp_zipcode, max_value); return; } else { #ifdef DEBUG_ZIPCODE @@ -62,7 +77,8 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p if (distance_index.is_looping_chain(ancestors.back())) { component += 1; } - zipcode.add_value(component); + temp_zipcode.emplace_back(component); + max_value = std::max(max_value, temp_zipcode.back()); } size_t connectivity = 0; @@ -76,7 +92,8 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p connectivity = connectivity | 4; } - zipcode.add_value(connectivity); + temp_zipcode.emplace_back(connectivity); + max_value = std::max(max_value, temp_zipcode.back()); ancestors.pop_back(); } @@ -88,62 +105,38 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p cerr << "Adding code for " << distance_index.net_handle_as_string(current_ancestor) << endl; #endif if (distance_index.is_node(current_ancestor)) { - vector to_add = get_node_code(current_ancestor, distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::NODE_SIZE); -#endif + get_node_code(current_ancestor, distance_index, temp_zipcode, max_value); } else if (distance_index.is_chain(current_ancestor)) { - vector to_add = get_chain_code(current_ancestor, distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::CHAIN_SIZE); -#endif + get_chain_code(current_ancestor, distance_index, temp_zipcode, max_value); + if (distance_index.is_trivial_chain(current_ancestor)) { + zipcode.from_vector(temp_zipcode, max_value); return; } } else if (distance_index.is_regular_snarl(current_ancestor)) { - vector to_add = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::REGULAR_SNARL_SIZE); -#endif + get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index, temp_zipcode, max_value); } else { #ifdef DEBUG_ZIPCODE assert(distance_index.is_snarl(current_ancestor)); #endif - vector to_add = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::IRREGULAR_SNARL_SIZE); -#endif - for (auto& x : to_add) { - zipcode.add_value(x); - } + get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index, temp_zipcode, max_value); } } + cerr << "Make real zipcode from temp with length " << temp_zipcode.size() << endl; + zipcode.from_vector(temp_zipcode, max_value); } -std::vector ZipCode::to_vector() const { - return zipcode.to_vector(); -} - -void ZipCode::from_vector(const std::vector& values) { - zipcode.from_vector(values); +void ZipCode::from_vector(const std::vector& values, size_t max_value) { + zipcode.from_vector(values, max_value); } void ZipCode::fill_in_full_decoder() { - if (byte_count() == 0 || finished_decoding) { + if (zipcode.size() == 0 || finished_decoding) { //If the zipcode is empty return; } - decoder.reserve(byte_count() / 4); + decoder.reserve(zipcode.size() / 4); bool done=false; while (!done) { done = fill_in_next_decoder(); @@ -163,193 +156,79 @@ bool ZipCode::fill_in_next_decoder() { //check to see how much has been filled in size_t zip_length = decoder_length(); - //Does the most recent thing in the zip_index point to a chain/node? - bool previous_is_chain; - - size_t zip_index=0; - size_t zip_value; if (zip_length == 0) { //If there is nothing in the decoder yet, then the first thing will start at 0 - for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } //Is the root a chain/node? - previous_is_chain = zip_value; - decoder.emplace_back(previous_is_chain, 0); - -#ifdef DEBUG_ZIPCODE -cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" : "snarl") << endl; -#endif - //There might be something else but we're done for now - return false; - } else if (zip_length == 1) { - //If there is one thing in the zipcode - previous_is_chain = decoder.back().first; - - //If the top-level structure is a chain, it might actually be a node, in which case - //the only other thing that got stored is the length - if (previous_is_chain) { - //Get to the end of the root chain - assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't - - for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_index == std::numeric_limits::max()) { - //If the zip code ends here (after the length), then this was a node and we're done -#ifdef DEBUG_ZIPCODE -cerr << "\tThe last thing was a root-level node, so nothing else" << endl; -#endif - finished_decoding = true; - return true; - } else { - //Otherwise, check if this is a node or a snarl. If it is a node, then there are three things remaining - size_t start_index = zip_index; + decoder.emplace_back(zipcode.at(ROOT_IS_CHAIN_OFFSET), 0); - //If it's a node, then there are three remaining things in the index - //If it were a snarl, then there are more than three things - for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - - - //Return the start of this thing, and true if it was a node - decoder.emplace_back(zip_index == std::numeric_limits::max(), start_index); #ifdef DEBUG_ZIPCODE - cerr << "\tAdding a " << (zip_index == std::numeric_limits::max() ? "node" : "snarl") << endl; +cerr << "\tadding the root, which is a " << (decoder.back().first ? "chain or node" : "snarl") << endl; #endif - //If this was a node, then we're done so return true. Otherwise, it was a snarl to return false - return zip_index == std::numeric_limits::max(); - } + if (zipcode.size() == ROOT_NODE_SIZE) { + //If this was a root node, then we're done + finished_decoding = true; + return true; } else { - //Otherwise, the top-level thing is a snarl and the next thing is a chain - for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - decoder.emplace_back(!previous_is_chain, zip_index); + //There might be something else but we're done for now return false; } } else { - //If there was already stuff in the decoder, then figure out where the last thing - //is and set values - previous_is_chain = decoder.back().first; - zip_index = decoder.back().second; -#ifdef DEBUG_ZIPCODE - cerr << "Last thing was a " << (previous_is_chain ? "chain or node" : "snarl") << " starting at " << zip_index << endl; -#endif - - //get to the end of the current thing, add the next thing to the decoder and return + //This is not a root + bool previous_is_chain = decoder.back().first; + size_t previous_start = decoder.back().second; if (previous_is_chain) { - //If the current zip_index points to a chain, then either it points to a node, or to - //a chain that is followed by a node or snarl - //The node is the shorter of the two, so if the zipcode ends after the node, then it was - //a node and otherwise, it was an actual chain + //If the last thing was chain, then either the chain was the last thing in the zipcode + // (if it was the child of a snarl) or the next thing is either a node or snarl - //This must be true in order for this to work assert(std::min(ZipCode::CHAIN_SIZE + ZipCode::REGULAR_SNARL_SIZE, ZipCode::CHAIN_SIZE + ZipCode::IRREGULAR_SNARL_SIZE) > ZipCode::NODE_SIZE); - //Get to the end of the "node". If it is the end of the zipcode, then it was a node - //Otherwise, it was a snarl - //The node could actually be a chain in a snarl, in which case the zipcode ends after the - //chain - size_t check_zip_index = zip_index; - for (size_t i = 0 ; i < std::min(ZipCode::CHAIN_SIZE, ZipCode::NODE_SIZE) ; i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; - } - //If the zipcode ends after a chain - if (check_zip_index == std::numeric_limits::max()) { + size_t this_size = zip_length == 1 ? ROOT_CHAIN_SIZE : CHAIN_SIZE; + if (zipcode.size() == previous_start + this_size) { + //If the zipcode ends here #ifdef DEBUG_ZIPCODE - cerr << "\tThe last thing was a chain pretending to be a node so we're done" << endl; + cerr << "The last thing was a trivial chain so we're done" << endl; #endif finished_decoding = true; return true; - } - //Now check if it was actually a real node - for (size_t i = 0 ; i < std::max(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE) - - std::min(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE); i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; - } - - //This might be a node that is a child of the chain, in which case there is one - //more thing in the zip code - - if (check_zip_index == std::numeric_limits::max()) { - //If the zip code ends here, then this was a node and we're done - //This should never really happen since it would have returned true when - //adding the node, but I'll leave in just in case someone calls this when they - //shouldn't have + } else if (zipcode.size() == previous_start + this_size + NODE_SIZE) { + //If the zipcode ends after the node, add the node and we're done #ifdef DEBUG_ZIPCODE - cerr << "\tThe last thing was a node so we're done" << endl; + cerr << "Adding a node and we're done" << endl; #endif + decoder.emplace_back(true, previous_start + this_size); finished_decoding = true; return true; } else { - //Otherwise, the last thing was a chain - //Get to the end of the chain - for (size_t i = 0 ; i < ZipCode::CHAIN_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - - //zip_index is now the start of the current thing that we want to add - the thing after the chain - - //The current thing can be either a snarl or a node. If it is a node, then the zipcode - //ends after the node. If it is a snarl, then the shortest the remaining zipcocde can be - //is the size of a snarl and a chain - //This must be true in order for this to work - assert(std::min(ZipCode::CHAIN_SIZE + ZipCode::REGULAR_SNARL_SIZE, - ZipCode::CHAIN_SIZE + ZipCode::IRREGULAR_SNARL_SIZE) > ZipCode::NODE_SIZE); - - //Check if the current thing is a node - check_zip_index = zip_index; - for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; - } - - //Return the start of this thing, and true if it was a node - decoder.emplace_back(check_zip_index == std::numeric_limits::max(), zip_index); + //Otherwise, this is a snarl and we're not done #ifdef DEBUG_ZIPCODE - cerr << "\tAdd a " << (check_zip_index == std::numeric_limits::max() ? "node" : "snarl") << endl; + cerr << "Adding a snarl starting at " << (previous_start + this_size) << endl; #endif - //If this was a node, then we're done so return true. Otherwise, it was a snarl to return false - return check_zip_index == std::numeric_limits::max(); + decoder.emplace_back(false, previous_start + this_size); + return false; } } else { - //If !previous_is_chain, then the current zip_index points to a snarl + //Otherwise, the last thing was a snarl + size_t next_start = previous_start; //The regular/irregular snarl tag - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - - if (zip_value == 1) { -#ifdef DEBUG_ZIPCODE - cerr << "\tAdd a node child of a regular snarl" << endl; -#endif - //Regular snarl, so 2 remaining things in the code - for (size_t i = 0 ; i < ZipCode::REGULAR_SNARL_SIZE - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - decoder.emplace_back(!previous_is_chain, zip_index); - return false; + if (zip_length == 1) { + //IF this was a root snarl + next_start += ROOT_SNARL_SIZE; + } else if (zipcode.at(previous_start + SNARL_IS_REGULAR_OFFSET) == 1) { + //If this was a regular snarl + next_start += REGULAR_SNARL_SIZE; } else { -#ifdef DEBUG_ZIPCODE - cerr << "\tAdd the child of " << (decoder.size() == 2 ? "a top-level " : "an" ) << " irregular snarl" << endl; -#endif - //If the decoder has two things in it (top-level chain and the current snarl), then this - //is a top-level irregular snarl. Otherwise a normal irregular snarl - size_t code_size = ZipCode::IRREGULAR_SNARL_SIZE; - for (size_t i = 0 ; i < code_size - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - decoder.emplace_back(!previous_is_chain, zip_index); - return false; + //Technically it could be irregular or cyclic but it doesn't matter because the codes are the same + next_start += IRREGULAR_SNARL_SIZE; } + decoder.emplace_back(true, next_start); + return false; } - } + } } size_t ZipCode::max_depth() const { @@ -387,17 +266,13 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } else { //Definitely a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 0) { - return ZipCode::IRREGULAR_SNARL; - } else if (zip_value == 1) { - return ZipCode::REGULAR_SNARL; + size_t code_type_int = zipcode.at(decoder[depth].second + ZipCode::SNARL_IS_REGULAR_OFFSET); + if (code_type_int == 0) { + return IRREGULAR_SNARL; + } else if (code_type_int == 1) { + return REGULAR_SNARL; } else { - return ZipCode::CYCLIC_SNARL; + return CYCLIC_SNARL; } } } @@ -410,11 +285,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan if (decoder_length() == 1) { //If the length is 1, then it's a node - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_LENGTH_OFFSET); return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { @@ -425,23 +296,13 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan //If this is a chain/node //If this is a chain or a node, then the length will be the second thing - size_t zip_value; - size_t zip_index = decoder[depth].second; - - for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + assert(CHAIN_LENGTH_OFFSET == NODE_LENGTH_OFFSET); + size_t zip_value = zipcode.at(decoder[depth].second + CHAIN_LENGTH_OFFSET); return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - - for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - + size_t zip_value = zipcode.at(decoder[depth].second + SNARL_LENGTH_OFFSET); return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } @@ -460,12 +321,7 @@ size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { throw std::runtime_error("zipcodes trying to find the rank in snarl of a node in a chain"); } - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(decoder[depth].second + CHAIN_RANK_IN_SNARL_OFFSET); } else { //If this is a snarl throw std::runtime_error("zipcodes don't store snarl ranks for snarls"); @@ -487,12 +343,7 @@ size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIn } else if (!decoder[depth].first) { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(decoder[depth].second + SNARL_CHILD_COUNT_OFFSET); } else { //If this is not a snarl throw std::runtime_error("trying to get the snarl child count of a non-snarl zipcode"); @@ -512,21 +363,13 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde if (!decoder[depth-1].first) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + NODE_OFFSET_OFFSET); return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + SNARL_OFFSET_IN_CHAIN_OFFSET); return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } @@ -544,23 +387,11 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { if (!decoder[depth-1].first) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - - return zip_value; + return zipcode.at(decoder[depth].second + NODE_CHAIN_COMPONENT_OFFSET); } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - - return zip_value; + return zipcode.at(decoder[depth].second + SNARL_CHAIN_COMPONENT_OFFSET); } } @@ -569,11 +400,7 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + CHAIN_COMPONENT_COUNT_OFFSET); if (zip_value % 2) { if (!get_end) { return 0; @@ -590,12 +417,7 @@ bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value % 2; + return zipcode.at(decoder[depth].second + CHAIN_COMPONENT_COUNT_OFFSET) % 2; } bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { @@ -610,28 +432,15 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { if (decoder[depth-1].first) { //If the parent is a chain, then this is a node and we need to check its orientation - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(decoder[depth].second + NODE_IS_REVERSED_OFFSET); } else { //If the parent is a snarl, then this might be a chain in a regular snarl - size_t zip_value; - size_t zip_index = decoder[depth-1].second; - //zip_value is true if the parent is a regular snarl - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 1) { + + size_t snarl_type = zipcode.at(decoder[depth-1].second + SNARL_IS_REGULAR_OFFSET); + if (snarl_type == 1) { //The parent is a regular snarl, which stores is_reversed for the child - for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(decoder[depth-1].second + REGULAR_SNARL_IS_REVERSED_OFFSET); } else { //The parent is an irregular snarl, so it isn't reversed return false; @@ -650,11 +459,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd if (depth == 0) { //If this is the root chain/snarl/node - size_t zip_value, zip_index = 0; - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return distance_index->get_handle_from_connected_component(zip_value); + return distance_index->get_handle_from_connected_component(zipcode.at(ROOT_IDENTIFIER_OFFSET)); } else if (decoder[depth].first) { //If this is a chain/node @@ -663,25 +468,18 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - //zip_value is is_regular_snarl - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 1) { + size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); + if (snarl_type == 1) { //If this is a regular snarl throw std::runtime_error("zipcodes trying to get a handle of a regular snarl"); } else { //Irregular snarl - //zip_value is distance index offset - for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + size_t zip_value = zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); + net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; } } @@ -693,11 +491,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S if (depth == 0) { //If this is the root chain/snarl/node - size_t zip_value, zip_index = 0; - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return distance_index->get_handle_from_connected_component(zip_value); + return distance_index->get_handle_from_connected_component(zipcode.at(ROOT_IDENTIFIER_OFFSET)); } else if (decoder[depth].first) { //If this is a chain/node @@ -713,13 +507,8 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - //zip_value is is_regular_snarl - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 1) { + size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); + if (snarl_type == 1) { //If this is a regular snarl net_handle_t n = distance_index->get_node_net_handle(id); @@ -733,12 +522,10 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } else { //Irregular snarl - //zip_value is distance index offset - for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + size_t zip_value = zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); + net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; } } @@ -751,11 +538,7 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { if (depth == 0) { //If this is the root chain/snarl/node - size_t zip_value, zip_index = 0; - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(ROOT_IDENTIFIER_OFFSET); } else if (decoder[depth].first) { //If this is a chain/node @@ -764,25 +547,15 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { } else { //If this is a snarl - size_t zip_value; - size_t zip_index = decoder[depth].second; - //zip_value is is_regular_snarl - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 1) { + size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); + if (snarl_type == 1) { //If this is a regular snarl throw std::runtime_error("zipcodes trying to get a handle of a regular ansl"); } else { //Irregular snarl - //zip_value is distance index offset - for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - return zip_value; + return zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); } } } @@ -792,18 +565,11 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star assert(depth > 0); assert((get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || get_code_type(depth-1) == ZipCode::REGULAR_SNARL || get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)); #endif - size_t zip_value; - size_t zip_index = decoder[depth-1].second; - //zip_value is 1 if the parent is a regular snarl - for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } - if (zip_value == 1) { + size_t snarl_type = zipcode.at(decoder[depth-1].second + SNARL_IS_REGULAR_OFFSET); + if (snarl_type == 1) { //The parent is a regular snarl, which stores is_reversed for the child - for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + + size_t zip_value = zipcode.at(decoder[depth-1].second + REGULAR_SNARL_IS_REVERSED_OFFSET); //Zip value is true if the child is reversed if ((snarl_start && left_side) || (!snarl_start && !left_side)) { @@ -824,9 +590,7 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star } else { distance_offset = ZipCode::IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET; } - for (size_t i = 0 ; i <= distance_offset - ZipCode::SNARL_IS_REGULAR_OFFSET -1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth-1].second + distance_offset); return zip_value == 0 ? std::numeric_limits::max() : zip_value - 1; } } @@ -834,31 +598,19 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); return (zip_value & 1) != 0; } bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); return (zip_value & 2) != 0; } bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value; - size_t zip_index = decoder[depth].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } + size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); return (zip_value & 4) != 0; } @@ -898,12 +650,11 @@ const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, } void ZipCode::dump(std::ostream& out) const { - std::vector numbers = to_vector(); // Print out the numbers in a way that is easy to copy-paste as a vector literal. out << " ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { +void ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value) { #ifdef DEBUG_ZIPCODE assert(!distance_index.is_trivial_chain(node)); assert((distance_index.is_chain(distance_index.get_parent(node)) || distance_index.is_root(distance_index.get_parent(node)))); #endif - //Node code is: offset in chain, length, is reversed - vector node_code(NODE_SIZE); + size_t start_i = temp_zipcode.size(); + temp_zipcode.resize(start_i + NODE_SIZE); + //Node code is: offset in chain, length, is reversed, chain component + //Assume this node is in a regular chain size_t prefix_sum = distance_index.get_prefix_sum_value(node); - node_code[NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; - node_code[NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; - node_code[NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); + temp_zipcode[start_i + NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; + max_value = std::max(max_value, temp_zipcode[start_i + NODE_OFFSET_OFFSET]); + + temp_zipcode[start_i + NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; + max_value = std::max(max_value, temp_zipcode[start_i + NODE_LENGTH_OFFSET]); + + temp_zipcode[start_i + NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); + max_value = std::max(max_value, temp_zipcode[start_i + NODE_IS_REVERSED_OFFSET]); + size_t component = distance_index.get_chain_component(node); - node_code[NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; - return node_code; + temp_zipcode[start_i + NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + max_value = std::max(max_value, temp_zipcode[start_i + NODE_CHAIN_COMPONENT_OFFSET]); + + return; } -vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { +void ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value) { //Chain code is: rank in snarl, length - vector chain_code (CHAIN_SIZE); - chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); + + size_t start_i = temp_zipcode.size(); + temp_zipcode.resize(start_i + CHAIN_SIZE); + + //Rank in snarl + temp_zipcode[start_i + CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); + max_value = std::max(max_value, temp_zipcode[start_i + CHAIN_RANK_IN_SNARL_OFFSET]); + + //Length size_t len = distance_index.minimum_length(chain); - chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; + temp_zipcode[start_i + CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; + max_value = std::max(max_value, temp_zipcode[start_i + CHAIN_LENGTH_OFFSET]); + + //Component count and if it loops bool is_trivial = distance_index.is_trivial_chain(chain) ; size_t component = is_trivial ? 0 @@ -946,102 +719,125 @@ vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDis if (!is_trivial && distance_index.is_looping_chain(chain)) { component += 1; } - chain_code[CHAIN_COMPONENT_COUNT_OFFSET] = component; - return chain_code; + temp_zipcode[start_i + CHAIN_COMPONENT_COUNT_OFFSET] = component; + max_value = std::max(max_value, component); + + return; } -vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { - //Regular snarl code is 1, offset in chain, length, is reversed - vector snarl_code (REGULAR_SNARL_SIZE); +void ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value) { + + size_t start_i = temp_zipcode.size(); + temp_zipcode.resize(start_i + REGULAR_SNARL_SIZE); + //Tag to say that it's a regular snarl - snarl_code[SNARL_IS_REGULAR_OFFSET] = 1; + temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET] = 1; //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; + temp_zipcode[start_i + SNARL_CHILD_COUNT_OFFSET] = child_count; + max_value = std::max(max_value, child_count); //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET]); size_t component = distance_index.get_chain_component(start_node); - snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET]); //Length of the snarl size_t len = distance_index.minimum_length(snarl); - snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + temp_zipcode[start_i + SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_LENGTH_OFFSET]); //Is the child of the snarl reversed in the snarl #ifdef DEBUG_ZIPCODE assert(distance_index.is_chain(snarl_child)); #endif - snarl_code[REGULAR_SNARL_IS_REVERSED_OFFSET] = (distance_index.distance_in_parent(snarl, + temp_zipcode[start_i + REGULAR_SNARL_IS_REVERSED_OFFSET] = (distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(snarl_child))) != 0); + max_value = std::max(max_value, temp_zipcode[start_i + REGULAR_SNARL_IS_REVERSED_OFFSET]); - return snarl_code; + return; } -vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index) { - vector snarl_code (IRREGULAR_SNARL_SIZE); +void ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value) { + + size_t start_i = temp_zipcode.size(); + temp_zipcode.resize(start_i + IRREGULAR_SNARL_SIZE); //Tag to say that it's an irregular snarl - snarl_code[SNARL_IS_REGULAR_OFFSET] = distance_index.is_dag(snarl) ? 0 : 2; + temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET] = distance_index.is_dag(snarl) ? 0 : 2; + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET]); //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; + temp_zipcode[start_i + SNARL_CHILD_COUNT_OFFSET] = child_count; + max_value = std::max(max_value, child_count); //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET]); size_t component = distance_index.get_chain_component(start_node); - snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET]); //Length of the snarl size_t len = distance_index.minimum_length(snarl); - snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + temp_zipcode[start_i + SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + max_value = std::max(max_value, temp_zipcode[start_i + SNARL_LENGTH_OFFSET]); //Record offset to look up distances in the index later - snarl_code[IRREGULAR_SNARL_RECORD_OFFSET] = (distance_index.get_record_offset(snarl)); + temp_zipcode[start_i + IRREGULAR_SNARL_RECORD_OFFSET] = (distance_index.get_record_offset(snarl)); + max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_RECORD_OFFSET]); - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); //Add 1 to values to store inf properly - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] == std::numeric_limits::max() + : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] + 1; + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] == std::numeric_limits::max() + : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] + 1; + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] == std::numeric_limits::max() + : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] + 1; + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = + temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] + 1; + : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] + 1; - return snarl_code; + max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET]); + max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET]); + max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET]); + max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET]); } @@ -1508,149 +1304,53 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si cerr << "Checking if two zip codes are farther than " << limit << endl; #endif - size_t zip_index1 = 0; size_t zip_index2 = 0; - size_t zip_value1 = std::numeric_limits::max(); - size_t zip_value2 = std::numeric_limits::max(); - - //If the two positions aren't on the same connected component, then we're done - for (size_t i = 0 ; i <= ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - } - if (zip_value1 != zip_value2) { + if (zip1.decoder[0].first != zip2.decoder[0].first) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif return true; } - bool is_top_level_chain = zip_value1; - for (size_t i = 0 ; i <= ROOT_IDENTIFIER_OFFSET - ROOT_IS_CHAIN_OFFSET - 1; i++) { - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - } - if (zip_value1 != zip_value2) { + if (zip1.get_distance_index_address(0) != zip2.get_distance_index_address(0)) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif return true; } - if (!is_top_level_chain) { + //The depth of a chain that both zips are on + size_t shared_depth = 0; + + if (!zip1.decoder[0].first) { //If the top-level thing is a snarl, then check if the zips are in the same chain. //If they are, then proceed from the shared chain - //The next thing will be the identifier for the chain - for (size_t i = 0 ; i <= CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - } - if (zip_value1 != zip_value2) { + if (zip1.get_rank_in_snarl(1) != zip2.get_rank_in_snarl(1)) { //We can't tell return false; } - //Next is the length of the chain - for (size_t i = 0 ; i <= CHAIN_LENGTH_OFFSET - CHAIN_RANK_IN_SNARL_OFFSET - 1; i++) { - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - } - if (zip_value1 < limit) { + //Next check the length of the chain + if (zip1.get_length(1) < limit) { return true; } + //The two zipcodes are on the same chain at depth 1 + shared_depth = 1; //The zips now point to the children of the shared chain, so we can proceed as if the top-level //structure was a chain - } else { - //If it is a chain, get two more things to get to the end of the chain - for (size_t i = 0 ; i < 2 ; ++i) { - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - } } //Both zips now point to a thing in a shared chain //Get the minimum possible distance between the structures on the chain //For a lower bound, this assumes that the positions are as close as they can be on the structure in the chain - size_t prefix_sum1, prefix_sum2, length1, length2, component1, component2; + size_t prefix_sum1 = zip1.get_offset_in_chain(shared_depth+1); + size_t prefix_sum2 = zip2.get_offset_in_chain(shared_depth+1); + size_t length1 = zip1.get_length(shared_depth+1); + size_t length2 = zip2.get_length(shared_depth+1); + size_t component1 = zip1.get_chain_component(shared_depth+1); + size_t component2 = zip2.get_chain_component(shared_depth+1); - //The next thing could either be a snarl or a node. If it is a node, - vector next_values; - for (size_t i = 0 ; i < NODE_SIZE ; i++ ) { -#ifdef DEBUG_ZIPCODE - assert(zip_index1 != std::numeric_limits::max()); -#endif - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - next_values.emplace_back(zip_value1); - } - if (zip_index1 == std::numeric_limits::max()) { -#ifdef DEBUG_ZIPCODE - cerr << "zip1 is a node in a chain" << endl; -#endif - //If the last thing was a node - prefix_sum1 = next_values[0]; - length1 = next_values[1]; - component1 = next_values[2]; - prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; - length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; - } else { -#ifdef DEBUG_ZIPCODE - cerr << "zip1 is in a snarl in a chain" << endl; -#endif - //If the last thing was a snarl - if (next_values[0]) { - //If the next thing was a regular snarl - prefix_sum1 = next_values[1]; - length1 = next_values[2]; - std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); - component1 = zip_value1; - prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; - length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; - } else { - //If the next thing was an irregular snarl - //TODO: If it's an irregular snarl, then we don't actually store the relevant values so we can't tell. Could look it up in the distance index or store it - return false; - } - } - - //Do the same for the other zip - next_values.clear(); - for (size_t i = 0 ; i < NODE_SIZE ; i++ ) { -#ifdef DEBUG_ZIPCODE - assert(zip_index2 != std::numeric_limits::max()); -#endif - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - next_values.emplace_back(zip_value2); - } - if (zip_index2 == std::numeric_limits::max()) { -#ifdef DEBUG_ZIPCODE - cerr << "zip2 is a node in a chain" << endl; -#endif - //If the last thing was a node - prefix_sum2 = next_values[0]; - length2 = next_values[1]; - component2 = next_values[2]; - prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; - length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; - } else { -#ifdef DEBUG_ZIPCODE - cerr << "zip2 is in a snarl in a chain" << endl; -#endif - //If the last thing was a snarl - if (next_values[0]) { - //If the next thing was a regular snarl - prefix_sum2 = next_values[1]; - length2 = next_values[2]; - std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); - component2 = zip_value2; - prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; - length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; - } else { - //If the next thing was an irregular snarl - //TODO: If it's an irregular snarl, then we don't actually store the relevant values so we can't tell. Could look it up in the distance index or store it - return false; - } - } #ifdef DEBUG_ZIPCODE cerr << "Finding distance in chain between " << prefix_sum1 << " " << length1 << " and " << prefix_sum2 << " and " << length2 << endl; #endif @@ -1689,52 +1389,162 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si } gbwtgraph::Payload ZipCode::get_payload_from_zip() const { - if (byte_count() > 15) { +#ifdef DEBUG_ZIPCODE + cerr << "Encode integers: "; + for (size_t i = 0 ; i < zipcode.size() ; i++) { + cerr << zipcode.at(i) << " "; + } + cerr << endl; +#endif + if (bit_count() > 112) { //If there aren't enough bits to represent the zip code return MIPayload::NO_CODE; } - - //Index and value as we walk through the zip code - size_t index = 0; - size_t value; - //The values that get returned code_type encoded1 = 0; code_type encoded2 = 0; - encoded1 |= byte_count(); + //The first (leftmost of first int) 8 bits is the width + encoded1 |= zipcode.get_bit_width(); - for (size_t i = 0 ; i < zipcode.data.size() ; i++ ) { - size_t byte = static_cast (zipcode.data[i]); - if ( i < 7 ) { - //Add to first code - encoded1 |= (byte << ((i+1)*8)); + //Left shift by 8 to make space for the next thing we're adding + encoded1 <<= 8; + //The second 8 bits is the number of items in the vector (not the number of bits) + encoded1 |= zipcode.size(); + encoded1 <<= 1; +#ifdef DEBUG_ZIPCODE +cerr << "Encode the bit width "<< ((size_t) zipcode.get_bit_width()) << " and size " << zipcode.size() << endl; +cerr << "\t"; +#endif + + + //16 bits are set, so 112 left + //Now add each bit one by one and left shift to make space for the next one + for (size_t i = 0 ; i < 112 ; i++ ) { + if ( i < 48 ) { + //Add to first code, just one bit to the end + if (i < zipcode.get_bit_count() && zipcode.bit_at(i)) { + encoded1 |= 1; +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif + } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; + } +#endif + //Left shift by one after everything except the last bit + if (i != 47) { + encoded1 <<= 1; + } } else { //Add to second code - encoded2 |= (byte << ((i-7)*8)); + if (i < zipcode.get_bit_count() && zipcode.bit_at(i)) { + encoded2 |= 1; +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif + } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; + } +#endif + if ( i != 111) { + encoded2 <<= 1; + } } } +#ifdef DEBUG_ZIPCODE + cerr << endl; + cerr << "Actual ints being stored: " << encoded1 << " and " << encoded2 << ": "; + for (int i = 63 ; i >= 0 ; --i) { + if (((size_t) 1 << i) & encoded1) { + cerr << "1"; + } else { + cerr << "0"; + } + } + for (int i = 63 ; i >= 0 ; --i) { + if (((size_t) 1 << i) & encoded2) { + cerr << "1"; + } else { + cerr << "0"; + } + } + cerr << endl; +#endif return {encoded1, encoded2}; } void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(payload != MIPayload::NO_CODE); - zipcode.data.reserve(16); - - //get one byte at a time from the payload and add it to the zip code - size_t bit_mask = (1 << 8) - 1; - size_t byte_count = payload.first & bit_mask; - for (size_t i = 1 ; i <= byte_count ; i++) { - if (i < 8) { - zipcode.add_one_byte((payload.first >> (i*8)) & bit_mask); + + //First 8 bits of first int is the width + size_t width = payload.first >> 56; + zipcode.set_bit_width((uint8_t)width); + + //Second 8 bits is the item count + size_t item_count = (payload.first >> 48) & ((1 << 8)-1); + + //bit count is the product of the two + size_t bit_count = (size_t)width * (size_t)item_count; + zipcode.set_bitvector_length(bit_count); + +#ifdef DEBUG_ZIPCODE + cerr << "Get zipcode from payload " << payload.first << " and " << payload.second<< " with width: " << width << " item count " << item_count << " meaning " << bit_count << " bits" << endl; + cerr << "\t"; +#endif + + + //Mask for checking the relevant bit + //Start by checking the 17th bit from the left + //Right shift by one for each bit we look at + uint64_t mask1 = (uint64_t)1 << 47; + uint64_t mask2 = (uint64_t)1 << 63; + //get one bit at a time from the payload and add it to the zip code + for (size_t i = 0 ; i < bit_count ; i++) { + if (i < 48) { + if ((payload.first & mask1) != 0) { + zipcode.set_bit_at(i); +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif + } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; + } +#endif + mask1 >>= 1; } else { - zipcode.add_one_byte((payload.second >> ((i-8)*8)) & bit_mask); + if ((payload.first & mask2) != 0) { + zipcode.set_bit_at(i); +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif + } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; + } +#endif + mask2 >>= 1; } - } +#ifdef DEBUG_ZIPCODE + cerr << endl; + cerr << "Found encoded integers: "; + for (size_t i = 0 ; i < zipcode.size() ; i++) { + cerr << zipcode.at(i) << " "; + } + cerr << endl; +#endif + return; } std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { @@ -1763,8 +1573,8 @@ std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { void ZipCodeCollection::serialize(std::ostream& out) const { - //The zipcode vector will be serialized as a bunch of varint_vector_ts - //The first varint_vector_t will have one value, which will be the length of the + //The zipcode vector will be serialized as a bunch of min_width_int_vector_ts + //The first min_width_int_vector_t will have one value, which will be the length of the //zipcode that follows it //First serialize the header, which is the magic number and version @@ -1775,29 +1585,37 @@ void ZipCodeCollection::serialize(std::ostream& out) const { for (const ZipCode& zip : zipcodes) { + + //Write the width + size_t width = zip.zipcode.get_bit_width(); + out.write(reinterpret_cast(&width), sizeof(width)); - //How many bytes are going to be saved for the zipcode? - size_t byte_count = zip.byte_count(); + //How many values are in the vector. Used with width to get the bit count + size_t item_count = zip.zipcode.size(); + + out.write(reinterpret_cast(&item_count), sizeof(item_count)); - varint_vector_t size_vector; - size_vector.add_value(byte_count); - //Write the number of bytes about to be saved - for (const uint8_t& byte : size_vector.data) { - out << char(byte); - } //Write the zipcode #ifdef DEBUG_ZIPCODE size_t zip_byte_count = 0; #endif - for (const uint8_t& byte : zip.zipcode.data ) { + size_t bit_count = zip.zipcode.get_bit_count(); + for (size_t i = 0 ; i < bit_count ; i += 8) { #ifdef DEBUG_ZIPCODE zip_byte_count++; #endif - out << char(byte); + uint8_t result = 0; + for (size_t j = 0 ; j < 8 ; j++) { + result << 1; + if (i+j < bit_count && zip.zipcode.bit_at(i+j)) { + result |= 1; + } + } + out << char(result); } #ifdef DEBUG_ZIPCODE - assert(byte_count == zip_byte_count); + assert(zip_byte_count == bit_count / 8); #endif } @@ -1818,40 +1636,44 @@ void ZipCodeCollection::deserialize(std::istream& in) { while (in.peek() != EOF) { - //First, get the number of bytes used by the zipcode - //This will be a varint_vector_t with one value, which is the number of bytes in the zipcode - //Each byte in the varint_vector_t starts with 0 if it is the last bit in the - //number, and 1 if the next byte is included - varint_vector_t byte_count_vector; - while (in.peek() & (1<<7)) { - //If the first bit in the byte is 1, then add it, stop once the first bit is 0 - char c; - in.get(c); - byte_count_vector.add_one_byte((uint8_t)c); - } - assert(! (in.peek() & (1<<7))); - //The next byte has a 0 as its first bit, so add it - char c; - in.get(c); - byte_count_vector.add_one_byte((uint8_t)c); + //First, get the bitwidth of the vector + uint8_t width; + in.read(reinterpret_cast(&width), sizeof(width)); + + //Next, get the number of items in the zipcode + size_t item_count; + in.read(reinterpret_cast(&item_count), sizeof(item_count)); + + size_t bit_count = (size_t)width * item_count; + + //How many bytes were used to store all the bits in the zipcode bit vector + size_t byte_count = (size_t) std::floor((float)bit_count / 8); + - //The first (and only) value in the vector is the length of the zipcode - size_t zipcode_byte_count = byte_count_vector.get_value_and_next_index(0).first; #ifdef DEBUG_ZIPCODE - cerr << "Get zipcode of " << zipcode_byte_count << " bytes" << endl; - //assert(zipcode_byte_count >= 15); - assert(byte_count_vector.get_value_and_next_index(0).second == std::numeric_limits::max()); + cerr << "Get zipcode of " << bit_count << " bits" << endl; #endif - char line [zipcode_byte_count]; + char line [byte_count]; - in.read(line, zipcode_byte_count); + in.read(line, byte_count); ZipCode zip; + zip.zipcode.set_bit_width(width); + zip.zipcode.set_bitvector_length(bit_count); + size_t added_bits = 0; for (const char& character : line) { - zip.zipcode.add_one_byte(uint8_t(character)); + for (int i = 7 ; i >= 0 ; i--) { + if (added_bits < bit_count) { + if ((uint8_t)character & (1 << i) != 0) { + zip.zipcode.set_bit_at(added_bits); + } + added_bits++; + } + } } + zipcodes.emplace_back(std::move(zip)); } @@ -1864,21 +1686,12 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_is_root = true; payload.parent_is_chain = true; - //Walk through the zipcode to get values - size_t zip_value; - size_t zip_index = decoder[0].second; - //Root is chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //root_identifier - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); - - //Root node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + payload.node_handle = distance_index.get_net_handle_from_values( + distance_index.get_record_offset(distance_index.get_handle_from_connected_component(get_distance_index_address(0))), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); + + payload.node_length = get_length(0); payload.is_trivial_chain = true; payload.is_reversed = false; payload.parent_handle = distance_index.get_root(); @@ -1891,43 +1704,29 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_is_chain = true; payload.parent_is_root = false; - //Walk through the zipcode to get values - size_t zip_value; - size_t zip_index = decoder[max_depth()-1].second; - //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - - //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + size_t parent_depth = max_depth() - 1; if (decoder_length() == 2) { //If the node is a child of the root chain - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_handle = distance_index.start_end_traversal_of( + distance_index.get_handle_from_connected_component(get_distance_index_address(0))); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; } payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); - //chain component count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = get_offset_in_chain(parent_depth+1); - //Node prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + payload.node_length = get_length(parent_depth+1); //is_reversed - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: For top-level chains we got this from the distance index - payload.is_reversed = zip_value; + payload.is_reversed = get_is_reversed_in_parent(parent_depth+1); - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.chain_component = zip_value; + payload.chain_component = get_chain_component(parent_depth+1); @@ -1944,56 +1743,30 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.is_trivial_chain = true; - size_t zip_value; - size_t zip_index; if (payload.parent_is_root) { //is_chain - zip_index = decoder[0].second; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_record_offset = distance_index.get_record_offset( + distance_index.get_handle_from_connected_component( + get_distance_index_address(0))); payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { - zip_index = decoder[max_depth()-1].second; - //is_regular - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //If this is a non-root snarl, get as much as we can from it - payload.parent_type = ZipCode::EMPTY; - if (zip_value == 0) { - payload.parent_type = ZipCode::IRREGULAR_SNARL; - } else if (zip_value == 1) { - payload.parent_type = ZipCode::REGULAR_SNARL; - } else { - payload.parent_type = ZipCode::CYCLIC_SNARL; - } + size_t parent_depth = max_depth() - 1; + payload.parent_type = get_code_type(parent_depth); - //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = 0; - payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - - //Snarl length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Snarl child_count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //TODO: SHould use this somehow payload.chain_component = 0; - //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); //Simple and regular snarls are different for clustering if (distance_index.is_simple_snarl(grandparent_handle)) { - payload.is_reversed = zip_value; + payload.is_reversed = get_is_reversed_in_parent(parent_depth+1); payload.parent_is_chain=true; payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { @@ -2003,17 +1776,11 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else { payload.is_reversed = false; - payload.parent_record_offset = zip_value; + payload.parent_record_offset = get_distance_index_address(parent_depth); } } - //We should be at the node/trivial chain now - zip_index = decoder[max_depth()].second; - //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Chain length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + payload.node_length = get_length(max_depth()); //Get the rest as default values @@ -2041,39 +1808,19 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure - size_t zip_value; - size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); - } + result += std::to_string(get_distance_index_address(0)); } else if (decoder[d].first) { //is_chain so could be a chain or a node if (decoder[d-1].first) { //If the thing before this was also a chain, then it is a node - size_t zip_value; - size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); - } + result += std::to_string(get_offset_in_chain(d)); } else { //Otherwise it's a chain - size_t zip_value; - size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); - } + result += std::to_string(get_rank_in_snarl(d)); } } else { //Definitely a snarl - size_t zip_value; - size_t zip_index = decoder[d].second; - for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - result += std::to_string(zip_value); - } + result += std::to_string(get_offset_in_chain(d)); } if (d < std::min(depth, max_depth())) { result += "."; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 992a8e27dc3..bf64055074d 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -2,7 +2,7 @@ #define VG_ZIP_CODE_HPP_INCLUDED -#include "varint.hpp" +#include "min_width_int_vector.hpp" #include "snarl_distance_index.hpp" #include @@ -106,14 +106,19 @@ class ZipCode { typedef std::uint64_t code_type; // We assume that this fits into gbwtgraph::Payload. - ///How many bytes were used to store this zipcode? - size_t byte_count() const { - return zipcode.byte_count(); - } //TODO: Make this private: //The actual data for a zipcode is a vector of ints - varint_vector_t zipcode; + min_width_int_vector_t zipcode; + + ///How many bytes were used to store this zipcode? + size_t bit_count() const { + return zipcode.get_bit_count(); + } + ///What is the bit width used to store this zipcode? + size_t bit_width() const { + return zipcode.get_bit_width(); + } /// Equality operator @@ -121,11 +126,8 @@ class ZipCode { return zipcode == other.zipcode; } - /// Dump to a normal vector - std::vector to_vector() const; - /// Load from a normal vector - void from_vector(const std::vector& values); + void from_vector(const std::vector& values, size_t max_value = 0); private: @@ -202,15 +204,26 @@ class ZipCode { /* Functions for getting the code for each snarl/chain/node * Distances will be stored as distance+1, 0 will be reserved for inf */ - //Return a vector of size_ts that will represent the node in the zip code - inline vector get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index); - //Return a vector of size_ts that will represent the chain in the zip code - inline vector get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index); - //Return a vector of size_ts that will represent the snarl in the zip code - inline vector get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index); - //Return a vector of size_ts that will represent the snarl in the zip code - inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); + ///Add the code for the given node to the end of the zipcode. + ///Also update max_value to be the maximum value in the zipcode + inline void get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value); + ///Add the code for the given chain to the end of the zipcode. + ///Also update max_value to be the maximum value in the zipcode + inline void get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value); + + ///Add the code for the given regular snarl to the end of the zipcode. + ///Also update max_value to be the maximum value in the zipcode + inline void get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value); + + ///Add the code for the given irregular or cyclic snarl to the end of the zipcode. + ///Also update max_value to be the maximum value in the zipcode + inline void get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index, + vector& temp_zipcode, size_t& max_value); //////////////////////////////// Stuff for decoding the zipcode @@ -219,7 +232,7 @@ class ZipCode { //TODO: Make the decoder and zipcode private, still need it for unit testing ///The decoder as a vector of pair, one for each snarl tree node in the zip ///where is_chain indicates whether it's a chain/node, and index - ///is the index of the node/snarl/chain code in the varint_vector_t + ///is the index of the node/snarl/chain code in the min_width_int_vector_t std::vector> decoder; ///Did we fill in the entire decoder From de6c76fa6c4846faed8462771987d447b59ce77e Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 2 Aug 2024 17:02:10 +0200 Subject: [PATCH 077/124] Fix zipcodes --- src/min_width_int_vector.cpp | 2 +- src/unittest/zip_code.cpp | 24 +++++++------- src/zip_code.cpp | 62 ++++++++++++++++++++++++++++++------ src/zip_code.hpp | 4 +-- 4 files changed, 67 insertions(+), 25 deletions(-) diff --git a/src/min_width_int_vector.cpp b/src/min_width_int_vector.cpp index 3ca1cc4d802..80c9baf7976 100644 --- a/src/min_width_int_vector.cpp +++ b/src/min_width_int_vector.cpp @@ -1,6 +1,6 @@ #include "min_width_int_vector.hpp" -#define DEBUG_MININT +//#define DEBUG_MININT namespace vg { using namespace std; diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 489f141d484..f7f03d75129 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -8,7 +8,7 @@ namespace vg{ namespace unittest{ using namespace std; - TEST_CASE("One node zipcode", "[zipcode][bug]") { + TEST_CASE("One node zipcode", "[zipcode]") { VG graph; Node* n1 = graph.create_node("GCAAACAGATT"); @@ -48,7 +48,6 @@ using namespace std; REQUIRE(zipcode.decoder.front().second == 0); } SECTION("decoded code") { - cerr << "New code" << endl; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); zipcode.fill_in_full_decoder(); @@ -141,7 +140,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(7) == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 7); + REQUIRE(zipcode.zipcode.size() == 8); } SECTION ("decoded zip code for node on top-level chain") { @@ -222,7 +221,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(12) == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 12); + REQUIRE(zipcode.zipcode.size() == 13); } @@ -392,6 +391,9 @@ using namespace std; Edge* e10 = graph.create_edge(n7, n8); + ofstream out ("testGraph.hg"); + graph.serialize(out); + IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex distance_index; fill_in_distance_index(&distance_index, &graph, &snarl_finder); @@ -437,7 +439,7 @@ using namespace std; distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 7); + REQUIRE(zipcode.zipcode.size() == 8); } @@ -534,7 +536,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(16) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 16); + REQUIRE(zipcode.zipcode.size() == 17); } @@ -594,7 +596,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(3) == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 3)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); //1 for regular snarl tag REQUIRE(zipcode.zipcode.at(4) == 1); @@ -698,7 +700,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(30) == 0) ; //That's it - REQUIRE(zipcode.zipcode.size() == 30); + REQUIRE(zipcode.zipcode.size() == 31); } @@ -1035,7 +1037,7 @@ using namespace std; REQUIRE(zipcode.zipcode.at(16) == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 16); + REQUIRE(zipcode.zipcode.size() == 17); } SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; @@ -1528,7 +1530,7 @@ using namespace std; distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 7); + REQUIRE(zipcode.zipcode.size() == 8); } SECTION("Distances") { @@ -1725,8 +1727,6 @@ using namespace std; Edge* e7 = graph.create_edge(n5, n6); Edge* e8 = graph.create_edge(n1, n1, true, false); - ofstream out ("testGraph.hg"); - graph.serialize(out); IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 2e681638f70..3c9c5bd9c17 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1,11 +1,11 @@ #include "zip_code.hpp" -#define DEBUG_ZIPCODE +//#define DEBUG_ZIPCODE namespace vg{ using namespace std; -void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos) { +void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos, bool fill_in_decoder) { std::vector ancestors; net_handle_t current_handle = distance_index.get_node_net_handle(id(pos)); @@ -66,6 +66,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p temp_zipcode.emplace_back(connectivity); max_value = std::max(max_value, temp_zipcode.back()); zipcode.from_vector(temp_zipcode, max_value); + if (fill_in_decoder) { + fill_in_full_decoder(); + } return; } else { #ifdef DEBUG_ZIPCODE @@ -111,6 +114,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p if (distance_index.is_trivial_chain(current_ancestor)) { zipcode.from_vector(temp_zipcode, max_value); + if (fill_in_decoder) { + fill_in_full_decoder(); + } return; } } else if (distance_index.is_regular_snarl(current_ancestor)) { @@ -122,8 +128,11 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index, temp_zipcode, max_value); } } - cerr << "Make real zipcode from temp with length " << temp_zipcode.size() << endl; zipcode.from_vector(temp_zipcode, max_value); + + if (fill_in_decoder) { + fill_in_full_decoder(); + } } void ZipCode::from_vector(const std::vector& values, size_t max_value) { @@ -1522,7 +1531,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { #endif mask1 >>= 1; } else { - if ((payload.first & mask2) != 0) { + if ((payload.second & mask2) != 0) { zipcode.set_bit_at(i); #ifdef DEBUG_ZIPCODE cerr << "1"; @@ -1576,6 +1585,9 @@ void ZipCodeCollection::serialize(std::ostream& out) const { //The zipcode vector will be serialized as a bunch of min_width_int_vector_ts //The first min_width_int_vector_t will have one value, which will be the length of the //zipcode that follows it +#ifdef DEBUG_ZIPCODE + cerr << "Serialize zipcode collection" << endl; +#endif //First serialize the header, which is the magic number and version uint32_t magic = magic_number; @@ -1587,7 +1599,7 @@ void ZipCodeCollection::serialize(std::ostream& out) const { for (const ZipCode& zip : zipcodes) { //Write the width - size_t width = zip.zipcode.get_bit_width(); + uint8_t width = (uint8_t) zip.zipcode.get_bit_width(); out.write(reinterpret_cast(&width), sizeof(width)); //How many values are in the vector. Used with width to get the bit count @@ -1598,6 +1610,12 @@ void ZipCodeCollection::serialize(std::ostream& out) const { //Write the zipcode #ifdef DEBUG_ZIPCODE + cerr << "Write width " << (size_t) width << " and item count " << item_count << " and zipcode: " << endl; + cerr << "\t"; + for (size_t i = 0 ; i < zip.zipcode.size() ; i++) { + cerr << zip.zipcode.at(i) << " "; + } + cerr << endl << "\t"; size_t zip_byte_count = 0; #endif size_t bit_count = zip.zipcode.get_bit_count(); @@ -1607,15 +1625,24 @@ void ZipCodeCollection::serialize(std::ostream& out) const { #endif uint8_t result = 0; for (size_t j = 0 ; j < 8 ; j++) { - result << 1; + result <<= 1; if (i+j < bit_count && zip.zipcode.bit_at(i+j)) { +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif result |= 1; } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; + } +#endif } out << char(result); } #ifdef DEBUG_ZIPCODE - assert(zip_byte_count == bit_count / 8); + cerr << endl; + assert(zip_byte_count == ceil((float)bit_count / 8)); #endif } @@ -1647,12 +1674,12 @@ void ZipCodeCollection::deserialize(std::istream& in) { size_t bit_count = (size_t)width * item_count; //How many bytes were used to store all the bits in the zipcode bit vector - size_t byte_count = (size_t) std::floor((float)bit_count / 8); + size_t byte_count = (size_t) std::ceil((float)bit_count / 8); #ifdef DEBUG_ZIPCODE - cerr << "Get zipcode of " << bit_count << " bits" << endl; + cerr << "Get zipcode with width " << (size_t) width << " and item count " << item_count << endl << "\t"; #endif char line [byte_count]; @@ -1666,13 +1693,28 @@ void ZipCodeCollection::deserialize(std::istream& in) { for (const char& character : line) { for (int i = 7 ; i >= 0 ; i--) { if (added_bits < bit_count) { - if ((uint8_t)character & (1 << i) != 0) { + if (((uint8_t)character & ((uint8_t)1 << i)) != 0) { zip.zipcode.set_bit_at(added_bits); +#ifdef DEBUG_ZIPCODE + cerr << "1"; +#endif + } +#ifdef DEBUG_ZIPCODE + else { + cerr << "0"; } +#endif added_bits++; } } } +#ifdef DEBUG_ZIPCODE + cerr << endl <<"\t"; + for (size_t i = 0 ; i < zip.zipcode.size() ; i++) { + cerr << zip.zipcode.at(i) << " "; + } + cerr << endl; +#endif zipcodes.emplace_back(std::move(zip)); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index bf64055074d..40c7df5bc38 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -60,7 +60,7 @@ class ZipCode { public: //Fill in an empty zipcode given a position - void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos); + void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos, bool fill_in_decoder=true); //Fill in an empty zipcode using the information that was stored in a payload void fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload); @@ -361,7 +361,7 @@ class ZipCodeCollection { //magic number to identify the file const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 2; + const static uint32_t version = 3; public: const static std::uint32_t get_magic_number() {return magic_number;} From 7aa1fe7c8bef0bf52800a580389ffef77b717974 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 11:26:28 +0200 Subject: [PATCH 078/124] Revert using minint vectors --- src/min_width_int_vector.cpp | 28 +- src/min_width_int_vector.hpp | 54 +- src/snarl_seed_clusterer.cpp | 8 +- src/subcommand/minimizer_main.cpp | 6 +- src/unittest/min_width_int_vector.cpp | 10 +- src/unittest/snarl_seed_clusterer.cpp | 2 +- src/unittest/zip_code.cpp | 485 ++++++---- src/zip_code.cpp | 1187 +++++++++++++++---------- src/zip_code.hpp | 57 +- 9 files changed, 1053 insertions(+), 784 deletions(-) diff --git a/src/min_width_int_vector.cpp b/src/min_width_int_vector.cpp index 80c9baf7976..4d4e3215dba 100644 --- a/src/min_width_int_vector.cpp +++ b/src/min_width_int_vector.cpp @@ -1,4 +1,7 @@ #include "min_width_int_vector.hpp" +#include +#include +#include //#define DEBUG_MININT @@ -6,32 +9,15 @@ namespace vg { using namespace std; void min_width_int_vector_t::from_vector(const vector& input_data, size_t max_val) { -#ifdef DEBUG_MININT - cerr << "get minint vector from int vector " << endl; -#endif if (max_val != 0) { -#ifdef DEBUG_MININT - cerr << "Get width from max value " << max_val << " bigger of " << ((size_t) width) << " and " << (std::floor(std::log2(max_val)) + 1) << endl; -#endif - width = (uint8_t) std::max((size_t) width, (size_t)(std::floor(std::log2((float) max_val)) + 1)); + width = std::max(width, 1 + (size_t)std::floor(std::log2(max_val))); } else if (width == 0) { //If we haven't already set the width, find it from the max value of the input data for (const size_t& x : input_data) { max_val = std::max(x, max_val); } -#ifdef DEBUG_MININT - cerr << "Found max value " << max_val << " and got width " << width << endl; -#endif - width = 1 + (size_t)std::floor(std::log2((float) max_val)); + width = 1 + (size_t)std::floor(std::log2(max_val)); } -#ifdef DEBUG_MININT - for (size_t x : input_data) { - cerr << x << " "; - } - for (size_t x : input_data) { - assert( width >= (uint8_t)(std::floor(std::log2(x)) + 1)); - } -#endif data.reserve(input_data.size()*width); for (const size_t& x : input_data) { @@ -39,11 +25,9 @@ void min_width_int_vector_t::from_vector(const vector& input_data, size_ } } - - void min_width_int_vector_t::push_back(size_t val) { #ifdef DEBUG_MININT - assert(width >= (uint8_t) (1 + (size_t)std::floor(std::log2(val)))); + assert(width >= 1 + (size_t)std::floor(std::log2(val))); #endif for (size_t i = 0 ; i < width ; i++) { data.emplace_back(val & (1 << (width - i - 1))); diff --git a/src/min_width_int_vector.hpp b/src/min_width_int_vector.hpp index b428b9b393b..e4f76a762c3 100644 --- a/src/min_width_int_vector.hpp +++ b/src/min_width_int_vector.hpp @@ -2,14 +2,7 @@ #define VG_MINWIDTH_INT_HPP_INCLUDED #include -#include #include -#include -#include -#include -#include - - /** \file min_width_int_vector.hpp * Methods for storing a vector of integers with minimal bit width @@ -22,27 +15,13 @@ using namespace std; */ struct min_width_int_vector_t { - private: - - /// How many bits are used to store the bit width used - /// This is needed for serializing - const static size_t BIT_WIDTH_WIDTH = 8; - - /// The bit width that is being used to store the integers - uint8_t width; - - ///The actual data stored in the vector - std::vector data; - public: - min_width_int_vector_t () { - width = 0; - } + min_width_int_vector_t () : + width(0) {} - min_width_int_vector_t (size_t w) { - width = w; - } + min_width_int_vector_t (size_t width) : + width(width) {} ///Make this a copy of input_data @@ -61,27 +40,18 @@ struct min_width_int_vector_t { ///Get the value at the given index size_t at(size_t index) const; - ///Check what the bit width is - // This is a size_t because it's blank when I try to write it to stderr - size_t get_bit_width() const { return (size_t) width;} + //Check what the bit width is + size_t get_bitwidth() const { return width;} - ///How many bits are we using total - size_t get_bit_count() const { return data.size(); } - - ///////////Access the bit vector itself for serializing - bool bit_at(size_t i) const {return data[i];} - void set_bitvector_length(size_t l) {data.resize(l);} - void set_bit_at(size_t i) {data[i] = true;} - void set_bit_width(size_t w) {width = w;} + private: - ///Equality operator - //TODO: This isn't actually checking the values- the widths could be different but still represent the same vectors. - // but that would be pretty slow to check so leave it - inline bool operator==(const min_width_int_vector_t& other) const { - return width == other.width && data == other.data; - } + /// The bit width that is being used to store the integers + /// This can be up to 64 + size_t width : 7; + ///The actual data stored in the vector + std::vector data; }; } #endif diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 220c36082f0..31579b53103 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -31,10 +31,10 @@ vector SnarlDistanceIndexClusterer::cluste vector seed_caches(seeds.size()); for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER - assert (seeds[i].zipcode.bit_count() != 0) ; + assert (seeds[i].zipcode.byte_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); - if (seeds[i].zipcode.bit_count() != 0) { + if (seeds[i].zipcode.byte_count() != 0) { seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } @@ -75,10 +75,10 @@ vector> SnarlDistanceIndexClusterer for (size_t i = 0 ; i < all_seeds[read_num].size() ; i++) { #ifdef DEBUG_CLUSTER //The zipcode should be filled in - assert(all_seeds[read_num][i].zipcode.bit_count() != 0); + assert(all_seeds[read_num][i].zipcode.byte_count() != 0); #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); - if (all_seeds[read_num][i].zipcode.bit_count() != 0) { + if (all_seeds[read_num][i].zipcode.byte_count() != 0) { all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index d75cf6bcd3e..73c30133801 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -387,8 +387,8 @@ int main_minimizer(int argc, char** argv) { //For each minimizer, writes the size of the zip code and then the zip code as a tsv pair value (0, 0); - //How many bits get used - cout << zipcode.zipcode.get_bit_count(); + //How many bytes get used + cout << zipcode.zipcode.byte_count(); //Each integer saved while (value.second != std::numeric_limits::max()) { value = zipcode.zipcode.get_value_and_next_index(value.second); @@ -396,7 +396,7 @@ int main_minimizer(int argc, char** argv) { } cout << endl; #endif - if (zipcode.zipcode.get_bit_count() <= 112) { + if (zipcode.zipcode.byte_count() < 15) { //If the zipcode is small enough to store in the payload return zipcode.get_payload_from_zip(); } else if (!zipcode_name.empty()) { diff --git a/src/unittest/min_width_int_vector.cpp b/src/unittest/min_width_int_vector.cpp index e4739646716..f61ec4b6ff3 100644 --- a/src/unittest/min_width_int_vector.cpp +++ b/src/unittest/min_width_int_vector.cpp @@ -47,7 +47,7 @@ using namespace std; minint_vector.from_vector(original); REQUIRE(minint_vector.size() == 1); REQUIRE(minint_vector.at(0) == 0); - REQUIRE(minint_vector.get_bit_width() == 1); + REQUIRE(minint_vector.get_bitwidth() == 1); } SECTION ("[1]") { vector original {1}; @@ -55,7 +55,7 @@ using namespace std; minint_vector.from_vector(original); REQUIRE(minint_vector.size() == 1); REQUIRE(minint_vector.at(0) == 1); - REQUIRE(minint_vector.get_bit_width() == 1); + REQUIRE(minint_vector.get_bitwidth() == 1); } SECTION ("[1, 2]") { vector original {1, 2}; @@ -65,13 +65,13 @@ using namespace std; REQUIRE(minint_vector.size() == 2); REQUIRE(minint_vector.at(0) == 1); REQUIRE(minint_vector.at(1) == 2); - REQUIRE(minint_vector.get_bit_width() == 2); + REQUIRE(minint_vector.get_bitwidth() == 2); } SECTION ("more values") { vector values {1, 3243, 123634, 53454, 0}; min_width_int_vector_t minint_vector (3); minint_vector.from_vector(values, 123634); - REQUIRE(minint_vector.get_bit_width() == 1+(size_t)std::floor(std::log2(123634))); + REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); assert(minint_vector.size() == values.size()); for (size_t i = 0 ; i < values.size() ; i++) { assert(minint_vector.at(i) == values[i]); @@ -85,7 +85,7 @@ using namespace std; for (size_t i = 0 ; i < values.size() ; i++) { assert(minint_vector.at(i) == values[i]); } - REQUIRE(minint_vector.get_bit_width() == 1+(size_t)std::floor(std::log2(123634))); + REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); } } } diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index d0569d4063e..ce7dde12972 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -833,7 +833,7 @@ namespace unittest { } } TEST_CASE( "Top-level looping chain", - "[cluster]" ) { + "[cluster][bug]" ) { VG graph; Node* n1 = graph.create_node("AGCGTGTAGAGAA"); diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index f7f03d75129..22bd68ac308 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -22,19 +22,23 @@ using namespace std; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); //Second value is the rank of the node (chain) in the root-snarl - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Third value is the length of the node - REQUIRE(zipcode.zipcode.at(2) == 11+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 11+1); //Connectivity - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 4); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -48,6 +52,7 @@ using namespace std; REQUIRE(zipcode.decoder.front().second == 0); } SECTION("decoded code") { + cerr << "New code" << endl; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); zipcode.fill_in_full_decoder(); @@ -61,7 +66,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -111,36 +116,44 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Component count of the chain - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true,(size_t)4)); - REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - REQUIRE(zipcode.zipcode.at(5) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); //Fifth is if the node is reversed - REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //The component - REQUIRE(zipcode.zipcode.at(7) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 8); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } SECTION ("decoded zip code for node on top-level chain") { @@ -171,57 +184,70 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 3); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); - REQUIRE(zipcode.decoder.at(0) == std::make_pair(true, (size_t)0)); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); + REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Chain component count - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the snarl code //1 for a regular snarl - REQUIRE(zipcode.decoder.at(1) == std::make_pair(false, (size_t)4)); - REQUIRE(zipcode.zipcode.at(4) == 1); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //prefix sum of the snarl - REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 5 : 6)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == (chain_is_reversed ? 5 : 6)+1); //length of the snarl - REQUIRE(zipcode.zipcode.at(6) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); //Child count - REQUIRE(zipcode.zipcode.at(7) == 2); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 2); //Chain component - REQUIRE(zipcode.zipcode.at(8) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //node is reversed in the snarl + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl = distance_index.get_parent(chain4); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(chain4)) != 0; - REQUIRE(zipcode.zipcode.at(9) == is_rev); + REQUIRE(value_and_index.first == is_rev); //Next is the chain code //rank of the chain in the snarl - REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t)10)); - REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent(distance_index.get_parent( + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); //node length - REQUIRE(zipcode.zipcode.at(11) == 2+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 2+1); //chain component count - REQUIRE(zipcode.zipcode.at(12) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 13); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -307,7 +333,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -317,7 +343,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -327,7 +353,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -337,7 +363,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -347,7 +373,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -357,7 +383,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -391,9 +417,6 @@ using namespace std; Edge* e10 = graph.create_edge(n7, n8); - ofstream out ("testGraph.hg"); - graph.serialize(out); - IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex distance_index; fill_in_distance_index(&distance_index, &graph, &snarl_finder); @@ -409,37 +432,45 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Third value is the chain component count - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 4)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); - REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - REQUIRE(zipcode.zipcode.at(5) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); //Fifth is if the node is reversed - REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //component - REQUIRE(zipcode.zipcode.at(7) == distance_index.get_chain_component( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component( distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 8); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -472,71 +503,88 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Third value is the chain component count of the chain - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the regular snarl code - REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //1 for regular snarl tag - REQUIRE(zipcode.zipcode.at(4) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //Prefix sum of the snarl - REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 4 : 3)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == (chain_is_reversed ? 4 : 3)+1); //snarl length - REQUIRE(zipcode.zipcode.at(6) == 0+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0+1); //Snarl child count - REQUIRE(zipcode.zipcode.at(7) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //chain component - REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //Is the chain is reversed in the snarl + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; - REQUIRE(zipcode.zipcode.at(9) == is_rev); + REQUIRE(value_and_index.first == is_rev); //Next is the chain code - REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 10)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( distance_index.get_parent(distance_index.get_node_net_handle(n2->id())))); //chain length - REQUIRE(zipcode.zipcode.at(11) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); //chain component count - REQUIRE(zipcode.zipcode.at(12) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the node code - REQUIRE(zipcode.decoder[3] == std::make_pair(true, (size_t) 13)); + REQUIRE(zipcode.decoder[3] == std::make_pair(true, value_and_index.second)); //Offset of the node in the chain - REQUIRE(zipcode.zipcode.at(13) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); //length of the node - REQUIRE(zipcode.zipcode.at(14) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); //is the node reversed in the parent - REQUIRE(zipcode.zipcode.at(15) == distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n2->id()))); //chain component - REQUIRE(zipcode.zipcode.at(16) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 17); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -584,123 +632,154 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Second value is the chain component count of the chain - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //1 for regular snarl tag - REQUIRE(zipcode.zipcode.at(4) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //Prefix sum of the snarl - REQUIRE(zipcode.zipcode.at(5) == (chain_is_reversed ? 4 : 3)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == (chain_is_reversed ? 4 : 3)+1); //snarl length - REQUIRE(zipcode.zipcode.at(6) == 0+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0+1); //snarl child count - REQUIRE(zipcode.zipcode.at(7) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //Chain component - REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_net_handle(n2->id()))); //Is the chain is reversed in the snarl + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain2 = distance_index.get_parent(distance_index.get_node_net_handle(n2->id())); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; - REQUIRE(zipcode.zipcode.at(9) == is_rev); + REQUIRE(value_and_index.first == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 10)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(10) == distance_index.get_rank_in_parent( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( distance_index.get_parent(distance_index.get_node_net_handle(n2->id())))); //chain length - REQUIRE(zipcode.zipcode.at(11) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); //chain component_count - REQUIRE(zipcode.zipcode.at(12) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(zipcode.decoder[3] == std::make_pair(false, (size_t) 13)); + REQUIRE(zipcode.decoder[3] == std::make_pair(false, value_and_index.second)); //1 as tag for regular snarl - REQUIRE(zipcode.zipcode.at(13) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //offset in chain - REQUIRE(zipcode.zipcode.at(14) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); //length - REQUIRE(zipcode.zipcode.at(15) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); //child count - REQUIRE(zipcode.zipcode.at(16) == 2); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 2); //is_reversed + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); snarl = distance_index.get_parent(chain3); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain3))) != 0; - REQUIRE(zipcode.zipcode.at(17) == is_rev); + REQUIRE(value_and_index.first == is_rev); - REQUIRE(zipcode.zipcode.at(18) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(zipcode.decoder[4] == std::make_pair(true, (size_t) 19)); + REQUIRE(zipcode.decoder[4] == std::make_pair(true, value_and_index.second)); //Rank in parent - REQUIRE(zipcode.zipcode.at(19) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); //length - REQUIRE(zipcode.zipcode.at(20) == distance_index.minimum_length(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) +1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.minimum_length(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) +1); //component_count - REQUIRE(zipcode.zipcode.at(21) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //REgular snarl code for snarl 3-5 - REQUIRE(zipcode.decoder[5] == std::make_pair(false, (size_t) 22)); - REQUIRE(zipcode.zipcode.at(22) == 1); + REQUIRE(zipcode.decoder[5] == std::make_pair(false, value_and_index.second)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); //offset in chain - REQUIRE(zipcode.zipcode.at(23) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)+1); //length - REQUIRE(zipcode.zipcode.at(24) == 0+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0+1); //child count - REQUIRE(zipcode.zipcode.at(25) == 1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); snarl = distance_index.get_parent(chain4); - REQUIRE(zipcode.zipcode.at(26) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //is_reversed + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; - REQUIRE(zipcode.zipcode.at(27) == is_rev); + REQUIRE(value_and_index.first == is_rev); //Chain code for node 4 - REQUIRE(zipcode.decoder[6] == std::make_pair(true, (size_t) 28)); + REQUIRE(zipcode.decoder[6] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(28) == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; //length - REQUIRE(zipcode.zipcode.at(29) == 4+1) ; + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 4+1) ; //Chain component - REQUIRE(zipcode.zipcode.at(30) == 0) ; + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0) ; //That's it - REQUIRE(zipcode.zipcode.size() == 31); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } @@ -859,7 +938,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -869,7 +948,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -879,7 +958,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -889,7 +968,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -899,7 +978,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -909,7 +988,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -919,7 +998,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -929,7 +1008,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -976,68 +1055,85 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Third is the chain component count - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, (size_t) 4)); + REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); //0 as tag for irregular snarl - REQUIRE(zipcode.zipcode.at(4) == 2); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 2); net_handle_t irregular_snarl = distance_index.get_parent(distance_index.get_parent(distance_index.get_node_net_handle(n2->id()))); //Snarl prefix sum + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); net_handle_t bound = distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, true)); - REQUIRE(zipcode.zipcode.at(5) == SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(bound), + REQUIRE(value_and_index.first == SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(bound), distance_index.minimum_length(bound))+1); //Snarl length - REQUIRE(zipcode.zipcode.at(6) == distance_index.minimum_length(irregular_snarl)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.minimum_length(irregular_snarl)+1); size_t child_count = 0 ; distance_index.for_each_child(irregular_snarl, [&] (const net_handle_t& child) { child_count++; }); //Snarl child count - REQUIRE(zipcode.zipcode.at(7) == child_count); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == child_count); //component - REQUIRE(zipcode.zipcode.at(8) == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, false)))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(irregular_snarl, false, false)))); //Snarl record offset - REQUIRE(zipcode.zipcode.at(9) == distance_index.get_record_offset(irregular_snarl)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_record_offset(irregular_snarl)); //Distance from left side of child to snarl start - //REQUIRE(zipcode.zipcode.at(10) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); //Distance from right side of child to snarl start - //REQUIRE(zipcode.zipcode.at(11) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 0 : 1)); //Distance from left side of child to snarl end - //REQUIRE(zipcode.zipcode.at(12) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Distance from right side of child to snarl end - //REQUIRE(zipcode.zipcode.at(13) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 14)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //Rank in snarl - REQUIRE(zipcode.zipcode.at(14) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); //Length - REQUIRE(zipcode.zipcode.at(15) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); //Component count - REQUIRE(zipcode.zipcode.at(16) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //That's it - REQUIRE(zipcode.zipcode.size() == 17); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; @@ -1151,7 +1247,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1161,7 +1257,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1171,7 +1267,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1181,7 +1277,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1191,7 +1287,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1201,7 +1297,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1211,7 +1307,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1255,17 +1351,21 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl - REQUIRE(zipcode.zipcode.at(0) == 0); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 0); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 2)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(2) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); //length - REQUIRE(zipcode.zipcode.at(3) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); } SECTION ("decoded zip code for node in top-level snarl") { ZipCode zipcode; @@ -1298,26 +1398,33 @@ using namespace std; REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); //0 to indicate that it's a top-level snarl - REQUIRE(zipcode.zipcode.at(0) == 0); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 0); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 2)); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(2) == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); //length - REQUIRE(zipcode.zipcode.at(3) == 2+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 2+1); //component count - REQUIRE(zipcode.zipcode.at(4) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Node 3 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, (size_t) 5)); + REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); //rank in snarl - REQUIRE(zipcode.zipcode.at(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); //length - REQUIRE(zipcode.zipcode.at(6) == 1+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 1+1); } SECTION ("decode zip code for node in chain in top-level snarl") { net_handle_t node3 = distance_index.get_node_net_handle(n3->id()); @@ -1396,7 +1503,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1406,7 +1513,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1416,7 +1523,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1426,7 +1533,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1436,7 +1543,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1446,7 +1553,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1456,7 +1563,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1500,37 +1607,45 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain - REQUIRE(zipcode.zipcode.at(0) == 1); + pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); + REQUIRE(value_and_index.first == 1); REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); //Second value is the connected component number of the chain - REQUIRE(zipcode.zipcode.at(1) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Third value is the chain component count - REQUIRE(zipcode.zipcode.at(2) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Connectivity of the chain - REQUIRE(zipcode.zipcode.at(3) == 0); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 0); //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, (size_t) 4)); - REQUIRE(zipcode.zipcode.at(4) == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); + REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); //Fourth is the node length - REQUIRE(zipcode.zipcode.at(5) == 3+1); + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == 3+1); //Fifth is if the node is reversed - REQUIRE(zipcode.zipcode.at(6) == distance_index.is_reversed_in_parent( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.is_reversed_in_parent( distance_index.get_node_net_handle(n1->id()))); //Chain component - REQUIRE(zipcode.zipcode.at(7) == distance_index.get_chain_component( + value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); + REQUIRE(value_and_index.first == distance_index.get_chain_component( distance_index.get_node_net_handle(n1->id()))); //That's it - REQUIRE(zipcode.zipcode.size() == 8); + REQUIRE(value_and_index.second == std::numeric_limits::max()); } SECTION("Distances") { @@ -1567,7 +1682,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1577,7 +1692,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1587,7 +1702,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1597,7 +1712,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1607,7 +1722,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1617,7 +1732,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1627,7 +1742,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.bit_count() <= 112) { + if (zipcode.byte_count() <= 15) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1727,6 +1842,8 @@ using namespace std; Edge* e7 = graph.create_edge(n5, n6); Edge* e8 = graph.create_edge(n1, n1, true, false); + ofstream out ("testGraph.hg"); + graph.serialize(out); IntegratedSnarlFinder snarl_finder(graph); SnarlDistanceIndex dist_index; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 3c9c5bd9c17..99004b283a4 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -5,7 +5,7 @@ namespace vg{ using namespace std; -void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos, bool fill_in_decoder) { +void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos) { std::vector ancestors; net_handle_t current_handle = distance_index.get_node_net_handle(id(pos)); @@ -16,42 +16,29 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p current_handle = distance_index.get_parent(current_handle); } - //Make a temporary zipcode that will turn into the real one - vector temp_zipcode; - temp_zipcode.reserve(ancestors.size() * 4); - //Remember the maximum value we see to set the bitwidth when we make the real zipcode - size_t max_value = 0; - //Now add the root-level snarl or chain if (distance_index.is_root_snarl(current_handle)) { //FIrst thing is a snarl, so add the snarl's connected component number - temp_zipcode.emplace_back(0); + zipcode.add_value(0); #ifdef DEBUG_ZIPCODE cerr << "Adding code for top-level snarl " << distance_index.net_handle_as_string(current_handle) << endl; #endif - temp_zipcode.emplace_back(distance_index.get_connected_component_number(current_handle)); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(distance_index.get_connected_component_number(current_handle)); } else { -#ifdef DEBUG_ZIPCODE - cerr << "Adding code for top-level chain " << distance_index.net_handle_as_string(current_handle) << endl; -#endif //FIrst thing is a chain so add its connected component number and remove the chain from the stack - temp_zipcode.emplace_back(1); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(1); //If the root-level structure is actually a chain, then save the connected component number and take out //the chain from the stack //If the root-level structure is a trivial chain, then just store the node (as a chain, which will have the //connected-component number as the rank in the snarl anyways) - temp_zipcode.emplace_back(distance_index.get_connected_component_number(ancestors.back())); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(distance_index.get_connected_component_number(ancestors.back())); if (ancestors.size() == 2 && distance_index.is_trivial_chain(ancestors.back())) { #ifdef DEBUG_ZIPCODE cerr << "Adding code for top-level trivial chain" << endl; #endif - temp_zipcode.emplace_back(distance_index.minimum_length(ancestors.back())+1); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(distance_index.minimum_length(ancestors.back())+1); size_t connectivity = 0; if ( distance_index.is_externally_start_end_connected(ancestors.back())) { connectivity = connectivity | 1; @@ -63,12 +50,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p connectivity = connectivity | 4; } - temp_zipcode.emplace_back(connectivity); - max_value = std::max(max_value, temp_zipcode.back()); - zipcode.from_vector(temp_zipcode, max_value); - if (fill_in_decoder) { - fill_in_full_decoder(); - } + zipcode.add_value(connectivity); return; } else { #ifdef DEBUG_ZIPCODE @@ -80,8 +62,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p if (distance_index.is_looping_chain(ancestors.back())) { component += 1; } - temp_zipcode.emplace_back(component); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(component); } size_t connectivity = 0; @@ -95,8 +76,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p connectivity = connectivity | 4; } - temp_zipcode.emplace_back(connectivity); - max_value = std::max(max_value, temp_zipcode.back()); + zipcode.add_value(connectivity); ancestors.pop_back(); } @@ -108,44 +88,62 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p cerr << "Adding code for " << distance_index.net_handle_as_string(current_ancestor) << endl; #endif if (distance_index.is_node(current_ancestor)) { - get_node_code(current_ancestor, distance_index, temp_zipcode, max_value); + vector to_add = get_node_code(current_ancestor, distance_index); + for (auto& x : to_add) { + zipcode.add_value(x); + } +#ifdef DEBUG_ZIPCODE + assert(to_add.size() == ZipCode::NODE_SIZE); +#endif } else if (distance_index.is_chain(current_ancestor)) { - get_chain_code(current_ancestor, distance_index, temp_zipcode, max_value); - + vector to_add = get_chain_code(current_ancestor, distance_index); + for (auto& x : to_add) { + zipcode.add_value(x); + } +#ifdef DEBUG_ZIPCODE + assert(to_add.size() == ZipCode::CHAIN_SIZE); +#endif if (distance_index.is_trivial_chain(current_ancestor)) { - zipcode.from_vector(temp_zipcode, max_value); - if (fill_in_decoder) { - fill_in_full_decoder(); - } return; } } else if (distance_index.is_regular_snarl(current_ancestor)) { - get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index, temp_zipcode, max_value); + vector to_add = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); + for (auto& x : to_add) { + zipcode.add_value(x); + } +#ifdef DEBUG_ZIPCODE + assert(to_add.size() == ZipCode::REGULAR_SNARL_SIZE); +#endif } else { #ifdef DEBUG_ZIPCODE assert(distance_index.is_snarl(current_ancestor)); #endif - get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index, temp_zipcode, max_value); + vector to_add = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); +#ifdef DEBUG_ZIPCODE + assert(to_add.size() == ZipCode::IRREGULAR_SNARL_SIZE); +#endif + for (auto& x : to_add) { + zipcode.add_value(x); + } } } - zipcode.from_vector(temp_zipcode, max_value); +} - if (fill_in_decoder) { - fill_in_full_decoder(); - } +std::vector ZipCode::to_vector() const { + return zipcode.to_vector(); } -void ZipCode::from_vector(const std::vector& values, size_t max_value) { - zipcode.from_vector(values, max_value); +void ZipCode::from_vector(const std::vector& values) { + zipcode.from_vector(values); } void ZipCode::fill_in_full_decoder() { - if (zipcode.size() == 0 || finished_decoding) { + if (byte_count() == 0 || finished_decoding) { //If the zipcode is empty return; } - decoder.reserve(zipcode.size() / 4); + decoder.reserve(byte_count() / 4); bool done=false; while (!done) { done = fill_in_next_decoder(); @@ -165,79 +163,193 @@ bool ZipCode::fill_in_next_decoder() { //check to see how much has been filled in size_t zip_length = decoder_length(); + //Does the most recent thing in the zip_index point to a chain/node? + bool previous_is_chain; + + size_t zip_index=0; + size_t zip_value; if (zip_length == 0) { //If there is nothing in the decoder yet, then the first thing will start at 0 + for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } //Is the root a chain/node? - decoder.emplace_back(zipcode.at(ROOT_IS_CHAIN_OFFSET), 0); + previous_is_chain = zip_value; + decoder.emplace_back(previous_is_chain, 0); #ifdef DEBUG_ZIPCODE -cerr << "\tadding the root, which is a " << (decoder.back().first ? "chain or node" : "snarl") << endl; +cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" : "snarl") << endl; #endif - if (zipcode.size() == ROOT_NODE_SIZE) { - //If this was a root node, then we're done - finished_decoding = true; - return true; + //There might be something else but we're done for now + return false; + } else if (zip_length == 1) { + //If there is one thing in the zipcode + previous_is_chain = decoder.back().first; + + //If the top-level structure is a chain, it might actually be a node, in which case + //the only other thing that got stored is the length + if (previous_is_chain) { + //Get to the end of the root chain + assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't + + for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_index == std::numeric_limits::max()) { + //If the zip code ends here (after the length), then this was a node and we're done +#ifdef DEBUG_ZIPCODE +cerr << "\tThe last thing was a root-level node, so nothing else" << endl; +#endif + finished_decoding = true; + return true; + } else { + //Otherwise, check if this is a node or a snarl. If it is a node, then there are three things remaining + size_t start_index = zip_index; + + //If it's a node, then there are three remaining things in the index + //If it were a snarl, then there are more than three things + for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + + + //Return the start of this thing, and true if it was a node + decoder.emplace_back(zip_index == std::numeric_limits::max(), start_index); +#ifdef DEBUG_ZIPCODE + cerr << "\tAdding a " << (zip_index == std::numeric_limits::max() ? "node" : "snarl") << endl; +#endif + //If this was a node, then we're done so return true. Otherwise, it was a snarl to return false + return zip_index == std::numeric_limits::max(); + } } else { - //There might be something else but we're done for now + //Otherwise, the top-level thing is a snarl and the next thing is a chain + for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + decoder.emplace_back(!previous_is_chain, zip_index); return false; } } else { - //This is not a root - bool previous_is_chain = decoder.back().first; - size_t previous_start = decoder.back().second; + //If there was already stuff in the decoder, then figure out where the last thing + //is and set values + previous_is_chain = decoder.back().first; + zip_index = decoder.back().second; +#ifdef DEBUG_ZIPCODE + cerr << "Last thing was a " << (previous_is_chain ? "chain or node" : "snarl") << " starting at " << zip_index << endl; +#endif + + //get to the end of the current thing, add the next thing to the decoder and return if (previous_is_chain) { - //If the last thing was chain, then either the chain was the last thing in the zipcode - // (if it was the child of a snarl) or the next thing is either a node or snarl + //If the current zip_index points to a chain, then either it points to a node, or to + //a chain that is followed by a node or snarl + //The node is the shorter of the two, so if the zipcode ends after the node, then it was + //a node and otherwise, it was an actual chain + //This must be true in order for this to work assert(std::min(ZipCode::CHAIN_SIZE + ZipCode::REGULAR_SNARL_SIZE, ZipCode::CHAIN_SIZE + ZipCode::IRREGULAR_SNARL_SIZE) > ZipCode::NODE_SIZE); - size_t this_size = zip_length == 1 ? ROOT_CHAIN_SIZE : CHAIN_SIZE; - if (zipcode.size() == previous_start + this_size) { - //If the zipcode ends here + //Get to the end of the "node". If it is the end of the zipcode, then it was a node + //Otherwise, it was a snarl + //The node could actually be a chain in a snarl, in which case the zipcode ends after the + //chain + size_t check_zip_index = zip_index; + for (size_t i = 0 ; i < std::min(ZipCode::CHAIN_SIZE, ZipCode::NODE_SIZE) ; i++) { + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + } + //If the zipcode ends after a chain + if (check_zip_index == std::numeric_limits::max()) { #ifdef DEBUG_ZIPCODE - cerr << "The last thing was a trivial chain so we're done" << endl; + cerr << "\tThe last thing was a chain pretending to be a node so we're done" << endl; #endif finished_decoding = true; return true; - } else if (zipcode.size() == previous_start + this_size + NODE_SIZE) { - //If the zipcode ends after the node, add the node and we're done + } + //Now check if it was actually a real node + for (size_t i = 0 ; i < std::max(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE) + - std::min(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE); i++) { + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + } + + //This might be a node that is a child of the chain, in which case there is one + //more thing in the zip code + + if (check_zip_index == std::numeric_limits::max()) { + //If the zip code ends here, then this was a node and we're done + //This should never really happen since it would have returned true when + //adding the node, but I'll leave in just in case someone calls this when they + //shouldn't have #ifdef DEBUG_ZIPCODE - cerr << "Adding a node and we're done" << endl; + cerr << "\tThe last thing was a node so we're done" << endl; #endif - decoder.emplace_back(true, previous_start + this_size); finished_decoding = true; return true; } else { - //Otherwise, this is a snarl and we're not done + //Otherwise, the last thing was a chain + //Get to the end of the chain + for (size_t i = 0 ; i < ZipCode::CHAIN_SIZE ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + + //zip_index is now the start of the current thing that we want to add - the thing after the chain + + //The current thing can be either a snarl or a node. If it is a node, then the zipcode + //ends after the node. If it is a snarl, then the shortest the remaining zipcocde can be + //is the size of a snarl and a chain + //This must be true in order for this to work + assert(std::min(ZipCode::CHAIN_SIZE + ZipCode::REGULAR_SNARL_SIZE, + ZipCode::CHAIN_SIZE + ZipCode::IRREGULAR_SNARL_SIZE) > ZipCode::NODE_SIZE); + + //Check if the current thing is a node + check_zip_index = zip_index; + for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + } + + //Return the start of this thing, and true if it was a node + decoder.emplace_back(check_zip_index == std::numeric_limits::max(), zip_index); #ifdef DEBUG_ZIPCODE - cerr << "Adding a snarl starting at " << (previous_start + this_size) << endl; + cerr << "\tAdd a " << (check_zip_index == std::numeric_limits::max() ? "node" : "snarl") << endl; #endif - decoder.emplace_back(false, previous_start + this_size); - return false; + //If this was a node, then we're done so return true. Otherwise, it was a snarl to return false + return check_zip_index == std::numeric_limits::max(); } } else { - //Otherwise, the last thing was a snarl - size_t next_start = previous_start; + //If !previous_is_chain, then the current zip_index points to a snarl //The regular/irregular snarl tag - if (zip_length == 1) { - //IF this was a root snarl - next_start += ROOT_SNARL_SIZE; - } else if (zipcode.at(previous_start + SNARL_IS_REGULAR_OFFSET) == 1) { - //If this was a regular snarl - next_start += REGULAR_SNARL_SIZE; + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + + if (zip_value == 1) { +#ifdef DEBUG_ZIPCODE + cerr << "\tAdd a node child of a regular snarl" << endl; +#endif + //Regular snarl, so 2 remaining things in the code + for (size_t i = 0 ; i < ZipCode::REGULAR_SNARL_SIZE - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + decoder.emplace_back(!previous_is_chain, zip_index); + return false; } else { - //Technically it could be irregular or cyclic but it doesn't matter because the codes are the same - next_start += IRREGULAR_SNARL_SIZE; +#ifdef DEBUG_ZIPCODE + cerr << "\tAdd the child of " << (decoder.size() == 2 ? "a top-level " : "an" ) << " irregular snarl" << endl; +#endif + //If the decoder has two things in it (top-level chain and the current snarl), then this + //is a top-level irregular snarl. Otherwise a normal irregular snarl + size_t code_size = ZipCode::IRREGULAR_SNARL_SIZE; + for (size_t i = 0 ; i < code_size - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + decoder.emplace_back(!previous_is_chain, zip_index); + return false; } - decoder.emplace_back(true, next_start); - return false; } - } + } } size_t ZipCode::max_depth() const { @@ -275,13 +387,17 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } else { //Definitely a snarl - size_t code_type_int = zipcode.at(decoder[depth].second + ZipCode::SNARL_IS_REGULAR_OFFSET); - if (code_type_int == 0) { - return IRREGULAR_SNARL; - } else if (code_type_int == 1) { - return REGULAR_SNARL; + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 0) { + return ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + return ZipCode::REGULAR_SNARL; } else { - return CYCLIC_SNARL; + return ZipCode::CYCLIC_SNARL; } } } @@ -294,7 +410,11 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan if (decoder_length() == 1) { //If the length is 1, then it's a node - size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_LENGTH_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { @@ -305,13 +425,23 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan //If this is a chain/node //If this is a chain or a node, then the length will be the second thing - assert(CHAIN_LENGTH_OFFSET == NODE_LENGTH_OFFSET); - size_t zip_value = zipcode.at(decoder[depth].second + CHAIN_LENGTH_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + + for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl - size_t zip_value = zipcode.at(decoder[depth].second + SNARL_LENGTH_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + + for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } @@ -330,7 +460,12 @@ size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { throw std::runtime_error("zipcodes trying to find the rank in snarl of a node in a chain"); } - return zipcode.at(decoder[depth].second + CHAIN_RANK_IN_SNARL_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } else { //If this is a snarl throw std::runtime_error("zipcodes don't store snarl ranks for snarls"); @@ -352,7 +487,12 @@ size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIn } else if (!decoder[depth].first) { //If this is a snarl - return zipcode.at(decoder[depth].second + SNARL_CHILD_COUNT_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } else { //If this is not a snarl throw std::runtime_error("trying to get the snarl child count of a non-snarl zipcode"); @@ -372,13 +512,21 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde if (!decoder[depth-1].first) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } - size_t zip_value = zipcode.at(decoder[depth].second + NODE_OFFSET_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl - size_t zip_value = zipcode.at(decoder[depth].second + SNARL_OFFSET_IN_CHAIN_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } @@ -396,11 +544,23 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { if (!decoder[depth-1].first) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } - return zipcode.at(decoder[depth].second + NODE_CHAIN_COMPONENT_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + + return zip_value; } else { //If this is a snarl - return zipcode.at(decoder[depth].second + SNARL_CHAIN_COMPONENT_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + + return zip_value; } } @@ -409,7 +569,11 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - size_t zip_value = zipcode.at(decoder[depth].second + CHAIN_COMPONENT_COUNT_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } if (zip_value % 2) { if (!get_end) { return 0; @@ -426,7 +590,12 @@ bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].first) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - return zipcode.at(decoder[depth].second + CHAIN_COMPONENT_COUNT_OFFSET) % 2; + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value % 2; } bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { @@ -441,15 +610,28 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { if (decoder[depth-1].first) { //If the parent is a chain, then this is a node and we need to check its orientation - return zipcode.at(decoder[depth].second + NODE_IS_REVERSED_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } else { //If the parent is a snarl, then this might be a chain in a regular snarl - - size_t snarl_type = zipcode.at(decoder[depth-1].second + SNARL_IS_REGULAR_OFFSET); - if (snarl_type == 1) { + size_t zip_value; + size_t zip_index = decoder[depth-1].second; + //zip_value is true if the parent is a regular snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child - return zipcode.at(decoder[depth-1].second + REGULAR_SNARL_IS_REVERSED_OFFSET); + for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } else { //The parent is an irregular snarl, so it isn't reversed return false; @@ -468,7 +650,11 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd if (depth == 0) { //If this is the root chain/snarl/node - return distance_index->get_handle_from_connected_component(zipcode.at(ROOT_IDENTIFIER_OFFSET)); + size_t zip_value, zip_index = 0; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return distance_index->get_handle_from_connected_component(zip_value); } else if (decoder[depth].first) { //If this is a chain/node @@ -477,18 +663,25 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd } else { //If this is a snarl - size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); - if (snarl_type == 1) { + size_t zip_value; + size_t zip_index = decoder[depth].second; + //zip_value is is_regular_snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { //If this is a regular snarl throw std::runtime_error("zipcodes trying to get a handle of a regular snarl"); } else { //Irregular snarl - size_t zip_value = zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); - net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE); + //zip_value is distance index offset + for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; } } @@ -500,7 +693,11 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S if (depth == 0) { //If this is the root chain/snarl/node - return distance_index->get_handle_from_connected_component(zipcode.at(ROOT_IDENTIFIER_OFFSET)); + size_t zip_value, zip_index = 0; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return distance_index->get_handle_from_connected_component(zip_value); } else if (decoder[depth].first) { //If this is a chain/node @@ -516,8 +713,13 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } else { //If this is a snarl - size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); - if (snarl_type == 1) { + size_t zip_value; + size_t zip_index = decoder[depth].second; + //zip_value is is_regular_snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { //If this is a regular snarl net_handle_t n = distance_index->get_node_net_handle(id); @@ -531,10 +733,12 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } else { //Irregular snarl - size_t zip_value = zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); - net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE); + //zip_value is distance index offset + for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; } } @@ -547,7 +751,11 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { if (depth == 0) { //If this is the root chain/snarl/node - return zipcode.at(ROOT_IDENTIFIER_OFFSET); + size_t zip_value, zip_index = 0; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } else if (decoder[depth].first) { //If this is a chain/node @@ -556,15 +764,25 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { } else { //If this is a snarl - size_t snarl_type = zipcode.at(decoder[depth].second + SNARL_IS_REGULAR_OFFSET); - if (snarl_type == 1) { + size_t zip_value; + size_t zip_index = decoder[depth].second; + //zip_value is is_regular_snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { //If this is a regular snarl throw std::runtime_error("zipcodes trying to get a handle of a regular ansl"); } else { //Irregular snarl - return zipcode.at(decoder[depth].second + IRREGULAR_SNARL_RECORD_OFFSET); + //zip_value is distance index offset + for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return zip_value; } } } @@ -574,11 +792,18 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star assert(depth > 0); assert((get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || get_code_type(depth-1) == ZipCode::REGULAR_SNARL || get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)); #endif - size_t snarl_type = zipcode.at(decoder[depth-1].second + SNARL_IS_REGULAR_OFFSET); - if (snarl_type == 1) { + size_t zip_value; + size_t zip_index = decoder[depth-1].second; + //zip_value is 1 if the parent is a regular snarl + for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child - - size_t zip_value = zipcode.at(decoder[depth-1].second + REGULAR_SNARL_IS_REVERSED_OFFSET); + for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - + ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } //Zip value is true if the child is reversed if ((snarl_start && left_side) || (!snarl_start && !left_side)) { @@ -599,7 +824,9 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star } else { distance_offset = ZipCode::IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET; } - size_t zip_value = zipcode.at(decoder[depth-1].second + distance_offset); + for (size_t i = 0 ; i <= distance_offset - ZipCode::SNARL_IS_REGULAR_OFFSET -1 ; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return zip_value == 0 ? std::numeric_limits::max() : zip_value - 1; } } @@ -607,19 +834,31 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return (zip_value & 1) != 0; } bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return (zip_value & 2) != 0; } bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].first); - size_t zip_value = zipcode.at(decoder[depth].second + ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET); + size_t zip_value; + size_t zip_index = decoder[depth].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } return (zip_value & 4) != 0; } @@ -659,11 +898,12 @@ const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, } void ZipCode::dump(std::ostream& out) const { + std::vector numbers = to_vector(); // Print out the numbers in a way that is easy to copy-paste as a vector literal. out << "& temp_zipcode, size_t& max_value) { +vector ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { #ifdef DEBUG_ZIPCODE assert(!distance_index.is_trivial_chain(node)); assert((distance_index.is_chain(distance_index.get_parent(node)) || distance_index.is_root(distance_index.get_parent(node)))); #endif - size_t start_i = temp_zipcode.size(); - temp_zipcode.resize(start_i + NODE_SIZE); - //Node code is: offset in chain, length, is reversed, chain component - + //Node code is: offset in chain, length, is reversed + vector node_code(NODE_SIZE); //Assume this node is in a regular chain size_t prefix_sum = distance_index.get_prefix_sum_value(node); - temp_zipcode[start_i + NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; - max_value = std::max(max_value, temp_zipcode[start_i + NODE_OFFSET_OFFSET]); - - temp_zipcode[start_i + NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; - max_value = std::max(max_value, temp_zipcode[start_i + NODE_LENGTH_OFFSET]); - - temp_zipcode[start_i + NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); - max_value = std::max(max_value, temp_zipcode[start_i + NODE_IS_REVERSED_OFFSET]); - + node_code[NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; + node_code[NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; + node_code[NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); size_t component = distance_index.get_chain_component(node); - temp_zipcode[start_i + NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; - max_value = std::max(max_value, temp_zipcode[start_i + NODE_CHAIN_COMPONENT_OFFSET]); - - return; + node_code[NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + return node_code; } -void ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value) { +vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { //Chain code is: rank in snarl, length - - size_t start_i = temp_zipcode.size(); - temp_zipcode.resize(start_i + CHAIN_SIZE); - - //Rank in snarl - temp_zipcode[start_i + CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); - max_value = std::max(max_value, temp_zipcode[start_i + CHAIN_RANK_IN_SNARL_OFFSET]); - - //Length + vector chain_code (CHAIN_SIZE); + chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); size_t len = distance_index.minimum_length(chain); - temp_zipcode[start_i + CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; - max_value = std::max(max_value, temp_zipcode[start_i + CHAIN_LENGTH_OFFSET]); - - //Component count and if it loops + chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; bool is_trivial = distance_index.is_trivial_chain(chain) ; size_t component = is_trivial ? 0 @@ -728,125 +946,102 @@ void ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex if (!is_trivial && distance_index.is_looping_chain(chain)) { component += 1; } - temp_zipcode[start_i + CHAIN_COMPONENT_COUNT_OFFSET] = component; - max_value = std::max(max_value, component); - - return; + chain_code[CHAIN_COMPONENT_COUNT_OFFSET] = component; + return chain_code; } -void ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value) { - - size_t start_i = temp_zipcode.size(); - temp_zipcode.resize(start_i + REGULAR_SNARL_SIZE); - +vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { + //Regular snarl code is 1, offset in chain, length, is reversed + vector snarl_code (REGULAR_SNARL_SIZE); //Tag to say that it's a regular snarl - temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET] = 1; + snarl_code[SNARL_IS_REGULAR_OFFSET] = 1; //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - temp_zipcode[start_i + SNARL_CHILD_COUNT_OFFSET] = child_count; - max_value = std::max(max_value, child_count); + snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET]); + snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); size_t component = distance_index.get_chain_component(start_node); - temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET]); + snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; //Length of the snarl size_t len = distance_index.minimum_length(snarl); - temp_zipcode[start_i + SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_LENGTH_OFFSET]); + snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); //Is the child of the snarl reversed in the snarl #ifdef DEBUG_ZIPCODE assert(distance_index.is_chain(snarl_child)); #endif - temp_zipcode[start_i + REGULAR_SNARL_IS_REVERSED_OFFSET] = (distance_index.distance_in_parent(snarl, + snarl_code[REGULAR_SNARL_IS_REVERSED_OFFSET] = (distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(snarl_child))) != 0); - max_value = std::max(max_value, temp_zipcode[start_i + REGULAR_SNARL_IS_REVERSED_OFFSET]); - return; + return snarl_code; } -void ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value) { - - size_t start_i = temp_zipcode.size(); - temp_zipcode.resize(start_i + IRREGULAR_SNARL_SIZE); +vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index) { + vector snarl_code (IRREGULAR_SNARL_SIZE); //Tag to say that it's an irregular snarl - temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET] = distance_index.is_dag(snarl) ? 0 : 2; - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_IS_REGULAR_OFFSET]); + snarl_code[SNARL_IS_REGULAR_OFFSET] = distance_index.is_dag(snarl) ? 0 : 2; //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - temp_zipcode[start_i + SNARL_CHILD_COUNT_OFFSET] = child_count; - max_value = std::max(max_value, child_count); + snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_OFFSET_IN_CHAIN_OFFSET]); + snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); size_t component = distance_index.get_chain_component(start_node); - temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_CHAIN_COMPONENT_OFFSET]); + snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; //Length of the snarl size_t len = distance_index.minimum_length(snarl); - temp_zipcode[start_i + SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); - max_value = std::max(max_value, temp_zipcode[start_i + SNARL_LENGTH_OFFSET]); + snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); //Record offset to look up distances in the index later - temp_zipcode[start_i + IRREGULAR_SNARL_RECORD_OFFSET] = (distance_index.get_record_offset(snarl)); - max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_RECORD_OFFSET]); + snarl_code[IRREGULAR_SNARL_RECORD_OFFSET] = (distance_index.get_record_offset(snarl)); - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); //Add 1 to values to store inf properly - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() ? 0 - : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] + 1; - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] == std::numeric_limits::max() + : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] + 1; + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] == std::numeric_limits::max() ? 0 - : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] + 1; - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] == std::numeric_limits::max() + : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] + 1; + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = + snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] == std::numeric_limits::max() ? 0 - : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] + 1; - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = - temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] == std::numeric_limits::max() + : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] + 1; + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = + snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] == std::numeric_limits::max() ? 0 - : temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] + 1; + : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] + 1; - max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET]); - max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET]); - max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET]); - max_value = std::max(max_value, temp_zipcode[start_i + IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET]); + return snarl_code; } @@ -1313,53 +1508,149 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si cerr << "Checking if two zip codes are farther than " << limit << endl; #endif - if (zip1.decoder[0].first != zip2.decoder[0].first) { + size_t zip_index1 = 0; size_t zip_index2 = 0; + size_t zip_value1 = std::numeric_limits::max(); + size_t zip_value2 = std::numeric_limits::max(); + + //If the two positions aren't on the same connected component, then we're done + for (size_t i = 0 ; i <= ROOT_IS_CHAIN_OFFSET ; i++) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } + if (zip_value1 != zip_value2) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif return true; } - if (zip1.get_distance_index_address(0) != zip2.get_distance_index_address(0)) { + bool is_top_level_chain = zip_value1; + for (size_t i = 0 ; i <= ROOT_IDENTIFIER_OFFSET - ROOT_IS_CHAIN_OFFSET - 1; i++) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } + if (zip_value1 != zip_value2) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif return true; } - //The depth of a chain that both zips are on - size_t shared_depth = 0; - - if (!zip1.decoder[0].first) { + if (!is_top_level_chain) { //If the top-level thing is a snarl, then check if the zips are in the same chain. //If they are, then proceed from the shared chain - if (zip1.get_rank_in_snarl(1) != zip2.get_rank_in_snarl(1)) { + //The next thing will be the identifier for the chain + for (size_t i = 0 ; i <= CHAIN_RANK_IN_SNARL_OFFSET; i++) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } + if (zip_value1 != zip_value2) { //We can't tell return false; } - //Next check the length of the chain - if (zip1.get_length(1) < limit) { + //Next is the length of the chain + for (size_t i = 0 ; i <= CHAIN_LENGTH_OFFSET - CHAIN_RANK_IN_SNARL_OFFSET - 1; i++) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } + if (zip_value1 < limit) { return true; } - //The two zipcodes are on the same chain at depth 1 - shared_depth = 1; //The zips now point to the children of the shared chain, so we can proceed as if the top-level //structure was a chain + } else { + //If it is a chain, get two more things to get to the end of the chain + for (size_t i = 0 ; i < 2 ; ++i) { + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + } } //Both zips now point to a thing in a shared chain //Get the minimum possible distance between the structures on the chain //For a lower bound, this assumes that the positions are as close as they can be on the structure in the chain - size_t prefix_sum1 = zip1.get_offset_in_chain(shared_depth+1); - size_t prefix_sum2 = zip2.get_offset_in_chain(shared_depth+1); - size_t length1 = zip1.get_length(shared_depth+1); - size_t length2 = zip2.get_length(shared_depth+1); - size_t component1 = zip1.get_chain_component(shared_depth+1); - size_t component2 = zip2.get_chain_component(shared_depth+1); + size_t prefix_sum1, prefix_sum2, length1, length2, component1, component2; + //The next thing could either be a snarl or a node. If it is a node, + vector next_values; + for (size_t i = 0 ; i < NODE_SIZE ; i++ ) { +#ifdef DEBUG_ZIPCODE + assert(zip_index1 != std::numeric_limits::max()); +#endif + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + next_values.emplace_back(zip_value1); + } + if (zip_index1 == std::numeric_limits::max()) { +#ifdef DEBUG_ZIPCODE + cerr << "zip1 is a node in a chain" << endl; +#endif + //If the last thing was a node + prefix_sum1 = next_values[0]; + length1 = next_values[1]; + component1 = next_values[2]; + prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; + length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; + } else { +#ifdef DEBUG_ZIPCODE + cerr << "zip1 is in a snarl in a chain" << endl; +#endif + //If the last thing was a snarl + if (next_values[0]) { + //If the next thing was a regular snarl + prefix_sum1 = next_values[1]; + length1 = next_values[2]; + std::tie(zip_value1, zip_index1) = zip1.zipcode.get_value_and_next_index(zip_index1); + component1 = zip_value1; + prefix_sum1 = prefix_sum1 == 0 ? std::numeric_limits::max() : prefix_sum1-1; + length1 = length1 == 0 ? std::numeric_limits::max() : length1-1; + } else { + //If the next thing was an irregular snarl + //TODO: If it's an irregular snarl, then we don't actually store the relevant values so we can't tell. Could look it up in the distance index or store it + return false; + } + } + + //Do the same for the other zip + next_values.clear(); + for (size_t i = 0 ; i < NODE_SIZE ; i++ ) { +#ifdef DEBUG_ZIPCODE + assert(zip_index2 != std::numeric_limits::max()); +#endif + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + next_values.emplace_back(zip_value2); + } + if (zip_index2 == std::numeric_limits::max()) { +#ifdef DEBUG_ZIPCODE + cerr << "zip2 is a node in a chain" << endl; +#endif + //If the last thing was a node + prefix_sum2 = next_values[0]; + length2 = next_values[1]; + component2 = next_values[2]; + prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; + length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; + } else { +#ifdef DEBUG_ZIPCODE + cerr << "zip2 is in a snarl in a chain" << endl; +#endif + //If the last thing was a snarl + if (next_values[0]) { + //If the next thing was a regular snarl + prefix_sum2 = next_values[1]; + length2 = next_values[2]; + std::tie(zip_value2, zip_index2) = zip2.zipcode.get_value_and_next_index(zip_index2); + component2 = zip_value2; + prefix_sum2 = prefix_sum2 == 0 ? std::numeric_limits::max() : prefix_sum2-1; + length2 = length2 == 0 ? std::numeric_limits::max() : length2-1; + } else { + //If the next thing was an irregular snarl + //TODO: If it's an irregular snarl, then we don't actually store the relevant values so we can't tell. Could look it up in the distance index or store it + return false; + } + } #ifdef DEBUG_ZIPCODE cerr << "Finding distance in chain between " << prefix_sum1 << " " << length1 << " and " << prefix_sum2 << " and " << length2 << endl; #endif @@ -1398,162 +1689,52 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si } gbwtgraph::Payload ZipCode::get_payload_from_zip() const { -#ifdef DEBUG_ZIPCODE - cerr << "Encode integers: "; - for (size_t i = 0 ; i < zipcode.size() ; i++) { - cerr << zipcode.at(i) << " "; - } - cerr << endl; -#endif - if (bit_count() > 112) { + if (byte_count() > 15) { //If there aren't enough bits to represent the zip code return MIPayload::NO_CODE; } + + //Index and value as we walk through the zip code + size_t index = 0; + size_t value; + //The values that get returned code_type encoded1 = 0; code_type encoded2 = 0; - //The first (leftmost of first int) 8 bits is the width - encoded1 |= zipcode.get_bit_width(); - - //Left shift by 8 to make space for the next thing we're adding - encoded1 <<= 8; - //The second 8 bits is the number of items in the vector (not the number of bits) - encoded1 |= zipcode.size(); - encoded1 <<= 1; + encoded1 |= byte_count(); -#ifdef DEBUG_ZIPCODE -cerr << "Encode the bit width "<< ((size_t) zipcode.get_bit_width()) << " and size " << zipcode.size() << endl; -cerr << "\t"; -#endif - + for (size_t i = 0 ; i < zipcode.data.size() ; i++ ) { + size_t byte = static_cast (zipcode.data[i]); + if ( i < 7 ) { + //Add to first code + encoded1 |= (byte << ((i+1)*8)); - //16 bits are set, so 112 left - //Now add each bit one by one and left shift to make space for the next one - for (size_t i = 0 ; i < 112 ; i++ ) { - if ( i < 48 ) { - //Add to first code, just one bit to the end - if (i < zipcode.get_bit_count() && zipcode.bit_at(i)) { - encoded1 |= 1; -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - //Left shift by one after everything except the last bit - if (i != 47) { - encoded1 <<= 1; - } } else { //Add to second code - if (i < zipcode.get_bit_count() && zipcode.bit_at(i)) { - encoded2 |= 1; -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - if ( i != 111) { - encoded2 <<= 1; - } + encoded2 |= (byte << ((i-7)*8)); } } -#ifdef DEBUG_ZIPCODE - cerr << endl; - cerr << "Actual ints being stored: " << encoded1 << " and " << encoded2 << ": "; - for (int i = 63 ; i >= 0 ; --i) { - if (((size_t) 1 << i) & encoded1) { - cerr << "1"; - } else { - cerr << "0"; - } - } - for (int i = 63 ; i >= 0 ; --i) { - if (((size_t) 1 << i) & encoded2) { - cerr << "1"; - } else { - cerr << "0"; - } - } - cerr << endl; -#endif return {encoded1, encoded2}; } void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(payload != MIPayload::NO_CODE); - - //First 8 bits of first int is the width - size_t width = payload.first >> 56; - zipcode.set_bit_width((uint8_t)width); - - //Second 8 bits is the item count - size_t item_count = (payload.first >> 48) & ((1 << 8)-1); - - //bit count is the product of the two - size_t bit_count = (size_t)width * (size_t)item_count; - zipcode.set_bitvector_length(bit_count); - -#ifdef DEBUG_ZIPCODE - cerr << "Get zipcode from payload " << payload.first << " and " << payload.second<< " with width: " << width << " item count " << item_count << " meaning " << bit_count << " bits" << endl; - cerr << "\t"; -#endif - - - //Mask for checking the relevant bit - //Start by checking the 17th bit from the left - //Right shift by one for each bit we look at - uint64_t mask1 = (uint64_t)1 << 47; - uint64_t mask2 = (uint64_t)1 << 63; - //get one bit at a time from the payload and add it to the zip code - for (size_t i = 0 ; i < bit_count ; i++) { - if (i < 48) { - if ((payload.first & mask1) != 0) { - zipcode.set_bit_at(i); -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - mask1 >>= 1; + zipcode.data.reserve(16); + + //get one byte at a time from the payload and add it to the zip code + size_t bit_mask = (1 << 8) - 1; + size_t byte_count = payload.first & bit_mask; + for (size_t i = 1 ; i <= byte_count ; i++) { + if (i < 8) { + zipcode.add_one_byte((payload.first >> (i*8)) & bit_mask); } else { - if ((payload.second & mask2) != 0) { - zipcode.set_bit_at(i); -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - mask2 >>= 1; + zipcode.add_one_byte((payload.second >> ((i-8)*8)) & bit_mask); } + } -#ifdef DEBUG_ZIPCODE - cerr << endl; - cerr << "Found encoded integers: "; - for (size_t i = 0 ; i < zipcode.size() ; i++) { - cerr << zipcode.at(i) << " "; - } - cerr << endl; -#endif - return; } std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { @@ -1582,12 +1763,9 @@ std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { void ZipCodeCollection::serialize(std::ostream& out) const { - //The zipcode vector will be serialized as a bunch of min_width_int_vector_ts - //The first min_width_int_vector_t will have one value, which will be the length of the + //The zipcode vector will be serialized as a bunch of varint_vector_ts + //The first varint_vector_t will have one value, which will be the length of the //zipcode that follows it -#ifdef DEBUG_ZIPCODE - cerr << "Serialize zipcode collection" << endl; -#endif //First serialize the header, which is the magic number and version uint32_t magic = magic_number; @@ -1597,52 +1775,29 @@ void ZipCodeCollection::serialize(std::ostream& out) const { for (const ZipCode& zip : zipcodes) { - - //Write the width - uint8_t width = (uint8_t) zip.zipcode.get_bit_width(); - out.write(reinterpret_cast(&width), sizeof(width)); - //How many values are in the vector. Used with width to get the bit count - size_t item_count = zip.zipcode.size(); - - out.write(reinterpret_cast(&item_count), sizeof(item_count)); + //How many bytes are going to be saved for the zipcode? + size_t byte_count = zip.byte_count(); + varint_vector_t size_vector; + size_vector.add_value(byte_count); + //Write the number of bytes about to be saved + for (const uint8_t& byte : size_vector.data) { + out << char(byte); + } //Write the zipcode #ifdef DEBUG_ZIPCODE - cerr << "Write width " << (size_t) width << " and item count " << item_count << " and zipcode: " << endl; - cerr << "\t"; - for (size_t i = 0 ; i < zip.zipcode.size() ; i++) { - cerr << zip.zipcode.at(i) << " "; - } - cerr << endl << "\t"; size_t zip_byte_count = 0; #endif - size_t bit_count = zip.zipcode.get_bit_count(); - for (size_t i = 0 ; i < bit_count ; i += 8) { + for (const uint8_t& byte : zip.zipcode.data ) { #ifdef DEBUG_ZIPCODE zip_byte_count++; #endif - uint8_t result = 0; - for (size_t j = 0 ; j < 8 ; j++) { - result <<= 1; - if (i+j < bit_count && zip.zipcode.bit_at(i+j)) { -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - result |= 1; - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - } - out << char(result); + out << char(byte); } #ifdef DEBUG_ZIPCODE - cerr << endl; - assert(zip_byte_count == ceil((float)bit_count / 8)); + assert(byte_count == zip_byte_count); #endif } @@ -1663,59 +1818,40 @@ void ZipCodeCollection::deserialize(std::istream& in) { while (in.peek() != EOF) { - //First, get the bitwidth of the vector - uint8_t width; - in.read(reinterpret_cast(&width), sizeof(width)); - - //Next, get the number of items in the zipcode - size_t item_count; - in.read(reinterpret_cast(&item_count), sizeof(item_count)); - - size_t bit_count = (size_t)width * item_count; - - //How many bytes were used to store all the bits in the zipcode bit vector - size_t byte_count = (size_t) std::ceil((float)bit_count / 8); - + //First, get the number of bytes used by the zipcode + //This will be a varint_vector_t with one value, which is the number of bytes in the zipcode + //Each byte in the varint_vector_t starts with 0 if it is the last bit in the + //number, and 1 if the next byte is included + varint_vector_t byte_count_vector; + while (in.peek() & (1<<7)) { + //If the first bit in the byte is 1, then add it, stop once the first bit is 0 + char c; + in.get(c); + byte_count_vector.add_one_byte((uint8_t)c); + } + assert(! (in.peek() & (1<<7))); + //The next byte has a 0 as its first bit, so add it + char c; + in.get(c); + byte_count_vector.add_one_byte((uint8_t)c); + //The first (and only) value in the vector is the length of the zipcode + size_t zipcode_byte_count = byte_count_vector.get_value_and_next_index(0).first; #ifdef DEBUG_ZIPCODE - cerr << "Get zipcode with width " << (size_t) width << " and item count " << item_count << endl << "\t"; + cerr << "Get zipcode of " << zipcode_byte_count << " bytes" << endl; + //assert(zipcode_byte_count >= 15); + assert(byte_count_vector.get_value_and_next_index(0).second == std::numeric_limits::max()); #endif - char line [byte_count]; + char line [zipcode_byte_count]; - in.read(line, byte_count); + in.read(line, zipcode_byte_count); ZipCode zip; - zip.zipcode.set_bit_width(width); - zip.zipcode.set_bitvector_length(bit_count); - size_t added_bits = 0; for (const char& character : line) { - for (int i = 7 ; i >= 0 ; i--) { - if (added_bits < bit_count) { - if (((uint8_t)character & ((uint8_t)1 << i)) != 0) { - zip.zipcode.set_bit_at(added_bits); -#ifdef DEBUG_ZIPCODE - cerr << "1"; -#endif - } -#ifdef DEBUG_ZIPCODE - else { - cerr << "0"; - } -#endif - added_bits++; - } - } - } -#ifdef DEBUG_ZIPCODE - cerr << endl <<"\t"; - for (size_t i = 0 ; i < zip.zipcode.size() ; i++) { - cerr << zip.zipcode.at(i) << " "; + zip.zipcode.add_one_byte(uint8_t(character)); } - cerr << endl; -#endif - zipcodes.emplace_back(std::move(zip)); } @@ -1728,12 +1864,21 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_is_root = true; payload.parent_is_chain = true; - payload.node_handle = distance_index.get_net_handle_from_values( - distance_index.get_record_offset(distance_index.get_handle_from_connected_component(get_distance_index_address(0))), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); - - payload.node_length = get_length(0); + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[0].second; + //Root is chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //root_identifier + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); + + //Root node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; payload.is_reversed = false; payload.parent_handle = distance_index.get_root(); @@ -1746,29 +1891,43 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_is_chain = true; payload.parent_is_root = false; - size_t parent_depth = max_depth() - 1; + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[max_depth()-1].second; + //is_chain/rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + + //root_identifier for root, chain length for anything else + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (decoder_length() == 2) { //If the node is a child of the root chain - payload.parent_handle = distance_index.start_end_traversal_of( - distance_index.get_handle_from_connected_component(get_distance_index_address(0))); + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; } payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); - payload.prefix_sum = get_offset_in_chain(parent_depth+1); + //chain component count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Node prefix sum + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Node length - payload.node_length = get_length(parent_depth+1); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //is_reversed + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: For top-level chains we got this from the distance index - payload.is_reversed = get_is_reversed_in_parent(parent_depth+1); + payload.is_reversed = zip_value; - payload.chain_component = get_chain_component(parent_depth+1); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; @@ -1785,30 +1944,56 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.is_trivial_chain = true; + size_t zip_value; + size_t zip_index; if (payload.parent_is_root) { //is_chain + zip_index = decoder[0].second; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Identifier for root snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; - payload.parent_record_offset = distance_index.get_record_offset( - distance_index.get_handle_from_connected_component( - get_distance_index_address(0))); + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { - size_t parent_depth = max_depth() - 1; - payload.parent_type = get_code_type(parent_depth); + zip_index = decoder[max_depth()-1].second; + //is_regular + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //If this is a non-root snarl, get as much as we can from it + payload.parent_type = ZipCode::EMPTY; + if (zip_value == 0) { + payload.parent_type = ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + payload.parent_type = ZipCode::REGULAR_SNARL; + } else { + payload.parent_type = ZipCode::CYCLIC_SNARL; + } - payload.prefix_sum = 0; + //Snarl prefix sum + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + + //Snarl length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Snarl child_count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Chain component of the snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //TODO: SHould use this somehow payload.chain_component = 0; + //is_reversed for regular snarl and record offset for irregular/cyclic snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); //Simple and regular snarls are different for clustering if (distance_index.is_simple_snarl(grandparent_handle)) { - payload.is_reversed = get_is_reversed_in_parent(parent_depth+1); + payload.is_reversed = zip_value; payload.parent_is_chain=true; payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { @@ -1818,11 +2003,17 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else { payload.is_reversed = false; - payload.parent_record_offset = get_distance_index_address(parent_depth); + payload.parent_record_offset = zip_value; } } - payload.node_length = get_length(max_depth()); + //We should be at the node/trivial chain now + zip_index = decoder[max_depth()].second; + //Chain rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Chain length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Get the rest as default values @@ -1850,19 +2041,39 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { result += (decoder[d].first ? "1" : "0"); if (d == 0) { //Root structure - result += std::to_string(get_distance_index_address(0)); + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + result += std::to_string(zip_value); + } } else if (decoder[d].first) { //is_chain so could be a chain or a node if (decoder[d-1].first) { //If the thing before this was also a chain, then it is a node - result += std::to_string(get_offset_in_chain(d)); + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + result += std::to_string(zip_value); + } } else { //Otherwise it's a chain - result += std::to_string(get_rank_in_snarl(d)); + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + result += std::to_string(zip_value); + } } } else { //Definitely a snarl - result += std::to_string(get_offset_in_chain(d)); + size_t zip_value; + size_t zip_index = decoder[d].second; + for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + result += std::to_string(zip_value); + } } if (d < std::min(depth, max_depth())) { result += "."; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 40c7df5bc38..992a8e27dc3 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -2,7 +2,7 @@ #define VG_ZIP_CODE_HPP_INCLUDED -#include "min_width_int_vector.hpp" +#include "varint.hpp" #include "snarl_distance_index.hpp" #include @@ -60,7 +60,7 @@ class ZipCode { public: //Fill in an empty zipcode given a position - void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos, bool fill_in_decoder=true); + void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos); //Fill in an empty zipcode using the information that was stored in a payload void fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload); @@ -106,19 +106,14 @@ class ZipCode { typedef std::uint64_t code_type; // We assume that this fits into gbwtgraph::Payload. + ///How many bytes were used to store this zipcode? + size_t byte_count() const { + return zipcode.byte_count(); + } //TODO: Make this private: //The actual data for a zipcode is a vector of ints - min_width_int_vector_t zipcode; - - ///How many bytes were used to store this zipcode? - size_t bit_count() const { - return zipcode.get_bit_count(); - } - ///What is the bit width used to store this zipcode? - size_t bit_width() const { - return zipcode.get_bit_width(); - } + varint_vector_t zipcode; /// Equality operator @@ -126,8 +121,11 @@ class ZipCode { return zipcode == other.zipcode; } + /// Dump to a normal vector + std::vector to_vector() const; + /// Load from a normal vector - void from_vector(const std::vector& values, size_t max_value = 0); + void from_vector(const std::vector& values); private: @@ -204,26 +202,15 @@ class ZipCode { /* Functions for getting the code for each snarl/chain/node * Distances will be stored as distance+1, 0 will be reserved for inf */ - ///Add the code for the given node to the end of the zipcode. - ///Also update max_value to be the maximum value in the zipcode - inline void get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value); - ///Add the code for the given chain to the end of the zipcode. - ///Also update max_value to be the maximum value in the zipcode - inline void get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value); - - ///Add the code for the given regular snarl to the end of the zipcode. - ///Also update max_value to be the maximum value in the zipcode - inline void get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value); - - ///Add the code for the given irregular or cyclic snarl to the end of the zipcode. - ///Also update max_value to be the maximum value in the zipcode - inline void get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, - const SnarlDistanceIndex& distance_index, - vector& temp_zipcode, size_t& max_value); + //Return a vector of size_ts that will represent the node in the zip code + inline vector get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index); + //Return a vector of size_ts that will represent the chain in the zip code + inline vector get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index); + //Return a vector of size_ts that will represent the snarl in the zip code + inline vector get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + const SnarlDistanceIndex& distance_index); + //Return a vector of size_ts that will represent the snarl in the zip code + inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); //////////////////////////////// Stuff for decoding the zipcode @@ -232,7 +219,7 @@ class ZipCode { //TODO: Make the decoder and zipcode private, still need it for unit testing ///The decoder as a vector of pair, one for each snarl tree node in the zip ///where is_chain indicates whether it's a chain/node, and index - ///is the index of the node/snarl/chain code in the min_width_int_vector_t + ///is the index of the node/snarl/chain code in the varint_vector_t std::vector> decoder; ///Did we fill in the entire decoder @@ -361,7 +348,7 @@ class ZipCodeCollection { //magic number to identify the file const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 3; + const static uint32_t version = 2; public: const static std::uint32_t get_magic_number() {return magic_number;} From 39ed6c8d83a7a75ce491d35461f1275ea1a2a8d3 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 11:55:56 +0200 Subject: [PATCH 079/124] Make decoder use fewer bits --- src/unittest/zip_code.cpp | 61 ++++++++++---------- src/zip_code.cpp | 118 +++++++++++++++++++------------------- src/zip_code.hpp | 10 +++- 3 files changed, 98 insertions(+), 91 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 22bd68ac308..d72de04d546 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -48,11 +48,10 @@ using namespace std; zipcode.fill_in_full_decoder(); REQUIRE(zipcode.decoder_length() == 1); - REQUIRE(zipcode.decoder.front().first == 1); - REQUIRE(zipcode.decoder.front().second == 0); + REQUIRE(zipcode.decoder.front().is_chain == 1); + REQUIRE(zipcode.decoder.front().offset == 0); } SECTION("decoded code") { - cerr << "New code" << endl; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); zipcode.fill_in_full_decoder(); @@ -118,7 +117,7 @@ using namespace std; //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -135,7 +134,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -186,7 +185,7 @@ using namespace std; //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -203,7 +202,7 @@ using namespace std; //Next is the snarl code //1 for a regular snarl - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -233,7 +232,7 @@ using namespace std; //Next is the chain code //rank of the chain in the snarl - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); @@ -430,7 +429,7 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -450,7 +449,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -501,7 +500,7 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 4); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -519,7 +518,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -550,7 +549,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Next is the chain code - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -566,7 +565,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the node code - REQUIRE(zipcode.decoder[3] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(true, value_and_index.second)); //Offset of the node in the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); @@ -629,7 +628,7 @@ using namespace std; zipcode.fill_in_full_decoder(); REQUIRE(zipcode.decoder_length() == 7); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -648,7 +647,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -678,7 +677,7 @@ using namespace std; distance_index.flip(distance_index.canonical(chain2))) != 0; REQUIRE(value_and_index.first == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( @@ -693,7 +692,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(zipcode.decoder[3] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(false, value_and_index.second)); //1 as tag for regular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -722,7 +721,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(zipcode.decoder[4] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[4] == ZipCode::decoder_t(true, value_and_index.second)); //Rank in parent value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); @@ -736,7 +735,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //REgular snarl code for snarl 3-5 - REQUIRE(zipcode.decoder[5] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[5] == ZipCode::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -765,7 +764,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Chain code for node 4 - REQUIRE(zipcode.decoder[6] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[6] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; @@ -1052,7 +1051,7 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1071,7 +1070,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(zipcode.decoder[1] == std::make_pair(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //0 as tag for irregular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2); @@ -1119,7 +1118,7 @@ using namespace std; //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //Rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1348,7 +1347,7 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1359,7 +1358,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); @@ -1395,7 +1394,7 @@ using namespace std; REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(zipcode.decoder[0] == std::make_pair(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1406,7 +1405,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1418,7 +1417,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Node 3 - REQUIRE(zipcode.decoder[2] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); @@ -1609,7 +1608,7 @@ using namespace std; //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == std::make_pair(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -1626,7 +1625,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == std::make_pair(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 99004b283a4..a06d61c421f 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -186,7 +186,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" return false; } else if (zip_length == 1) { //If there is one thing in the zipcode - previous_is_chain = decoder.back().first; + previous_is_chain = decoder.back().is_chain; //If the top-level structure is a chain, it might actually be a node, in which case //the only other thing that got stored is the length @@ -234,8 +234,8 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } else { //If there was already stuff in the decoder, then figure out where the last thing //is and set values - previous_is_chain = decoder.back().first; - zip_index = decoder.back().second; + previous_is_chain = decoder.back().is_chain; + zip_index = decoder.back().offset; #ifdef DEBUG_ZIPCODE cerr << "Last thing was a " << (previous_is_chain ? "chain or node" : "snarl") << " starting at " << zip_index << endl; #endif @@ -363,7 +363,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { //A snarl is always a snarl. A chain could actually be a node if (depth == 0) { //If it is a root snarl/chain - if (decoder[0].first) { + if (decoder[0].is_chain) { //If it says it's a chain, then it might be a chain or a node //If there is still only one thing in the decoder, then it's a node @@ -376,9 +376,9 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { return ZipCode::ROOT_SNARL; } } else { - if (decoder[depth].first) { + if (decoder[depth].is_chain) { //is_chain so could be a chain or a node - if (decoder[depth-1].first) { + if (decoder[depth-1].is_chain) { //If the thing before this was also a chain, then it is a node return ZipCode::NODE; } else { @@ -388,7 +388,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } else { //Definitely a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -411,7 +411,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan if (decoder_length() == 1) { //If the length is 1, then it's a node size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -421,12 +421,12 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan //Otherwise, we didn't store the length throw std::runtime_error("zipcodes don't store lengths of top-level chains or snarls"); } - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node //If this is a chain or a node, then the length will be the second thing size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -436,7 +436,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -453,15 +453,15 @@ size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { //If this is the root chain/snarl/node throw std::runtime_error("zipcodes don't store ranks of top-level chains or snarls"); - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node - if (decoder[depth-1].first) { + if (decoder[depth-1].is_chain) { throw std::runtime_error("zipcodes trying to find the rank in snarl of a node in a chain"); } size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -484,11 +484,11 @@ size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIn }); return child_count; - } else if (!decoder[depth].first) { + } else if (!decoder[depth].is_chain) { //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -506,14 +506,14 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde //If this is the root chain/snarl/node throw std::runtime_error("zipcodes don't have chain offsets for roots"); - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node - if (!decoder[depth-1].first) { + if (!decoder[depth-1].is_chain) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -523,7 +523,7 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -538,14 +538,14 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { //If this is the root chain/snarl/node throw std::runtime_error("zipcodes don't have chain offsets for roots"); - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node - if (!decoder[depth-1].first) { + if (!decoder[depth-1].is_chain) { throw std::runtime_error("zipcodes trying to find the offset in child of a snarl"); } size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -555,7 +555,7 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -566,11 +566,11 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) const { - if (!decoder[depth].first) { + if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -587,11 +587,11 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons bool ZipCode::get_is_looping_chain(const size_t& depth) const { - if (!decoder[depth].first) { + if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -604,14 +604,14 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { //If this is the root chain/snarl/node return false; - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node - if (decoder[depth-1].first) { + if (decoder[depth-1].is_chain) { //If the parent is a chain, then this is a node and we need to check its orientation size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -619,7 +619,7 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { } else { //If the parent is a snarl, then this might be a chain in a regular snarl size_t zip_value; - size_t zip_index = decoder[depth-1].second; + size_t zip_index = decoder[depth-1].offset; //zip_value is true if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -656,7 +656,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd } return distance_index->get_handle_from_connected_component(zip_value); - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node throw std::runtime_error("zipcodes trying to get a handle of a chain or node"); @@ -664,7 +664,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -699,7 +699,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } return distance_index->get_handle_from_connected_component(zip_value); - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node net_handle_t n = distance_index->get_node_net_handle(id); @@ -714,7 +714,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -757,7 +757,7 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { } return zip_value; - } else if (decoder[depth].first) { + } else if (decoder[depth].is_chain) { //If this is a chain/node throw std::runtime_error("zipcodes trying to get a handle of a chain or node"); @@ -765,7 +765,7 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { //If this is a snarl size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -793,7 +793,7 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star assert((get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || get_code_type(depth-1) == ZipCode::REGULAR_SNARL || get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)); #endif size_t zip_value; - size_t zip_index = decoder[depth-1].second; + size_t zip_index = decoder[depth-1].offset; //zip_value is 1 if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -833,9 +833,9 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); - assert(decoder[0].first); + assert(decoder[0].is_chain); size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -843,9 +843,9 @@ bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { } bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); - assert(decoder[0].first); + assert(decoder[0].is_chain); size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -853,9 +853,9 @@ bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { } bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); - assert(decoder[0].first); + assert(decoder[0].is_chain); size_t zip_value; - size_t zip_index = decoder[depth].second; + size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -1328,7 +1328,7 @@ cerr << "Finding distances to ancestors of second position" << endl; distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d2, node_length),1)); } - } else if ( zip1.decoder[depth].first) { + } else if ( zip1.decoder[depth].is_chain) { #ifdef DEBUG_ZIPCODE cerr << "\tancestor should be a chain" << endl; #endif @@ -1866,7 +1866,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& //Walk through the zipcode to get values size_t zip_value; - size_t zip_index = decoder[0].second; + size_t zip_index = decoder[0].offset; //Root is chain std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier @@ -1885,7 +1885,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_type = ZipCode::ROOT_NODE; payload.parent_record_offset = 0; - } else if (decoder[max_depth() - 1].first) { + } else if (decoder[max_depth() - 1].is_chain) { //If the parent is a chain payload.node_handle = distance_index.get_node_net_handle(id); payload.parent_is_chain = true; @@ -1893,7 +1893,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& //Walk through the zipcode to get values size_t zip_value; - size_t zip_index = decoder[max_depth()-1].second; + size_t zip_index = decoder[max_depth()-1].offset; //is_chain/rank in snarl std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -1948,7 +1948,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& size_t zip_index; if (payload.parent_is_root) { //is_chain - zip_index = decoder[0].second; + zip_index = decoder[0].offset; std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -1959,7 +1959,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& SnarlDistanceIndex::ROOT_HANDLE); payload.parent_type = ZipCode::ROOT_SNARL; } else { - zip_index = decoder[max_depth()-1].second; + zip_index = decoder[max_depth()-1].offset; //is_regular std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //If this is a non-root snarl, get as much as we can from it @@ -2008,7 +2008,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } //We should be at the node/trivial chain now - zip_index = decoder[max_depth()].second; + zip_index = decoder[max_depth()].offset; //Chain rank in snarl std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain length @@ -2038,21 +2038,21 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { } string result = ""; for (size_t d = 0 ; d < depth ; d++) { - result += (decoder[d].first ? "1" : "0"); + result += (decoder[d].is_chain ? "1" : "0"); if (d == 0) { //Root structure size_t zip_value; - size_t zip_index = decoder[d].second; + size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } - } else if (decoder[d].first) { + } else if (decoder[d].is_chain) { //is_chain so could be a chain or a node - if (decoder[d-1].first) { + if (decoder[d-1].is_chain) { //If the thing before this was also a chain, then it is a node size_t zip_value; - size_t zip_index = decoder[d].second; + size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); @@ -2060,7 +2060,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { } else { //Otherwise it's a chain size_t zip_value; - size_t zip_index = decoder[d].second; + size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); @@ -2069,7 +2069,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { } else { //Definitely a snarl size_t zip_value; - size_t zip_index = decoder[d].second; + size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 992a8e27dc3..4b5de75b9dc 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -220,7 +220,15 @@ class ZipCode { ///The decoder as a vector of pair, one for each snarl tree node in the zip ///where is_chain indicates whether it's a chain/node, and index ///is the index of the node/snarl/chain code in the varint_vector_t - std::vector> decoder; + struct decoder_t { + bool is_chain : 1; + size_t offset : 15; + decoder_t(bool is_chain, size_t offset) : is_chain(is_chain), offset(offset) {} + inline bool operator==(const decoder_t& other) const { + return is_chain == other.is_chain && offset == other.offset; + } + }; + std::vector decoder; ///Did we fill in the entire decoder ///TODO: I'm making it fill in the decoder automatically because it seems to be faster that way, instead of From 116dc01020eb418dfcea9be56945eeb55c1cf7f5 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 12:09:12 +0200 Subject: [PATCH 080/124] Only store zipcodes in a separate file --- src/subcommand/minimizer_main.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 73c30133801..23a46710149 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -396,10 +396,7 @@ int main_minimizer(int argc, char** argv) { } cout << endl; #endif - if (zipcode.zipcode.byte_count() < 15) { - //If the zipcode is small enough to store in the payload - return zipcode.get_payload_from_zip(); - } else if (!zipcode_name.empty()) { + if (!zipcode_name.empty()) { //Otherwise, if they are being saved, add the zipcode to the oversized zipcode list //And remember the zipcode From eace4132904f3367f8e13634cd0966538a1158ba Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 19:17:53 +0200 Subject: [PATCH 081/124] Serialize zipcode and decoder --- src/unittest/zip_code.cpp | 26 ++++++++++++++- src/zip_code.cpp | 67 +++++++++++++++++++++++++++++++++++++++ src/zip_code.hpp | 2 +- 3 files changed, 93 insertions(+), 2 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index d72de04d546..788e61af79c 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -1747,7 +1747,7 @@ using namespace std; REQUIRE(zipcode == decoded); }; } - SECTION("serialization") { + SECTION("serialize without decoder") { ZipCodeCollection zipcodes; for (size_t i = 1 ; i <= 7 ; i++) { ZipCode zip; @@ -1769,6 +1769,30 @@ using namespace std; } } + SECTION("serialize with decoder") { + ZipCodeCollection zipcodes; + for (size_t i = 1 ; i <= 7 ; i++) { + ZipCode zip; + zip.fill_in_zipcode(distance_index, make_pos_t(i, 0, false)); + zip.fill_in_full_decoder(); + zipcodes.emplace_back(zip); + } + ofstream out ("zipcodes"); + zipcodes.serialize(out); + out.close(); + + ifstream in("zipcodes"); + ZipCodeCollection new_zipcodes; + new_zipcodes.deserialize(in); + in.close(); + + REQUIRE(zipcodes.size() == new_zipcodes.size()); + for (size_t i = 0 ; i < zipcodes.size() ; i++) { + REQUIRE(zipcodes.at(i).zipcode == new_zipcodes.at(i).zipcode); + REQUIRE(zipcodes.at(i).decoder == new_zipcodes.at(i).decoder); + } + + } } TEST_CASE( "Looping chain zipcode", "[zipcode]" ) { VG graph; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a06d61c421f..8f3dc6b01b9 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1799,6 +1799,26 @@ void ZipCodeCollection::serialize(std::ostream& out) const { #ifdef DEBUG_ZIPCODE assert(byte_count == zip_byte_count); #endif + + //Also save the decoder + varint_vector_t decoder_vector; + for (const ZipCode::decoder_t& d : zip.decoder) { + decoder_vector.add_value(d.is_chain); + decoder_vector.add_value(d.offset); + } + + //Write the number of bytes for the zipcode + varint_vector_t decoder_byte_count; + decoder_byte_count.add_value(decoder_vector.byte_count()); + for (const uint8_t& byte : decoder_byte_count.data) { + out << char(byte); + } + + + //Write the decoder + for (const uint8_t& byte : decoder_vector.data ) { + out << char(byte); + } } } @@ -1852,6 +1872,53 @@ void ZipCodeCollection::deserialize(std::istream& in) { for (const char& character : line) { zip.zipcode.add_one_byte(uint8_t(character)); } + + + //Now get the decoder + + varint_vector_t decoder_byte_count_vector; + while (in.peek() & (1<<7)) { + //If the first bit in the byte is 1, then add it, stop once the first bit is 0 + char ch; + in.get(ch); + decoder_byte_count_vector.add_one_byte((uint8_t)ch); + } + assert(! (in.peek() & (1<<7))); + //The next byte has a 0 as its first bit, so add it + char ch; + in.get(ch); + decoder_byte_count_vector.add_one_byte((uint8_t)ch); + + //The first (and only) value in the vector is the length of the zipcode + size_t decoder_byte_count = decoder_byte_count_vector.get_value_and_next_index(0).first; + +#ifdef DEBUG_ZIPCODE + cerr << "Get decoder of " << decoder_byte_count << " bytes" << endl; + //assert(decoder_byte_count >= 15); + assert(decoder_byte_count_vector.get_value_and_next_index(0).second == std::numeric_limits::max()); +#endif + + char line1 [decoder_byte_count]; + + in.read(line1, decoder_byte_count); + + varint_vector_t decoder_vector; + for (const char& character : line1) { + decoder_vector.add_one_byte(uint8_t(character)); + } + + if (decoder_vector.byte_count() != 0) { + size_t index = 0; + while (index != std::numeric_limits::max()) { + size_t is_chain, offset; + std::tie(is_chain, index) = decoder_vector.get_value_and_next_index(index); + std::tie(offset, index) = decoder_vector.get_value_and_next_index(index); + zip.decoder.emplace_back(is_chain != 0, offset); + } + } + + + zipcodes.emplace_back(std::move(zip)); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4b5de75b9dc..350ee85e489 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -356,7 +356,7 @@ class ZipCodeCollection { //magic number to identify the file const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 2; + const static uint32_t version = 3; public: const static std::uint32_t get_magic_number() {return magic_number;} From a9dffbed512af75baaaa1da796f6284cf6f557df Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Sat, 3 Aug 2024 10:51:23 -0700 Subject: [PATCH 082/124] Revert "Serialize zipcode and decoder" This reverts commit eace4132904f3367f8e13634cd0966538a1158ba. --- src/unittest/zip_code.cpp | 26 +-------------- src/zip_code.cpp | 67 --------------------------------------- src/zip_code.hpp | 2 +- 3 files changed, 2 insertions(+), 93 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 788e61af79c..d72de04d546 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -1747,7 +1747,7 @@ using namespace std; REQUIRE(zipcode == decoded); }; } - SECTION("serialize without decoder") { + SECTION("serialization") { ZipCodeCollection zipcodes; for (size_t i = 1 ; i <= 7 ; i++) { ZipCode zip; @@ -1769,30 +1769,6 @@ using namespace std; } } - SECTION("serialize with decoder") { - ZipCodeCollection zipcodes; - for (size_t i = 1 ; i <= 7 ; i++) { - ZipCode zip; - zip.fill_in_zipcode(distance_index, make_pos_t(i, 0, false)); - zip.fill_in_full_decoder(); - zipcodes.emplace_back(zip); - } - ofstream out ("zipcodes"); - zipcodes.serialize(out); - out.close(); - - ifstream in("zipcodes"); - ZipCodeCollection new_zipcodes; - new_zipcodes.deserialize(in); - in.close(); - - REQUIRE(zipcodes.size() == new_zipcodes.size()); - for (size_t i = 0 ; i < zipcodes.size() ; i++) { - REQUIRE(zipcodes.at(i).zipcode == new_zipcodes.at(i).zipcode); - REQUIRE(zipcodes.at(i).decoder == new_zipcodes.at(i).decoder); - } - - } } TEST_CASE( "Looping chain zipcode", "[zipcode]" ) { VG graph; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 8f3dc6b01b9..a06d61c421f 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1799,26 +1799,6 @@ void ZipCodeCollection::serialize(std::ostream& out) const { #ifdef DEBUG_ZIPCODE assert(byte_count == zip_byte_count); #endif - - //Also save the decoder - varint_vector_t decoder_vector; - for (const ZipCode::decoder_t& d : zip.decoder) { - decoder_vector.add_value(d.is_chain); - decoder_vector.add_value(d.offset); - } - - //Write the number of bytes for the zipcode - varint_vector_t decoder_byte_count; - decoder_byte_count.add_value(decoder_vector.byte_count()); - for (const uint8_t& byte : decoder_byte_count.data) { - out << char(byte); - } - - - //Write the decoder - for (const uint8_t& byte : decoder_vector.data ) { - out << char(byte); - } } } @@ -1872,53 +1852,6 @@ void ZipCodeCollection::deserialize(std::istream& in) { for (const char& character : line) { zip.zipcode.add_one_byte(uint8_t(character)); } - - - //Now get the decoder - - varint_vector_t decoder_byte_count_vector; - while (in.peek() & (1<<7)) { - //If the first bit in the byte is 1, then add it, stop once the first bit is 0 - char ch; - in.get(ch); - decoder_byte_count_vector.add_one_byte((uint8_t)ch); - } - assert(! (in.peek() & (1<<7))); - //The next byte has a 0 as its first bit, so add it - char ch; - in.get(ch); - decoder_byte_count_vector.add_one_byte((uint8_t)ch); - - //The first (and only) value in the vector is the length of the zipcode - size_t decoder_byte_count = decoder_byte_count_vector.get_value_and_next_index(0).first; - -#ifdef DEBUG_ZIPCODE - cerr << "Get decoder of " << decoder_byte_count << " bytes" << endl; - //assert(decoder_byte_count >= 15); - assert(decoder_byte_count_vector.get_value_and_next_index(0).second == std::numeric_limits::max()); -#endif - - char line1 [decoder_byte_count]; - - in.read(line1, decoder_byte_count); - - varint_vector_t decoder_vector; - for (const char& character : line1) { - decoder_vector.add_one_byte(uint8_t(character)); - } - - if (decoder_vector.byte_count() != 0) { - size_t index = 0; - while (index != std::numeric_limits::max()) { - size_t is_chain, offset; - std::tie(is_chain, index) = decoder_vector.get_value_and_next_index(index); - std::tie(offset, index) = decoder_vector.get_value_and_next_index(index); - zip.decoder.emplace_back(is_chain != 0, offset); - } - } - - - zipcodes.emplace_back(std::move(zip)); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 350ee85e489..4b5de75b9dc 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -356,7 +356,7 @@ class ZipCodeCollection { //magic number to identify the file const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 3; + const static uint32_t version = 2; public: const static std::uint32_t get_magic_number() {return magic_number;} From 05480d0d274f7ee44cf1e570620b85b58a363315 Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Sat, 3 Aug 2024 10:51:40 -0700 Subject: [PATCH 083/124] Revert "Only store zipcodes in a separate file" This reverts commit 116dc01020eb418dfcea9be56945eeb55c1cf7f5. --- src/subcommand/minimizer_main.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 23a46710149..73c30133801 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -396,7 +396,10 @@ int main_minimizer(int argc, char** argv) { } cout << endl; #endif - if (!zipcode_name.empty()) { + if (zipcode.zipcode.byte_count() < 15) { + //If the zipcode is small enough to store in the payload + return zipcode.get_payload_from_zip(); + } else if (!zipcode_name.empty()) { //Otherwise, if they are being saved, add the zipcode to the oversized zipcode list //And remember the zipcode From 0207c82542a824cc5118da443988199f033054d9 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 20:21:00 +0200 Subject: [PATCH 084/124] Undo putting zipcode and decoder together --- src/algorithms/chain_items.hpp | 16 +- src/min_width_int_vector.cpp | 53 --- src/min_width_int_vector.hpp | 57 --- src/minimizer_mapper.cpp | 3 +- src/minimizer_mapper.hpp | 8 +- src/minimizer_mapper_from_chains.cpp | 46 +-- src/snarl_seed_clusterer.cpp | 62 ++-- src/snarl_seed_clusterer.hpp | 36 +- src/subcommand/zipcode_main.cpp | 6 +- src/unittest/min_width_int_vector.cpp | 92 ----- src/unittest/snarl_seed_clusterer.cpp | 126 +------ src/unittest/zip_code.cpp | 516 +++++++++++++------------- src/unittest/zip_code_tree.cpp | 56 --- src/zip_code.cpp | 377 ++++++++++--------- src/zip_code.hpp | 268 ++++++------- src/zip_code_tree.cpp | 160 ++++---- 16 files changed, 775 insertions(+), 1107 deletions(-) delete mode 100644 src/min_width_int_vector.cpp delete mode 100644 src/min_width_int_vector.hpp delete mode 100644 src/unittest/min_width_int_vector.cpp diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index 9511487034d..387be2f7806 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -107,8 +107,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries to the start of this anchor, or null if /// none is set. - inline ZipCode* start_hint() const { - return start_zip; + inline ZipCodeDecoder* start_hint() const { + return start_decoder; } /// Get the graph distance from wherever the start hint is positioned back @@ -120,8 +120,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries from the end of this anchor, or null if /// none is set. - inline ZipCode* end_hint() const { - return end_zip; + inline ZipCodeDecoder* end_hint() const { + return end_decoder; } /// Get the graph distance from wherever the end hint is positioned forward @@ -142,14 +142,14 @@ class Anchor { /// Compose a read start position, graph start position, and match length into an Anchor. /// Can also bring along a distance hint and a seed number. - inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCode* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_zip(hint), end_zip(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { + inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCodeDecoder* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_decoder(hint), end_decoder(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { // Nothing to do! } /// Compose two Anchors into an Anchor that represents coming in through /// the first one and going out through the second, like a tunnel. Useful /// for representing chains as chainable items. - inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_zip(first.start_hint()), end_zip(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { + inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_decoder(first.start_hint()), end_decoder(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { // Nothing to do! } @@ -170,8 +170,8 @@ class Anchor { int points; size_t start_seed; size_t end_seed; - ZipCode* start_zip; - ZipCode* end_zip; + ZipCodeDecoder* start_decoder; + ZipCodeDecoder* end_decoder; size_t start_offset; size_t end_offset; size_t seed_length; diff --git a/src/min_width_int_vector.cpp b/src/min_width_int_vector.cpp deleted file mode 100644 index 4d4e3215dba..00000000000 --- a/src/min_width_int_vector.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "min_width_int_vector.hpp" -#include -#include -#include - -//#define DEBUG_MININT - -namespace vg { -using namespace std; - -void min_width_int_vector_t::from_vector(const vector& input_data, size_t max_val) { - if (max_val != 0) { - width = std::max(width, 1 + (size_t)std::floor(std::log2(max_val))); - } else if (width == 0) { - //If we haven't already set the width, find it from the max value of the input data - for (const size_t& x : input_data) { - max_val = std::max(x, max_val); - } - width = 1 + (size_t)std::floor(std::log2(max_val)); - } - data.reserve(input_data.size()*width); - - for (const size_t& x : input_data) { - push_back(x); - } -} - -void min_width_int_vector_t::push_back(size_t val) { -#ifdef DEBUG_MININT - assert(width >= 1 + (size_t)std::floor(std::log2(val))); -#endif - for (size_t i = 0 ; i < width ; i++) { - data.emplace_back(val & (1 << (width - i - 1))); - } - -} - -size_t min_width_int_vector_t::size() const { - return data.size() / width; -} -size_t min_width_int_vector_t::at(size_t index) const { - size_t result = 0; - size_t start_index = index * width; - for (size_t i = 0 ; i < width ; i++) { - if (data[i + start_index]) { - result |= (1 << (width - i - 1)); - } - } - return result; -} - - -} diff --git a/src/min_width_int_vector.hpp b/src/min_width_int_vector.hpp deleted file mode 100644 index e4f76a762c3..00000000000 --- a/src/min_width_int_vector.hpp +++ /dev/null @@ -1,57 +0,0 @@ -#ifndef VG_MINWIDTH_INT_HPP_INCLUDED -#define VG_MINWIDTH_INT_HPP_INCLUDED - -#include -#include - -/** \file min_width_int_vector.hpp - * Methods for storing a vector of integers with minimal bit width - */ - -namespace vg{ -using namespace std; - -/* A struct to store a vector of integers with minimal bit width - */ -struct min_width_int_vector_t { - - public: - - min_width_int_vector_t () : - width(0) {} - - min_width_int_vector_t (size_t width) : - width(width) {} - - - ///Make this a copy of input_data - ///If maxval is set, then this is the maximum value in the input data, - /// or the maximum value to be stored with the bitwidth - ///If there is no max_val and the width has not already been set, get the - /// width from the maximum value in input_data - void from_vector(const vector& input_data, size_t max_val = 0); - - ///Add a value to the end of the vector - void push_back(size_t val); - - ///How long is the vector - size_t size() const; - - ///Get the value at the given index - size_t at(size_t index) const; - - //Check what the bit width is - size_t get_bitwidth() const { return width;} - - - private: - - /// The bit width that is being used to store the integers - /// This can be up to 64 - size_t width : 7; - - ///The actual data stored in the vector - std::vector data; -}; -} -#endif diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index c70d26f3cbf..f240b2f6a1b 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3757,7 +3757,8 @@ std::vector MinimizerMapper::find_seeds(const std::vector //If the zipcode was saved in the payload seeds.back().zipcode.fill_in_zipcode_from_payload(minimizer.occs[j].payload); } - seeds.back().zipcode.fill_in_full_decoder(); + ZipCodeDecoder* decoder = new ZipCodeDecoder(&seeds.back().zipcode); + seeds.back().zipcode_decoder.reset(decoder); } diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index 117e9b624bf..502f442543b 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -601,15 +601,15 @@ class MinimizerMapper : public AlignerClient { /// How do we convert chain info to an actual seed of the type we are using? /// Also needs to know the hit position, and the minimizer number. - inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip) { - return { hit, minimizer, zip}; + inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip, ZipCodeDecoder* decoder) { + return { hit, minimizer, zip, std::unique_ptr(decoder)}; } /// Convert a collection of seeds to a collection of chaining anchors. - std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const; + std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const; /// Convert a single seed to a single chaining anchor. - static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); + static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); /// Convert a read region, and the seeds that that region covers the /// stapled bases of (sorted by stapled base), into a single chaining diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index 00823cb63a0..4da269028eb 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -91,26 +91,26 @@ static pos_t forward_pos(const MinimizerMapper::Seed& seed, const VectorViewget_distance_index_address(0) == + end_seed1.zipcode_decoder->get_distance_index_address(0)); + assert(start_seed2.zipcode_decoder->get_distance_index_address(0) == + end_seed2.zipcode_decoder->get_distance_index_address(0)); #endif - if (start_seed1.zipcode.get_distance_index_address(0) != - start_seed2.zipcode.get_distance_index_address(0)) { + if (start_seed1.zipcode_decoder->get_distance_index_address(0) != + start_seed2.zipcode_decoder->get_distance_index_address(0)) { //If the two ranges are on different connected components return false; } - if (start_seed1.zipcode.get_code_type(0) == ZipCode::ROOT_SNARL) { + if (start_seed1.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_SNARL) { //If this is in a root snarl - if (start_seed1.zipcode.get_rank_in_snarl(1) != - start_seed2.zipcode.get_rank_in_snarl(1) + if (start_seed1.zipcode_decoder->get_rank_in_snarl(1) != + start_seed2.zipcode_decoder->get_rank_in_snarl(1) || - start_seed1.zipcode.get_rank_in_snarl(1) != - end_seed1.zipcode.get_rank_in_snarl(1) + start_seed1.zipcode_decoder->get_rank_in_snarl(1) != + end_seed1.zipcode_decoder->get_rank_in_snarl(1) || - start_seed2.zipcode.get_rank_in_snarl(1) != - end_seed2.zipcode.get_rank_in_snarl(1)) { + start_seed2.zipcode_decoder->get_rank_in_snarl(1) != + end_seed2.zipcode_decoder->get_rank_in_snarl(1)) { //If the two ranges are on different children of the snarl return false; } @@ -119,20 +119,20 @@ static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1 //Get the offset used for determining the range //On the top-level chain, node, or child of the top-level snarl auto get_seed_offset = [&] (const MinimizerMapper::Seed& seed) { - if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN) { - return seed.zipcode.get_offset_in_chain(1); - } else if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_NODE) { - return is_rev(seed.pos) ? seed.zipcode.get_length(0) - offset(seed.pos) + if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_CHAIN) { + return seed.zipcode_decoder->get_offset_in_chain(1); + } else if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_NODE) { + return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(0) - offset(seed.pos) : offset(seed.pos); } else { //Otherwise, this is a top-level snarl, and we've already made sure that it's on the //same child chain/node - if (seed.zipcode.get_code_type(1) == ZipCode::CHAIN) { + if (seed.zipcode_decoder->get_code_type(1) == ZipCode::CHAIN) { //On a chain - return seed.zipcode.get_offset_in_chain(2); + return seed.zipcode_decoder->get_offset_in_chain(2); } else { //On a node - return is_rev(seed.pos) ? seed.zipcode.get_length(1) - offset(seed.pos) + return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(1) - offset(seed.pos) : offset(seed.pos); } } @@ -3861,7 +3861,7 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l return to_return; } -std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const { +std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const { std::vector to_return; to_return.reserve(seeds.size()); for (size_t i = 0; i < seeds.size(); i++) { @@ -3870,7 +3870,7 @@ std::vector MinimizerMapper::to_anchors(const Alignment& aln return to_return; } -algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { +algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { // Turn each seed into the part of its match on the node where the // anchoring end (start for forward-strand minimizers, end for // reverse-strand minimizers) falls. @@ -3928,7 +3928,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector // TODO: Always make sequence and quality available for scoring! // We're going to score the anchor as the full minimizer, and rely on the margins to stop us from taking overlapping anchors. int score = aligner->score_exact_match(aln, read_start - margin_left, length + margin_right); - return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, &(seed.zipcode), hint_start); + return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, seed.zipcode_decoder.get(), hint_start); } algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, size_t read_start, size_t read_end, const std::vector& sorted_seeds, const std::vector& seed_anchors, const std::vector::const_iterator& mismatch_begin, const std::vector::const_iterator& mismatch_end, const HandleGraph& graph, const Aligner* aligner) { diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 31579b53103..6dbb291b647 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -426,14 +426,14 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode.max_depth()); + &seed, seed.seed->zipcode_decoder->max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, - &seed, seed.seed->zipcode.max_depth() - 1); + &seed, seed.seed->zipcode_decoder->max_depth() - 1); } new_parent = true; @@ -532,7 +532,7 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode.max_depth()); + &seed, seed.seed->zipcode_decoder->max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; @@ -637,7 +637,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; @@ -711,7 +711,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -721,17 +721,17 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1); + : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - !chain_problem->seed->seed->zipcode.is_externally_start_start_connected(0) && - !chain_problem->seed->seed->zipcode.is_externally_start_end_connected(0) && - !chain_problem->seed->seed->zipcode.is_externally_end_end_connected(0) && - !chain_problem->seed->seed->zipcode.get_is_looping_chain(0); + !chain_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(0) && + !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(0); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -760,32 +760,32 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode.max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() ? false - : chain_problem->seed->seed->zipcode.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER - cerr << "For child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; - cerr << "For parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; - cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " - << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + cerr << "For child type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " @@ -1443,15 +1443,15 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust //Get the distances between the two sides of the child size_t distance_left_left = - child_problem->seed->seed->zipcode.is_externally_start_start_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_left_right = - child_problem->seed->seed->zipcode.is_externally_start_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_right_right = - child_problem->seed->seed->zipcode.is_externally_end_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); if (distance_left_left == std::numeric_limits::max() && @@ -1597,7 +1597,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (snarl_problem->seed->seed->zipcode.get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { + if (snarl_problem->seed->seed->zipcode_decoder->get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 22f8478e6ff..239d1e0d182 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -70,23 +70,42 @@ class SnarlDistanceIndexClusterer { pos_t pos; size_t source; // Source minimizer. ZipCode zipcode; //zipcode for distance information, optionally stored in the minimizer payload + //TODO: unique_ptr? + std::unique_ptr zipcode_decoder; //The decoder for the zipcode Seed() = default; Seed(pos_t pos, size_t source, ZipCode zipcode) : pos(pos), source(source), zipcode(zipcode) { - zipcode.fill_in_full_decoder(); + ZipCodeDecoder* decoder = new ZipCodeDecoder(&this->zipcode); + zipcode_decoder.reset(decoder); + zipcode_decoder->fill_in_full_decoder(); + } + Seed(pos_t pos, size_t source, ZipCode zipcode, std::unique_ptr zipcode_decoder) : + pos(pos), source(source), zipcode(zipcode), zipcode_decoder(std::move(zipcode_decoder)){ + if (zipcode_decoder) { + zipcode_decoder->zipcode = &zipcode; + } } //Move constructor Seed (Seed&& other) : pos(std::move(other.pos)), source(std::move(other.source)), - zipcode(std::move(other.zipcode)){} + zipcode(std::move(other.zipcode)), + zipcode_decoder(std::move(other.zipcode_decoder)) { + if (zipcode_decoder) { + zipcode_decoder->zipcode = &zipcode; + } + } //Move assignment operator Seed& operator=(Seed&& other) { pos = std::move(other.pos); source = std::move(other.source); zipcode = std::move(other.zipcode); + zipcode_decoder = std::move(other.zipcode_decoder); + if (zipcode_decoder) { + zipcode_decoder->zipcode = &zipcode; + } return *this; } }; @@ -102,6 +121,9 @@ class SnarlDistanceIndexClusterer { //TODO: I think I can skip the zipcode now since I have the payload MIPayload payload; + //TODO: This doesn't actually get used but I'll use it if I use the zipcodes properly + //std::unique_ptr zipcode_decoder; + //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds //For a seed in a chain, distance_left is the left of the chain, right is the distance @@ -294,18 +316,18 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); + is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); node_length = distance_index.chain_minimum_length(containing_net_handle); - chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); - is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); + chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); + is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); + node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); + chain_component_start = seed->seed->zipcode_decoder->get_chain_component(zipcode_depth); chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; prefix_sum_value = SnarlDistanceIndex::sum( diff --git a/src/subcommand/zipcode_main.cpp b/src/subcommand/zipcode_main.cpp index 4e61724c04a..a4649cb5808 100644 --- a/src/subcommand/zipcode_main.cpp +++ b/src/subcommand/zipcode_main.cpp @@ -260,14 +260,14 @@ int main_zipcode(int argc, char** argv) { //Get zip codes ZipCode zip1; zip1.fill_in_zipcode(*distance_index, pos1); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(*distance_index, pos2); - zip2.fill_in_full_decoder(); + ZipCodeDecoder decoder1(&zip1); + ZipCodeDecoder decoder2(&zip2); //Time finding distance with the zip codes std::chrono::time_point start = std::chrono::system_clock::now(); - size_t zip_distance = ZipCode::minimum_distance_between(zip1, pos1, zip2, pos2, *distance_index); + size_t zip_distance = ZipCode::minimum_distance_between(decoder1, pos1, decoder2, pos2, *distance_index); std::chrono::time_point end = std::chrono::system_clock::now(); std::chrono::duration elapsed_seconds = end-start; elapsed_seconds_zip.emplace_back(elapsed_seconds.count()); diff --git a/src/unittest/min_width_int_vector.cpp b/src/unittest/min_width_int_vector.cpp deleted file mode 100644 index f61ec4b6ff3..00000000000 --- a/src/unittest/min_width_int_vector.cpp +++ /dev/null @@ -1,92 +0,0 @@ -#include "catch.hpp" -#include -#include -#include "../min_width_int_vector.hpp" - -namespace vg{ -namespace unittest{ -using namespace std; - - TEST_CASE("Array of ints added one at a time", "[minint]") { - SECTION ("[0]") { - min_width_int_vector_t minint_vector (1); - minint_vector.push_back(0); - REQUIRE(minint_vector.size() == 1); - REQUIRE(minint_vector.at(0) == 0); - } - SECTION ("[1]") { - min_width_int_vector_t minint_vector (1); - minint_vector.push_back(1); - REQUIRE(minint_vector.size() == 1); - REQUIRE(minint_vector.at(0) == 1); - } - SECTION ("[1, 2]") { - min_width_int_vector_t minint_vector(2); - minint_vector.push_back(1); - minint_vector.push_back(2); - REQUIRE(minint_vector.size() == 2); - REQUIRE(minint_vector.at(0) == 1); - REQUIRE(minint_vector.at(1) == 2); - } - SECTION ("more values") { - vector values {1, 3243, 123634, 53454, 0}; - min_width_int_vector_t minint_vector(1+(size_t)std::floor(std::log2(123634))); - for (auto& x : values) { - minint_vector.push_back(x); - } - assert(minint_vector.size() == values.size()); - for (size_t i = 0 ; i < values.size() ; i++) { - assert(minint_vector.at(i) == values[i]); - } - } - } - TEST_CASE("Array of ints from vector", "[minint]") { - SECTION ("[0]") { - vector original {0}; - min_width_int_vector_t minint_vector; - minint_vector.from_vector(original); - REQUIRE(minint_vector.size() == 1); - REQUIRE(minint_vector.at(0) == 0); - REQUIRE(minint_vector.get_bitwidth() == 1); - } - SECTION ("[1]") { - vector original {1}; - min_width_int_vector_t minint_vector; - minint_vector.from_vector(original); - REQUIRE(minint_vector.size() == 1); - REQUIRE(minint_vector.at(0) == 1); - REQUIRE(minint_vector.get_bitwidth() == 1); - } - SECTION ("[1, 2]") { - vector original {1, 2}; - min_width_int_vector_t minint_vector; - minint_vector.from_vector(original); - - REQUIRE(minint_vector.size() == 2); - REQUIRE(minint_vector.at(0) == 1); - REQUIRE(minint_vector.at(1) == 2); - REQUIRE(minint_vector.get_bitwidth() == 2); - } - SECTION ("more values") { - vector values {1, 3243, 123634, 53454, 0}; - min_width_int_vector_t minint_vector (3); - minint_vector.from_vector(values, 123634); - REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); - assert(minint_vector.size() == values.size()); - for (size_t i = 0 ; i < values.size() ; i++) { - assert(minint_vector.at(i) == values[i]); - } - } - SECTION ("more values without bitwidth") { - vector values {1, 3243, 123634, 53454, 0}; - min_width_int_vector_t minint_vector; - minint_vector.from_vector(values); - assert(minint_vector.size() == values.size()); - for (size_t i = 0 ; i < values.size() ; i++) { - assert(minint_vector.at(i) == values[i]); - } - REQUIRE(minint_vector.get_bitwidth() == 1+(size_t)std::floor(std::log2(123634))); - } - } -} -} diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index ce7dde12972..cc19c928773 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -44,7 +44,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -88,7 +87,6 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -123,7 +121,6 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -161,7 +158,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -211,7 +207,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -229,7 +224,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -247,7 +241,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -265,18 +258,15 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); - zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); - zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -293,18 +283,15 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); - zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); - zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -358,7 +345,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -376,7 +362,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -394,7 +379,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -412,18 +396,15 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); - zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); - zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -440,18 +421,15 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); - zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); - zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -499,7 +477,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -519,7 +496,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -585,7 +561,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -601,7 +576,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 1); @@ -617,7 +591,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -633,7 +606,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -649,7 +621,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -665,7 +636,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -683,7 +653,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 8); @@ -699,7 +668,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -774,7 +742,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -801,7 +768,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -824,7 +790,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -877,7 +842,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -985,7 +949,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1004,7 +967,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1024,7 +986,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -1043,7 +1004,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1062,7 +1022,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 11); @@ -1109,7 +1068,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1127,7 +1085,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1145,13 +1102,11 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(4, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); - zipcode1.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode1}); vector> clusters = clusterer.cluster_seeds(seeds, 3, 3); @@ -1168,7 +1123,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1221,7 +1175,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1239,7 +1192,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1256,7 +1208,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1274,7 +1225,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1337,7 +1287,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1355,7 +1304,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1402,7 +1350,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1410,7 +1357,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1437,7 +1383,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1445,7 +1390,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1472,7 +1416,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1480,7 +1423,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1508,7 +1450,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1516,7 +1457,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1579,7 +1519,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1597,7 +1536,6 @@ namespace unittest { for (pos_t pos : pos_ts) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1653,7 +1591,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1669,7 +1606,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1684,7 +1620,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1732,7 +1667,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1781,7 +1715,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1798,7 +1731,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1843,7 +1775,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 20); @@ -1860,7 +1791,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1939,7 +1869,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1992,7 +1921,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2025,7 +1953,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -2039,7 +1966,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2078,7 +2004,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2121,7 +2046,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2179,7 +2103,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2199,7 +2122,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2216,7 +2138,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2235,7 +2156,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2306,7 +2226,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2324,7 +2243,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2342,7 +2260,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2361,7 +2278,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({8, 12}); @@ -2370,7 +2286,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2393,7 +2308,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2411,7 +2325,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2477,7 +2390,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2494,7 +2406,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2512,7 +2423,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2530,8 +2440,7 @@ namespace unittest { for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); + zipcode.fill_in_zipcode(dist_index, pos);; seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2539,8 +2448,7 @@ namespace unittest { for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); + zipcode.fill_in_zipcode(dist_index, pos);; seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2571,7 +2479,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2580,7 +2487,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2648,7 +2554,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2667,7 +2572,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2688,7 +2592,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2714,7 +2617,6 @@ namespace unittest { for (pos_t pos : pos_ts[read_num]){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -2743,7 +2645,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2801,7 +2702,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2820,7 +2720,6 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 6); @@ -2836,7 +2735,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2891,7 +2789,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2907,7 +2804,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2923,7 +2819,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2940,7 +2835,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2980,7 +2874,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3026,7 +2919,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3043,7 +2935,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3060,7 +2951,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3097,7 +2987,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({pos, 0, zipcode}); } @@ -3142,7 +3031,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3159,7 +3047,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3175,7 +3062,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3191,7 +3077,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3233,7 +3118,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3250,7 +3134,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3266,7 +3149,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3282,7 +3164,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3321,7 +3202,6 @@ namespace unittest { // for (pos_t pos : pos_ts) { // ZipCode zipcode; // zipcode.fill_in_zipcode(dist_index, pos); - // zipcode.fill_in_full_decoder(); // seeds.push_back({ pos, 0, zipcode}); // } // vector clusters = clusterer.cluster_seeds(seeds, read_lim); @@ -3372,7 +3252,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -3440,7 +3319,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); - zipcode.fill_in_full_decoder(); all_seeds[read].push_back({ pos, 0, zipcode}); } diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index d72de04d546..ed8b83e6761 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -45,21 +45,22 @@ using namespace std; SECTION("decoder") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 1); - REQUIRE(zipcode.decoder.front().is_chain == 1); - REQUIRE(zipcode.decoder.front().offset == 0); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 1); + REQUIRE(decoder.decoder.front().is_chain == 1); + REQUIRE(decoder.decoder.front().offset == 0); } SECTION("decoded code") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); - REQUIRE(zipcode.get_length(0) == distance_index.minimum_length(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_NODE); + ZipCodeDecoder decoder(&zipcode); + + REQUIRE(decoder.get_length(0) == distance_index.minimum_length(chain1)); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_NODE); } SECTION("n1 as payload") { ZipCode zipcode; @@ -74,9 +75,9 @@ using namespace std; SECTION("Distances within one node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(ZipCode::minimum_distance_between(zipcode, make_pos_t(n1->id(), false, 0), - zipcode, make_pos_t(n1->id(), false, 3), + ZipCodeDecoder decoder(&zipcode); + REQUIRE(ZipCode::minimum_distance_between(decoder, make_pos_t(n1->id(), false, 0), + decoder, make_pos_t(n1->id(), false, 3), distance_index) == 3); } @@ -110,14 +111,14 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 2); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -134,7 +135,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -158,34 +159,34 @@ using namespace std; SECTION ("decoded zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); //Next is the node code - REQUIRE(zipcode.get_code_type( 1) == ZipCode::NODE); - REQUIRE(zipcode.get_length( 1) == distance_index.minimum_length(node1)); - REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(decoder.get_code_type( 1) == ZipCode::NODE); + REQUIRE(decoder.get_length( 1) == distance_index.minimum_length(node1)); + REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 3); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 3); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -202,7 +203,7 @@ using namespace std; //Next is the snarl code //1 for a regular snarl - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -232,7 +233,7 @@ using namespace std; //Next is the chain code //rank of the chain in the snarl - REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); @@ -253,78 +254,78 @@ using namespace std; SECTION ("decoded zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zipcode.fill_in_full_decoder(); + ZipCodeDecoder decoder(&zipcode); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl36 = distance_index.get_parent(chain4); net_handle_t chain1 = distance_index.get_parent(snarl36); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); //values for the snarl - REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(snarl36)); - REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(decoder.get_length(1) == distance_index.minimum_length(snarl36)); + REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); + REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), distance_index.flip(chain4)) != 0; //values for the chain - REQUIRE(zipcode.get_length(2) == distance_index.minimum_length(chain4)); - REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(decoder.get_length(2) == distance_index.minimum_length(chain4)); + REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); - zip6.fill_in_full_decoder(); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip2, make_pos_t(n2->id(), false, 0), + ZipCodeDecoder decoder1(&zip1); + ZipCodeDecoder decoder2(&zip2); + ZipCodeDecoder decoder3(&zip3); + ZipCodeDecoder decoder4(&zip4); + ZipCodeDecoder decoder5(&zip5); + ZipCodeDecoder decoder6(&zip6); + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder3, make_pos_t(n3->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 2), - zip1, make_pos_t(n1->id(), true, 2), + REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 2), + decoder1, make_pos_t(n1->id(), true, 2), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder4, make_pos_t(n4->id(), false, 0), distance_index) == 6); - REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), - zip4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), + decoder4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), - zip4, make_pos_t(n4->id(), false, 1), + REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), + decoder4, make_pos_t(n4->id(), false, 1), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder6, make_pos_t(n6->id(), false, 0), distance_index) == 7); } @@ -425,11 +426,11 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 2); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -449,7 +450,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -476,31 +477,31 @@ using namespace std; SECTION ("decode zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); - REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(node1)); - REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::NODE); - REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(decoder.get_length(1) == distance_index.minimum_length(node1)); + REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(decoder.get_code_type(1) == ZipCode::NODE); + REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 4); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 4); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -518,7 +519,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -549,7 +550,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Next is the chain code - REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -565,7 +566,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the node code - REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[3] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //Offset of the node in the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); @@ -590,45 +591,45 @@ using namespace std; SECTION ("decode zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t chain2 = distance_index.get_parent(node2); net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(zipcode.get_length(1) == 0); - REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(decoder.get_length(1) == 0); + REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl1, distance_index.get_bound(snarl1, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(zipcode.get_length(2) == 3); - REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(decoder.get_length(2) == 3); + REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); //Node at depth 3 - REQUIRE(zipcode.get_length(3) == 1); - REQUIRE(zipcode.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); - REQUIRE(zipcode.get_code_type(3) == ZipCode::NODE); - REQUIRE(zipcode.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); + REQUIRE(decoder.get_length(3) == 1); + REQUIRE(decoder.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); + REQUIRE(decoder.get_code_type(3) == ZipCode::NODE); + REQUIRE(decoder.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); } SECTION ("zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 7); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 7); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -647,7 +648,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -677,7 +678,7 @@ using namespace std; distance_index.flip(distance_index.canonical(chain2))) != 0; REQUIRE(value_and_index.first == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( @@ -692,7 +693,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[3] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); //1 as tag for regular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -721,7 +722,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(zipcode.decoder[4] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[4] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //Rank in parent value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); @@ -735,7 +736,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //REgular snarl code for snarl 3-5 - REQUIRE(zipcode.decoder[5] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[5] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -764,7 +765,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Chain code for node 4 - REQUIRE(zipcode.decoder[6] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[6] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; @@ -786,7 +787,6 @@ using namespace std; SECTION ("decoded zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl3 = distance_index.get_parent(chain4); @@ -796,118 +796,119 @@ using namespace std; net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(zipcode.get_length(1) == 0); - REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(decoder.get_length(1) == 0); + REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); - REQUIRE(zipcode.get_length(2) == 3); - REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(decoder.get_length(2) == 3); + REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); //Snarl at depth 3 - REQUIRE(zipcode.get_length(3) == 1); - REQUIRE(zipcode.get_offset_in_chain(3) == 1); - REQUIRE(zipcode.get_code_type(3) == ZipCode::REGULAR_SNARL); + REQUIRE(decoder.get_length(3) == 1); + REQUIRE(decoder.get_offset_in_chain(3) == 1); + REQUIRE(decoder.get_code_type(3) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain3); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain3))) != 0; //Chain at depth 4 - REQUIRE(zipcode.get_is_reversed_in_parent(4) == is_rev); - REQUIRE(zipcode.get_length(4) == distance_index.minimum_length(chain3)); - REQUIRE(zipcode.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(zipcode.get_code_type(4) == ZipCode::CHAIN); + REQUIRE(decoder.get_is_reversed_in_parent(4) == is_rev); + REQUIRE(decoder.get_length(4) == distance_index.minimum_length(chain3)); + REQUIRE(decoder.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(decoder.get_code_type(4) == ZipCode::CHAIN); //Snarl3 at depth 5 - REQUIRE(zipcode.get_length(5) == 0); - REQUIRE(zipcode.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); - REQUIRE(zipcode.get_code_type(5) == ZipCode::REGULAR_SNARL); + REQUIRE(decoder.get_length(5) == 0); + REQUIRE(decoder.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); + REQUIRE(decoder.get_code_type(5) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain4); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; //node/chain at depth 6 - REQUIRE(zipcode.get_is_reversed_in_parent(6) == is_rev); - REQUIRE(zipcode.get_length(6) == 4); - REQUIRE(zipcode.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(zipcode.get_code_type(6) == ZipCode::CHAIN); + REQUIRE(decoder.get_is_reversed_in_parent(6) == is_rev); + REQUIRE(decoder.get_length(6) == 4); + REQUIRE(decoder.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(decoder.get_code_type(6) == ZipCode::CHAIN); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); - zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); - zip7.fill_in_full_decoder(); ZipCode zip8; zip8.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); - zip8.fill_in_full_decoder(); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip2, make_pos_t(n2->id(), false, 0), + ZipCodeDecoder decoder1 (&zip1); + ZipCodeDecoder decoder2 (&zip2); + ZipCodeDecoder decoder3 (&zip3); + ZipCodeDecoder decoder4 (&zip4); + ZipCodeDecoder decoder5 (&zip5); + ZipCodeDecoder decoder6 (&zip6); + ZipCodeDecoder decoder7 (&zip7); + ZipCodeDecoder decoder8 (&zip8); + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder6, make_pos_t(n6->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder7, make_pos_t(n7->id(), false, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), - zip7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), + decoder7, make_pos_t(n7->id(), false, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), - zip8, make_pos_t(n8->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), + decoder8, make_pos_t(n8->id(), false, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), - zip6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), + decoder6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), - zip8, make_pos_t(n8->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), + decoder8, make_pos_t(n8->id(), true, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), - zip6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), + decoder6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip7, make_pos_t(n7->id(), true, 0), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder7, make_pos_t(n7->id(), true, 0), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 2); } @@ -1047,11 +1048,11 @@ using namespace std; SECTION ("zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 3); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 3); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1070,7 +1071,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); //0 as tag for irregular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2); @@ -1118,7 +1119,7 @@ using namespace std; //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //Rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1137,108 +1138,105 @@ using namespace std; SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); net_handle_t snarl1 = distance_index.get_parent(chain3); net_handle_t chain1 = distance_index.get_parent(snarl1); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl1 at depth 1 - REQUIRE(zipcode.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::CYCLIC_SNARL); + REQUIRE(decoder.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); + REQUIRE(decoder.get_code_type(1) == ZipCode::CYCLIC_SNARL); //chain3 at depth 3 - REQUIRE(zipcode.get_length(2) == 1); - REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(decoder.get_length(2) == 1); + REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); bool chain_is_rev = distance_index.is_reversed_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))); //node1 to left side of node 3 - REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); + REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); //Node 1 to right side of node 3 - REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); + REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); //node4 to left side of node 3 - REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); + REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); //Node 4 to right side of node 3 - REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); + REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); - zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); - zip7.fill_in_full_decoder(); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip2, make_pos_t(n2->id(), false, 0), + ZipCodeDecoder decoder1(&zip1); + ZipCodeDecoder decoder2(&zip2); + ZipCodeDecoder decoder3(&zip3); + ZipCodeDecoder decoder4(&zip4); + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), - zip1, make_pos_t(n1->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), + decoder1, make_pos_t(n1->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder4, make_pos_t(n4->id(), false, 0), distance_index) == 3); //Shouldn't take the loop in the chain - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), - zip1, make_pos_t(n1->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), + decoder1, make_pos_t(n1->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), - zip4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), + decoder4, make_pos_t(n4->id(), false, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 0), - zip2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 0), + decoder2, make_pos_t(n2->id(), true, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 1), - zip4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 1), + decoder4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); } @@ -1343,11 +1341,11 @@ using namespace std; SECTION ("zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 2); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1358,7 +1356,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); @@ -1369,32 +1367,32 @@ using namespace std; SECTION ("decoded zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); + ZipCodeDecoder decoder(&zipcode); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); net_handle_t root_snarl = distance_index.get_parent(chain1); //Root snarl - REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == distance_index.canonical(distance_index.get_parent(chain1))); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); //Chain1 at depth 1 - REQUIRE(zipcode.get_length(1) == 3); - REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(decoder.get_length(1) == 3); + REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); + REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); } SECTION ("zip code for node in chain in top-level snarl") { net_handle_t node1 = distance_index.get_node_net_handle(n3->id()); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 3); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 3); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1405,7 +1403,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1417,7 +1415,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Node 3 - REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); @@ -1432,69 +1430,67 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zipcode.fill_in_full_decoder(); + ZipCodeDecoder decoder(&zipcode); //Root snarl - REQUIRE(zipcode.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); - REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(decoder.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); + REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); //chain2 at depth 1 - REQUIRE(zipcode.get_length(1) == 2); - REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(decoder.get_length(1) == 2); + REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); //node3 at depth 2 - REQUIRE(zipcode.get_length(2) == 1); - REQUIRE(zipcode.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); - REQUIRE(zipcode.get_code_type(2) == ZipCode::NODE); - REQUIRE(zipcode.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); + REQUIRE(decoder.get_length(2) == 1); + REQUIRE(decoder.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); + REQUIRE(decoder.get_code_type(2) == ZipCode::NODE); + REQUIRE(decoder.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); - zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); - zip7.fill_in_full_decoder(); - - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip2, make_pos_t(n2->id(), false, 0), + ZipCodeDecoder zip_decoder1(&zip1); + ZipCodeDecoder zip_decoder2(&zip2); + ZipCodeDecoder zip_decoder3(&zip3); + ZipCodeDecoder zip_decoder6(&zip6); + ZipCodeDecoder zip_decoder7(&zip7); + + REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), + zip_decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), true, 0), - zip2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), true, 0), + zip_decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), + zip_decoder3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip3, make_pos_t(n3->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), + zip_decoder3, make_pos_t(n3->id(), true, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), + zip_decoder6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip6, make_pos_t(n6->id(), false, 0), - zip7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip_decoder6, make_pos_t(n6->id(), false, 0), + zip_decoder7, make_pos_t(n7->id(), false, 0), distance_index) == 1); } @@ -1601,14 +1597,14 @@ using namespace std; net_handle_t grandparent = distance_index.get_parent(parent); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - zipcode.fill_in_full_decoder(); - REQUIRE(zipcode.decoder_length() == 2); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); + REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -1625,7 +1621,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); + REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -1650,10 +1646,8 @@ using namespace std; SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), false, 0)); - zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), false, 0)); - zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), false, 0)); ZipCode zip4; @@ -1665,8 +1659,10 @@ using namespace std; ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), false, 0)); - REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), - zip2, make_pos_t(n2->id(), false, 0), + ZipCodeDecoder decoder1(&zip1); + ZipCodeDecoder decoder2(&zip2); + REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), + decoder2, make_pos_t(n2->id(), false, 0), distance_index) == 3); @@ -1796,30 +1792,30 @@ using namespace std; SECTION( "node2" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t parent = distance_index.get_parent(node2); net_handle_t bound = distance_index.get_bound(parent, true, false); - REQUIRE(zipcode.decoder_length() == 2); + ZipCodeDecoder decoder(&zipcode); + REQUIRE(decoder.decoder_length() == 2); - REQUIRE(distance_index.minimum_length(node2) == zipcode.get_length(1)); - REQUIRE(zipcode.get_chain_component(1) == distance_index.get_chain_component(node2)); - REQUIRE(zipcode.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); - REQUIRE(zipcode.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); - REQUIRE(zipcode.get_is_looping_chain(0)); + REQUIRE(distance_index.minimum_length(node2) == decoder.get_length(1)); + REQUIRE(decoder.get_chain_component(1) == distance_index.get_chain_component(node2)); + REQUIRE(decoder.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); + REQUIRE(decoder.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); + REQUIRE(decoder.get_is_looping_chain(0)); } SECTION( "node5" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); - zipcode.fill_in_full_decoder(); net_handle_t node = distance_index.get_node_net_handle(n5->id()); net_handle_t parent = distance_index.get_parent(node); net_handle_t bound = distance_index.get_bound(parent, true, false); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.minimum_length(node) == zipcode.get_length(zipcode.max_depth())); + REQUIRE(distance_index.minimum_length(node) == decoder.get_length(decoder.max_depth())); } } TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { @@ -1852,14 +1848,14 @@ using namespace std; SECTION( "Check connectivity" ) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); - zipcode.fill_in_full_decoder(); + ZipCodeDecoder decoder(&zipcode); - REQUIRE(zipcode.get_length(1) == 1); + REQUIRE(decoder.get_length(1) == 1); if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { - REQUIRE(zipcode.is_externally_end_end_connected(0)); + REQUIRE(decoder.is_externally_end_end_connected(0)); } else { - REQUIRE(zipcode.is_externally_start_start_connected(0)); + REQUIRE(decoder.is_externally_start_start_connected(0)); } } diff --git a/src/unittest/zip_code_tree.cpp b/src/unittest/zip_code_tree.cpp index 3e3765948df..409f386a50d 100644 --- a/src/unittest/zip_code_tree.cpp +++ b/src/unittest/zip_code_tree.cpp @@ -40,7 +40,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -85,7 +84,6 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -156,7 +154,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -267,7 +264,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -390,7 +386,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -437,7 +432,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -500,7 +494,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -585,7 +578,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -635,7 +627,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -769,7 +760,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -844,7 +834,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -882,7 +871,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -920,7 +908,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -957,7 +944,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -992,7 +978,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1018,7 +1003,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1045,7 +1029,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1072,7 +1055,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1099,7 +1081,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1157,7 +1138,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1215,7 +1195,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1271,7 +1250,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1373,7 +1351,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1438,7 +1415,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1530,7 +1506,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1563,7 +1538,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1594,7 +1568,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1620,7 +1593,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1648,7 +1620,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1676,7 +1647,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1703,7 +1673,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1806,7 +1775,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1838,7 +1806,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1868,7 +1835,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1900,7 +1866,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1958,7 +1923,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2029,7 +1993,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2100,7 +2063,6 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2144,7 +2106,6 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2223,7 +2184,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2278,7 +2238,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2323,7 +2282,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2366,7 +2324,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2416,7 +2373,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2467,7 +2423,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2533,7 +2488,6 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2598,7 +2552,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2619,7 +2572,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2662,7 +2614,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2682,7 +2633,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2727,7 +2677,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2747,7 +2696,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2767,7 +2715,6 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2832,7 +2779,6 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2878,7 +2824,6 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); } distance_index.for_each_child(distance_index.get_root(), [&](net_handle_t child) { @@ -2945,7 +2890,6 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); - zipcode.fill_in_full_decoder(); seeds.push_back({ pos, (size_t)j, zipcode}); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a06d61c421f..7f45122fbff 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -137,13 +137,20 @@ void ZipCode::from_vector(const std::vector& values) { zipcode.from_vector(values); } +ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : + zipcode(zipcode), decoder(0), finished_decoding(false) { + if (zipcode != nullptr) { + decoder.reserve(zipcode->byte_count() / 4); + fill_in_full_decoder(); + } +} -void ZipCode::fill_in_full_decoder() { - if (byte_count() == 0 || finished_decoding) { +void ZipCodeDecoder::fill_in_full_decoder() { + if (zipcode->byte_count() == 0 || finished_decoding) { //If the zipcode is empty return; } - decoder.reserve(byte_count() / 4); + decoder.reserve(zipcode->byte_count() / 4); bool done=false; while (!done) { done = fill_in_next_decoder(); @@ -151,7 +158,7 @@ void ZipCode::fill_in_full_decoder() { finished_decoding = true; } -bool ZipCode::fill_in_next_decoder() { +bool ZipCodeDecoder::fill_in_next_decoder() { #ifdef DEBUG_ZIPCODE cerr << "Decode one more thing in the zipcode. Currently decoded " << decoder_length() << " things" << endl; #endif @@ -172,7 +179,7 @@ bool ZipCode::fill_in_next_decoder() { if (zip_length == 0) { //If there is nothing in the decoder yet, then the first thing will start at 0 for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } //Is the root a chain/node? @@ -195,7 +202,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_index == std::numeric_limits::max()) { //If the zip code ends here (after the length), then this was a node and we're done @@ -211,7 +218,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //If it's a node, then there are three remaining things in the index //If it were a snarl, then there are more than three things for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } @@ -226,7 +233,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } else { //Otherwise, the top-level thing is a snarl and the next thing is a chain for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -258,7 +265,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //chain size_t check_zip_index = zip_index; for (size_t i = 0 ; i < std::min(ZipCode::CHAIN_SIZE, ZipCode::NODE_SIZE) ; i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; } //If the zipcode ends after a chain if (check_zip_index == std::numeric_limits::max()) { @@ -271,7 +278,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Now check if it was actually a real node for (size_t i = 0 ; i < std::max(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE) - std::min(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE); i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; } //This might be a node that is a child of the chain, in which case there is one @@ -291,7 +298,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Otherwise, the last thing was a chain //Get to the end of the chain for (size_t i = 0 ; i < ZipCode::CHAIN_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } //zip_index is now the start of the current thing that we want to add - the thing after the chain @@ -306,7 +313,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Check if the current thing is a node check_zip_index = zip_index; for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; } //Return the start of this thing, and true if it was a node @@ -322,7 +329,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //The regular/irregular snarl tag for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { @@ -331,7 +338,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #endif //Regular snarl, so 2 remaining things in the code for (size_t i = 0 ; i < ZipCode::REGULAR_SNARL_SIZE - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -343,7 +350,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //is a top-level irregular snarl. Otherwise a normal irregular snarl size_t code_size = ZipCode::IRREGULAR_SNARL_SIZE; for (size_t i = 0 ; i < code_size - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -352,12 +359,12 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } } -size_t ZipCode::max_depth() const { +size_t ZipCodeDecoder::max_depth() const { return decoder_length()-1; } -ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { +ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { //Now get the code type //A snarl is always a snarl. A chain could actually be a node @@ -390,7 +397,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 0) { return ZipCode::IRREGULAR_SNARL; @@ -403,7 +410,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } -size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -413,7 +420,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -429,7 +436,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { @@ -439,14 +446,14 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { +size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { if (depth == 0) { @@ -463,7 +470,7 @@ size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -472,7 +479,7 @@ size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { } } -size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -490,7 +497,7 @@ size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIn size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -499,7 +506,7 @@ size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIn } } -size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -515,7 +522,7 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -525,13 +532,13 @@ size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceInde size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCode::get_chain_component(const size_t& depth) const { +size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { if (depth == 0) { @@ -547,7 +554,7 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -557,14 +564,14 @@ size_t ZipCode::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } } -size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) const { +size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_end) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -572,7 +579,7 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value % 2) { if (!get_end) { @@ -585,7 +592,7 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons return zip_value / 2; } -bool ZipCode::get_is_looping_chain(const size_t& depth) const { +bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -593,11 +600,11 @@ bool ZipCode::get_is_looping_chain(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value % 2; } -bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { +bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { if (depth == 0) { @@ -613,7 +620,7 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -622,14 +629,14 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_index = decoder[depth-1].offset; //zip_value is true if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -643,7 +650,7 @@ bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { } } -net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { //get_net_handle_slow does the same thing so if this gets changed need to change that too @@ -652,7 +659,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -667,7 +674,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -679,7 +686,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -687,7 +694,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd } } -net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -695,7 +702,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -717,7 +724,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -736,7 +743,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -745,7 +752,7 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } -size_t ZipCode::get_distance_index_address(const size_t& depth) const { +size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { if (depth == 0) { @@ -753,7 +760,7 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -768,7 +775,7 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -780,13 +787,13 @@ size_t ZipCode::get_distance_index_address(const size_t& depth) const { //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value; } } } -size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { +size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { #ifdef DEBUG_ZIPCODE assert(depth > 0); @@ -796,13 +803,13 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star size_t zip_index = decoder[depth-1].offset; //zip_value is 1 if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } //Zip value is true if the child is reversed @@ -825,53 +832,53 @@ size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_star distance_offset = ZipCode::IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET; } for (size_t i = 0 ; i <= distance_offset - ZipCode::SNARL_IS_REGULAR_OFFSET -1 ; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value - 1; } } -bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { +bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 1) != 0; } -bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { +bool ZipCodeDecoder::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 2) != 0; } -bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { +bool ZipCodeDecoder::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } return (zip_value & 4) != 0; } -const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, +const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, const size_t& depth) { - if (zip1.max_depth() < depth && zip2.max_depth() < depth ) { + if (decoder1.max_depth() < depth && decoder2.max_depth() < depth ) { return false; } //First, check if the code types are the same - ZipCode::code_type_t type1 = zip1.get_code_type(depth); - ZipCode::code_type_t type2 = zip2.get_code_type(depth); + ZipCode::code_type_t type1 = decoder1.get_code_type(depth); + ZipCode::code_type_t type2 = decoder2.get_code_type(depth); if (type1 != type2) { return false; } @@ -879,39 +886,44 @@ const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, if (type1 == ZipCode::ROOT_NODE || type1 == ZipCode::ROOT_CHAIN || type1 == ZipCode::ROOT_SNARL || type1 == ZipCode::IRREGULAR_SNARL || type1 == ZipCode::CYCLIC_SNARL ) { //If the codes are for root-structures or irregular/cyclic snarls, just check if the //connected component numbers are the same - return zip1.get_distance_index_address(depth) == zip2.get_distance_index_address(depth); + return decoder1.get_distance_index_address(depth) == decoder2.get_distance_index_address(depth); } else { //Check the parent type. If the parent is a snarl, then check rank. If it's a chain, //then check the prefix sum - if (zip1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || - zip1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || - zip1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || - zip1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { + if (decoder1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || + decoder1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || + decoder1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || + decoder1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { //If the parent is a snarl, then check the rank - return zip1.get_rank_in_snarl(depth) == zip2.get_rank_in_snarl(depth); + return decoder1.get_rank_in_snarl(depth) == decoder2.get_rank_in_snarl(depth); } else { //Otherwise, check the offset in the chain //Since the type is the same, this is sufficient - return zip1.get_offset_in_chain(depth) == zip2.get_offset_in_chain(depth); + return decoder1.get_offset_in_chain(depth) == decoder2.get_offset_in_chain(depth); } } } -void ZipCode::dump(std::ostream& out) const { - std::vector numbers = to_vector(); - // Print out the numbers in a way that is easy to copy-paste as a vector literal. - out << " numbers = zipcode->to_vector(); + // Print out the numbers in a way that is easy to copy-paste as a vector literal. + out << ""; } - out << "}>"; } -std::ostream& operator<<(std::ostream& out, const ZipCode& zip) { - return out << ""; +std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder) { + return out << ""; } @@ -1045,8 +1057,8 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons } -size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, - ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, +size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos_t& pos1, + ZipCodeDecoder& zip2_decoder, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit, bool undirected_distance, const HandleGraph* graph){ @@ -1054,11 +1066,11 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, //Make sure that the zip codes actually correspond to the positions ZipCode check_zip1; check_zip1.fill_in_zipcode(distance_index, pos1); - assert(zip1 == check_zip1); + assert(*zip1_decoder.zipcode == check_zip1); ZipCode check_zip2; check_zip2.fill_in_zipcode(distance_index, pos2); - assert(zip2 == check_zip2); + assert(*zip2_decoder.zipcode == check_zip2); cerr << endl << "Minimum distance between " << pos1 << " and " << pos2 << " using zipcodes" << endl; cerr << "Ancestors for " << pos1 << endl; @@ -1079,7 +1091,7 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, //Helper function to update the distances to the ends of the parent //distance_start and distance_end get updated - auto update_distances_to_ends_of_parent = [&] (ZipCode& zip, const size_t& child_depth, + auto update_distances_to_ends_of_parent = [&] (ZipCodeDecoder& decoder, const size_t& child_depth, size_t& distance_to_start, size_t& distance_to_end) { #ifdef DEBUG_ZIPCODE cerr << "Update distance to ends of parent at depth " << child_depth << endl; @@ -1090,12 +1102,12 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - code_type_t parent_type = zip.get_code_type(child_depth-1); + code_type_t parent_type = decoder.get_code_type(child_depth-1); if (parent_type == IRREGULAR_SNARL || parent_type == CYCLIC_SNARL) { //If the parent is an irregular snarl - net_handle_t parent_handle = zip.get_net_handle(child_depth-1, &distance_index); - size_t child_rank = zip.get_rank_in_snarl(child_depth); + net_handle_t parent_handle = decoder.get_net_handle(child_depth-1, &distance_index); + size_t child_rank = decoder.get_rank_in_snarl(child_depth); distance_start_left = distance_index.distance_in_snarl(parent_handle, child_rank, false, 0, false, graph); distance_start_right = distance_index.distance_in_snarl(parent_handle, @@ -1110,7 +1122,7 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, } else if (parent_type == REGULAR_SNARL) { //If its a regular snarl, then the distances to the ends are either 0 or inf //For a regular snarl, the snarl stores if the child was reversed, rather than the child - if (zip.get_is_reversed_in_parent(child_depth)) { + if (decoder.get_is_reversed_in_parent(child_depth)) { distance_start_left = std::numeric_limits::max(); distance_start_right = 0; distance_end_right = std::numeric_limits::max(); @@ -1125,30 +1137,30 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, cerr << "Distances to parent regular snarl: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; #endif } else if (parent_type == CHAIN) { - if (zip.get_code_type(child_depth) == NODE && - zip.get_is_reversed_in_parent(child_depth)){ + if (decoder.get_code_type(child_depth) == NODE && + decoder.get_is_reversed_in_parent(child_depth)){ //If this is reversed in the chain distance_start_left = std::numeric_limits::max(); distance_end_right = std::numeric_limits::max(); //Prefix sum of the child - distance_end_left = zip.get_offset_in_chain(child_depth, &distance_index); + distance_end_left = decoder.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_start_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - zip.get_length(child_depth-1, &distance_index), - zip.get_offset_in_chain(child_depth, &distance_index)), - zip.get_length(child_depth, &distance_index)); + decoder.get_length(child_depth-1, &distance_index), + decoder.get_offset_in_chain(child_depth, &distance_index)), + decoder.get_length(child_depth, &distance_index)); } else { //If it is a node that isn't reversed in the chain, or it's a snarl which is never reversed distance_end_left = std::numeric_limits::max(); distance_start_right = std::numeric_limits::max(); //Prefix sum of the child - distance_start_left = zip.get_offset_in_chain(child_depth, &distance_index); + distance_start_left = decoder.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_end_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - zip.get_length(child_depth-1, &distance_index), - zip.get_offset_in_chain(child_depth, &distance_index)), - zip.get_length(child_depth, &distance_index)); + decoder.get_length(child_depth-1, &distance_index), + decoder.get_offset_in_chain(child_depth, &distance_index)), + decoder.get_length(child_depth, &distance_index)); } #ifdef DEBUG_ZIPCODE cerr << "Distances to parent chain: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; @@ -1166,7 +1178,7 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, }; - if (zip1.get_distance_index_address(0) != zip2.get_distance_index_address(0)) { + if (zip1_decoder.get_distance_index_address(0) != zip2_decoder.get_distance_index_address(0)) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif @@ -1175,17 +1187,18 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, //The two positions are in the same connected component so now fill in the rest //of the decoder and try to find the distance - zip1.fill_in_full_decoder(); - zip2.fill_in_full_decoder(); + zip1_decoder.fill_in_full_decoder(); + zip2_decoder.fill_in_full_decoder(); //Now find the lowest common ancestor of the two zipcodes size_t lowest_common_ancestor_depth = 0; bool still_equal = true; while (still_equal) { - if (lowest_common_ancestor_depth == zip1.decoder_length()-1 || - lowest_common_ancestor_depth == zip2.decoder_length()-1 || - !ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth+1)) { + if (lowest_common_ancestor_depth == zip1_decoder.decoder_length()-1 || + lowest_common_ancestor_depth == zip2_decoder.decoder_length()-1 || + !ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, + lowest_common_ancestor_depth+1)) { //If we've hit the end of either decoder or if they are no longer equal, //Then break the loop and keep the current lowest_common_ancestor_depth still_equal = false; @@ -1209,26 +1222,26 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, if (distance_limit != std::numeric_limits::max() && - lowest_common_ancestor_depth < zip1.decoder_length()-1){ + lowest_common_ancestor_depth < zip1_decoder.decoder_length()-1){ //If we're aborting when the distance is definitely too far, - code_type_t ancestor_type = zip1.get_code_type(lowest_common_ancestor_depth); + code_type_t ancestor_type = zip1_decoder.get_code_type(lowest_common_ancestor_depth); if (ancestor_type == CHAIN || ancestor_type == ROOT_CHAIN) { //If the current ancestor is a chain, then check the distance - size_t prefix_sum1 = zip1.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); - size_t prefix_sum2 = zip2.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); size_t distance_in_chain; if (prefix_sum1 < prefix_sum2) { //zip1 comes before zip2 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum2, SnarlDistanceIndex::sum(prefix_sum1, - zip1.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip1_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); } else { //zip2 comes before zip1 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum1, SnarlDistanceIndex::sum(prefix_sum2, - zip2.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip2_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); } if (distance_in_chain > distance_limit) { return std::numeric_limits::max(); @@ -1238,15 +1251,15 @@ size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, //Start from the nodes size_t distance_to_start1 = is_rev(pos1) - ? zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1) + ? zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1) : offset(pos1) + 1; size_t distance_to_end1 = is_rev(pos1) ? offset(pos1) + 1 - : zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1); + : zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1); size_t distance_to_start2 = is_rev(pos2) - ? zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2) + ? zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2) : offset(pos2) + 1; size_t distance_to_end2 = is_rev(pos2) ? offset(pos2) + 1 - : zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2); + : zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2); if (!undirected_distance) { //These are directed distances so set backwards distances to inf @@ -1269,22 +1282,22 @@ cerr << "Finding distances to ancestors of first position" << endl; //Now walk up the snarl tree from each position to one level below the lowest common ancestor - for (int i = zip1.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip1_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip1, i+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip1_decoder, i+1, distance_to_start1, distance_to_end1); } #ifdef DEBUG_ZIPCODE cerr << "Finding distances to ancestors of second position" << endl; #endif //The same thing for the second position - for (int i = zip2.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip2_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip2, i+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip2_decoder, i+1, distance_to_start2, distance_to_end2); } @@ -1293,7 +1306,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "Distances in children of common ancestor: " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; //Check that the current nodes are actually children of the lca - assert(ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth)); + assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, lowest_common_ancestor_depth)); #endif //Find the distance between them in the lowest common ancestor @@ -1308,18 +1321,18 @@ cerr << "Finding distances to ancestors of second position" << endl; cerr << "At " << depth << "st/th ancestor" << endl; cerr << "\tdistances are " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; #endif - if (depth == zip1.decoder_length()-1) { + if (depth == zip1_decoder.decoder_length()-1) { //If the lca is a node that both positions are on #ifdef DEBUG_ZIPCODE //If the lca is a node, then both the zipcode nodes should be the same node - assert(ZipCode::is_equal(zip1, zip2, depth)); - assert(depth == zip2.decoder_length()-1); + assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth)); + assert(depth == zip2_decoder.decoder_length()-1); cerr << "\tAncestor should be a node" << endl; #endif size_t d1 = SnarlDistanceIndex::sum(distance_to_end1, distance_to_start2); size_t d2 = SnarlDistanceIndex::sum(distance_to_end2, distance_to_start1); - size_t node_length = zip1.get_length(depth, &distance_index); + size_t node_length = zip1_decoder.get_length(depth, &distance_index); if (d1 > node_length) { distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d1, node_length),1)); @@ -1328,31 +1341,31 @@ cerr << "Finding distances to ancestors of second position" << endl; distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d2, node_length),1)); } - } else if ( zip1.decoder[depth].is_chain) { + } else if ( zip1_decoder.decoder[depth].is_chain) { #ifdef DEBUG_ZIPCODE cerr << "\tancestor should be a chain" << endl; #endif //If this ancestor is a chain //If the children are reversed in the chain, then flip their distances - bool rev1 = (zip1.get_code_type(depth+1) == NODE && - zip1.get_is_reversed_in_parent(depth+1)); + bool rev1 = (zip1_decoder.get_code_type(depth+1) == NODE && + zip1_decoder.get_is_reversed_in_parent(depth+1)); size_t dist_start1 = rev1 ? distance_to_end1 : distance_to_start1; size_t dist_end1 = rev1 ? distance_to_start1 : distance_to_end1; - bool rev2 = zip2.get_code_type(depth+1) == NODE && - zip2.get_is_reversed_in_parent(depth+1); + bool rev2 = zip2_decoder.get_code_type(depth+1) == NODE && + zip2_decoder.get_is_reversed_in_parent(depth+1); size_t dist_start2 = rev2 ? distance_to_end2 : distance_to_start2; size_t dist_end2 = rev2 ? distance_to_start2 : distance_to_end2; //If they are the same child, then there is no path between them in the chain because we don't allow loops //So first check that they aren't the same - if (!(ZipCode::is_equal(zip1, zip2, depth+1) - )){//TODO: I think this is unnecessary || (zip1.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) - size_t prefix_sum1 = zip1.get_offset_in_chain(depth+1, &distance_index); - size_t prefix_sum2 = zip2.get_offset_in_chain(depth+1, &distance_index); - code_type_t code_type1 = zip1.get_code_type(depth+1); - code_type_t code_type2 = zip2.get_code_type(depth+1); + if (!(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth+1) + )){//TODO: I think this is unnecessary || (zip1_decoder.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) + size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(depth+1, &distance_index); + size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(depth+1, &distance_index); + code_type_t code_type1 = zip1_decoder.get_code_type(depth+1); + code_type_t code_type2 = zip2_decoder.get_code_type(depth+1); if (prefix_sum1 < prefix_sum2 || (prefix_sum1 == prefix_sum2 && @@ -1366,7 +1379,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1376,7 +1389,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1.get_length(depth+1, &distance_index))), + zip1_decoder.get_length(depth+1, &distance_index))), dist_end1),1)); } } else { @@ -1384,7 +1397,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(Prefix sum 2 + distance left 2) - (prefix sum1+ length 1) + distance right 1 #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it isn't a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1.get_length(depth+1, &distance_index) << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1395,7 +1408,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1.get_length(depth+1, &distance_index))), + zip1_decoder.get_length(depth+1, &distance_index))), dist_end1),1) ); } @@ -1407,7 +1420,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(prefix sum 1 + distance left 1) - (prefix sum 2 + length 2) + distance right 2 #ifdef DEBUG_ZIPCODE cerr << "Second child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; + cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2_decoder.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; #endif if (dist_start1 != std::numeric_limits::max() && dist_end2 != std::numeric_limits::max() ){ @@ -1417,7 +1430,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2.get_length(depth+1, &distance_index))), + zip2_decoder.get_length(depth+1, &distance_index))), dist_end2), 1)); } } else { @@ -1436,7 +1449,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2.get_length(depth+1, &distance_index))), + zip2_decoder.get_length(depth+1, &distance_index))), dist_end2),1) ); } @@ -1444,8 +1457,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } } //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); } else { #ifdef DEBUG_ZIPCODE @@ -1455,11 +1468,11 @@ cerr << "Finding distances to ancestors of second position" << endl; //If the parent is a regular snarl, then there is no path between them so //just update the distances to the ends of the parent - if (zip1.get_code_type(depth) != REGULAR_SNARL) { + if (zip1_decoder.get_code_type(depth) != REGULAR_SNARL) { //Parent may be an irregular snarl or a root snarl (which is also irregular) - net_handle_t parent_handle = zip1.get_net_handle(depth, &distance_index); - size_t rank1 = zip1.get_rank_in_snarl(depth+1); - size_t rank2 = zip2.get_rank_in_snarl(depth+1); + net_handle_t parent_handle = zip1_decoder.get_net_handle(depth, &distance_index); + size_t rank1 = zip1_decoder.get_rank_in_snarl(depth+1); + size_t rank2 = zip2_decoder.get_rank_in_snarl(depth+1); #ifdef DEBUG_ZIPCODE cerr << "irregular snarl so find distances in the distance index: " << distance_index.net_handle_as_string(parent_handle) << endl; cerr << "\t at offset " << distance_index.get_record_offset(parent_handle) << endl; @@ -1492,8 +1505,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } #endif //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); } #ifdef DEBUG_ZIPCODE cerr << "distance in ancestor: " << distance_between << endl; @@ -1856,7 +1869,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { +MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; if (decoder_length() == 1) { @@ -1868,15 +1881,15 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& size_t zip_value; size_t zip_index = decoder[0].offset; //Root is chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); //Root node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; @@ -1895,17 +1908,17 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& size_t zip_value; size_t zip_index = decoder[max_depth()-1].offset; //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); if (decoder_length() == 2) { //If the node is a child of the root chain payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; @@ -1913,20 +1926,20 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); //chain component count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Node prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //is_reversed - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //TODO: For top-level chains we got this from the distance index payload.is_reversed = zip_value; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.chain_component = zip_value; @@ -1949,9 +1962,9 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& if (payload.parent_is_root) { //is_chain zip_index = decoder[0].offset; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, @@ -1961,7 +1974,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else { zip_index = decoder[max_depth()-1].offset; //is_regular - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //If this is a non-root snarl, get as much as we can from it payload.parent_type = ZipCode::EMPTY; if (zip_value == 0) { @@ -1973,20 +1986,20 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Snarl length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Snarl child_count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //TODO: SHould use this somehow payload.chain_component = 0; //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed @@ -2010,9 +2023,9 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& //We should be at the node/trivial chain now zip_index = decoder[max_depth()].offset; //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); //Chain length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Get the rest as default values @@ -2031,7 +2044,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& return payload; } -net_identifier_t ZipCode::get_identifier(size_t depth) const { +net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { if (depth == std::numeric_limits::max()) { //This is equivalent to distance_index.get_root() return "ROOT"; @@ -2044,7 +2057,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else if (decoder[d].is_chain) { @@ -2054,7 +2067,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else { @@ -2062,7 +2075,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2071,7 +2084,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2088,7 +2101,7 @@ net_identifier_t ZipCode::get_identifier(size_t depth) const { return result; } -const net_identifier_t ZipCode::get_parent_identifier(const net_identifier_t& child) { +const net_identifier_t ZipCodeDecoder::get_parent_identifier(const net_identifier_t& child) { if (child == "ROOT") { throw std::runtime_error("error: trying to get the parent of the root net_identifier_t"); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4b5de75b9dc..eceed521640 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -19,14 +19,18 @@ using namespace std; * A ZipCode stores the information and can be used to create a zipcode. It can be used * to calculate the distance between zipcodes * - * A decoder is used for interpreting zipcodes to find specific values that were - * stored in the ZipCode. + * A ZipCodeDecoder is used for interpreting zipcodes to find specific values that were + * stored in the ZipCode. A ZipCodeDecoder must be constructed from a specific zipcode. * Construction of a decoder occurs one code at a time, starting from the root snarl or chain, - * so it is possible to have a partially constructed decoder, to avoid having to + * so it is possible to have a partially constructed ZipCodeDecoder, to avoid having to * walk through the entire ZipCode to get the values for things higher in the snarl tree. * The full decoder must be constructed to get values for the node. */ +///A decoder for interpreting a zipcode +///Can interpret the values for a snarl tree node given the depth +///(depth in the snarl tree, also the index into the zipcode vector) +class ZipCodeDecoder; ///A struct to interpret the minimizer payload @@ -55,8 +59,7 @@ class ZipCode { /// Regular snarls are bubbles. Irregular snarls are snarls that aren't bubbles but are dags /// Cyclic snarls are non-dags. They are stored the same as irregular snarls. Only the type is different public: - enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; - + enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; public: //Fill in an empty zipcode given a position @@ -80,8 +83,8 @@ class ZipCode { //The decoders may or may not be filled in, and may be filled in when this is run //If distance_limit is set, return std::numeric_limits::max() if the distance //will be greater than the distance limit - static size_t minimum_distance_between(ZipCode& zip1, const pos_t& pos1, - ZipCode& zip2, const pos_t& pos2, + static size_t minimum_distance_between(ZipCodeDecoder& zip_decoder1, const pos_t& pos1, + ZipCodeDecoder& zip_decoder2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit = std::numeric_limits::max(), bool undirected_distance=false, @@ -212,124 +215,7 @@ class ZipCode { //Return a vector of size_ts that will represent the snarl in the zip code inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); - - //////////////////////////////// Stuff for decoding the zipcode - - public: - //TODO: Make the decoder and zipcode private, still need it for unit testing - ///The decoder as a vector of pair, one for each snarl tree node in the zip - ///where is_chain indicates whether it's a chain/node, and index - ///is the index of the node/snarl/chain code in the varint_vector_t - struct decoder_t { - bool is_chain : 1; - size_t offset : 15; - decoder_t(bool is_chain, size_t offset) : is_chain(is_chain), offset(offset) {} - inline bool operator==(const decoder_t& other) const { - return is_chain == other.is_chain && offset == other.offset; - } - }; - std::vector decoder; - - ///Did we fill in the entire decoder - ///TODO: I'm making it fill in the decoder automatically because it seems to be faster that way, instead of - /// waiting to see which parts are actually needed - bool finished_decoding = false; - - public: - - ///Go through the entire zipcode and fill in the decoder - void fill_in_full_decoder(); - - ///Fill in one more item in the decoder - ///Returns true if this is the last thing in the zipcode and false if there is more to decode - bool fill_in_next_decoder(); - - ///What is the maximum depth of this zipcode? - size_t max_depth() const; - - ///How many codes in the zipcode have been decoded? - size_t decoder_length() const {return decoder.size();} - - ///What type of snarl tree node is at the given depth (index into the zipcode) - ZipCode::code_type_t get_code_type(const size_t& depth) const ; - - ///Get the length of a snarl tree node given the depth in the snarl tree - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl - size_t get_rank_in_snarl(const size_t& depth) const ; - - ///Get the number of children in a snarl. Throw an exception if it isn't a snarl - size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the prefix sum of a child of a chain - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the chain component of a chain child. - ///For snarls, this will be the component of the start node - size_t get_chain_component(const size_t& depth) const ; - - ///Get the chain component of the last node in the chain - /// This behaves like the distance index get_chain_component- - /// for looping chains it returns the last component if get_end is true, - /// and 0 if it is false - size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; - bool get_is_looping_chain(const size_t& depth) const ; - - ///Is the snarl tree node backwards relative to its parent - bool get_is_reversed_in_parent(const size_t& depth) const; - - ///Get the handle of the thing at the given depth. This can only be used for - ///Root-level structures or irregular snarls - net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; - - ///Get the handle of the thing at the given depth. This can be used for anything but is slow, - /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I - /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; - - ///Get the information that was stored to get the address in the distance index - ///This is the connected component number for a root structure, or the address of - ///an irregular snarl. Throws an error for anything else - ///This is used for checking equality without looking at the distance index. - ///Use get_net_handle for getting the actual handle - size_t get_distance_index_address(const size_t& depth) const; - - /// The minimum distance from start or end of the snarl to the left or right side of the child - size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; - - bool is_externally_start_end_connected(const size_t& depth) const; - bool is_externally_start_start_connected(const size_t& depth) const; - bool is_externally_end_end_connected(const size_t& depth) const; - - - ///Are the two decoders pointing to the same snarl tree node at the given depth - ///This only checks if the values in the zipcode are the same at the given depth, - ///so if the preceeding snarl tree nodes are different, - ///then this might actually refer to different things - const static bool is_equal(const ZipCode& zip1, const ZipCode& zip2, - const size_t& depth); - - /// Dump a ZipCode to a stream so that it can be reconstructed for a - /// unit test from the resulting information. - void dump(std::ostream& out) const; - - //TODO: I want to make a struct for holding all values of a code as real values - - ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; - - /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth - /// would be the node, also include the node id - net_identifier_t get_identifier(size_t depth) const; - const static net_identifier_t get_parent_identifier(const net_identifier_t& child); - + friend class ZipCodeDecoder; }; /// Print a code type to a stream @@ -369,6 +255,136 @@ class ZipCodeCollection { }; +/* + * Struct for interpreting a ZipCode + */ +class ZipCodeDecoder { + + public: + //TODO: Make the decoder and zipcode private, still need it for unit testing + ///The decoder as a vector of pair, one for each snarl tree node in the zip + ///where is_chain indicates whether it's a chain/node, and index + ///is the index of the node/snarl/chain code in the varint_vector_t + struct decoder_t { + bool is_chain : 1; + size_t offset : 15; + decoder_t(bool is_chain, size_t offset) : is_chain(is_chain), offset(offset) {} + decoder_t() : is_chain(false), offset(0) {} + inline bool operator==(const decoder_t& other) const { + return is_chain == other.is_chain && offset == other.offset; + } + }; + std::vector decoder; + + ///The zipcode that this is decoding + const ZipCode* zipcode; + + ///Did we fill in the entire decoder + bool finished_decoding; + + public: + + ///Constructor that goes through the zipcode and decodes it to fill in decoder + ///If a depth is given, then only fill in up to depth snarl tree nodes + ///Otherwise, fill in the whole zipcode + ZipCodeDecoder(const ZipCode* zipcode = nullptr); + + ///Go through the entire zipcode and fill in the decoder + void fill_in_full_decoder(); + + ///Fill in one more item in the decoder + ///Returns true if this is the last thing in the zipcode and false if there is more to decode + bool fill_in_next_decoder(); + + ///What is the maximum depth of this zipcode? + size_t max_depth() const; + + ///How many codes in the zipcode have been decoded? + size_t decoder_length() const {return decoder.size();} + + ///What type of snarl tree node is at the given depth (index into the zipcode) + ZipCode::code_type_t get_code_type(const size_t& depth) const ; + + ///Get the length of a snarl tree node given the depth in the snarl tree + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl + size_t get_rank_in_snarl(const size_t& depth) const ; + + ///Get the number of children in a snarl. Throw an exception if it isn't a snarl + size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the prefix sum of a child of a chain + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the chain component of a chain child. + ///For snarls, this will be the component of the start node + size_t get_chain_component(const size_t& depth) const ; + + ///Get the chain component of the last node in the chain + /// This behaves like the distance index get_chain_component- + /// for looping chains it returns the last component if get_end is true, + /// and 0 if it is false + size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; + bool get_is_looping_chain(const size_t& depth) const ; + + ///Is the snarl tree node backwards relative to its parent + bool get_is_reversed_in_parent(const size_t& depth) const; + + ///Get the handle of the thing at the given depth. This can only be used for + ///Root-level structures or irregular snarls + net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; + + ///Get the handle of the thing at the given depth. This can be used for anything but is slow, + /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I + /// remember that it's slow + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; + + ///Get the information that was stored to get the address in the distance index + ///This is the connected component number for a root structure, or the address of + ///an irregular snarl. Throws an error for anything else + ///This is used for checking equality without looking at the distance index. + ///Use get_net_handle for getting the actual handle + size_t get_distance_index_address(const size_t& depth) const; + + /// The minimum distance from start or end of the snarl to the left or right side of the child + size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; + + bool is_externally_start_end_connected(const size_t& depth) const; + bool is_externally_start_start_connected(const size_t& depth) const; + bool is_externally_end_end_connected(const size_t& depth) const; + + + ///Are the two decoders pointing to the same snarl tree node at the given depth + ///This only checks if the values in the zipcode are the same at the given depth, + ///so if the preceeding snarl tree nodes are different, + ///then this might actually refer to different things + const static bool is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, + const size_t& depth); + + /// Dump a ZipCodeDecoder to a stream so that it can be reconstructed for a + /// unit test from the resulting information. + void dump(std::ostream& out) const; + + //TODO: I want to make a struct for holding all values of a code as real values + + ///Fill in a payload with values from the zipcode + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + + /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth + /// would be the node, also include the node id + net_identifier_t get_identifier(size_t depth) const; + const static net_identifier_t get_parent_identifier(const net_identifier_t& child); + + +}; + template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { @@ -376,7 +392,7 @@ struct wang_hash { } }; -std::ostream& operator<<(std::ostream& out, const ZipCode& decoder); +std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); /** diff --git a/src/zip_code_tree.cpp b/src/zip_code_tree.cpp index 1ed2bc13afd..1055949af1b 100644 --- a/src/zip_code_tree.cpp +++ b/src/zip_code_tree.cpp @@ -55,7 +55,7 @@ void ZipCodeForest::open_chain(forest_growing_state_t& forest_state, #endif const Seed& current_seed = forest_state.seeds->at(seed_index); - size_t current_max_depth = current_seed.zipcode.max_depth(); + size_t current_max_depth = current_seed.zipcode_decoder->max_depth(); if (depth == 0) { //If this is the start of a new top-level chain, make a new tree, which will be the new active tree @@ -177,7 +177,7 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, //The value that got stored in forest_state.sibling_indices_at_depth was the prefix sum //traversing the chain according to its orientation in the tree, so either way //the distance is the length of the chain - the prefix sum - size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode.get_length(depth), + size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode_decoder->get_length(depth), forest_state.sibling_indices_at_depth[depth].back().value); bool add_distances = true; if (distance_to_chain_end > forest_state.distance_limit && forest_state.open_chains.back().second) { @@ -260,9 +260,9 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, std::numeric_limits::max(), false); //Update the distance to the end of the chain to be the distance from the previous child - size_t last_length = depth == last_seed.zipcode.max_depth() + size_t last_length = depth == last_seed.zipcode_decoder->max_depth() ? 0 - : last_seed.zipcode.get_length(depth+1); + : last_seed.zipcode_decoder->get_length(depth+1); distance_to_chain_end = SnarlDistanceIndex::sum(distance_to_chain_end, SnarlDistanceIndex::sum(last_edge, @@ -299,10 +299,10 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, bool chain_is_reversed) { const Seed& current_seed = forest_state.seeds->at(seed_index); - ZipCode::code_type_t current_type = current_seed.zipcode.get_code_type(depth); + ZipCode::code_type_t current_type = current_seed.zipcode_decoder->get_code_type(depth); //Is this chain actually a node pretending to be a chain - bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode.max_depth(); + bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode_decoder->max_depth(); //For a root node or trivial chain, the "chain" is actually just the node, so the depth // of the chain we're working on is the same depth. Otherwise, the depth is depth-1 @@ -320,11 +320,11 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //Otherwise, get the distance to the start or end of the chain current_offset = chain_is_reversed - ? SnarlDistanceIndex::minus(current_seed.zipcode.get_length(chain_depth) , + ? SnarlDistanceIndex::minus(current_seed.zipcode_decoder->get_length(chain_depth) , SnarlDistanceIndex::sum( - current_seed.zipcode.get_offset_in_chain(depth), - current_seed.zipcode.get_length(depth))) - : current_seed.zipcode.get_offset_in_chain(depth); + current_seed.zipcode_decoder->get_offset_in_chain(depth), + current_seed.zipcode_decoder->get_length(depth))) + : current_seed.zipcode_decoder->get_offset_in_chain(depth); } @@ -537,7 +537,7 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //stored should be the offset of the end bound of the snarl, so add the //length of the snarl current_offset = SnarlDistanceIndex::sum(current_offset, - current_seed.zipcode.get_length(depth)); + current_seed.zipcode_decoder->get_length(depth)); } @@ -614,7 +614,7 @@ void ZipCodeForest::close_snarl(forest_growing_state_t& forest_state, forest_state.sibling_indices_at_depth[depth-1].pop_back(); //Snarl prefix sum is now the distance from the start of the chain to the start of the snarl - snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode.get_length(depth)); + snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode_decoder->get_length(depth)); //Now update forest_state.sibling_indices_at_depth to be the previous thing in the chain forest_state.sibling_indices_at_depth[depth-1].push_back({ @@ -745,9 +745,9 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //If we're getting the distance to the end of the snarl, then this is the length of the snarl // otherwise, it is the distance from the seed to the start (or end) of the snarl - size_t snarl_distance = to_snarl_end ? seed.zipcode.get_length(depth) + size_t snarl_distance = to_snarl_end ? seed.zipcode_decoder->get_length(depth) : SnarlDistanceIndex::sum (distance_to_chain_start, - seed.zipcode.get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); + seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); //Add the edge trees[forest_state.active_tree_index].zip_code_tree.at(last_child_index - 1 - sibling_i) = @@ -757,7 +757,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //Otherwise, the previous thing was another child of the snarl //and we need to record the distance between these two size_t distance; - if (seed.zipcode.get_code_type(depth) == ZipCode::REGULAR_SNARL) { + if (seed.zipcode_decoder->get_code_type(depth) == ZipCode::REGULAR_SNARL) { //If this is the child of a regular snarl, then the distance between //any two chains is inf, and the distance to any bound is 0 distance = to_snarl_end ? sibling.distances.second : std::numeric_limits::max(); @@ -771,19 +771,19 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co if (to_snarl_end && !is_cyclic_snarl) { distance = SnarlDistanceIndex::sum(sibling.distances.second, - sibling_seed.zipcode.get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); + sibling_seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); } else { //If to_snarl_end is true, then we want the distance to the end (or start if snarl_is_reversed) // Rank is 0 and the orientation doesn't matter size_t rank2 = to_snarl_end ? (snarl_is_reversed ? 0 : 1) - : seed.zipcode.get_rank_in_snarl(depth+1); + : seed.zipcode_decoder->get_rank_in_snarl(depth+1); bool right_side2 = child_is_reversed; //If the sibling is the start, then get the distance to the appropriate bound size_t rank1 = sibling.type == ZipCodeTree::SNARL_START ? (snarl_is_reversed ? 1 : 0) - : sibling_seed.zipcode.get_rank_in_snarl(depth+1); + : sibling_seed.zipcode_decoder->get_rank_in_snarl(depth+1); bool right_side1 = !sibling.is_reversed; size_t distance_to_end_of_last_child = sibling.type == ZipCodeTree::SNARL_START ? 0 @@ -791,7 +791,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //The bools for this are true if the distance is to/from the right side of the child //We want the right side of 1 (which comes first in the dag ordering) to the left side of 2 //relative to the orientation of the snarl - net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth, forest_state.distance_index); + net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth, forest_state.distance_index); distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum( forest_state.distance_index->distance_in_snarl(snarl_handle, rank1, right_side1, rank2, right_side2), distance_to_chain_start), @@ -938,7 +938,7 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< } else if (current_item.get_type() == ZipCodeTree::SEED) { //If this is a seed, check the snarls we've seen previously for (const size_t& snarl_depth : snarl_depths) { - if (seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) + if (seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::REGULAR_SNARL) { //If this is a regular snarl, then it must be a DAG too dag_count++; @@ -946,11 +946,11 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< //If this is an irregular snarl //Check the snarl in the distance index - net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode.get_net_handle(snarl_depth, &distance_index); + net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode_decoder->get_net_handle(snarl_depth, &distance_index); #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || - seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || - seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); + assert(seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || + seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || + seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); assert(distance_index.is_snarl(snarl_handle)); #endif if (distance_index.is_dag(snarl_handle)) { @@ -976,13 +976,13 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< return std::make_pair(dag_count, non_dag_count); } bool ZipCodeTree::seed_is_reversed_at_depth (const Seed& seed, size_t depth, const SnarlDistanceIndex& distance_index){ - if (seed.zipcode.get_is_reversed_in_parent(depth)) { + if (seed.zipcode_decoder->get_is_reversed_in_parent(depth)) { return true; - } else if (depth > 0 && (seed.zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL - || seed.zipcode.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { + } else if (depth > 0 && (seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL + || seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { //If the parent is an irregular snarl, then check the orientation of the child in the snarl - net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth-1, &distance_index); - size_t rank = seed.zipcode.get_rank_in_snarl(depth); + net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth-1, &distance_index); + size_t rank = seed.zipcode_decoder->get_rank_in_snarl(depth); if (distance_index.distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && @@ -1109,10 +1109,10 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //so if things are traversed backwards, reverse the orientation bool a_is_reversed = false; bool b_is_reversed = false; - while (depth < seeds->at(previous_seed_index).zipcode.max_depth() && - depth < seeds->at(current_item.get_value()).zipcode.max_depth() && - ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, - seeds->at(current_item.get_value()).zipcode, depth)) { + while (depth < seeds->at(previous_seed_index).zipcode_decoder->max_depth() && + depth < seeds->at(current_item.get_value()).zipcode_decoder->max_depth() && + ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, + *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { //Remember the orientation if (ZipCodeTree::seed_is_reversed_at_depth(seeds->at(previous_seed_index), depth, distance_index)) { @@ -1142,19 +1142,19 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //Either depth is the last thing in previous_seed_index or current_item.value, or they are different at this depth - if ( ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, - seeds->at(current_item.get_value()).zipcode, depth)) { + if ( ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, + *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthey are on the same node" << endl; #endif //If they are equal, then they must be on the same node size_t offset1 = is_rev(seeds->at(previous_seed_index).pos) - ? seeds->at(previous_seed_index).zipcode.get_length(depth) + ? seeds->at(previous_seed_index).zipcode_decoder->get_length(depth) - offset(seeds->at(previous_seed_index).pos) : offset(seeds->at(previous_seed_index).pos); size_t offset2 = is_rev(seeds->at(current_item.get_value()).pos) - ? seeds->at(current_item.get_value()).zipcode.get_length(depth) + ? seeds->at(current_item.get_value()).zipcode_decoder->get_length(depth) - offset(seeds->at(current_item.get_value()).pos) : offset(seeds->at(current_item.get_value()).pos); if (!current_is_in_cyclic_snarl) { @@ -1172,28 +1172,28 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, cerr << "\tThey are on different connected components" << endl; #endif //If they are on different connected components, sort by connected component - assert( seeds->at(previous_seed_index).zipcode.get_distance_index_address(0) <= - seeds->at(current_item.get_value()).zipcode.get_distance_index_address(0)); + assert( seeds->at(previous_seed_index).zipcode_decoder->get_distance_index_address(0) <= + seeds->at(current_item.get_value()).zipcode_decoder->get_distance_index_address(0)); - } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::CHAIN - || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { + } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::CHAIN + || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common chain" << endl; #endif //If previous_seed_index and current_item.value are both children of a chain - size_t offset_a = seeds->at(previous_seed_index).zipcode.get_offset_in_chain(depth); - size_t offset_b = seeds->at(current_item.get_value()).zipcode.get_offset_in_chain(depth); + size_t offset_a = seeds->at(previous_seed_index).zipcode_decoder->get_offset_in_chain(depth); + size_t offset_b = seeds->at(current_item.get_value()).zipcode_decoder->get_offset_in_chain(depth); if (!current_is_in_cyclic_snarl) { if ( offset_a == offset_b) { //If they have the same prefix sum, then the snarl comes first //They will never be on the same child at this depth if (parent_of_a_is_reversed) { - assert(seeds->at(current_item.get_value()).zipcode.get_code_type(depth) != ZipCode::NODE && - seeds->at(previous_seed_index).zipcode.get_code_type(depth) == ZipCode::NODE); + assert(seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && + seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); } else { - assert( seeds->at(previous_seed_index).zipcode.get_code_type(depth) != ZipCode::NODE && - seeds->at(current_item.get_value()).zipcode.get_code_type(depth) == ZipCode::NODE); + assert( seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && + seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); } } else { //Check if the parent chain is reversed and if so, then the order should be reversed @@ -1205,8 +1205,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, } } } - } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::REGULAR_SNARL - || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { + } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::REGULAR_SNARL + || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common dag snarl" << endl; #endif @@ -1215,8 +1215,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, // The ranks of children in snarls are in a topological order, so // sort on the ranks if (!current_is_in_cyclic_snarl) { - assert( seeds->at(previous_seed_index).zipcode.get_rank_in_snarl(depth) <= - seeds->at(current_item.get_value()).zipcode.get_rank_in_snarl(depth)); + assert( seeds->at(previous_seed_index).zipcode_decoder->get_rank_in_snarl(depth) <= + seeds->at(current_item.get_value()).zipcode_decoder->get_rank_in_snarl(depth)); } } @@ -2031,20 +2031,20 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\tThis is the root snarl so sort by connected component: " - << seed.zipcode.get_distance_index_address(0) << endl; + << seed.zipcode_decoder->get_distance_index_address(0) << endl; #endif - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode.get_distance_index_address(0)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode_decoder->get_distance_index_address(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(0)); } else if (interval.code_type == ZipCode::NODE || interval.code_type == ZipCode::ROOT_NODE - || seed.zipcode.max_depth() == interval.depth) { + || seed.zipcode_decoder->max_depth() == interval.depth) { #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\t this is a node: offset: " << ( is_rev(seed.pos) - ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) + ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)) << endl;; #endif sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( - is_rev(seed.pos) != order_is_reversed ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) + is_rev(seed.pos) != order_is_reversed ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(ZipCode::NODE); @@ -2058,12 +2058,12 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, // and 2 will be added to the node with an offset in the node of 0 (node 3 if the chain is traversed forward) // See sort_value_t for more details - size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode.get_length(interval.depth), - SnarlDistanceIndex::sum( seed.zipcode.get_offset_in_chain(interval.depth+1), - seed.zipcode.get_length(interval.depth+1))) - : seed.zipcode.get_offset_in_chain(interval.depth+1); + size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode_decoder->get_length(interval.depth), + SnarlDistanceIndex::sum( seed.zipcode_decoder->get_offset_in_chain(interval.depth+1), + seed.zipcode_decoder->get_length(interval.depth+1))) + : seed.zipcode_decoder->get_offset_in_chain(interval.depth+1); - ZipCode::code_type_t child_type = seed.zipcode.get_code_type(interval.depth+1); + ZipCode::code_type_t child_type = seed.zipcode_decoder->get_code_type(interval.depth+1); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(child_type); if (child_type == ZipCode::REGULAR_SNARL @@ -2075,9 +2075,9 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, sort_values_by_seed[zipcode_sort_order[i]].set_chain_order(1); } else { //If this is a node, then the order depends on where the position falls in the node - bool node_is_rev = seed.zipcode.get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); + bool node_is_rev = seed.zipcode_decoder->get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); node_is_rev = order_is_reversed ? !node_is_rev : node_is_rev; - size_t node_offset = node_is_rev ? seed.zipcode.get_length(interval.depth+1) - offset(seed.pos) + size_t node_offset = node_is_rev ? seed.zipcode_decoder->get_length(interval.depth+1) - offset(seed.pos) : offset(seed.pos); sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(SnarlDistanceIndex::sum(prefix_sum, node_offset)); @@ -2093,13 +2093,13 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #endif } else { #ifdef DEBUG_ZIP_CODE_SORTING - cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode.get_rank_in_snarl(interval.depth+1) << endl; + cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1) << endl; #endif // The ranks of children in irregular snarls are in a topological order, so // sort on the ranks // The rank of children in a regular snarl is arbitrary but it doesn't matter anyway - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode.get_rank_in_snarl(interval.depth+1)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(interval.depth+1)); } min_sort_value = std::min(min_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); max_sort_value = std::max(max_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); @@ -2204,7 +2204,7 @@ void ZipCodeForest::get_next_intervals(forest_growing_state_t& forest_state, con if (interval.code_type != ZipCode::EMPTY && - seeds->at(zipcode_sort_order[interval.interval_start]).zipcode.max_depth() == interval.depth ) { + seeds->at(zipcode_sort_order[interval.interval_start]).zipcode_decoder->max_depth() == interval.depth ) { //If this is a trivial chain, then just return the same interval as a node #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthis was a trivial chain so just return the same interval as a node" << endl; @@ -2434,7 +2434,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth()+1); cerr << "Close anything open" << endl; #endif while (!forest_state.open_intervals.empty()) { @@ -2607,7 +2607,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth(); for (size_t seed_i = current_interval.interval_start ; seed_i < current_interval.interval_end ; seed_i++) { @@ -2709,9 +2709,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s const SnarlDistanceIndex* distance_index = forest_state.distance_index; #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_code_type(snarl_interval.depth) + assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_code_type(snarl_interval.depth) == ZipCode::CYCLIC_SNARL); - net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); + net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); cerr << "Sorting and finding intervals for cyclic snarl " << distance_index->net_handle_as_string(handle); size_t child_count = 0; for (auto& x : child_intervals) { @@ -2720,7 +2720,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s cerr << " with " << child_count << " children" << endl; #endif - net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); + net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); /****** For each interval, form runs of reachable seeds @@ -2800,9 +2800,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s //Get up to half of the values from before the snarl while (check_i >= parent_interval.interval_start && parent_offset_values.size() <= check_count/2) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); } check_i--; @@ -2813,9 +2813,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s check_i = snarl_interval.interval_end; while (check_i < parent_interval.interval_end && parent_offset_values.size() < check_count) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); } check_i++; @@ -2857,7 +2857,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s #ifdef DEBUG_ZIP_CODE_TREE //This is how seed_is_reversed_at_depth currently works but double check this in case it changed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); assert (distance_index->distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && distance_index->distance_in_snarl(snarl_handle, 1, false, rank, true) == std::numeric_limits::max()); @@ -2866,7 +2866,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s interval_is_reversable = false; } else { //If the interval is not reversed in the snarl, check if it can be reversed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); size_t distance_start = distance_index->distance_in_snarl(snarl_handle, 0, false, rank, true); size_t distance_end = distance_index->distance_in_snarl(snarl_handle, 1, false, rank, false); interval_is_reversable = distance_start != std::numeric_limits::max() @@ -2899,7 +2899,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s std::get<1>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = sort_values_by_seed[zipcode_sort_order[sort_i]].get_sort_value(); std::get<2>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = - seed.zipcode.max_depth() <= snarl_interval.depth+2; + seed.zipcode_decoder->max_depth() <= snarl_interval.depth+2; //Make a new run for the seed, to be updated with anything combined with it From 69d48f011402f5808e57c60a2d6a24e99e788d16 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 23:18:20 +0200 Subject: [PATCH 085/124] Fix typo --- src/snarl_seed_clusterer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 6dbb291b647..44bb04f5e89 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -1811,7 +1811,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; child1.prefix_sum = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; - child2.has_chain_values = true; + child1.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet From e12b23a745398c6357969c313ac2bdfdb3a415b1 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 3 Aug 2024 23:56:15 +0200 Subject: [PATCH 086/124] Use distance index less for chain values --- src/snarl_seed_clusterer.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 239d1e0d182..d879381a4e8 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -317,7 +317,11 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); - node_length = distance_index.chain_minimum_length(containing_net_handle); + if (zipcode_depth == 0 || is_looping_chain || seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true) != 0) { + node_length = distance_index.chain_minimum_length(containing_net_handle); + } else { + node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); + } chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); } From f4f10f31c895cff78cc8b4893bb64b91b576a3a7 Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Mon, 5 Aug 2024 02:18:58 -0700 Subject: [PATCH 087/124] Put decoder back with zipcode --- src/algorithms/chain_items.hpp | 16 +- src/minimizer_mapper.cpp | 3 +- src/minimizer_mapper.hpp | 8 +- src/minimizer_mapper_from_chains.cpp | 46 +-- src/snarl_seed_clusterer.cpp | 64 ++-- src/snarl_seed_clusterer.hpp | 42 +-- src/subcommand/zipcode_main.cpp | 6 +- src/unittest/snarl_seed_clusterer.cpp | 126 ++++++- src/unittest/zip_code.cpp | 516 +++++++++++++------------- src/unittest/zip_code_tree.cpp | 56 +++ src/zip_code.cpp | 377 +++++++++---------- src/zip_code.hpp | 268 +++++++------ src/zip_code_tree.cpp | 160 ++++---- 13 files changed, 907 insertions(+), 781 deletions(-) diff --git a/src/algorithms/chain_items.hpp b/src/algorithms/chain_items.hpp index 387be2f7806..9511487034d 100644 --- a/src/algorithms/chain_items.hpp +++ b/src/algorithms/chain_items.hpp @@ -107,8 +107,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries to the start of this anchor, or null if /// none is set. - inline ZipCodeDecoder* start_hint() const { - return start_decoder; + inline ZipCode* start_hint() const { + return start_zip; } /// Get the graph distance from wherever the start hint is positioned back @@ -120,8 +120,8 @@ class Anchor { /// Get the distance-finding hint information (i.e. "zip code") for /// accelerating distance queries from the end of this anchor, or null if /// none is set. - inline ZipCodeDecoder* end_hint() const { - return end_decoder; + inline ZipCode* end_hint() const { + return end_zip; } /// Get the graph distance from wherever the end hint is positioned forward @@ -142,14 +142,14 @@ class Anchor { /// Compose a read start position, graph start position, and match length into an Anchor. /// Can also bring along a distance hint and a seed number. - inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCodeDecoder* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_decoder(hint), end_decoder(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { + inline Anchor(size_t read_start, const pos_t& graph_start, size_t length, size_t margin_before, size_t margin_after, int score, size_t seed_number = std::numeric_limits::max(), ZipCode* hint = nullptr, size_t hint_start = 0) : start(read_start), size(length), margin_before(margin_before), margin_after(margin_after), start_pos(graph_start), end_pos(advance(graph_start, length)), points(score), start_seed(seed_number), end_seed(seed_number), start_zip(hint), end_zip(hint), start_offset(hint_start), end_offset(length - hint_start), seed_length(margin_before + length + margin_after) { // Nothing to do! } /// Compose two Anchors into an Anchor that represents coming in through /// the first one and going out through the second, like a tunnel. Useful /// for representing chains as chainable items. - inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_decoder(first.start_hint()), end_decoder(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { + inline Anchor(const Anchor& first, const Anchor& last, size_t extra_margin_before, size_t extra_margin_after, int score) : start(first.read_start()), size(last.read_end() - first.read_start()), margin_before(first.margin_before + extra_margin_before), margin_after(last.margin_after + extra_margin_after), start_pos(first.graph_start()), end_pos(last.graph_end()), points(score), start_seed(first.seed_start()), end_seed(last.seed_end()), start_zip(first.start_hint()), end_zip(last.end_hint()), start_offset(first.start_offset), end_offset(last.end_offset), seed_length((first.base_seed_length() + last.base_seed_length()) / 2) { // Nothing to do! } @@ -170,8 +170,8 @@ class Anchor { int points; size_t start_seed; size_t end_seed; - ZipCodeDecoder* start_decoder; - ZipCodeDecoder* end_decoder; + ZipCode* start_zip; + ZipCode* end_zip; size_t start_offset; size_t end_offset; size_t seed_length; diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index f240b2f6a1b..c70d26f3cbf 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3757,8 +3757,7 @@ std::vector MinimizerMapper::find_seeds(const std::vector //If the zipcode was saved in the payload seeds.back().zipcode.fill_in_zipcode_from_payload(minimizer.occs[j].payload); } - ZipCodeDecoder* decoder = new ZipCodeDecoder(&seeds.back().zipcode); - seeds.back().zipcode_decoder.reset(decoder); + seeds.back().zipcode.fill_in_full_decoder(); } diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index 502f442543b..117e9b624bf 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -601,15 +601,15 @@ class MinimizerMapper : public AlignerClient { /// How do we convert chain info to an actual seed of the type we are using? /// Also needs to know the hit position, and the minimizer number. - inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip, ZipCodeDecoder* decoder) { - return { hit, minimizer, zip, std::unique_ptr(decoder)}; + inline static Seed chain_info_to_seed(const pos_t& hit, size_t minimizer, const ZipCode& zip) { + return { hit, minimizer, zip}; } /// Convert a collection of seeds to a collection of chaining anchors. - std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const; + std::vector to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const; /// Convert a single seed to a single chaining anchor. - static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); + static algorithms::Anchor to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner); /// Convert a read region, and the seeds that that region covers the /// stapled bases of (sorted by stapled base), into a single chaining diff --git a/src/minimizer_mapper_from_chains.cpp b/src/minimizer_mapper_from_chains.cpp index 4da269028eb..00823cb63a0 100644 --- a/src/minimizer_mapper_from_chains.cpp +++ b/src/minimizer_mapper_from_chains.cpp @@ -91,26 +91,26 @@ static pos_t forward_pos(const MinimizerMapper::Seed& seed, const VectorViewget_distance_index_address(0) == - end_seed1.zipcode_decoder->get_distance_index_address(0)); - assert(start_seed2.zipcode_decoder->get_distance_index_address(0) == - end_seed2.zipcode_decoder->get_distance_index_address(0)); + assert(start_seed1.zipcode.get_distance_index_address(0) == + end_seed1.zipcode.get_distance_index_address(0)); + assert(start_seed2.zipcode.get_distance_index_address(0) == + end_seed2.zipcode.get_distance_index_address(0)); #endif - if (start_seed1.zipcode_decoder->get_distance_index_address(0) != - start_seed2.zipcode_decoder->get_distance_index_address(0)) { + if (start_seed1.zipcode.get_distance_index_address(0) != + start_seed2.zipcode.get_distance_index_address(0)) { //If the two ranges are on different connected components return false; } - if (start_seed1.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_SNARL) { + if (start_seed1.zipcode.get_code_type(0) == ZipCode::ROOT_SNARL) { //If this is in a root snarl - if (start_seed1.zipcode_decoder->get_rank_in_snarl(1) != - start_seed2.zipcode_decoder->get_rank_in_snarl(1) + if (start_seed1.zipcode.get_rank_in_snarl(1) != + start_seed2.zipcode.get_rank_in_snarl(1) || - start_seed1.zipcode_decoder->get_rank_in_snarl(1) != - end_seed1.zipcode_decoder->get_rank_in_snarl(1) + start_seed1.zipcode.get_rank_in_snarl(1) != + end_seed1.zipcode.get_rank_in_snarl(1) || - start_seed2.zipcode_decoder->get_rank_in_snarl(1) != - end_seed2.zipcode_decoder->get_rank_in_snarl(1)) { + start_seed2.zipcode.get_rank_in_snarl(1) != + end_seed2.zipcode.get_rank_in_snarl(1)) { //If the two ranges are on different children of the snarl return false; } @@ -119,20 +119,20 @@ static bool chain_ranges_are_equivalent(const MinimizerMapper::Seed& start_seed1 //Get the offset used for determining the range //On the top-level chain, node, or child of the top-level snarl auto get_seed_offset = [&] (const MinimizerMapper::Seed& seed) { - if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_CHAIN) { - return seed.zipcode_decoder->get_offset_in_chain(1); - } else if (seed.zipcode_decoder->get_code_type(0) == ZipCode::ROOT_NODE) { - return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(0) - offset(seed.pos) + if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN) { + return seed.zipcode.get_offset_in_chain(1); + } else if (seed.zipcode.get_code_type(0) == ZipCode::ROOT_NODE) { + return is_rev(seed.pos) ? seed.zipcode.get_length(0) - offset(seed.pos) : offset(seed.pos); } else { //Otherwise, this is a top-level snarl, and we've already made sure that it's on the //same child chain/node - if (seed.zipcode_decoder->get_code_type(1) == ZipCode::CHAIN) { + if (seed.zipcode.get_code_type(1) == ZipCode::CHAIN) { //On a chain - return seed.zipcode_decoder->get_offset_in_chain(2); + return seed.zipcode.get_offset_in_chain(2); } else { //On a node - return is_rev(seed.pos) ? seed.zipcode_decoder->get_length(1) - offset(seed.pos) + return is_rev(seed.pos) ? seed.zipcode.get_length(1) - offset(seed.pos) : offset(seed.pos); } } @@ -3861,7 +3861,7 @@ std::pair MinimizerMapper::align_sequence_between(const pos_t& l return to_return; } -std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds) const { +std::vector MinimizerMapper::to_anchors(const Alignment& aln, const VectorView& minimizers, std::vector& seeds) const { std::vector to_return; to_return.reserve(seeds.size()); for (size_t i = 0; i < seeds.size(); i++) { @@ -3870,7 +3870,7 @@ std::vector MinimizerMapper::to_anchors(const Alignment& aln return to_return; } -algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, const std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { +algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const VectorView& minimizers, std::vector& seeds, size_t seed_number, const HandleGraph& graph, const Aligner* aligner) { // Turn each seed into the part of its match on the node where the // anchoring end (start for forward-strand minimizers, end for // reverse-strand minimizers) falls. @@ -3928,7 +3928,7 @@ algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, const Vector // TODO: Always make sequence and quality available for scoring! // We're going to score the anchor as the full minimizer, and rely on the margins to stop us from taking overlapping anchors. int score = aligner->score_exact_match(aln, read_start - margin_left, length + margin_right); - return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, seed.zipcode_decoder.get(), hint_start); + return algorithms::Anchor(read_start, graph_start, length, margin_left, margin_right, score, seed_number, &(seed.zipcode), hint_start); } algorithms::Anchor MinimizerMapper::to_anchor(const Alignment& aln, size_t read_start, size_t read_end, const std::vector& sorted_seeds, const std::vector& seed_anchors, const std::vector::const_iterator& mismatch_begin, const std::vector::const_iterator& mismatch_end, const HandleGraph& graph, const Aligner* aligner) { diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 44bb04f5e89..31579b53103 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode_decoder->get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode_decoder->get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -426,14 +426,14 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode_decoder->max_depth()); + &seed, seed.seed->zipcode.max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, - &seed, seed.seed->zipcode_decoder->max_depth() - 1); + &seed, seed.seed->zipcode.max_depth() - 1); } new_parent = true; @@ -532,7 +532,7 @@ cerr << "Add all seeds to nodes: " << endl; clustering_problem.seed_count_prefix_sum.back(), false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode_decoder->max_depth()); + &seed, seed.seed->zipcode.max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; @@ -637,7 +637,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; @@ -711,7 +711,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -721,17 +721,17 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1); + : chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - !chain_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(0) && - !chain_problem->seed->seed->zipcode_decoder->get_is_looping_chain(0); + !chain_problem->seed->seed->zipcode.is_externally_start_start_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_start_end_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_end_end_connected(0) && + !chain_problem->seed->seed->zipcode.get_is_looping_chain(0); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -760,32 +760,32 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode_decoder->max_depth() + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode.max_depth() ? false - : chain_problem->seed->seed->zipcode_decoder->get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + : chain_problem->seed->seed->zipcode.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) - : chain_problem->seed->seed->zipcode_decoder->get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); #ifdef DEBUG_CLUSTER - cerr << "For child type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth) << endl; - cerr << "For parent type " << chain_problem->seed->seed->zipcode_decoder->get_code_type(chain_problem->zipcode_depth-1) << endl; - cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " - << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode_decoder->get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; + cerr << "For child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "For parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " @@ -1443,15 +1443,15 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(Clust //Get the distances between the two sides of the child size_t distance_left_left = - child_problem->seed->seed->zipcode_decoder->is_externally_start_start_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_start_start_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_left_right = - child_problem->seed->seed->zipcode_decoder->is_externally_start_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_start_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); size_t distance_right_right = - child_problem->seed->seed->zipcode_decoder->is_externally_end_end_connected(child_problem->zipcode_depth) + child_problem->seed->seed->zipcode.is_externally_end_end_connected(child_problem->zipcode_depth) ? 0 : std::numeric_limits::max(); if (distance_left_left == std::numeric_limits::max() && @@ -1597,7 +1597,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (snarl_problem->seed->seed->zipcode_decoder->get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { + if (snarl_problem->seed->seed->zipcode.get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters @@ -1811,7 +1811,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; child1.prefix_sum = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; - child1.has_chain_values = true; + child2.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index d879381a4e8..22f8478e6ff 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -70,42 +70,23 @@ class SnarlDistanceIndexClusterer { pos_t pos; size_t source; // Source minimizer. ZipCode zipcode; //zipcode for distance information, optionally stored in the minimizer payload - //TODO: unique_ptr? - std::unique_ptr zipcode_decoder; //The decoder for the zipcode Seed() = default; Seed(pos_t pos, size_t source, ZipCode zipcode) : pos(pos), source(source), zipcode(zipcode) { - ZipCodeDecoder* decoder = new ZipCodeDecoder(&this->zipcode); - zipcode_decoder.reset(decoder); - zipcode_decoder->fill_in_full_decoder(); - } - Seed(pos_t pos, size_t source, ZipCode zipcode, std::unique_ptr zipcode_decoder) : - pos(pos), source(source), zipcode(zipcode), zipcode_decoder(std::move(zipcode_decoder)){ - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } + zipcode.fill_in_full_decoder(); } //Move constructor Seed (Seed&& other) : pos(std::move(other.pos)), source(std::move(other.source)), - zipcode(std::move(other.zipcode)), - zipcode_decoder(std::move(other.zipcode_decoder)) { - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } - } + zipcode(std::move(other.zipcode)){} //Move assignment operator Seed& operator=(Seed&& other) { pos = std::move(other.pos); source = std::move(other.source); zipcode = std::move(other.zipcode); - zipcode_decoder = std::move(other.zipcode_decoder); - if (zipcode_decoder) { - zipcode_decoder->zipcode = &zipcode; - } return *this; } }; @@ -121,9 +102,6 @@ class SnarlDistanceIndexClusterer { //TODO: I think I can skip the zipcode now since I have the payload MIPayload payload; - //TODO: This doesn't actually get used but I'll use it if I use the zipcodes properly - //std::unique_ptr zipcode_decoder; - //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds //For a seed in a chain, distance_left is the left of the chain, right is the distance @@ -316,22 +294,18 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = seed->seed->zipcode_decoder->get_is_looping_chain(zipcode_depth); - if (zipcode_depth == 0 || is_looping_chain || seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true) != 0) { - node_length = distance_index.chain_minimum_length(containing_net_handle); - } else { - node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); - } - chain_component_end = seed->seed->zipcode_decoder->get_last_chain_component(zipcode_depth, true); - is_reversed_in_parent = seed->seed->zipcode_decoder->get_is_reversed_in_parent(zipcode_depth); + is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); + node_length = distance_index.chain_minimum_length(containing_net_handle); + chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); + is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = seed->seed->zipcode_decoder->get_length(zipcode_depth, &distance_index); + node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = seed->seed->zipcode_decoder->get_chain_component(zipcode_depth); + chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; prefix_sum_value = SnarlDistanceIndex::sum( diff --git a/src/subcommand/zipcode_main.cpp b/src/subcommand/zipcode_main.cpp index a4649cb5808..4e61724c04a 100644 --- a/src/subcommand/zipcode_main.cpp +++ b/src/subcommand/zipcode_main.cpp @@ -260,14 +260,14 @@ int main_zipcode(int argc, char** argv) { //Get zip codes ZipCode zip1; zip1.fill_in_zipcode(*distance_index, pos1); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(*distance_index, pos2); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); + zip2.fill_in_full_decoder(); //Time finding distance with the zip codes std::chrono::time_point start = std::chrono::system_clock::now(); - size_t zip_distance = ZipCode::minimum_distance_between(decoder1, pos1, decoder2, pos2, *distance_index); + size_t zip_distance = ZipCode::minimum_distance_between(zip1, pos1, zip2, pos2, *distance_index); std::chrono::time_point end = std::chrono::system_clock::now(); std::chrono::duration elapsed_seconds = end-start; elapsed_seconds_zip.emplace_back(elapsed_seconds.count()); diff --git a/src/unittest/snarl_seed_clusterer.cpp b/src/unittest/snarl_seed_clusterer.cpp index cc19c928773..ce7dde12972 100644 --- a/src/unittest/snarl_seed_clusterer.cpp +++ b/src/unittest/snarl_seed_clusterer.cpp @@ -44,6 +44,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -87,6 +88,7 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -121,6 +123,7 @@ namespace unittest { for (auto& pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -158,6 +161,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 15); @@ -207,6 +211,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -224,6 +229,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -241,6 +247,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -258,15 +265,18 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -283,15 +293,18 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -345,6 +358,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -362,6 +376,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -379,6 +394,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -396,15 +412,18 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(3, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(5, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 5, 5); @@ -421,15 +440,18 @@ namespace unittest { pos_t pos = make_pos_t(5, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(6, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode1}); pos = make_pos_t(1, false, 0); ZipCode zipcode2; zipcode2.fill_in_zipcode(dist_index, pos); + zipcode2.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode2}); vector> clusters = clusterer.cluster_seeds(seeds, 10, 10); @@ -477,6 +499,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -496,6 +519,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -561,6 +585,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -576,6 +601,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 1); @@ -591,6 +617,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -606,6 +633,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 2); @@ -621,6 +649,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -636,6 +665,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -653,6 +683,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 8); @@ -668,6 +699,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -742,6 +774,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -768,6 +801,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -790,6 +824,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -842,6 +877,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -949,6 +985,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -967,6 +1004,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -986,6 +1024,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 9); @@ -1004,6 +1043,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1022,6 +1062,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 11); @@ -1068,6 +1109,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1085,6 +1127,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1102,11 +1145,13 @@ namespace unittest { pos_t pos = make_pos_t(2, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[0].push_back({ pos, 0, zipcode}); pos = make_pos_t(4, false, 0); ZipCode zipcode1; zipcode1.fill_in_zipcode(dist_index, pos); + zipcode1.fill_in_full_decoder(); seeds[1].push_back({ pos, 0, zipcode1}); vector> clusters = clusterer.cluster_seeds(seeds, 3, 3); @@ -1123,6 +1168,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1175,6 +1221,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1192,6 +1239,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1208,6 +1256,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1225,6 +1274,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1287,6 +1337,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1304,6 +1355,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1350,6 +1402,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1357,6 +1410,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1383,6 +1437,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1390,6 +1445,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1416,6 +1472,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1423,6 +1480,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1450,6 +1508,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1457,6 +1516,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -1519,6 +1579,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1536,6 +1597,7 @@ namespace unittest { for (pos_t pos : pos_ts) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1591,6 +1653,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1606,6 +1669,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1620,6 +1684,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1667,6 +1732,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1715,6 +1781,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -1731,6 +1798,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1775,6 +1843,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 20); @@ -1791,6 +1860,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -1869,6 +1939,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0,zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1921,6 +1992,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -1953,6 +2025,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector& seeds1 = all_seeds[1]; @@ -1966,6 +2039,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2004,6 +2078,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2046,6 +2121,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2103,6 +2179,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2122,6 +2199,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2138,6 +2216,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 10); @@ -2156,6 +2235,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2226,6 +2306,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2243,6 +2324,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2260,6 +2342,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2278,6 +2361,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({8, 12}); @@ -2286,6 +2370,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } @@ -2308,6 +2393,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2325,6 +2411,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2390,6 +2477,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 5); @@ -2406,6 +2494,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2423,6 +2512,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2440,7 +2530,8 @@ namespace unittest { for (id_t n : ids) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos);; + zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2448,7 +2539,8 @@ namespace unittest { for (id_t n : ids1) { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, pos);; + zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2479,6 +2571,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector ids1({5, 13}); @@ -2487,6 +2580,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds1.push_back({ pos, 0, zipcode}); } //Clusters are @@ -2554,6 +2648,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 7); @@ -2572,6 +2667,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2592,6 +2688,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 4); @@ -2617,6 +2714,7 @@ namespace unittest { for (pos_t pos : pos_ts[read_num]){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -2645,6 +2743,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2702,6 +2801,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 3); @@ -2720,6 +2820,7 @@ namespace unittest { for (pos_t pos : pos_ts){ ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } vector clusters = clusterer.cluster_seeds(seeds, 6); @@ -2735,6 +2836,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2789,6 +2891,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2804,6 +2907,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2819,6 +2923,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2835,6 +2940,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2874,6 +2980,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2919,6 +3026,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2935,6 +3043,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2951,6 +3060,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2987,6 +3097,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({pos, 0, zipcode}); } @@ -3031,6 +3142,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3047,6 +3159,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3062,6 +3175,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3077,6 +3191,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3118,6 +3233,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3134,6 +3250,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3149,6 +3266,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3164,6 +3282,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -3202,6 +3321,7 @@ namespace unittest { // for (pos_t pos : pos_ts) { // ZipCode zipcode; // zipcode.fill_in_zipcode(dist_index, pos); + // zipcode.fill_in_full_decoder(); // seeds.push_back({ pos, 0, zipcode}); // } // vector clusters = clusterer.cluster_seeds(seeds, read_lim); @@ -3252,6 +3372,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); seeds[read_num].push_back({ pos, 0, zipcode}); } } @@ -3319,6 +3440,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, pos); + zipcode.fill_in_full_decoder(); all_seeds[read].push_back({ pos, 0, zipcode}); } diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index ed8b83e6761..d72de04d546 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -45,22 +45,21 @@ using namespace std; SECTION("decoder") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 1); - REQUIRE(decoder.decoder.front().is_chain == 1); - REQUIRE(decoder.decoder.front().offset == 0); + REQUIRE(zipcode.decoder_length() == 1); + REQUIRE(zipcode.decoder.front().is_chain == 1); + REQUIRE(zipcode.decoder.front().offset == 0); } SECTION("decoded code") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); - ZipCodeDecoder decoder(&zipcode); - - REQUIRE(decoder.get_length(0) == distance_index.minimum_length(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_NODE); + REQUIRE(zipcode.get_length(0) == distance_index.minimum_length(chain1)); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_NODE); } SECTION("n1 as payload") { ZipCode zipcode; @@ -75,9 +74,9 @@ using namespace std; SECTION("Distances within one node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(ZipCode::minimum_distance_between(decoder, make_pos_t(n1->id(), false, 0), - decoder, make_pos_t(n1->id(), false, 3), + zipcode.fill_in_full_decoder(); + REQUIRE(ZipCode::minimum_distance_between(zipcode, make_pos_t(n1->id(), false, 0), + zipcode, make_pos_t(n1->id(), false, 3), distance_index) == 3); } @@ -111,14 +110,14 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -135,7 +134,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -159,34 +158,34 @@ using namespace std; SECTION ("decoded zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Next is the node code - REQUIRE(decoder.get_code_type( 1) == ZipCode::NODE); - REQUIRE(decoder.get_length( 1) == distance_index.minimum_length(node1)); - REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(zipcode.get_code_type( 1) == ZipCode::NODE); + REQUIRE(zipcode.get_length( 1) == distance_index.minimum_length(node1)); + REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -203,7 +202,7 @@ using namespace std; //Next is the snarl code //1 for a regular snarl - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -233,7 +232,7 @@ using namespace std; //Next is the chain code //rank of the chain in the snarl - REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent( distance_index.get_node_net_handle(n4->id())))); @@ -254,78 +253,78 @@ using namespace std; SECTION ("decoded zip code for node in simple snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl36 = distance_index.get_parent(chain4); net_handle_t chain1 = distance_index.get_parent(snarl36); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //values for the snarl - REQUIRE(decoder.get_length(1) == distance_index.minimum_length(snarl36)); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(snarl36)); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 5 : 6)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), distance_index.flip(chain4)) != 0; //values for the chain - REQUIRE(decoder.get_length(2) == distance_index.minimum_length(chain4)); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == distance_index.minimum_length(chain4)); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - ZipCodeDecoder decoder3(&zip3); - ZipCodeDecoder decoder4(&zip4); - ZipCodeDecoder decoder5(&zip5); - ZipCodeDecoder decoder6(&zip6); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 2), - decoder1, make_pos_t(n1->id(), true, 2), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 2), + zip1, make_pos_t(n1->id(), true, 2), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 6); - REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 1), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 1), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == 7); } @@ -426,11 +425,11 @@ using namespace std; SECTION ("zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -450,7 +449,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -477,31 +476,31 @@ using namespace std; SECTION ("decode zip code for node on top-level chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); net_handle_t chain1 = distance_index.get_parent(node1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); - REQUIRE(decoder.get_length(1) == distance_index.minimum_length(node1)); - REQUIRE(decoder.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); - REQUIRE(decoder.get_code_type(1) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + REQUIRE(zipcode.get_length(1) == distance_index.minimum_length(node1)); + REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); } SECTION ("zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 4); + REQUIRE(zipcode.decoder_length() == 4); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); @@ -519,7 +518,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -550,7 +549,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Next is the chain code - REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -566,7 +565,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the node code - REQUIRE(decoder.decoder[3] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(true, value_and_index.second)); //Offset of the node in the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n2->id()))+1); @@ -591,45 +590,45 @@ using namespace std; SECTION ("decode zip code for node on in nested chain") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t chain2 = distance_index.get_parent(node2); net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(decoder.get_length(1) == 0); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == 0); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); bool is_rev = distance_index.distance_in_parent(snarl1, distance_index.get_bound(snarl1, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(decoder.get_length(2) == 3); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == 3); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); //Node at depth 3 - REQUIRE(decoder.get_length(3) == 1); - REQUIRE(decoder.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); - REQUIRE(decoder.get_code_type(3) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); + REQUIRE(zipcode.get_length(3) == 1); + REQUIRE(zipcode.get_offset_in_chain(3) == distance_index.get_prefix_sum_value(node2)); + REQUIRE(zipcode.get_code_type(3) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); } SECTION ("zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 7); + zipcode.fill_in_full_decoder(); + REQUIRE(zipcode.decoder_length() == 7); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -648,7 +647,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 1-8 - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //1 for regular snarl tag value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -678,7 +677,7 @@ using namespace std; distance_index.flip(distance_index.canonical(chain2))) != 0; REQUIRE(value_and_index.first == is_rev); //Next is the chain code for chain 2-7 - REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent( @@ -693,7 +692,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Next is the regular snarl code for snarl 2-7 - REQUIRE(decoder.decoder[3] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[3] == ZipCode::decoder_t(false, value_and_index.second)); //1 as tag for regular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -722,7 +721,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_chain_component(distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, true)))); //Chain code for chain 3-5 - REQUIRE(decoder.decoder[4] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[4] == ZipCode::decoder_t(true, value_and_index.second)); //Rank in parent value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))) ); @@ -736,7 +735,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //REgular snarl code for snarl 3-5 - REQUIRE(decoder.decoder[5] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[5] == ZipCode::decoder_t(false, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 1); @@ -765,7 +764,7 @@ using namespace std; REQUIRE(value_and_index.first == is_rev); //Chain code for node 4 - REQUIRE(decoder.decoder[6] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[6] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_node_net_handle(n4->id()))) ; @@ -787,6 +786,7 @@ using namespace std; SECTION ("decoded zip code for more deeply nested node") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); net_handle_t snarl3 = distance_index.get_parent(chain4); @@ -796,119 +796,118 @@ using namespace std; net_handle_t snarl1 = distance_index.get_parent(chain2); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl at depth 1 - REQUIRE(decoder.get_length(1) == 0); - REQUIRE(decoder.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(1) == 0); + REQUIRE(zipcode.get_offset_in_chain(1) == (chain_is_reversed ? 4 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::REGULAR_SNARL); net_handle_t snarl = distance_index.get_parent(chain2); bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain2))) != 0; //Chain at depth 2 - REQUIRE(decoder.get_is_reversed_in_parent(2) == is_rev); - REQUIRE(decoder.get_length(2) == 3); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); + REQUIRE(zipcode.get_length(2) == 3); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); //Snarl at depth 3 - REQUIRE(decoder.get_length(3) == 1); - REQUIRE(decoder.get_offset_in_chain(3) == 1); - REQUIRE(decoder.get_code_type(3) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(3) == 1); + REQUIRE(zipcode.get_offset_in_chain(3) == 1); + REQUIRE(zipcode.get_code_type(3) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain3); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain3))) != 0; //Chain at depth 4 - REQUIRE(decoder.get_is_reversed_in_parent(4) == is_rev); - REQUIRE(decoder.get_length(4) == distance_index.minimum_length(chain3)); - REQUIRE(decoder.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(decoder.get_code_type(4) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(4) == is_rev); + REQUIRE(zipcode.get_length(4) == distance_index.minimum_length(chain3)); + REQUIRE(zipcode.get_rank_in_snarl(4) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(zipcode.get_code_type(4) == ZipCode::CHAIN); //Snarl3 at depth 5 - REQUIRE(decoder.get_length(5) == 0); - REQUIRE(decoder.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); - REQUIRE(decoder.get_code_type(5) == ZipCode::REGULAR_SNARL); + REQUIRE(zipcode.get_length(5) == 0); + REQUIRE(zipcode.get_offset_in_chain(5) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); + REQUIRE(zipcode.get_code_type(5) == ZipCode::REGULAR_SNARL); snarl = distance_index.get_parent(chain4); is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(chain4))) != 0; //node/chain at depth 6 - REQUIRE(decoder.get_is_reversed_in_parent(6) == is_rev); - REQUIRE(decoder.get_length(6) == 4); - REQUIRE(decoder.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); - REQUIRE(decoder.get_code_type(6) == ZipCode::CHAIN); + REQUIRE(zipcode.get_is_reversed_in_parent(6) == is_rev); + REQUIRE(zipcode.get_length(6) == 4); + REQUIRE(zipcode.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); + REQUIRE(zipcode.get_code_type(6) == ZipCode::CHAIN); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); + zip7.fill_in_full_decoder(); ZipCode zip8; zip8.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); + zip8.fill_in_full_decoder(); - ZipCodeDecoder decoder1 (&zip1); - ZipCodeDecoder decoder2 (&zip2); - ZipCodeDecoder decoder3 (&zip3); - ZipCodeDecoder decoder4 (&zip4); - ZipCodeDecoder decoder5 (&zip5); - ZipCodeDecoder decoder6 (&zip6); - ZipCodeDecoder decoder7 (&zip7); - ZipCodeDecoder decoder8 (&zip8); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder8, make_pos_t(n8->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip8, make_pos_t(n8->id(), false, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 0), - decoder8, make_pos_t(n8->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 0), + zip8, make_pos_t(n8->id(), true, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder5, make_pos_t(n5->id(), false, 0), - decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip5, make_pos_t(n5->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder7, make_pos_t(n7->id(), true, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip7, make_pos_t(n7->id(), true, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 2); } @@ -1048,11 +1047,11 @@ using namespace std; SECTION ("zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1071,7 +1070,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Irregular snarl code for snarl 1-4 - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(false, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(false, value_and_index.second)); //0 as tag for irregular snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == 2); @@ -1119,7 +1118,7 @@ using namespace std; //REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 1 : 0)); //Node 3 as a chain - REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //Rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1138,105 +1137,108 @@ using namespace std; SECTION ("decode zip code for node in irregular snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); net_handle_t snarl1 = distance_index.get_parent(chain3); net_handle_t chain1 = distance_index.get_parent(snarl1); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(chain1)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_CHAIN); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_CHAIN); //Snarl1 at depth 1 - REQUIRE(decoder.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CYCLIC_SNARL); + REQUIRE(zipcode.get_offset_in_chain(1, &distance_index) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CYCLIC_SNARL); //chain3 at depth 3 - REQUIRE(decoder.get_length(2) == 1); - REQUIRE(decoder.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); - REQUIRE(decoder.get_code_type(2) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(2) == 1); + REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); bool chain_is_rev = distance_index.is_reversed_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))); //node1 to left side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); //Node 1 to right side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, false) == 2); //node4 to left side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, true) == std::numeric_limits::max()); //Node 4 to right side of node 3 - REQUIRE(decoder.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); + REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); + zip7.fill_in_full_decoder(); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - ZipCodeDecoder decoder3(&zip3); - ZipCodeDecoder decoder4(&zip4); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder1, make_pos_t(n1->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip1, make_pos_t(n1->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 3); //Shouldn't take the loop in the chain - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), - decoder1, make_pos_t(n1->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), + zip1, make_pos_t(n1->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 1), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 1), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 5); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder2, make_pos_t(n2->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip2, make_pos_t(n2->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), false, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), false, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 2); - REQUIRE(ZipCode::minimum_distance_between(decoder3, make_pos_t(n3->id(), true, 0), - decoder2, make_pos_t(n2->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip3, make_pos_t(n3->id(), true, 0), + zip2, make_pos_t(n2->id(), true, 0), distance_index) == 1); - REQUIRE(ZipCode::minimum_distance_between(decoder4, make_pos_t(n4->id(), false, 1), - decoder4, make_pos_t(n4->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip4, make_pos_t(n4->id(), false, 1), + zip4, make_pos_t(n4->id(), false, 0), distance_index) == std::numeric_limits::max()); } @@ -1341,11 +1343,11 @@ using namespace std; SECTION ("zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1356,7 +1358,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is node 1 as a chain - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n1->id())))); @@ -1367,32 +1369,32 @@ using namespace std; SECTION ("decoded zip code for node in top-level snarl") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); net_handle_t root_snarl = distance_index.get_parent(chain1); //Root snarl - REQUIRE(distance_index.canonical(decoder.get_net_handle(0, &distance_index)) == + REQUIRE(distance_index.canonical(zipcode.get_net_handle(0, &distance_index)) == distance_index.canonical(distance_index.get_parent(chain1))); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); //Chain1 at depth 1 - REQUIRE(decoder.get_length(1) == 3); - REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(1) == 3); + REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); } SECTION ("zip code for node in chain in top-level snarl") { net_handle_t node1 = distance_index.get_node_net_handle(n3->id()); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 3); + REQUIRE(zipcode.decoder_length() == 3); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(false, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(false, (size_t)0)); //0 to indicate that it's a top-level snarl pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); @@ -1403,7 +1405,7 @@ using namespace std; REQUIRE(value_and_index.first == distance_index.get_connected_component_number(distance_index.get_node_net_handle(n1->id()))); //Next is chain 2-3 - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_rank_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id())))); @@ -1415,7 +1417,7 @@ using namespace std; REQUIRE(value_and_index.first == 0); //Node 3 - REQUIRE(decoder.decoder[2] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[2] == ZipCode::decoder_t(true, value_and_index.second)); //rank in snarl value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)+1); @@ -1430,67 +1432,69 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); //Root snarl - REQUIRE(decoder.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); - REQUIRE(decoder.get_code_type(0) == ZipCode::ROOT_SNARL); + REQUIRE(zipcode.get_distance_index_address(0) == distance_index.get_connected_component_number(node3)); + REQUIRE(zipcode.get_code_type(0) == ZipCode::ROOT_SNARL); //chain2 at depth 1 - REQUIRE(decoder.get_length(1) == 2); - REQUIRE(decoder.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); - REQUIRE(decoder.get_code_type(1) == ZipCode::CHAIN); + REQUIRE(zipcode.get_length(1) == 2); + REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain2)); + REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); //node3 at depth 2 - REQUIRE(decoder.get_length(2) == 1); - REQUIRE(decoder.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); - REQUIRE(decoder.get_code_type(2) == ZipCode::NODE); - REQUIRE(decoder.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); + REQUIRE(zipcode.get_length(2) == 1); + REQUIRE(zipcode.get_offset_in_chain(2) == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); + REQUIRE(zipcode.get_code_type(2) == ZipCode::NODE); + REQUIRE(zipcode.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + zip3.fill_in_full_decoder(); ZipCode zip4; zip4.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + zip4.fill_in_full_decoder(); ZipCode zip5; zip5.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zip5.fill_in_full_decoder(); ZipCode zip6; zip6.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); + zip6.fill_in_full_decoder(); ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); - ZipCodeDecoder zip_decoder1(&zip1); - ZipCodeDecoder zip_decoder2(&zip2); - ZipCodeDecoder zip_decoder3(&zip3); - ZipCodeDecoder zip_decoder6(&zip6); - ZipCodeDecoder zip_decoder7(&zip7); - - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder2, make_pos_t(n2->id(), false, 0), + zip7.fill_in_full_decoder(); + + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), true, 0), - zip_decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), true, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder3, make_pos_t(n3->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), false, 0), distance_index) == 4); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder3, make_pos_t(n3->id(), true, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip3, make_pos_t(n3->id(), true, 0), distance_index) == 8); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder1, make_pos_t(n1->id(), false, 0), - zip_decoder6, make_pos_t(n6->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip6, make_pos_t(n6->id(), false, 0), distance_index) == std::numeric_limits::max()); - REQUIRE(ZipCode::minimum_distance_between(zip_decoder6, make_pos_t(n6->id(), false, 0), - zip_decoder7, make_pos_t(n7->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip6, make_pos_t(n6->id(), false, 0), + zip7, make_pos_t(n7->id(), false, 0), distance_index) == 1); } @@ -1597,14 +1601,14 @@ using namespace std; net_handle_t grandparent = distance_index.get_parent(parent); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + zipcode.fill_in_full_decoder(); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); //1st value is 1 to indicate that it's a chain pair value_and_index = zipcode.zipcode.get_value_and_next_index(0); REQUIRE(value_and_index.first == 1); - REQUIRE(decoder.decoder[0] == ZipCodeDecoder::decoder_t(true, (size_t)0)); + REQUIRE(zipcode.decoder[0] == ZipCode::decoder_t(true, (size_t)0)); //Second value is the connected component number of the chain value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); @@ -1621,7 +1625,7 @@ using namespace std; //Next is the node code //Third value is the prefix sum of the node - REQUIRE(decoder.decoder[1] == ZipCodeDecoder::decoder_t(true, value_and_index.second)); + REQUIRE(zipcode.decoder[1] == ZipCode::decoder_t(true, value_and_index.second)); value_and_index = zipcode.zipcode.get_value_and_next_index(value_and_index.second); REQUIRE(value_and_index.first == distance_index.get_prefix_sum_value(distance_index.get_node_net_handle(n1->id()))+1); @@ -1646,8 +1650,10 @@ using namespace std; SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), false, 0)); + zip1.fill_in_full_decoder(); ZipCode zip2; zip2.fill_in_zipcode(distance_index, make_pos_t(n2->id(), false, 0)); + zip2.fill_in_full_decoder(); ZipCode zip3; zip3.fill_in_zipcode(distance_index, make_pos_t(n3->id(), false, 0)); ZipCode zip4; @@ -1659,10 +1665,8 @@ using namespace std; ZipCode zip7; zip7.fill_in_zipcode(distance_index, make_pos_t(n7->id(), false, 0)); - ZipCodeDecoder decoder1(&zip1); - ZipCodeDecoder decoder2(&zip2); - REQUIRE(ZipCode::minimum_distance_between(decoder1, make_pos_t(n1->id(), false, 0), - decoder2, make_pos_t(n2->id(), false, 0), + REQUIRE(ZipCode::minimum_distance_between(zip1, make_pos_t(n1->id(), false, 0), + zip2, make_pos_t(n2->id(), false, 0), distance_index) == 3); @@ -1792,30 +1796,30 @@ using namespace std; SECTION( "node2" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); net_handle_t parent = distance_index.get_parent(node2); net_handle_t bound = distance_index.get_bound(parent, true, false); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(decoder.decoder_length() == 2); + REQUIRE(zipcode.decoder_length() == 2); - REQUIRE(distance_index.minimum_length(node2) == decoder.get_length(1)); - REQUIRE(decoder.get_chain_component(1) == distance_index.get_chain_component(node2)); - REQUIRE(decoder.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); - REQUIRE(decoder.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); - REQUIRE(decoder.get_is_looping_chain(0)); + REQUIRE(distance_index.minimum_length(node2) == zipcode.get_length(1)); + REQUIRE(zipcode.get_chain_component(1) == distance_index.get_chain_component(node2)); + REQUIRE(zipcode.get_last_chain_component(0, true) == distance_index.get_chain_component(bound, true)); + REQUIRE(zipcode.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); + REQUIRE(zipcode.get_is_looping_chain(0)); } SECTION( "node5" ) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); + zipcode.fill_in_full_decoder(); net_handle_t node = distance_index.get_node_net_handle(n5->id()); net_handle_t parent = distance_index.get_parent(node); net_handle_t bound = distance_index.get_bound(parent, true, false); - ZipCodeDecoder decoder(&zipcode); - REQUIRE(distance_index.minimum_length(node) == decoder.get_length(decoder.max_depth())); + REQUIRE(distance_index.minimum_length(node) == zipcode.get_length(zipcode.max_depth())); } } TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { @@ -1848,14 +1852,14 @@ using namespace std; SECTION( "Check connectivity" ) { ZipCode zipcode; zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); - ZipCodeDecoder decoder(&zipcode); + zipcode.fill_in_full_decoder(); - REQUIRE(decoder.get_length(1) == 1); + REQUIRE(zipcode.get_length(1) == 1); if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { - REQUIRE(decoder.is_externally_end_end_connected(0)); + REQUIRE(zipcode.is_externally_end_end_connected(0)); } else { - REQUIRE(decoder.is_externally_start_start_connected(0)); + REQUIRE(zipcode.is_externally_start_start_connected(0)); } } diff --git a/src/unittest/zip_code_tree.cpp b/src/unittest/zip_code_tree.cpp index 409f386a50d..3e3765948df 100644 --- a/src/unittest/zip_code_tree.cpp +++ b/src/unittest/zip_code_tree.cpp @@ -40,6 +40,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -84,6 +85,7 @@ namespace unittest { pos_t pos = make_pos_t(n, false, 0); ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -154,6 +156,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -264,6 +267,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -386,6 +390,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -432,6 +437,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -494,6 +500,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -578,6 +585,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -627,6 +635,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -760,6 +769,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -834,6 +844,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -871,6 +882,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -908,6 +920,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -944,6 +957,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -978,6 +992,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1003,6 +1018,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1029,6 +1045,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1055,6 +1072,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1081,6 +1099,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1138,6 +1157,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1195,6 +1215,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1250,6 +1271,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1351,6 +1373,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -1415,6 +1438,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1506,6 +1530,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1538,6 +1563,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1568,6 +1594,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1593,6 +1620,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1620,6 +1648,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1647,6 +1676,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1673,6 +1703,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1775,6 +1806,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1806,6 +1838,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1835,6 +1868,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1866,6 +1900,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -1923,6 +1958,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -1993,6 +2029,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2063,6 +2100,7 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2106,6 +2144,7 @@ namespace unittest { pos_t pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, i, zipcode}); minimizers.emplace_back(); @@ -2184,6 +2223,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2238,6 +2278,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); minimizers.emplace_back(); @@ -2282,6 +2323,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2324,6 +2366,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2373,6 +2416,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2423,6 +2467,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); minimizers.emplace_back(); @@ -2488,6 +2533,7 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2552,6 +2598,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2572,6 +2619,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2614,6 +2662,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2633,6 +2682,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2677,6 +2727,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2696,6 +2747,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2715,6 +2767,7 @@ namespace unittest { for (pos_t pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, 0, zipcode}); } @@ -2779,6 +2832,7 @@ namespace unittest { auto pos = positions[i]; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, i, zipcode}); minimizers.emplace_back(); @@ -2824,6 +2878,7 @@ namespace unittest { for (auto pos : positions) { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos.first); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos.first, pos.second, zipcode}); } distance_index.for_each_child(distance_index.get_root(), [&](net_handle_t child) { @@ -2890,6 +2945,7 @@ namespace unittest { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, pos); + zipcode.fill_in_full_decoder(); seeds.push_back({ pos, (size_t)j, zipcode}); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 7f45122fbff..a06d61c421f 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -137,20 +137,13 @@ void ZipCode::from_vector(const std::vector& values) { zipcode.from_vector(values); } -ZipCodeDecoder::ZipCodeDecoder(const ZipCode* zipcode) : - zipcode(zipcode), decoder(0), finished_decoding(false) { - if (zipcode != nullptr) { - decoder.reserve(zipcode->byte_count() / 4); - fill_in_full_decoder(); - } -} -void ZipCodeDecoder::fill_in_full_decoder() { - if (zipcode->byte_count() == 0 || finished_decoding) { +void ZipCode::fill_in_full_decoder() { + if (byte_count() == 0 || finished_decoding) { //If the zipcode is empty return; } - decoder.reserve(zipcode->byte_count() / 4); + decoder.reserve(byte_count() / 4); bool done=false; while (!done) { done = fill_in_next_decoder(); @@ -158,7 +151,7 @@ void ZipCodeDecoder::fill_in_full_decoder() { finished_decoding = true; } -bool ZipCodeDecoder::fill_in_next_decoder() { +bool ZipCode::fill_in_next_decoder() { #ifdef DEBUG_ZIPCODE cerr << "Decode one more thing in the zipcode. Currently decoded " << decoder_length() << " things" << endl; #endif @@ -179,7 +172,7 @@ bool ZipCodeDecoder::fill_in_next_decoder() { if (zip_length == 0) { //If there is nothing in the decoder yet, then the first thing will start at 0 for (size_t i = 0 ; i <= ZipCode::ROOT_IS_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //Is the root a chain/node? @@ -202,7 +195,7 @@ cerr << "\tadding the root, which is a " << (previous_is_chain ? "chain or node" assert(ZipCode::ROOT_CHAIN_SIZE==ZipCode::ROOT_NODE_SIZE);//This is true for now but all this will change if it isn't for (size_t i = 0 ; i < ZipCode::ROOT_NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_index == std::numeric_limits::max()) { //If the zip code ends here (after the length), then this was a node and we're done @@ -218,7 +211,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //If it's a node, then there are three remaining things in the index //If it were a snarl, then there are more than three things for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } @@ -233,7 +226,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } else { //Otherwise, the top-level thing is a snarl and the next thing is a chain for (size_t i = 0 ; i < ZipCode::ROOT_SNARL_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -265,7 +258,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //chain size_t check_zip_index = zip_index; for (size_t i = 0 ; i < std::min(ZipCode::CHAIN_SIZE, ZipCode::NODE_SIZE) ; i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //If the zipcode ends after a chain if (check_zip_index == std::numeric_limits::max()) { @@ -278,7 +271,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Now check if it was actually a real node for (size_t i = 0 ; i < std::max(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE) - std::min(ZipCode::NODE_SIZE, ZipCode::CHAIN_SIZE); i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //This might be a node that is a child of the chain, in which case there is one @@ -298,7 +291,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Otherwise, the last thing was a chain //Get to the end of the chain for (size_t i = 0 ; i < ZipCode::CHAIN_SIZE ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //zip_index is now the start of the current thing that we want to add - the thing after the chain @@ -313,7 +306,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //Check if the current thing is a node check_zip_index = zip_index; for (size_t i = 0 ; i < ZipCode::NODE_SIZE ; i++) { - check_zip_index = zipcode->zipcode.get_value_and_next_index(check_zip_index).second; + check_zip_index = zipcode.get_value_and_next_index(check_zip_index).second; } //Return the start of this thing, and true if it was a node @@ -329,7 +322,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //The regular/irregular snarl tag for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { @@ -338,7 +331,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; #endif //Regular snarl, so 2 remaining things in the code for (size_t i = 0 ; i < ZipCode::REGULAR_SNARL_SIZE - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -350,7 +343,7 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; //is a top-level irregular snarl. Otherwise a normal irregular snarl size_t code_size = ZipCode::IRREGULAR_SNARL_SIZE; for (size_t i = 0 ; i < code_size - ZipCode::SNARL_IS_REGULAR_OFFSET - 1; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } decoder.emplace_back(!previous_is_chain, zip_index); return false; @@ -359,12 +352,12 @@ cerr << "\tThe last thing was a root-level node, so nothing else" << endl; } } -size_t ZipCodeDecoder::max_depth() const { +size_t ZipCode::max_depth() const { return decoder_length()-1; } -ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { +ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { //Now get the code type //A snarl is always a snarl. A chain could actually be a node @@ -397,7 +390,7 @@ ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 0) { return ZipCode::IRREGULAR_SNARL; @@ -410,7 +403,7 @@ ZipCode::code_type_t ZipCodeDecoder::get_code_type(const size_t& depth) const { } } -size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -420,7 +413,7 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -436,7 +429,7 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { @@ -446,14 +439,14 @@ size_t ZipCodeDecoder::get_length(const size_t& depth, const SnarlDistanceIndex* size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_LENGTH_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { +size_t ZipCode::get_rank_in_snarl(const size_t& depth) const { if (depth == 0) { @@ -470,7 +463,7 @@ size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -479,7 +472,7 @@ size_t ZipCodeDecoder::get_rank_in_snarl(const size_t& depth) const { } } -size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -497,7 +490,7 @@ size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDis size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHILD_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -506,7 +499,7 @@ size_t ZipCodeDecoder::get_snarl_child_count(const size_t& depth, const SnarlDis } } -size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { @@ -522,7 +515,7 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; @@ -532,13 +525,13 @@ size_t ZipCodeDecoder::get_offset_in_chain(const size_t& depth, const SnarlDista size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } } -size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { +size_t ZipCode::get_chain_component(const size_t& depth) const { if (depth == 0) { @@ -554,7 +547,7 @@ size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -564,14 +557,14 @@ size_t ZipCodeDecoder::get_chain_component(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_CHAIN_COMPONENT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } } -size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_end) const { +size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -579,7 +572,7 @@ size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_en size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value % 2) { if (!get_end) { @@ -592,7 +585,7 @@ size_t ZipCodeDecoder::get_last_chain_component(const size_t& depth, bool get_en return zip_value / 2; } -bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { +bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); @@ -600,11 +593,11 @@ bool ZipCodeDecoder::get_is_looping_chain(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value % 2; } -bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { +bool ZipCode::get_is_reversed_in_parent(const size_t& depth) const { if (depth == 0) { @@ -620,7 +613,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::NODE_IS_REVERSED_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -629,14 +622,14 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { size_t zip_index = decoder[depth-1].offset; //zip_value is true if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } else { @@ -650,7 +643,7 @@ bool ZipCodeDecoder::get_is_reversed_in_parent(const size_t& depth) const { } } -net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const { //get_net_handle_slow does the same thing so if this gets changed need to change that too @@ -659,7 +652,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -674,7 +667,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -686,7 +679,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -694,7 +687,7 @@ net_handle_t ZipCodeDecoder::get_net_handle(const size_t& depth, const SnarlDist } } -net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -702,7 +695,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return distance_index->get_handle_from_connected_component(zip_value); @@ -724,7 +717,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -743,7 +736,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } net_handle_t snarl_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); return snarl_handle; @@ -752,7 +745,7 @@ net_handle_t ZipCodeDecoder::get_net_handle_slow(nid_t id, const size_t& depth, } -size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { +size_t ZipCode::get_distance_index_address(const size_t& depth) const { if (depth == 0) { @@ -760,7 +753,7 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { size_t zip_value, zip_index = 0; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; @@ -775,7 +768,7 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { size_t zip_index = decoder[depth].offset; //zip_value is is_regular_snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //If this is a regular snarl @@ -787,13 +780,13 @@ size_t ZipCodeDecoder::get_distance_index_address(const size_t& depth) const { //zip_value is distance index offset for (size_t i = 0 ; i <= ZipCode::IRREGULAR_SNARL_RECORD_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET-1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value; } } } -size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { +size_t ZipCode::get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const { #ifdef DEBUG_ZIPCODE assert(depth > 0); @@ -803,13 +796,13 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna size_t zip_index = decoder[depth-1].offset; //zip_value is 1 if the parent is a regular snarl for (size_t i = 0 ; i <= ZipCode::SNARL_IS_REGULAR_OFFSET ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } if (zip_value == 1) { //The parent is a regular snarl, which stores is_reversed for the child for (size_t i = 0 ; i <= ZipCode::REGULAR_SNARL_IS_REVERSED_OFFSET - ZipCode::SNARL_IS_REGULAR_OFFSET - 1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } //Zip value is true if the child is reversed @@ -832,53 +825,53 @@ size_t ZipCodeDecoder::get_distance_to_snarl_bound(const size_t& depth, bool sna distance_offset = ZipCode::IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET; } for (size_t i = 0 ; i <= distance_offset - ZipCode::SNARL_IS_REGULAR_OFFSET -1 ; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return zip_value == 0 ? std::numeric_limits::max() : zip_value - 1; } } -bool ZipCodeDecoder::is_externally_start_end_connected (const size_t& depth) const { +bool ZipCode::is_externally_start_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 1) != 0; } -bool ZipCodeDecoder::is_externally_start_start_connected (const size_t& depth) const { +bool ZipCode::is_externally_start_start_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 2) != 0; } -bool ZipCodeDecoder::is_externally_end_end_connected (const size_t& depth) const { +bool ZipCode::is_externally_end_end_connected (const size_t& depth) const { assert(depth == 0); assert(decoder[0].is_chain); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_NODE_OR_CHAIN_CONNECTIVITY_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } return (zip_value & 4) != 0; } -const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, +const bool ZipCode::is_equal(const ZipCode& zip1, const ZipCode& zip2, const size_t& depth) { - if (decoder1.max_depth() < depth && decoder2.max_depth() < depth ) { + if (zip1.max_depth() < depth && zip2.max_depth() < depth ) { return false; } //First, check if the code types are the same - ZipCode::code_type_t type1 = decoder1.get_code_type(depth); - ZipCode::code_type_t type2 = decoder2.get_code_type(depth); + ZipCode::code_type_t type1 = zip1.get_code_type(depth); + ZipCode::code_type_t type2 = zip2.get_code_type(depth); if (type1 != type2) { return false; } @@ -886,44 +879,39 @@ const bool ZipCodeDecoder::is_equal(const ZipCodeDecoder& decoder1, const ZipCod if (type1 == ZipCode::ROOT_NODE || type1 == ZipCode::ROOT_CHAIN || type1 == ZipCode::ROOT_SNARL || type1 == ZipCode::IRREGULAR_SNARL || type1 == ZipCode::CYCLIC_SNARL ) { //If the codes are for root-structures or irregular/cyclic snarls, just check if the //connected component numbers are the same - return decoder1.get_distance_index_address(depth) == decoder2.get_distance_index_address(depth); + return zip1.get_distance_index_address(depth) == zip2.get_distance_index_address(depth); } else { //Check the parent type. If the parent is a snarl, then check rank. If it's a chain, //then check the prefix sum - if (decoder1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || - decoder1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { + if (zip1.get_code_type(depth-1) == ZipCode::REGULAR_SNARL || + zip1.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL || + zip1.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL || + zip1.get_code_type(depth-1) == ZipCode::ROOT_SNARL) { //If the parent is a snarl, then check the rank - return decoder1.get_rank_in_snarl(depth) == decoder2.get_rank_in_snarl(depth); + return zip1.get_rank_in_snarl(depth) == zip2.get_rank_in_snarl(depth); } else { //Otherwise, check the offset in the chain //Since the type is the same, this is sufficient - return decoder1.get_offset_in_chain(depth) == decoder2.get_offset_in_chain(depth); + return zip1.get_offset_in_chain(depth) == zip2.get_offset_in_chain(depth); } } } -void ZipCodeDecoder::dump(std::ostream& out) const { - if (!zipcode) { - // We're decoding nothing - out << *this; - } else { - std::vector numbers = zipcode->to_vector(); - // Print out the numbers in a way that is easy to copy-paste as a vector literal. - out << " numbers = to_vector(); + // Print out the numbers in a way that is easy to copy-paste as a vector literal. + out << ""; } + out << "}>"; } -std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder) { - return out << ""; +std::ostream& operator<<(std::ostream& out, const ZipCode& zip) { + return out << ""; } @@ -1057,8 +1045,8 @@ vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, cons } -size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos_t& pos1, - ZipCodeDecoder& zip2_decoder, const pos_t& pos2, const SnarlDistanceIndex& distance_index, +size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, + ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit, bool undirected_distance, const HandleGraph* graph){ @@ -1066,11 +1054,11 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Make sure that the zip codes actually correspond to the positions ZipCode check_zip1; check_zip1.fill_in_zipcode(distance_index, pos1); - assert(*zip1_decoder.zipcode == check_zip1); + assert(zip1 == check_zip1); ZipCode check_zip2; check_zip2.fill_in_zipcode(distance_index, pos2); - assert(*zip2_decoder.zipcode == check_zip2); + assert(zip2 == check_zip2); cerr << endl << "Minimum distance between " << pos1 << " and " << pos2 << " using zipcodes" << endl; cerr << "Ancestors for " << pos1 << endl; @@ -1091,7 +1079,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Helper function to update the distances to the ends of the parent //distance_start and distance_end get updated - auto update_distances_to_ends_of_parent = [&] (ZipCodeDecoder& decoder, const size_t& child_depth, + auto update_distances_to_ends_of_parent = [&] (ZipCode& zip, const size_t& child_depth, size_t& distance_to_start, size_t& distance_to_end) { #ifdef DEBUG_ZIPCODE cerr << "Update distance to ends of parent at depth " << child_depth << endl; @@ -1102,12 +1090,12 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - code_type_t parent_type = decoder.get_code_type(child_depth-1); + code_type_t parent_type = zip.get_code_type(child_depth-1); if (parent_type == IRREGULAR_SNARL || parent_type == CYCLIC_SNARL) { //If the parent is an irregular snarl - net_handle_t parent_handle = decoder.get_net_handle(child_depth-1, &distance_index); - size_t child_rank = decoder.get_rank_in_snarl(child_depth); + net_handle_t parent_handle = zip.get_net_handle(child_depth-1, &distance_index); + size_t child_rank = zip.get_rank_in_snarl(child_depth); distance_start_left = distance_index.distance_in_snarl(parent_handle, child_rank, false, 0, false, graph); distance_start_right = distance_index.distance_in_snarl(parent_handle, @@ -1122,7 +1110,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos } else if (parent_type == REGULAR_SNARL) { //If its a regular snarl, then the distances to the ends are either 0 or inf //For a regular snarl, the snarl stores if the child was reversed, rather than the child - if (decoder.get_is_reversed_in_parent(child_depth)) { + if (zip.get_is_reversed_in_parent(child_depth)) { distance_start_left = std::numeric_limits::max(); distance_start_right = 0; distance_end_right = std::numeric_limits::max(); @@ -1137,30 +1125,30 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos cerr << "Distances to parent regular snarl: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; #endif } else if (parent_type == CHAIN) { - if (decoder.get_code_type(child_depth) == NODE && - decoder.get_is_reversed_in_parent(child_depth)){ + if (zip.get_code_type(child_depth) == NODE && + zip.get_is_reversed_in_parent(child_depth)){ //If this is reversed in the chain distance_start_left = std::numeric_limits::max(); distance_end_right = std::numeric_limits::max(); //Prefix sum of the child - distance_end_left = decoder.get_offset_in_chain(child_depth, &distance_index); + distance_end_left = zip.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_start_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - decoder.get_length(child_depth-1, &distance_index), - decoder.get_offset_in_chain(child_depth, &distance_index)), - decoder.get_length(child_depth, &distance_index)); + zip.get_length(child_depth-1, &distance_index), + zip.get_offset_in_chain(child_depth, &distance_index)), + zip.get_length(child_depth, &distance_index)); } else { //If it is a node that isn't reversed in the chain, or it's a snarl which is never reversed distance_end_left = std::numeric_limits::max(); distance_start_right = std::numeric_limits::max(); //Prefix sum of the child - distance_start_left = decoder.get_offset_in_chain(child_depth, &distance_index); + distance_start_left = zip.get_offset_in_chain(child_depth, &distance_index); //Length of the chain - prefix sum of the child - length of the child distance_end_right = SnarlDistanceIndex::minus(SnarlDistanceIndex::minus( - decoder.get_length(child_depth-1, &distance_index), - decoder.get_offset_in_chain(child_depth, &distance_index)), - decoder.get_length(child_depth, &distance_index)); + zip.get_length(child_depth-1, &distance_index), + zip.get_offset_in_chain(child_depth, &distance_index)), + zip.get_length(child_depth, &distance_index)); } #ifdef DEBUG_ZIPCODE cerr << "Distances to parent chain: " << distance_start_left << " " << distance_start_right << " " << distance_end_left << " " << distance_end_right << endl; @@ -1178,7 +1166,7 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos }; - if (zip1_decoder.get_distance_index_address(0) != zip2_decoder.get_distance_index_address(0)) { + if (zip1.get_distance_index_address(0) != zip2.get_distance_index_address(0)) { #ifdef DEBUG_ZIPCODE cerr << "Zip codes are on different connected components" << endl; #endif @@ -1187,18 +1175,17 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //The two positions are in the same connected component so now fill in the rest //of the decoder and try to find the distance - zip1_decoder.fill_in_full_decoder(); - zip2_decoder.fill_in_full_decoder(); + zip1.fill_in_full_decoder(); + zip2.fill_in_full_decoder(); //Now find the lowest common ancestor of the two zipcodes size_t lowest_common_ancestor_depth = 0; bool still_equal = true; while (still_equal) { - if (lowest_common_ancestor_depth == zip1_decoder.decoder_length()-1 || - lowest_common_ancestor_depth == zip2_decoder.decoder_length()-1 || - !ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, - lowest_common_ancestor_depth+1)) { + if (lowest_common_ancestor_depth == zip1.decoder_length()-1 || + lowest_common_ancestor_depth == zip2.decoder_length()-1 || + !ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth+1)) { //If we've hit the end of either decoder or if they are no longer equal, //Then break the loop and keep the current lowest_common_ancestor_depth still_equal = false; @@ -1222,26 +1209,26 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos if (distance_limit != std::numeric_limits::max() && - lowest_common_ancestor_depth < zip1_decoder.decoder_length()-1){ + lowest_common_ancestor_depth < zip1.decoder_length()-1){ //If we're aborting when the distance is definitely too far, - code_type_t ancestor_type = zip1_decoder.get_code_type(lowest_common_ancestor_depth); + code_type_t ancestor_type = zip1.get_code_type(lowest_common_ancestor_depth); if (ancestor_type == CHAIN || ancestor_type == ROOT_CHAIN) { //If the current ancestor is a chain, then check the distance - size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); - size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum1 = zip1.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); + size_t prefix_sum2 = zip2.get_offset_in_chain(lowest_common_ancestor_depth+1, &distance_index); size_t distance_in_chain; if (prefix_sum1 < prefix_sum2) { //zip1 comes before zip2 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum2, SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip1.get_length(lowest_common_ancestor_depth+1, &distance_index))); } else { //zip2 comes before zip1 distance_in_chain = SnarlDistanceIndex::minus( prefix_sum1, SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(lowest_common_ancestor_depth+1, &distance_index))); + zip2.get_length(lowest_common_ancestor_depth+1, &distance_index))); } if (distance_in_chain > distance_limit) { return std::numeric_limits::max(); @@ -1251,15 +1238,15 @@ size_t ZipCode::minimum_distance_between(ZipCodeDecoder& zip1_decoder, const pos //Start from the nodes size_t distance_to_start1 = is_rev(pos1) - ? zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1) + ? zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1) : offset(pos1) + 1; size_t distance_to_end1 = is_rev(pos1) ? offset(pos1) + 1 - : zip1_decoder.get_length(zip1_decoder.decoder_length()-1, &distance_index) - offset(pos1); + : zip1.get_length(zip1.decoder_length()-1, &distance_index) - offset(pos1); size_t distance_to_start2 = is_rev(pos2) - ? zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2) + ? zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2) : offset(pos2) + 1; size_t distance_to_end2 = is_rev(pos2) ? offset(pos2) + 1 - : zip2_decoder.get_length(zip2_decoder.decoder_length()-1, &distance_index) - offset(pos2); + : zip2.get_length(zip2.decoder_length()-1, &distance_index) - offset(pos2); if (!undirected_distance) { //These are directed distances so set backwards distances to inf @@ -1282,22 +1269,22 @@ cerr << "Finding distances to ancestors of first position" << endl; //Now walk up the snarl tree from each position to one level below the lowest common ancestor - for (int i = zip1_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip1.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip1_decoder, i+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip1, i+1, distance_to_start1, distance_to_end1); } #ifdef DEBUG_ZIPCODE cerr << "Finding distances to ancestors of second position" << endl; #endif //The same thing for the second position - for (int i = zip2_decoder.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { + for (int i = zip2.decoder_length()-2 ; i > 0 && i > lowest_common_ancestor_depth ; i--) { //the parent snarl tree node is at index i //The distances are currently to the ends of the current node //FInd the distances to the ends of the parent - update_distances_to_ends_of_parent(zip2_decoder, i+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip2, i+1, distance_to_start2, distance_to_end2); } @@ -1306,7 +1293,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "Distances in children of common ancestor: " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; //Check that the current nodes are actually children of the lca - assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, lowest_common_ancestor_depth)); + assert(ZipCode::is_equal(zip1, zip2, lowest_common_ancestor_depth)); #endif //Find the distance between them in the lowest common ancestor @@ -1321,18 +1308,18 @@ cerr << "Finding distances to ancestors of second position" << endl; cerr << "At " << depth << "st/th ancestor" << endl; cerr << "\tdistances are " << distance_to_start1 << " " << distance_to_end1 << " " << distance_to_start2 << " " << distance_to_end2 << endl; #endif - if (depth == zip1_decoder.decoder_length()-1) { + if (depth == zip1.decoder_length()-1) { //If the lca is a node that both positions are on #ifdef DEBUG_ZIPCODE //If the lca is a node, then both the zipcode nodes should be the same node - assert(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth)); - assert(depth == zip2_decoder.decoder_length()-1); + assert(ZipCode::is_equal(zip1, zip2, depth)); + assert(depth == zip2.decoder_length()-1); cerr << "\tAncestor should be a node" << endl; #endif size_t d1 = SnarlDistanceIndex::sum(distance_to_end1, distance_to_start2); size_t d2 = SnarlDistanceIndex::sum(distance_to_end2, distance_to_start1); - size_t node_length = zip1_decoder.get_length(depth, &distance_index); + size_t node_length = zip1.get_length(depth, &distance_index); if (d1 > node_length) { distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d1, node_length),1)); @@ -1341,31 +1328,31 @@ cerr << "Finding distances to ancestors of second position" << endl; distance_between = std::min(distance_between, SnarlDistanceIndex::minus(SnarlDistanceIndex::minus(d2, node_length),1)); } - } else if ( zip1_decoder.decoder[depth].is_chain) { + } else if ( zip1.decoder[depth].is_chain) { #ifdef DEBUG_ZIPCODE cerr << "\tancestor should be a chain" << endl; #endif //If this ancestor is a chain //If the children are reversed in the chain, then flip their distances - bool rev1 = (zip1_decoder.get_code_type(depth+1) == NODE && - zip1_decoder.get_is_reversed_in_parent(depth+1)); + bool rev1 = (zip1.get_code_type(depth+1) == NODE && + zip1.get_is_reversed_in_parent(depth+1)); size_t dist_start1 = rev1 ? distance_to_end1 : distance_to_start1; size_t dist_end1 = rev1 ? distance_to_start1 : distance_to_end1; - bool rev2 = zip2_decoder.get_code_type(depth+1) == NODE && - zip2_decoder.get_is_reversed_in_parent(depth+1); + bool rev2 = zip2.get_code_type(depth+1) == NODE && + zip2.get_is_reversed_in_parent(depth+1); size_t dist_start2 = rev2 ? distance_to_end2 : distance_to_start2; size_t dist_end2 = rev2 ? distance_to_start2 : distance_to_end2; //If they are the same child, then there is no path between them in the chain because we don't allow loops //So first check that they aren't the same - if (!(ZipCodeDecoder::is_equal(zip1_decoder, zip2_decoder, depth+1) - )){//TODO: I think this is unnecessary || (zip1_decoder.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) - size_t prefix_sum1 = zip1_decoder.get_offset_in_chain(depth+1, &distance_index); - size_t prefix_sum2 = zip2_decoder.get_offset_in_chain(depth+1, &distance_index); - code_type_t code_type1 = zip1_decoder.get_code_type(depth+1); - code_type_t code_type2 = zip2_decoder.get_code_type(depth+1); + if (!(ZipCode::is_equal(zip1, zip2, depth+1) + )){//TODO: I think this is unnecessary || (zip1.get_code_type(depth+1) == NODE && id(pos1) == id(pos2)))) + size_t prefix_sum1 = zip1.get_offset_in_chain(depth+1, &distance_index); + size_t prefix_sum2 = zip2.get_offset_in_chain(depth+1, &distance_index); + code_type_t code_type1 = zip1.get_code_type(depth+1); + code_type_t code_type2 = zip2.get_code_type(depth+1); if (prefix_sum1 < prefix_sum2 || (prefix_sum1 == prefix_sum2 && @@ -1379,7 +1366,7 @@ cerr << "Finding distances to ancestors of second position" << endl; #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << zip1.get_length(depth+1, &distance_index) << " " << dist_end1 << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1389,7 +1376,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(depth+1, &distance_index))), + zip1.get_length(depth+1, &distance_index))), dist_end1),1)); } } else { @@ -1397,7 +1384,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(Prefix sum 2 + distance left 2) - (prefix sum1+ length 1) + distance right 1 #ifdef DEBUG_ZIPCODE cerr << "First child comes first in the chain and it isn't a snarl" << endl; - cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1_decoder.get_length(depth+1, &distance_index) << endl; + cerr << "Find distances from : " << prefix_sum2 << " " << dist_start2 << " " << prefix_sum1 << " " << dist_end1 << " " << zip1.get_length(depth+1, &distance_index) << endl; #endif if (dist_start2 != std::numeric_limits::max() && dist_end1 != std::numeric_limits::max()) { @@ -1408,7 +1395,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum2, dist_start2), SnarlDistanceIndex::sum(prefix_sum1, - zip1_decoder.get_length(depth+1, &distance_index))), + zip1.get_length(depth+1, &distance_index))), dist_end1),1) ); } @@ -1420,7 +1407,7 @@ cerr << "Finding distances to ancestors of second position" << endl; //(prefix sum 1 + distance left 1) - (prefix sum 2 + length 2) + distance right 2 #ifdef DEBUG_ZIPCODE cerr << "Second child comes first in the chain and it is a snarl" << endl; - cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2_decoder.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; + cerr << "Find distances from : " << prefix_sum1 << " " << dist_start1 << " " << prefix_sum2 << " " << zip2.get_length(depth+1, &distance_index) << " " << dist_end2 << endl; #endif if (dist_start1 != std::numeric_limits::max() && dist_end2 != std::numeric_limits::max() ){ @@ -1430,7 +1417,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(depth+1, &distance_index))), + zip2.get_length(depth+1, &distance_index))), dist_end2), 1)); } } else { @@ -1449,7 +1436,7 @@ cerr << "Finding distances to ancestors of second position" << endl; SnarlDistanceIndex::sum(prefix_sum1, dist_start1), SnarlDistanceIndex::sum(prefix_sum2, - zip2_decoder.get_length(depth+1, &distance_index))), + zip2.get_length(depth+1, &distance_index))), dist_end2),1) ); } @@ -1457,8 +1444,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } } //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); } else { #ifdef DEBUG_ZIPCODE @@ -1468,11 +1455,11 @@ cerr << "Finding distances to ancestors of second position" << endl; //If the parent is a regular snarl, then there is no path between them so //just update the distances to the ends of the parent - if (zip1_decoder.get_code_type(depth) != REGULAR_SNARL) { + if (zip1.get_code_type(depth) != REGULAR_SNARL) { //Parent may be an irregular snarl or a root snarl (which is also irregular) - net_handle_t parent_handle = zip1_decoder.get_net_handle(depth, &distance_index); - size_t rank1 = zip1_decoder.get_rank_in_snarl(depth+1); - size_t rank2 = zip2_decoder.get_rank_in_snarl(depth+1); + net_handle_t parent_handle = zip1.get_net_handle(depth, &distance_index); + size_t rank1 = zip1.get_rank_in_snarl(depth+1); + size_t rank2 = zip2.get_rank_in_snarl(depth+1); #ifdef DEBUG_ZIPCODE cerr << "irregular snarl so find distances in the distance index: " << distance_index.net_handle_as_string(parent_handle) << endl; cerr << "\t at offset " << distance_index.get_record_offset(parent_handle) << endl; @@ -1505,8 +1492,8 @@ cerr << "Finding distances to ancestors of second position" << endl; } #endif //Update distances from the ends of the children (at depth+1) to parent (depth) - update_distances_to_ends_of_parent(zip1_decoder, depth+1, distance_to_start1, distance_to_end1); - update_distances_to_ends_of_parent(zip2_decoder, depth+1, distance_to_start2, distance_to_end2); + update_distances_to_ends_of_parent(zip1, depth+1, distance_to_start1, distance_to_end1); + update_distances_to_ends_of_parent(zip2, depth+1, distance_to_start2, distance_to_end2); } #ifdef DEBUG_ZIPCODE cerr << "distance in ancestor: " << distance_between << endl; @@ -1869,7 +1856,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { MIPayload payload; if (decoder_length() == 1) { @@ -1881,15 +1868,15 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance size_t zip_value; size_t zip_index = decoder[0].offset; //Root is chain - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); //Root node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; payload.is_trivial_chain = true; @@ -1908,17 +1895,17 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance size_t zip_value; size_t zip_index = decoder[max_depth()-1].offset; //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (decoder_length() == 2) { //If the node is a child of the root chain payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } else { payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); payload.parent_type = ZipCode::CHAIN; @@ -1926,20 +1913,20 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); //chain component count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Node prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Node length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //is_reversed - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: For top-level chains we got this from the distance index payload.is_reversed = zip_value; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.chain_component = zip_value; @@ -1962,9 +1949,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance if (payload.parent_is_root) { //is_chain zip_index = decoder[0].offset; - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, @@ -1974,7 +1961,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } else { zip_index = decoder[max_depth()-1].offset; //is_regular - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //If this is a non-root snarl, get as much as we can from it payload.parent_type = ZipCode::EMPTY; if (zip_value == 0) { @@ -1986,20 +1973,20 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance } //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Snarl length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Snarl child_count - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //TODO: SHould use this somehow payload.chain_component = 0; //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (payload.parent_type == ZipCode::REGULAR_SNARL) { //Snarl is reversed @@ -2023,9 +2010,9 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance //We should be at the node/trivial chain now zip_index = decoder[max_depth()].offset; //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain length - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; //Get the rest as default values @@ -2044,7 +2031,7 @@ MIPayload ZipCodeDecoder::get_payload_from_zipcode(nid_t id, const SnarlDistance return payload; } -net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { +net_identifier_t ZipCode::get_identifier(size_t depth) const { if (depth == std::numeric_limits::max()) { //This is equivalent to distance_index.get_root() return "ROOT"; @@ -2057,7 +2044,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::ROOT_IDENTIFIER_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else if (decoder[d].is_chain) { @@ -2067,7 +2054,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::NODE_OFFSET_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } else { @@ -2075,7 +2062,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_RANK_IN_SNARL_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2084,7 +2071,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { size_t zip_value; size_t zip_index = decoder[d].offset; for (size_t i = 0 ; i <= ZipCode::SNARL_OFFSET_IN_CHAIN_OFFSET; i++) { - std::tie(zip_value, zip_index) = zipcode->zipcode.get_value_and_next_index(zip_index); + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); result += std::to_string(zip_value); } } @@ -2101,7 +2088,7 @@ net_identifier_t ZipCodeDecoder::get_identifier(size_t depth) const { return result; } -const net_identifier_t ZipCodeDecoder::get_parent_identifier(const net_identifier_t& child) { +const net_identifier_t ZipCode::get_parent_identifier(const net_identifier_t& child) { if (child == "ROOT") { throw std::runtime_error("error: trying to get the parent of the root net_identifier_t"); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index eceed521640..4b5de75b9dc 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -19,18 +19,14 @@ using namespace std; * A ZipCode stores the information and can be used to create a zipcode. It can be used * to calculate the distance between zipcodes * - * A ZipCodeDecoder is used for interpreting zipcodes to find specific values that were - * stored in the ZipCode. A ZipCodeDecoder must be constructed from a specific zipcode. + * A decoder is used for interpreting zipcodes to find specific values that were + * stored in the ZipCode. * Construction of a decoder occurs one code at a time, starting from the root snarl or chain, - * so it is possible to have a partially constructed ZipCodeDecoder, to avoid having to + * so it is possible to have a partially constructed decoder, to avoid having to * walk through the entire ZipCode to get the values for things higher in the snarl tree. * The full decoder must be constructed to get values for the node. */ -///A decoder for interpreting a zipcode -///Can interpret the values for a snarl tree node given the depth -///(depth in the snarl tree, also the index into the zipcode vector) -class ZipCodeDecoder; ///A struct to interpret the minimizer payload @@ -59,7 +55,8 @@ class ZipCode { /// Regular snarls are bubbles. Irregular snarls are snarls that aren't bubbles but are dags /// Cyclic snarls are non-dags. They are stored the same as irregular snarls. Only the type is different public: - enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; + enum code_type_t { NODE = 1, CHAIN, REGULAR_SNARL, IRREGULAR_SNARL, CYCLIC_SNARL, ROOT_SNARL, ROOT_CHAIN, ROOT_NODE, EMPTY }; + public: //Fill in an empty zipcode given a position @@ -83,8 +80,8 @@ class ZipCode { //The decoders may or may not be filled in, and may be filled in when this is run //If distance_limit is set, return std::numeric_limits::max() if the distance //will be greater than the distance limit - static size_t minimum_distance_between(ZipCodeDecoder& zip_decoder1, const pos_t& pos1, - ZipCodeDecoder& zip_decoder2, const pos_t& pos2, + static size_t minimum_distance_between(ZipCode& zip1, const pos_t& pos1, + ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit = std::numeric_limits::max(), bool undirected_distance=false, @@ -215,7 +212,124 @@ class ZipCode { //Return a vector of size_ts that will represent the snarl in the zip code inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); - friend class ZipCodeDecoder; + + //////////////////////////////// Stuff for decoding the zipcode + + public: + //TODO: Make the decoder and zipcode private, still need it for unit testing + ///The decoder as a vector of pair, one for each snarl tree node in the zip + ///where is_chain indicates whether it's a chain/node, and index + ///is the index of the node/snarl/chain code in the varint_vector_t + struct decoder_t { + bool is_chain : 1; + size_t offset : 15; + decoder_t(bool is_chain, size_t offset) : is_chain(is_chain), offset(offset) {} + inline bool operator==(const decoder_t& other) const { + return is_chain == other.is_chain && offset == other.offset; + } + }; + std::vector decoder; + + ///Did we fill in the entire decoder + ///TODO: I'm making it fill in the decoder automatically because it seems to be faster that way, instead of + /// waiting to see which parts are actually needed + bool finished_decoding = false; + + public: + + ///Go through the entire zipcode and fill in the decoder + void fill_in_full_decoder(); + + ///Fill in one more item in the decoder + ///Returns true if this is the last thing in the zipcode and false if there is more to decode + bool fill_in_next_decoder(); + + ///What is the maximum depth of this zipcode? + size_t max_depth() const; + + ///How many codes in the zipcode have been decoded? + size_t decoder_length() const {return decoder.size();} + + ///What type of snarl tree node is at the given depth (index into the zipcode) + ZipCode::code_type_t get_code_type(const size_t& depth) const ; + + ///Get the length of a snarl tree node given the depth in the snarl tree + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl + size_t get_rank_in_snarl(const size_t& depth) const ; + + ///Get the number of children in a snarl. Throw an exception if it isn't a snarl + size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the prefix sum of a child of a chain + ///This requires the distance index for irregular snarls (except for a top-level snarl) + ///Throws an exception if the distance index is not given when it is needed + ///Doesn't use a given distance index if it isn't needed + size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + + ///Get the chain component of a chain child. + ///For snarls, this will be the component of the start node + size_t get_chain_component(const size_t& depth) const ; + + ///Get the chain component of the last node in the chain + /// This behaves like the distance index get_chain_component- + /// for looping chains it returns the last component if get_end is true, + /// and 0 if it is false + size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; + bool get_is_looping_chain(const size_t& depth) const ; + + ///Is the snarl tree node backwards relative to its parent + bool get_is_reversed_in_parent(const size_t& depth) const; + + ///Get the handle of the thing at the given depth. This can only be used for + ///Root-level structures or irregular snarls + net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; + + ///Get the handle of the thing at the given depth. This can be used for anything but is slow, + /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I + /// remember that it's slow + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; + + ///Get the information that was stored to get the address in the distance index + ///This is the connected component number for a root structure, or the address of + ///an irregular snarl. Throws an error for anything else + ///This is used for checking equality without looking at the distance index. + ///Use get_net_handle for getting the actual handle + size_t get_distance_index_address(const size_t& depth) const; + + /// The minimum distance from start or end of the snarl to the left or right side of the child + size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; + + bool is_externally_start_end_connected(const size_t& depth) const; + bool is_externally_start_start_connected(const size_t& depth) const; + bool is_externally_end_end_connected(const size_t& depth) const; + + + ///Are the two decoders pointing to the same snarl tree node at the given depth + ///This only checks if the values in the zipcode are the same at the given depth, + ///so if the preceeding snarl tree nodes are different, + ///then this might actually refer to different things + const static bool is_equal(const ZipCode& zip1, const ZipCode& zip2, + const size_t& depth); + + /// Dump a ZipCode to a stream so that it can be reconstructed for a + /// unit test from the resulting information. + void dump(std::ostream& out) const; + + //TODO: I want to make a struct for holding all values of a code as real values + + ///Fill in a payload with values from the zipcode + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + + /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth + /// would be the node, also include the node id + net_identifier_t get_identifier(size_t depth) const; + const static net_identifier_t get_parent_identifier(const net_identifier_t& child); + }; /// Print a code type to a stream @@ -255,136 +369,6 @@ class ZipCodeCollection { }; -/* - * Struct for interpreting a ZipCode - */ -class ZipCodeDecoder { - - public: - //TODO: Make the decoder and zipcode private, still need it for unit testing - ///The decoder as a vector of pair, one for each snarl tree node in the zip - ///where is_chain indicates whether it's a chain/node, and index - ///is the index of the node/snarl/chain code in the varint_vector_t - struct decoder_t { - bool is_chain : 1; - size_t offset : 15; - decoder_t(bool is_chain, size_t offset) : is_chain(is_chain), offset(offset) {} - decoder_t() : is_chain(false), offset(0) {} - inline bool operator==(const decoder_t& other) const { - return is_chain == other.is_chain && offset == other.offset; - } - }; - std::vector decoder; - - ///The zipcode that this is decoding - const ZipCode* zipcode; - - ///Did we fill in the entire decoder - bool finished_decoding; - - public: - - ///Constructor that goes through the zipcode and decodes it to fill in decoder - ///If a depth is given, then only fill in up to depth snarl tree nodes - ///Otherwise, fill in the whole zipcode - ZipCodeDecoder(const ZipCode* zipcode = nullptr); - - ///Go through the entire zipcode and fill in the decoder - void fill_in_full_decoder(); - - ///Fill in one more item in the decoder - ///Returns true if this is the last thing in the zipcode and false if there is more to decode - bool fill_in_next_decoder(); - - ///What is the maximum depth of this zipcode? - size_t max_depth() const; - - ///How many codes in the zipcode have been decoded? - size_t decoder_length() const {return decoder.size();} - - ///What type of snarl tree node is at the given depth (index into the zipcode) - ZipCode::code_type_t get_code_type(const size_t& depth) const ; - - ///Get the length of a snarl tree node given the depth in the snarl tree - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl - size_t get_rank_in_snarl(const size_t& depth) const ; - - ///Get the number of children in a snarl. Throw an exception if it isn't a snarl - size_t get_snarl_child_count(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the prefix sum of a child of a chain - ///This requires the distance index for irregular snarls (except for a top-level snarl) - ///Throws an exception if the distance index is not given when it is needed - ///Doesn't use a given distance index if it isn't needed - size_t get_offset_in_chain(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; - - ///Get the chain component of a chain child. - ///For snarls, this will be the component of the start node - size_t get_chain_component(const size_t& depth) const ; - - ///Get the chain component of the last node in the chain - /// This behaves like the distance index get_chain_component- - /// for looping chains it returns the last component if get_end is true, - /// and 0 if it is false - size_t get_last_chain_component(const size_t& depth, bool get_end = false) const ; - bool get_is_looping_chain(const size_t& depth) const ; - - ///Is the snarl tree node backwards relative to its parent - bool get_is_reversed_in_parent(const size_t& depth) const; - - ///Get the handle of the thing at the given depth. This can only be used for - ///Root-level structures or irregular snarls - net_handle_t get_net_handle(const size_t& depth, const SnarlDistanceIndex* distance_index) const; - - ///Get the handle of the thing at the given depth. This can be used for anything but is slow, - /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I - /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; - - ///Get the information that was stored to get the address in the distance index - ///This is the connected component number for a root structure, or the address of - ///an irregular snarl. Throws an error for anything else - ///This is used for checking equality without looking at the distance index. - ///Use get_net_handle for getting the actual handle - size_t get_distance_index_address(const size_t& depth) const; - - /// The minimum distance from start or end of the snarl to the left or right side of the child - size_t get_distance_to_snarl_bound(const size_t& depth, bool snarl_start, bool left_side) const; - - bool is_externally_start_end_connected(const size_t& depth) const; - bool is_externally_start_start_connected(const size_t& depth) const; - bool is_externally_end_end_connected(const size_t& depth) const; - - - ///Are the two decoders pointing to the same snarl tree node at the given depth - ///This only checks if the values in the zipcode are the same at the given depth, - ///so if the preceeding snarl tree nodes are different, - ///then this might actually refer to different things - const static bool is_equal(const ZipCodeDecoder& decoder1, const ZipCodeDecoder& decoder2, - const size_t& depth); - - /// Dump a ZipCodeDecoder to a stream so that it can be reconstructed for a - /// unit test from the resulting information. - void dump(std::ostream& out) const; - - //TODO: I want to make a struct for holding all values of a code as real values - - ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; - - /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth - /// would be the node, also include the node id - net_identifier_t get_identifier(size_t depth) const; - const static net_identifier_t get_parent_identifier(const net_identifier_t& child); - - -}; - template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { @@ -392,7 +376,7 @@ struct wang_hash { } }; -std::ostream& operator<<(std::ostream& out, const ZipCodeDecoder& decoder); +std::ostream& operator<<(std::ostream& out, const ZipCode& decoder); /** diff --git a/src/zip_code_tree.cpp b/src/zip_code_tree.cpp index 1055949af1b..1ed2bc13afd 100644 --- a/src/zip_code_tree.cpp +++ b/src/zip_code_tree.cpp @@ -55,7 +55,7 @@ void ZipCodeForest::open_chain(forest_growing_state_t& forest_state, #endif const Seed& current_seed = forest_state.seeds->at(seed_index); - size_t current_max_depth = current_seed.zipcode_decoder->max_depth(); + size_t current_max_depth = current_seed.zipcode.max_depth(); if (depth == 0) { //If this is the start of a new top-level chain, make a new tree, which will be the new active tree @@ -177,7 +177,7 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, //The value that got stored in forest_state.sibling_indices_at_depth was the prefix sum //traversing the chain according to its orientation in the tree, so either way //the distance is the length of the chain - the prefix sum - size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode_decoder->get_length(depth), + size_t distance_to_chain_end = SnarlDistanceIndex::minus(last_seed.zipcode.get_length(depth), forest_state.sibling_indices_at_depth[depth].back().value); bool add_distances = true; if (distance_to_chain_end > forest_state.distance_limit && forest_state.open_chains.back().second) { @@ -260,9 +260,9 @@ void ZipCodeForest::close_chain(forest_growing_state_t& forest_state, std::numeric_limits::max(), false); //Update the distance to the end of the chain to be the distance from the previous child - size_t last_length = depth == last_seed.zipcode_decoder->max_depth() + size_t last_length = depth == last_seed.zipcode.max_depth() ? 0 - : last_seed.zipcode_decoder->get_length(depth+1); + : last_seed.zipcode.get_length(depth+1); distance_to_chain_end = SnarlDistanceIndex::sum(distance_to_chain_end, SnarlDistanceIndex::sum(last_edge, @@ -299,10 +299,10 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, bool chain_is_reversed) { const Seed& current_seed = forest_state.seeds->at(seed_index); - ZipCode::code_type_t current_type = current_seed.zipcode_decoder->get_code_type(depth); + ZipCode::code_type_t current_type = current_seed.zipcode.get_code_type(depth); //Is this chain actually a node pretending to be a chain - bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode_decoder->max_depth(); + bool is_trivial_chain = current_type == ZipCode::CHAIN && depth == current_seed.zipcode.max_depth(); //For a root node or trivial chain, the "chain" is actually just the node, so the depth // of the chain we're working on is the same depth. Otherwise, the depth is depth-1 @@ -320,11 +320,11 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //Otherwise, get the distance to the start or end of the chain current_offset = chain_is_reversed - ? SnarlDistanceIndex::minus(current_seed.zipcode_decoder->get_length(chain_depth) , + ? SnarlDistanceIndex::minus(current_seed.zipcode.get_length(chain_depth) , SnarlDistanceIndex::sum( - current_seed.zipcode_decoder->get_offset_in_chain(depth), - current_seed.zipcode_decoder->get_length(depth))) - : current_seed.zipcode_decoder->get_offset_in_chain(depth); + current_seed.zipcode.get_offset_in_chain(depth), + current_seed.zipcode.get_length(depth))) + : current_seed.zipcode.get_offset_in_chain(depth); } @@ -537,7 +537,7 @@ void ZipCodeForest::add_child_to_chain(forest_growing_state_t& forest_state, //stored should be the offset of the end bound of the snarl, so add the //length of the snarl current_offset = SnarlDistanceIndex::sum(current_offset, - current_seed.zipcode_decoder->get_length(depth)); + current_seed.zipcode.get_length(depth)); } @@ -614,7 +614,7 @@ void ZipCodeForest::close_snarl(forest_growing_state_t& forest_state, forest_state.sibling_indices_at_depth[depth-1].pop_back(); //Snarl prefix sum is now the distance from the start of the chain to the start of the snarl - snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode_decoder->get_length(depth)); + snarl_prefix_sum = SnarlDistanceIndex::minus(snarl_prefix_sum, last_seed.zipcode.get_length(depth)); //Now update forest_state.sibling_indices_at_depth to be the previous thing in the chain forest_state.sibling_indices_at_depth[depth-1].push_back({ @@ -745,9 +745,9 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //If we're getting the distance to the end of the snarl, then this is the length of the snarl // otherwise, it is the distance from the seed to the start (or end) of the snarl - size_t snarl_distance = to_snarl_end ? seed.zipcode_decoder->get_length(depth) + size_t snarl_distance = to_snarl_end ? seed.zipcode.get_length(depth) : SnarlDistanceIndex::sum (distance_to_chain_start, - seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); + seed.zipcode.get_distance_to_snarl_bound(depth+1, !snarl_is_reversed, !child_is_reversed)); //Add the edge trees[forest_state.active_tree_index].zip_code_tree.at(last_child_index - 1 - sibling_i) = @@ -757,7 +757,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //Otherwise, the previous thing was another child of the snarl //and we need to record the distance between these two size_t distance; - if (seed.zipcode_decoder->get_code_type(depth) == ZipCode::REGULAR_SNARL) { + if (seed.zipcode.get_code_type(depth) == ZipCode::REGULAR_SNARL) { //If this is the child of a regular snarl, then the distance between //any two chains is inf, and the distance to any bound is 0 distance = to_snarl_end ? sibling.distances.second : std::numeric_limits::max(); @@ -771,19 +771,19 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co if (to_snarl_end && !is_cyclic_snarl) { distance = SnarlDistanceIndex::sum(sibling.distances.second, - sibling_seed.zipcode_decoder->get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); + sibling_seed.zipcode.get_distance_to_snarl_bound(depth+1, snarl_is_reversed, child_is_reversed)); } else { //If to_snarl_end is true, then we want the distance to the end (or start if snarl_is_reversed) // Rank is 0 and the orientation doesn't matter size_t rank2 = to_snarl_end ? (snarl_is_reversed ? 0 : 1) - : seed.zipcode_decoder->get_rank_in_snarl(depth+1); + : seed.zipcode.get_rank_in_snarl(depth+1); bool right_side2 = child_is_reversed; //If the sibling is the start, then get the distance to the appropriate bound size_t rank1 = sibling.type == ZipCodeTree::SNARL_START ? (snarl_is_reversed ? 1 : 0) - : sibling_seed.zipcode_decoder->get_rank_in_snarl(depth+1); + : sibling_seed.zipcode.get_rank_in_snarl(depth+1); bool right_side1 = !sibling.is_reversed; size_t distance_to_end_of_last_child = sibling.type == ZipCodeTree::SNARL_START ? 0 @@ -791,7 +791,7 @@ void ZipCodeForest::add_snarl_distances(forest_growing_state_t& forest_state, co //The bools for this are true if the distance is to/from the right side of the child //We want the right side of 1 (which comes first in the dag ordering) to the left side of 2 //relative to the orientation of the snarl - net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth, forest_state.distance_index); + net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth, forest_state.distance_index); distance = SnarlDistanceIndex::sum(SnarlDistanceIndex::sum( forest_state.distance_index->distance_in_snarl(snarl_handle, rank1, right_side1, rank2, right_side2), distance_to_chain_start), @@ -938,7 +938,7 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< } else if (current_item.get_type() == ZipCodeTree::SEED) { //If this is a seed, check the snarls we've seen previously for (const size_t& snarl_depth : snarl_depths) { - if (seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) + if (seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::REGULAR_SNARL) { //If this is a regular snarl, then it must be a DAG too dag_count++; @@ -946,11 +946,11 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< //If this is an irregular snarl //Check the snarl in the distance index - net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode_decoder->get_net_handle(snarl_depth, &distance_index); + net_handle_t snarl_handle = seeds[current_item.get_value()].zipcode.get_net_handle(snarl_depth, &distance_index); #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || - seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || - seeds[current_item.get_value()].zipcode_decoder->get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); + assert(seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::IRREGULAR_SNARL || + seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::CYCLIC_SNARL || + seeds[current_item.get_value()].zipcode.get_code_type(snarl_depth) == ZipCode::ROOT_SNARL); assert(distance_index.is_snarl(snarl_handle)); #endif if (distance_index.is_dag(snarl_handle)) { @@ -976,13 +976,13 @@ std::pair ZipCodeTree::dag_and_non_dag_snarl_count(const vector< return std::make_pair(dag_count, non_dag_count); } bool ZipCodeTree::seed_is_reversed_at_depth (const Seed& seed, size_t depth, const SnarlDistanceIndex& distance_index){ - if (seed.zipcode_decoder->get_is_reversed_in_parent(depth)) { + if (seed.zipcode.get_is_reversed_in_parent(depth)) { return true; - } else if (depth > 0 && (seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL - || seed.zipcode_decoder->get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { + } else if (depth > 0 && (seed.zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL + || seed.zipcode.get_code_type(depth-1) == ZipCode::CYCLIC_SNARL)) { //If the parent is an irregular snarl, then check the orientation of the child in the snarl - net_handle_t snarl_handle = seed.zipcode_decoder->get_net_handle(depth-1, &distance_index); - size_t rank = seed.zipcode_decoder->get_rank_in_snarl(depth); + net_handle_t snarl_handle = seed.zipcode.get_net_handle(depth-1, &distance_index); + size_t rank = seed.zipcode.get_rank_in_snarl(depth); if (distance_index.distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && @@ -1109,10 +1109,10 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //so if things are traversed backwards, reverse the orientation bool a_is_reversed = false; bool b_is_reversed = false; - while (depth < seeds->at(previous_seed_index).zipcode_decoder->max_depth() && - depth < seeds->at(current_item.get_value()).zipcode_decoder->max_depth() && - ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, - *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { + while (depth < seeds->at(previous_seed_index).zipcode.max_depth() && + depth < seeds->at(current_item.get_value()).zipcode.max_depth() && + ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, + seeds->at(current_item.get_value()).zipcode, depth)) { //Remember the orientation if (ZipCodeTree::seed_is_reversed_at_depth(seeds->at(previous_seed_index), depth, distance_index)) { @@ -1142,19 +1142,19 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, //Either depth is the last thing in previous_seed_index or current_item.value, or they are different at this depth - if ( ZipCodeDecoder::is_equal(*seeds->at(previous_seed_index).zipcode_decoder, - *seeds->at(current_item.get_value()).zipcode_decoder, depth)) { + if ( ZipCode::is_equal(seeds->at(previous_seed_index).zipcode, + seeds->at(current_item.get_value()).zipcode, depth)) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthey are on the same node" << endl; #endif //If they are equal, then they must be on the same node size_t offset1 = is_rev(seeds->at(previous_seed_index).pos) - ? seeds->at(previous_seed_index).zipcode_decoder->get_length(depth) + ? seeds->at(previous_seed_index).zipcode.get_length(depth) - offset(seeds->at(previous_seed_index).pos) : offset(seeds->at(previous_seed_index).pos); size_t offset2 = is_rev(seeds->at(current_item.get_value()).pos) - ? seeds->at(current_item.get_value()).zipcode_decoder->get_length(depth) + ? seeds->at(current_item.get_value()).zipcode.get_length(depth) - offset(seeds->at(current_item.get_value()).pos) : offset(seeds->at(current_item.get_value()).pos); if (!current_is_in_cyclic_snarl) { @@ -1172,28 +1172,28 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, cerr << "\tThey are on different connected components" << endl; #endif //If they are on different connected components, sort by connected component - assert( seeds->at(previous_seed_index).zipcode_decoder->get_distance_index_address(0) <= - seeds->at(current_item.get_value()).zipcode_decoder->get_distance_index_address(0)); + assert( seeds->at(previous_seed_index).zipcode.get_distance_index_address(0) <= + seeds->at(current_item.get_value()).zipcode.get_distance_index_address(0)); - } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::CHAIN - || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { + } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::CHAIN + || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::ROOT_CHAIN) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common chain" << endl; #endif //If previous_seed_index and current_item.value are both children of a chain - size_t offset_a = seeds->at(previous_seed_index).zipcode_decoder->get_offset_in_chain(depth); - size_t offset_b = seeds->at(current_item.get_value()).zipcode_decoder->get_offset_in_chain(depth); + size_t offset_a = seeds->at(previous_seed_index).zipcode.get_offset_in_chain(depth); + size_t offset_b = seeds->at(current_item.get_value()).zipcode.get_offset_in_chain(depth); if (!current_is_in_cyclic_snarl) { if ( offset_a == offset_b) { //If they have the same prefix sum, then the snarl comes first //They will never be on the same child at this depth if (parent_of_a_is_reversed) { - assert(seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && - seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); + assert(seeds->at(current_item.get_value()).zipcode.get_code_type(depth) != ZipCode::NODE && + seeds->at(previous_seed_index).zipcode.get_code_type(depth) == ZipCode::NODE); } else { - assert( seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth) != ZipCode::NODE && - seeds->at(current_item.get_value()).zipcode_decoder->get_code_type(depth) == ZipCode::NODE); + assert( seeds->at(previous_seed_index).zipcode.get_code_type(depth) != ZipCode::NODE && + seeds->at(current_item.get_value()).zipcode.get_code_type(depth) == ZipCode::NODE); } } else { //Check if the parent chain is reversed and if so, then the order should be reversed @@ -1205,8 +1205,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, } } } - } else if (seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::REGULAR_SNARL - || seeds->at(previous_seed_index).zipcode_decoder->get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { + } else if (seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::REGULAR_SNARL + || seeds->at(previous_seed_index).zipcode.get_code_type(depth-1) == ZipCode::IRREGULAR_SNARL) { #ifdef DEBUG_ZIP_CODE_TREE cerr << "\t they are children of a common dag snarl" << endl; #endif @@ -1215,8 +1215,8 @@ void ZipCodeTree::validate_zip_tree(const SnarlDistanceIndex& distance_index, // The ranks of children in snarls are in a topological order, so // sort on the ranks if (!current_is_in_cyclic_snarl) { - assert( seeds->at(previous_seed_index).zipcode_decoder->get_rank_in_snarl(depth) <= - seeds->at(current_item.get_value()).zipcode_decoder->get_rank_in_snarl(depth)); + assert( seeds->at(previous_seed_index).zipcode.get_rank_in_snarl(depth) <= + seeds->at(current_item.get_value()).zipcode.get_rank_in_snarl(depth)); } } @@ -2031,20 +2031,20 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\tThis is the root snarl so sort by connected component: " - << seed.zipcode_decoder->get_distance_index_address(0) << endl; + << seed.zipcode.get_distance_index_address(0) << endl; #endif - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode_decoder->get_distance_index_address(0)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( seed.zipcode.get_distance_index_address(0)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(0)); } else if (interval.code_type == ZipCode::NODE || interval.code_type == ZipCode::ROOT_NODE - || seed.zipcode_decoder->max_depth() == interval.depth) { + || seed.zipcode.max_depth() == interval.depth) { #ifdef DEBUG_ZIP_CODE_SORTING cerr << "\t\t this is a node: offset: " << ( is_rev(seed.pos) - ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) + ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)) << endl;; #endif sort_values_by_seed[zipcode_sort_order[i]].set_sort_value( - is_rev(seed.pos) != order_is_reversed ? seed.zipcode_decoder->get_length(interval.depth) - offset(seed.pos) + is_rev(seed.pos) != order_is_reversed ? seed.zipcode.get_length(interval.depth) - offset(seed.pos) : offset(seed.pos)); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(ZipCode::NODE); @@ -2058,12 +2058,12 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, // and 2 will be added to the node with an offset in the node of 0 (node 3 if the chain is traversed forward) // See sort_value_t for more details - size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode_decoder->get_length(interval.depth), - SnarlDistanceIndex::sum( seed.zipcode_decoder->get_offset_in_chain(interval.depth+1), - seed.zipcode_decoder->get_length(interval.depth+1))) - : seed.zipcode_decoder->get_offset_in_chain(interval.depth+1); + size_t prefix_sum = order_is_reversed ? SnarlDistanceIndex::minus(seed.zipcode.get_length(interval.depth), + SnarlDistanceIndex::sum( seed.zipcode.get_offset_in_chain(interval.depth+1), + seed.zipcode.get_length(interval.depth+1))) + : seed.zipcode.get_offset_in_chain(interval.depth+1); - ZipCode::code_type_t child_type = seed.zipcode_decoder->get_code_type(interval.depth+1); + ZipCode::code_type_t child_type = seed.zipcode.get_code_type(interval.depth+1); sort_values_by_seed[zipcode_sort_order[i]].set_code_type(child_type); if (child_type == ZipCode::REGULAR_SNARL @@ -2075,9 +2075,9 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, sort_values_by_seed[zipcode_sort_order[i]].set_chain_order(1); } else { //If this is a node, then the order depends on where the position falls in the node - bool node_is_rev = seed.zipcode_decoder->get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); + bool node_is_rev = seed.zipcode.get_is_reversed_in_parent(interval.depth+1) != is_rev(seed.pos); node_is_rev = order_is_reversed ? !node_is_rev : node_is_rev; - size_t node_offset = node_is_rev ? seed.zipcode_decoder->get_length(interval.depth+1) - offset(seed.pos) + size_t node_offset = node_is_rev ? seed.zipcode.get_length(interval.depth+1) - offset(seed.pos) : offset(seed.pos); sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(SnarlDistanceIndex::sum(prefix_sum, node_offset)); @@ -2093,13 +2093,13 @@ void ZipCodeForest::sort_one_interval(forest_growing_state_t& forest_state, #endif } else { #ifdef DEBUG_ZIP_CODE_SORTING - cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1) << endl; + cerr << "\tThis is snarl, so return the rank in the snarl: " << seed.zipcode.get_rank_in_snarl(interval.depth+1) << endl; #endif // The ranks of children in irregular snarls are in a topological order, so // sort on the ranks // The rank of children in a regular snarl is arbitrary but it doesn't matter anyway - sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode_decoder->get_rank_in_snarl(interval.depth+1)); - sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode_decoder->get_code_type(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_sort_value(seed.zipcode.get_rank_in_snarl(interval.depth+1)); + sort_values_by_seed[zipcode_sort_order[i]].set_code_type(seed.zipcode.get_code_type(interval.depth+1)); } min_sort_value = std::min(min_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); max_sort_value = std::max(max_sort_value, sort_values_by_seed[zipcode_sort_order[i]].get_sort_value()); @@ -2204,7 +2204,7 @@ void ZipCodeForest::get_next_intervals(forest_growing_state_t& forest_state, con if (interval.code_type != ZipCode::EMPTY && - seeds->at(zipcode_sort_order[interval.interval_start]).zipcode_decoder->max_depth() == interval.depth ) { + seeds->at(zipcode_sort_order[interval.interval_start]).zipcode.max_depth() == interval.depth ) { //If this is a trivial chain, then just return the same interval as a node #ifdef DEBUG_ZIP_CODE_TREE cerr << "\tthis was a trivial chain so just return the same interval as a node" << endl; @@ -2434,7 +2434,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth()+1); + seeds.at(forest_state.seed_sort_order[current_interval.interval_start]).zipcode.max_depth()+1); cerr << "Close anything open" << endl; #endif while (!forest_state.open_intervals.empty()) { @@ -2607,7 +2607,7 @@ void ZipCodeForest::fill_in_forest(const vector& seeds, const VectorViewmax_depth(); + seeds.at(forest_state.seed_sort_order[current_interval.interval_start]).zipcode.max_depth(); for (size_t seed_i = current_interval.interval_start ; seed_i < current_interval.interval_end ; seed_i++) { @@ -2709,9 +2709,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s const SnarlDistanceIndex* distance_index = forest_state.distance_index; #ifdef DEBUG_ZIP_CODE_TREE - assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_code_type(snarl_interval.depth) + assert(seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_code_type(snarl_interval.depth) == ZipCode::CYCLIC_SNARL); - net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); + net_handle_t handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); cerr << "Sorting and finding intervals for cyclic snarl " << distance_index->net_handle_as_string(handle); size_t child_count = 0; for (auto& x : child_intervals) { @@ -2720,7 +2720,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s cerr << " with " << child_count << " children" << endl; #endif - net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode_decoder->get_net_handle(snarl_interval.depth, distance_index); + net_handle_t snarl_handle = seeds->at(zipcode_sort_order[snarl_interval.interval_start]).zipcode.get_net_handle(snarl_interval.depth, distance_index); /****** For each interval, form runs of reachable seeds @@ -2800,9 +2800,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s //Get up to half of the values from before the snarl while (check_i >= parent_interval.interval_start && parent_offset_values.size() <= check_count/2) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); } check_i--; @@ -2813,9 +2813,9 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s check_i = snarl_interval.interval_end; while (check_i < parent_interval.interval_end && parent_offset_values.size() < check_count) { - if (seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->max_depth() == snarl_interval.depth) { + if (seeds->at(zipcode_sort_order[check_i]).zipcode.max_depth() == snarl_interval.depth) { parent_offset_values.emplace_back(minimizers[seeds->at(zipcode_sort_order[check_i]).source].value.offset, - seeds->at(zipcode_sort_order[check_i]).zipcode_decoder->get_offset_in_chain(snarl_interval.depth)); + seeds->at(zipcode_sort_order[check_i]).zipcode.get_offset_in_chain(snarl_interval.depth)); } check_i++; @@ -2857,7 +2857,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s #ifdef DEBUG_ZIP_CODE_TREE //This is how seed_is_reversed_at_depth currently works but double check this in case it changed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); assert (distance_index->distance_in_snarl(snarl_handle, 0, false, rank, false) == std::numeric_limits::max() && distance_index->distance_in_snarl(snarl_handle, 1, false, rank, true) == std::numeric_limits::max()); @@ -2866,7 +2866,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s interval_is_reversable = false; } else { //If the interval is not reversed in the snarl, check if it can be reversed - size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode_decoder->get_rank_in_snarl(snarl_interval.depth+1); + size_t rank = seeds->at(zipcode_sort_order[child_interval.interval_start]).zipcode.get_rank_in_snarl(snarl_interval.depth+1); size_t distance_start = distance_index->distance_in_snarl(snarl_handle, 0, false, rank, true); size_t distance_end = distance_index->distance_in_snarl(snarl_handle, 1, false, rank, false); interval_is_reversable = distance_start != std::numeric_limits::max() @@ -2899,7 +2899,7 @@ void ZipCodeForest::get_cyclic_snarl_intervals( forest_growing_state_t& forest_s std::get<1>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = sort_values_by_seed[zipcode_sort_order[sort_i]].get_sort_value(); std::get<2>(read_and_chain_offsets [sort_i-snarl_interval.interval_start]) = - seed.zipcode_decoder->max_depth() <= snarl_interval.depth+2; + seed.zipcode.max_depth() <= snarl_interval.depth+2; //Make a new run for the seed, to be updated with anything combined with it From d72d232a4c517bbd9896c27c495329a6a3e17710 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 5 Aug 2024 12:00:09 +0200 Subject: [PATCH 088/124] Serialize decoder --- src/minimizer_mapper.cpp | 3 +- src/zip_code.cpp | 68 ++++++++++++++++++++++++++++++++++++++++ src/zip_code.hpp | 2 +- 3 files changed, 71 insertions(+), 2 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index c70d26f3cbf..14eccb6acd8 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3744,6 +3744,7 @@ std::vector MinimizerMapper::find_seeds(const std::vector if (minimizer.occs[j].payload == MIPayload::NO_CODE) { //If the zipcocde wasn't saved, then calculate it seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit); + seeds.back().zipcode.fill_in_full_decoder(); } else if (minimizer.occs[j].payload.first == 0) { //If the minimizer stored the index into a list of zipcodes if (!this->zipcodes->empty()) { @@ -3752,12 +3753,12 @@ std::vector MinimizerMapper::find_seeds(const std::vector } else { //If we don't have the oversized payloads, then fill in the zipcode using the pos seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit); + seeds.back().zipcode.fill_in_full_decoder(); } } else { //If the zipcode was saved in the payload seeds.back().zipcode.fill_in_zipcode_from_payload(minimizer.occs[j].payload); } - seeds.back().zipcode.fill_in_full_decoder(); } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index a06d61c421f..5e002bc7049 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1735,6 +1735,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { } } + fill_in_full_decoder(); } std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { @@ -1799,6 +1800,26 @@ void ZipCodeCollection::serialize(std::ostream& out) const { #ifdef DEBUG_ZIPCODE assert(byte_count == zip_byte_count); #endif + + //Also save the decoder + varint_vector_t decoder_vector; + for (const ZipCode::decoder_t& d : zip.decoder) { + decoder_vector.add_value(d.is_chain); + decoder_vector.add_value(d.offset); + } + + //Write the number of bytes for the zipcode + varint_vector_t decoder_byte_count; + decoder_byte_count.add_value(decoder_vector.byte_count()); + for (const uint8_t& byte : decoder_byte_count.data) { + out << char(byte); + } + + + //Write the decoder + for (const uint8_t& byte : decoder_vector.data ) { + out << char(byte); + } } } @@ -1852,6 +1873,53 @@ void ZipCodeCollection::deserialize(std::istream& in) { for (const char& character : line) { zip.zipcode.add_one_byte(uint8_t(character)); } + + + //Now get the decoder + + varint_vector_t decoder_byte_count_vector; + while (in.peek() & (1<<7)) { + //If the first bit in the byte is 1, then add it, stop once the first bit is 0 + char ch; + in.get(ch); + decoder_byte_count_vector.add_one_byte((uint8_t)ch); + } + assert(! (in.peek() & (1<<7))); + //The next byte has a 0 as its first bit, so add it + char ch; + in.get(ch); + decoder_byte_count_vector.add_one_byte((uint8_t)ch); + + //The first (and only) value in the vector is the length of the zipcode + size_t decoder_byte_count = decoder_byte_count_vector.get_value_and_next_index(0).first; + +#ifdef DEBUG_ZIPCODE + cerr << "Get decoder of " << decoder_byte_count << " bytes" << endl; + //assert(decoder_byte_count >= 15); + assert(decoder_byte_count_vector.get_value_and_next_index(0).second == std::numeric_limits::max()); +#endif + + char line1 [decoder_byte_count]; + + in.read(line1, decoder_byte_count); + + varint_vector_t decoder_vector; + for (const char& character : line1) { + decoder_vector.add_one_byte(uint8_t(character)); + } + + if (decoder_vector.byte_count() != 0) { + size_t index = 0; + while (index != std::numeric_limits::max()) { + size_t is_chain, offset; + std::tie(is_chain, index) = decoder_vector.get_value_and_next_index(index); + std::tie(offset, index) = decoder_vector.get_value_and_next_index(index); + zip.decoder.emplace_back(is_chain != 0, offset); + } + } + zip.finished_decoding=true; + + zipcodes.emplace_back(std::move(zip)); } diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4b5de75b9dc..350ee85e489 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -356,7 +356,7 @@ class ZipCodeCollection { //magic number to identify the file const static uint32_t magic_number = 0x5a495053; //ZIPS - const static uint32_t version = 2; + const static uint32_t version = 3; public: const static std::uint32_t get_magic_number() {return magic_number;} From c4a2e4812992d2bee7abcf841ae7ada00be0b4d7 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 5 Aug 2024 14:21:07 +0200 Subject: [PATCH 089/124] Actually serialize the decoder --- src/subcommand/minimizer_main.cpp | 3 +++ src/unittest/zip_code.cpp | 27 ++++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 73c30133801..935fc9d8274 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -403,6 +403,9 @@ int main_minimizer(int argc, char** argv) { //Otherwise, if they are being saved, add the zipcode to the oversized zipcode list //And remember the zipcode + //Fill in the decoder to be saved too + zipcode.fill_in_full_decoder(); + size_t zip_index; #pragma omp critical diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index d72de04d546..71d61b9b8d8 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -1747,7 +1747,7 @@ using namespace std; REQUIRE(zipcode == decoded); }; } - SECTION("serialization") { + SECTION("serialization without decoder") { ZipCodeCollection zipcodes; for (size_t i = 1 ; i <= 7 ; i++) { ZipCode zip; @@ -1766,6 +1766,31 @@ using namespace std; REQUIRE(zipcodes.size() == new_zipcodes.size()); for (size_t i = 0 ; i < zipcodes.size() ; i++) { REQUIRE(zipcodes.at(i).zipcode == new_zipcodes.at(i).zipcode); + REQUIRE(zipcodes.at(i).decoder == new_zipcodes.at(i).decoder); + } + + } + SECTION("serialization with decoder") { + ZipCodeCollection zipcodes; + for (size_t i = 1 ; i <= 7 ; i++) { + ZipCode zip; + zip.fill_in_zipcode(distance_index, make_pos_t(i, 0, false)); + zip.fill_in_full_decoder(); + zipcodes.emplace_back(zip); + } + ofstream out ("zipcodes"); + zipcodes.serialize(out); + out.close(); + + ifstream in("zipcodes"); + ZipCodeCollection new_zipcodes; + new_zipcodes.deserialize(in); + in.close(); + + REQUIRE(zipcodes.size() == new_zipcodes.size()); + for (size_t i = 0 ; i < zipcodes.size() ; i++) { + REQUIRE(zipcodes.at(i).zipcode == new_zipcodes.at(i).zipcode); + REQUIRE(zipcodes.at(i).decoder == new_zipcodes.at(i).decoder); } } From 7fd62d22d9d707f03aeb3496c61007dfe89549d5 Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Mon, 5 Aug 2024 07:27:50 -0700 Subject: [PATCH 090/124] Put decoder into zipcode payload --- src/unittest/zip_code.cpp | 108 ++++++++++++++++++++++------------ src/zip_code.cpp | 120 ++++++++++++++++++++++++++++++++++---- src/zip_code.hpp | 2 +- 3 files changed, 183 insertions(+), 47 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 71d61b9b8d8..c42ea1086a1 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -65,10 +65,11 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); } } SECTION("Distances within one node") { @@ -332,60 +333,66 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n2 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n3 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n4 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); } } SECTION("n5 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n6 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } } @@ -937,80 +944,88 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n2 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n3 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n4 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n5 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n6 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n7 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n8 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } } @@ -1246,70 +1261,77 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n2 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n3 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n4 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n5 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n6 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n7 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } } @@ -1502,70 +1524,77 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n2 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n3 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n4 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n5 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n6 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n7 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } } @@ -1681,70 +1710,77 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n2 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n3 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n4 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n5 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n6 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("n7 as payload") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (zipcode.byte_count() <= 15) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); + REQUIRE(zipcode.decoder == decoded.decoder); }; } SECTION("serialization without decoder") { diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 5e002bc7049..c87751df3cb 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -5,7 +5,7 @@ namespace vg{ using namespace std; -void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos) { +void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const pos_t& pos, bool fill_in_decoder) { std::vector ancestors; net_handle_t current_handle = distance_index.get_node_net_handle(id(pos)); @@ -51,6 +51,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p } zipcode.add_value(connectivity); + if (fill_in_decoder) { + fill_in_full_decoder(); + } return; } else { #ifdef DEBUG_ZIPCODE @@ -104,6 +107,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p assert(to_add.size() == ZipCode::CHAIN_SIZE); #endif if (distance_index.is_trivial_chain(current_ancestor)) { + if (fill_in_decoder) { + fill_in_full_decoder(); + } return; } } else if (distance_index.is_regular_snarl(current_ancestor)) { @@ -127,6 +133,9 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p } } } + if (fill_in_decoder) { + fill_in_full_decoder(); + } } std::vector ZipCode::to_vector() const { @@ -1689,10 +1698,37 @@ bool ZipCode::is_farther_than(const ZipCode& zip1, const ZipCode& zip2, const si } gbwtgraph::Payload ZipCode::get_payload_from_zip() const { - if (byte_count() > 15) { + varint_vector_t decoder_vector; + //The zipcode decoder's is_chain will always alternate is_chain between levels, except for the very end, + // which may have two is_chains in a row for a trivial chain. So we can store the whole series in two bits. + //For the decoder, we never need to know the byte count, since the value in the decoder is never 0 + + + //TODO: This is assuming the decoder is filled in already + bool is_root_chain = decoder[0].is_chain; + bool is_trivial_chain = decoder.size() > 1 && decoder[decoder.size()-1].is_chain && decoder[decoder.size()-2].is_chain; + size_t is_chain_value = 0; + if (is_root_chain) { + is_chain_value |= 1; + } + if (is_trivial_chain) { + is_chain_value |= 1<<1; + } + decoder_vector.add_value(is_chain_value); + //The first offset is always 0 so ignore it + for (const ZipCode::decoder_t& d : decoder) { + if (d.offset != 0) { + decoder_vector.add_value(d.offset); + } + } + + //First byte is for the byte_count + if (byte_count() + decoder_vector.byte_count() > 15) { //If there aren't enough bits to represent the zip code return MIPayload::NO_CODE; } + + //Encode it as the byte count of the zipcode, the zipcode, and the decoder //Index and value as we walk through the zip code size_t index = 0; @@ -1704,18 +1740,34 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { encoded1 |= byte_count(); + size_t encoded_bytes = 1; + for (size_t i = 0 ; i < zipcode.data.size() ; i++ ) { size_t byte = static_cast (zipcode.data[i]); - if ( i < 7 ) { + if ( encoded_bytes < 8 ) { //Add to first code - encoded1 |= (byte << ((i+1)*8)); + encoded1 |= (byte << (encoded_bytes*8)); } else { //Add to second code - encoded2 |= (byte << ((i-7)*8)); + encoded2 |= (byte << ((encoded_bytes-8)*8)); } + encoded_bytes++; } + for (size_t i = 0 ; i < decoder_vector.data.size() ; i++) { + size_t byte = static_cast (decoder_vector.data[i]); + if ( encoded_bytes < 8 ) { + //Add to first code + encoded1 |= (byte << (encoded_bytes*8)); + + } else { + //Add to second code + encoded2 |= (byte << ((encoded_bytes-8)*8)); + } + encoded_bytes++; + } + assert(encoded_bytes <= 16); return {encoded1, encoded2}; } @@ -1724,18 +1776,66 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(payload != MIPayload::NO_CODE); zipcode.data.reserve(16); + size_t decoded_bytes = 0; + //get one byte at a time from the payload and add it to the zip code size_t bit_mask = (1 << 8) - 1; size_t byte_count = payload.first & bit_mask; - for (size_t i = 1 ; i <= byte_count ; i++) { - if (i < 8) { - zipcode.add_one_byte((payload.first >> (i*8)) & bit_mask); + decoded_bytes++; + for (size_t i = 0 ; i < byte_count ; i++) { + if (decoded_bytes < 8) { + zipcode.add_one_byte((payload.first >> (decoded_bytes*8)) & bit_mask); } else { - zipcode.add_one_byte((payload.second >> ((i-8)*8)) & bit_mask); + zipcode.add_one_byte((payload.second >> ((decoded_bytes-8)*8)) & bit_mask); } + decoded_bytes++; + } + //Find the booleans specifying the is_chain values + uint8_t is_chain_val = 0; + if (decoded_bytes < 8) { + is_chain_val = (payload.first >> (decoded_bytes*8)) & bit_mask; + } else { + is_chain_val = (payload.second >> ((decoded_bytes-8)*8)) & bit_mask; + } + decoded_bytes++; + bool is_chain = is_chain_val & 1; + bool is_trivial_chain = is_chain_val & (1<<1); + + //Get the decoder offsets + varint_vector_t decoder_vector; + for (size_t i = decoded_bytes ; i <16 ; i++) { + uint8_t saved_byte; + if (decoded_bytes < 8) { + saved_byte = (payload.first >> (decoded_bytes*8)) & bit_mask; + } else { + saved_byte = (payload.second >> ((decoded_bytes-8)*8)) & bit_mask; + } + if (saved_byte != 0) { + decoder_vector.add_one_byte(saved_byte); + } + + decoded_bytes++; + } + //Now go through the varint vector up and add anything that isn't 0 + size_t varint_value= 1; + size_t varint_index = 0; + decoder.emplace_back(is_chain, 0); + is_chain = !is_chain; + if (decoder_vector.byte_count() != 0) { + while (varint_index != std::numeric_limits::max() && varint_value != 0) { + std::tie(varint_value, varint_index) = decoder_vector.get_value_and_next_index(varint_index); + + decoder.emplace_back(is_chain, varint_value); + + is_chain = !is_chain; + } + } + if (is_trivial_chain) { + assert(!decoder.back().is_chain); + decoder.back().is_chain = true; } - fill_in_full_decoder(); + } std::ostream& operator<<(std::ostream& out, const ZipCode::code_type_t& type) { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 350ee85e489..451a7875ca3 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -60,7 +60,7 @@ class ZipCode { public: //Fill in an empty zipcode given a position - void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos); + void fill_in_zipcode (const SnarlDistanceIndex& distance_index, const vg::pos_t& pos, bool fill_in_decoder = true); //Fill in an empty zipcode using the information that was stored in a payload void fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload); From 020cbb4b063dde369ec0317b7b87d4e8fbb96fc7 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 5 Aug 2024 16:57:39 +0200 Subject: [PATCH 091/124] Actually serialize the decoder --- src/subcommand/minimizer_main.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 935fc9d8274..3f8ab7522f8 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -396,9 +396,10 @@ int main_minimizer(int argc, char** argv) { } cout << endl; #endif - if (zipcode.zipcode.byte_count() < 15) { + auto payload = zipcode.get_payload_from_zip(); + if (payload != MIPayload::NO_CODE) { //If the zipcode is small enough to store in the payload - return zipcode.get_payload_from_zip(); + return payload; } else if (!zipcode_name.empty()) { //Otherwise, if they are being saved, add the zipcode to the oversized zipcode list //And remember the zipcode From 06a6b046a69d07ec60b56da033546e1f961fb61a Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 6 Aug 2024 19:32:37 +0200 Subject: [PATCH 092/124] Add an unpacked zipcode but it doesn't compile yet --- src/zip_code.cpp | 343 +++++++++++++++++++++++++++-------------------- src/zip_code.hpp | 53 ++++---- 2 files changed, 223 insertions(+), 173 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index c87751df3cb..cf965b795f5 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -578,6 +578,7 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } + assert(ZipCode::CHAIN_COMPONENT_COUNT_OFFSET == ZipCode::ROOT_CHAIN_COMPONENT_COUNT_OFFSET); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { @@ -599,6 +600,7 @@ bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } + assert(ZipCode::CHAIN_COMPONENT_COUNT_OFFSET == ZipCode::ROOT_CHAIN_COMPONENT_COUNT_OFFSET); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { @@ -1725,7 +1727,7 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { //First byte is for the byte_count if (byte_count() + decoder_vector.byte_count() > 15) { //If there aren't enough bits to represent the zip code - return MIPayload::NO_CODE; + return ZipCode::NO_PAYLOAD; } //Encode it as the byte count of the zipcode, the zipcode, and the decoder @@ -1773,7 +1775,7 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { } void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { - assert(payload != MIPayload::NO_CODE); + assert(payload != ZipCode::NO_PAYLOAD); zipcode.data.reserve(16); size_t decoded_bytes = 0; @@ -2024,179 +2026,234 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { - MIPayload payload; +vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const { + vector unpacked_zipcode; - if (decoder_length() == 1) { - //If the root-level structure is a node - payload.parent_is_root = true; - payload.parent_is_chain = true; + //Otherwise, walk through the zipcode start to end (root to leaf) and fill in the unpacked zipcode + //Fill in everything in the zipcode in this pass, and then go back and fill in any net handles that + //weren't stored in the zipcode by getting the parents + for (size_t depth = 0 ; depth < decoder_length() ; depth++) { + unpacked_zipcode.empalce_back(); + zip_code_t& current_code = unpacked_zipcode.back(); - //Walk through the zipcode to get values size_t zip_value; - size_t zip_index = decoder[0].offset; - //Root is chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //root_identifier - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); + size_t zip_index = decoder[depth].offset; + bool is_chain = decoder[depth].is_chain; + if (depth == 0) { + //identifier is first for anything in the root + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Root node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + if (is_chain) { + if (decoder_length() == 1) { + //Root node - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - payload.is_trivial_chain = true; - payload.is_reversed = false; - payload.parent_handle = distance_index.get_root(); - payload.parent_type = ZipCode::ROOT_NODE; - payload.parent_record_offset = 0; + current_code.code_type = ZipCode::ROOT_NODE; + //Get the root node as a chain + current_code.handle = distance_index.get_net_handle_from_values( + distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); - } else if (decoder[max_depth() - 1].is_chain) { - //If the parent is a chain - payload.node_handle = distance_index.get_node_net_handle(id); - payload.parent_is_chain = true; - payload.parent_is_root = false; + //For a root node, this is the length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //Walk through the zipcode to get values - size_t zip_value; - size_t zip_index = decoder[max_depth()-1].offset; - //is_chain/rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //root_identifier for root, chain length for anything else - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } else { + //Root chain + current_code.code_type = ZipCode::ROOT_CHAIN; - if (decoder_length() == 2) { - //If the node is a child of the root chain - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); - payload.parent_type = ZipCode::ROOT_CHAIN; - payload.parent_is_root = true; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - } else { - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); - payload.parent_type = ZipCode::CHAIN; - } - payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); + current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); - //chain component count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //For a root chain, this is the component count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.is_looping_chain = zip_value % 2; + if (zip_value % 2) { + zip_value -= 1; + } + current_code.chain_component = zip_value / 2; + } + //The next thing for both nodes and chains is the connectivity value + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //start-end connected + if ((zip_value & 1) != 0) { + current_code.distance_start_right = 0; + current_code.distance_end_left = 0; + } + //start-start connected + if((zip_value & 2) != 0){ + current_code.distance_start_left = 0; + } + //end-end connected + if ((zip_value & 4) != 0) { + current_code.distance_end_right = 0; + } + } else { + //Root snarl + current_code.code_type = ZipCode::ROOT_SNARL; + current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + } + } else { + if (is_chain) { + if (decoder[depth-1].is_chain) { + //Node in a chain + current_code.code_type = ZipCode::NODE; - //Node prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //Node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - //is_reversed - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //TODO: For top-level chains we got this from the distance index - payload.is_reversed = zip_value; + //Prefix sum value + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.prefix_sum_or_snarl_rank = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.chain_component = zip_value; + //Node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + //Node is reversed + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.is_reversed = zip_value; + //Node chain component + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.chain_component = zip_value; + } else { + //Chain + current_code.code_type = ZipCode::CHAIN; - } else { - //If the node is a child of a snarl - - payload.node_handle = distance_index.get_node_net_handle(id); - payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE, - distance_index.get_node_record_offset(payload.node_handle)); - payload.parent_is_chain = false; - payload.parent_is_root = decoder_length() == 2; - payload.is_trivial_chain = true; + //chain rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.prefix_sum_or_snarl_rank = zip_value; + //Chain length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1;; - size_t zip_value; - size_t zip_index; - if (payload.parent_is_root) { - //is_chain - zip_index = decoder[0].offset; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Identifier for root snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_handle = payload.parent_handle; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); - payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); - payload.parent_type = ZipCode::ROOT_SNARL; - } else { - zip_index = decoder[max_depth()-1].offset; - //is_regular - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //If this is a non-root snarl, get as much as we can from it - payload.parent_type = ZipCode::EMPTY; - if (zip_value == 0) { - payload.parent_type = ZipCode::IRREGULAR_SNARL; - } else if (zip_value == 1) { - payload.parent_type = ZipCode::REGULAR_SNARL; + //chain component count / is looping chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.is_looping_chain = zip_value % 2; + if (zip_value % 2) { + zip_value -= 1; + } + current_code.chain_component = zip_value / 2; + } } else { - payload.parent_type = ZipCode::CYCLIC_SNARL; - } + //Snarl - //Snarl prefix sum - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //snarl type + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + if (zip_value == 1) { + current_code.code_type = ZipCode::REGULAR_SNARL; + } else if (zip_value == 0) { + current_code.code_type = ZipCode::IRREGULAR_SNARL; + } else { + current_code.code_type = ZipCode::CYCLIC_SNARL; + } + //Offset in chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.prefix_sum_or_snarl_rank = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //snarl length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - //Snarl length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Snarl child_count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Chain component of the snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //TODO: SHould use this somehow - payload.chain_component = 0; - //is_reversed for regular snarl and record offset for irregular/cyclic snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //CHild count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index) - if (payload.parent_type == ZipCode::REGULAR_SNARL) { - //Snarl is reversed - net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); - //Simple and regular snarls are different for clustering - if (distance_index.is_simple_snarl(grandparent_handle)) { - payload.is_reversed = zip_value; - payload.parent_is_chain=true; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); + //Chain component + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; + current_code.chain_component = zip_value; + + if (current_code.code_type == ZipCode::REGULAR_SNARL) { + //Regular snarl + + //Is reversed. This really means is_reversed for the child, which will be used to get the distance values for the child + //The child's values will be set in the second pass + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; + current_code.is_reversed = zip_value; } else { - payload.is_reversed = false; - payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); - } + //Irregular/cyclic snarl + + //Snarl record for irregular/cyclic snarls + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; + current_code.net_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); - } else { - payload.is_reversed = false; - payload.parent_record_offset = zip_value; - } + //Distance values + //These are actually the distances from the child to the bounds of the snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.distance_start_left = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - } - //We should be at the node/trivial chain now - zip_index = decoder[max_depth()].offset; - //Chain rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Chain length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.distance_end_left = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - //Get the rest as default values + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.distance_start_right = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - } - payload.parent_depth = 0; - for (size_t d = 0 ; d <= max_depth() ; d++) { - auto type = get_code_type(d); - if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { - payload.parent_depth++; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + current_code.distance_end_right = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + } + } } } + //Now go back walking up the snarl tree and add all the stuff from the distance index: + //net handles if they haven't been set and distances for children of snarls + for (int depth = decoder_length()-1 ; depth >= 0 ; depth--) { + zip_code_t& current_code = unpacked_zipcode[depth]; + //If we need to set the net handle + if (current_codenet_handle == distance_index.get_root()) { + if (depth == decoder_length-1 ) { + current_code.net_handle = distance_index.get_node_net_handle(id); + if (current_code.code_type == ZipCode::CHAIN) { + current_code.net_handle = distance_index.get_net_handle_from_values( + distance_index.get_record_offset(current_code.net_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE, + distance_index.get_node_record_offset(current_code.net_handle)); + } + } else { + current_code.net_handle = distance_index.get_parent(unpacked_zipcode[depth+1].net_handle); + } + } + + //If we need to set distances and sometimes the orientation + if (depth != 0) { + zip_code_t& parent_code = unpacked_zipcode[depth-1]; + if (parent_code.code_type == ZipCode::REGULAR_SNARL) { + //If the parent was a regular snarl, then we stored the orientation to get the distances + current_code.is_reversed = parent_code.is_reversed; + parent_code.is_reversed = false; + if (current_code.is_reversed) { + current_code.distance_start_left = std::numeric_limits::max(); + current_code.distance_start_right = 0; + current_code.distance_end_left = 0; + current_code.distance_end_right = std::numeric_limits::max(); + } else { + current_code.distance_start_left = 0; + current_code.distance_start_right = std::numeric_limits::max(); + current_code.distance_end_left = std::numeric_limits::max(); + current_code.distance_end_right = 0; + } + parent_code.distance_start_left = std::numeric_limits::max(); + parent_code.distance_start_right = std::numeric_limits::max(); + parent_code.distance_end_left = std::numeric_limits::max(); + parent_code.distance_end_right = std::numeric_limits::max(); + } else if (parent_code.code_type == ZipCode::IRREGULAR_SNARL || parent_code.code_type == ZipCode::CYCLIC_SNARL) { + //If the parent was an irregular or cyclic snarl, then we saved the distances + current_code.distance_start_left = parent_code.distance_start_left; + current_code.distance_start_right = parent_code.distance_start_right; + current_code.distance_end_left = parent_code.distance_end_left; + current_code.distance_end_right = parent_code.distance_end_right; + + parent_code.distance_start_left = std::numeric_limits::max(); + parent_code.distance_start_right = std::numeric_limits::max(); + parent_code.distance_end_left = std::numeric_limits::max(); + parent_code.distance_end_right = std::numeric_limits::max(); + + parent_code.is_reversed = false; + } + } - return payload; + } + return unpacked_zipcode; } net_identifier_t ZipCode::get_identifier(size_t depth) const { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 451a7875ca3..4d2b9332773 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -29,10 +29,8 @@ using namespace std; -///A struct to interpret the minimizer payload -///I want to use zipcodes as the payload but at the moment clustering still expects the old payload -///This can interpret zipcodes to format them as the old payload -struct MIPayload; +///A struct to store an unpacked version of one node/snarl/chain code +struct zip_code_t; /// A struct to be used as a unique identifier for a snarl tree node (node/snarl/chain) @@ -320,16 +318,16 @@ class ZipCode { /// unit test from the resulting information. void dump(std::ostream& out) const; - //TODO: I want to make a struct for holding all values of a code as real values - - ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + ///Unpack the zip code to get a bigger version with random access + vector unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id net_identifier_t get_identifier(size_t depth) const; const static net_identifier_t get_parent_identifier(const net_identifier_t& child); + public: + constexpr static gbwtgraph::Payload NO_PAYLOAD = {0, 0}; }; /// Print a code type to a stream @@ -380,34 +378,29 @@ std::ostream& operator<<(std::ostream& out, const ZipCode& decoder); /** - The payload for the minimizer index. This stores distance information that gets used in clustering - The payload now uses zip codes, so this gets used to go from a zip code to distance information - usable by the clusterer + An unpacked version of one node/snarl/chain code + Not all values will be set for every type of code */ -struct MIPayload { - typedef std::uint64_t code_type; // We assume that this fits into gbwtgraph::Payload. - //typedef std::pair payload_type; - - - constexpr static gbwtgraph::Payload NO_CODE = {0, 0}; - constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); +struct zip_code_t { + ZipCode::code_type_t code_type = ZipCode::EMPTY; + //TODO: I'd like this to be the root or another placeholder + net_handle_t net_handle; - net_handle_t node_handle; - net_handle_t parent_handle; + size_t length = std::numeric_limits::max(); + size_t prefix_sum_or_snarl_rank = std::numeric_limits::max(); + size_t chain_component = std::numeric_limits::max(); - size_t node_length = std::numeric_limits::max(); - size_t prefix_sum = 0; - size_t chain_component = 0; - //Depth according to the distance index - size_t parent_depth = 0; - size_t parent_record_offset = 0; + //distance from the left side of the child to the start of the snarl + //or, for root nodes/chains, start-start connected + //start-right and end-left are the same for root nodes/chains + size_t distance_start_left = std::numeric_limits::max(); + size_t distance_start_right = std::numeric_limits::max(); + size_t distance_end_left = std::numeric_limits::max(); + size_t distance_end_right = std::numeric_limits::max(); - ZipCode::code_type_t parent_type = ZipCode::EMPTY; bool is_reversed = false; - bool is_trivial_chain = false; - bool parent_is_chain = false; - bool parent_is_root = false; + bool is_looping_chain = false; }; } From d5a9e4d74c7e90686ab71e7a098e6b488c845539 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 6 Aug 2024 22:01:04 +0200 Subject: [PATCH 093/124] Get everything to compile --- src/minimizer_mapper.cpp | 2 +- src/minimizer_mapper.hpp | 2 +- src/snarl_seed_clusterer.cpp | 218 +++++++++++------------------- src/snarl_seed_clusterer.hpp | 3 +- src/subcommand/minimizer_main.cpp | 6 +- src/unittest/zip_code.cpp | 72 +++++----- src/zip_code.cpp | 18 +-- 7 files changed, 131 insertions(+), 190 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index 14eccb6acd8..3a87586f0a7 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3741,7 +3741,7 @@ std::vector MinimizerMapper::find_seeds(const std::vector seeds.back().source = i; //Get the zipcode - if (minimizer.occs[j].payload == MIPayload::NO_CODE) { + if (minimizer.occs[j].payload == ZipCode::NO_PAYLOAD) { //If the zipcocde wasn't saved, then calculate it seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit); seeds.back().zipcode.fill_in_full_decoder(); diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index 117e9b624bf..b8cf445753b 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -596,7 +596,7 @@ class MinimizerMapper : public AlignerClient { /// How should we initialize chain info when it's not stored in the minimizer index? inline static gbwtgraph::Payload no_chain_info() { - return MIPayload::NO_CODE; + return ZipCode::NO_PAYLOAD; } /// How do we convert chain info to an actual seed of the type we are using? diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 31579b53103..2ff6b814fa6 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].unpacked_zipcode = seeds[i].zipcode.unpack_zip_code(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -66,7 +66,7 @@ vector> SnarlDistanceIndexClusterer throw std::runtime_error("Clusterer: We can't handle more than paired end mapping"); } - //Make a vector of SeedCache that contains all the payloads + //Make a vector of SeedCache that contains all the unpacked zipcodes vector> all_seed_caches; all_seed_caches.reserve(all_seeds.size()); @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].unpacked_zipcode = all_seeds[read_num][i].zipcode.unpack_zip_code(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -352,65 +352,41 @@ cerr << "Add all seeds to nodes: " << endl; //The zipcodes are already filled in //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now - const MIPayload& payload = seed.payload; + const zip_code_t& node_code = seed.unpacked_zipcode.back(); + const zip_code_t& parent_code = seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2]; #ifdef DEBUG_CLUSTER - //cerr << "Using cached values for node " << id << ": " - // << ", " << seed.payload.record_offset - // << ", " << seed.payload.parent_record_offset - // << ", " << seed.payload.node_length - // << ", " << seed.payload.prefix_sum - // << ", " << seed.payload.chain_component << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; - //assert(seed.payload.parent_record_offset == - // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) - // :distance_index.get_record_offset(parent_handle))); - cerr << "Node length " << seed.payload.node_length << " should be " << distance_index.minimum_length(handle) << endl; - assert(seed.payload.node_length == distance_index.minimum_length(handle)); - //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) - // ? std::numeric_limits::max() - // : distance_index.get_prefix_sum_value(handle); - //assert(seed.payload.prefix_sum == prefix_sum); + cerr << "Node length " << node_code.length << " should be " << distance_index.minimum_length(handle) << endl; + assert(seed.unpacked_vector.back().length == distance_index.minimum_length(handle)); size_t chain_component = (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0); chain_component = chain_component == std::numeric_limits::max() ? 0 : chain_component; - cerr << "For nod " << distance_index.net_handle_as_string(handle) << endl; - cerr << "Chain compoentn: " << chain_component << " was " << seed.payload.chain_component << endl; - assert(seed.payload.chain_component == chain_component); + cerr << "For node " << distance_index.net_handle_as_string(handle) << endl; + cerr << "Chain compoentn: " << chain_component << " was " << node_code.chain_component << endl; + assert(node_code.chain_component == chain_component); - if (!distance_index.is_root(seed.payload.parent_handle)) { - cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; - cerr <<" Is actually " << distance_index.net_handle_as_string( distance_index.start_end_traversal_of(seed.payload.parent_handle)) << endl; - assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); - } #endif - if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { + if (!((seed.unpacked_zipcode.front().code_type == ZipCode::ROOT_SNARL && seed.unpacked_zipcode.size() == 2) + || seed.unpacked_zipcode.front().code_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) //Add the seed to its parent //Also update the zipcode on the seed + #ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; - //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() - // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); - cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; - assert(seed.payload.node_length == distance_index.minimum_length(seed.payload.node_handle)); - cerr << "Reversed in parent? " << distance_index.net_handle_as_string(seed.payload.node_handle) << " " << distance_index.net_handle_as_string(seed.payload.parent_handle) << " " << seed.payload.is_reversed << endl; - cerr << "is trivial? " << seed.payload.is_trivial_chain << endl; - if (!distance_index.is_root(seed.payload.parent_handle)) { - cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(seed.payload.parent_handle)) << endl; - } - cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2].net_handle) << endl; + cerr << "Node length should be " << distance_index.minimum_length(node_code.net_handle) << " actually " << node_code.length << endl; + assert(node_code.length == distance_index.minimum_length(node_code.handle)); + cerr << "Reversed in parent? " << distance_index.net_handle_as_string(node_code.net_handle) << " " << distance_index.net_handle_as_string(parent_code.net_handle) << " " << node_code.is_reversed << endl; - assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) - : distance_index.is_reversed_in_parent(seed.payload.node_handle))); #endif //Add the parent chain or trivial chain @@ -418,34 +394,43 @@ cerr << "Add all seeds to nodes: " << endl; new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.parent_handle) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(parent_code.net_handle) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; - if (seed.payload.is_trivial_chain ) { - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + if (distance_index.is_chain(node_code.net_handle) ) { + //Trivial chain + clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(node_code.net_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), + false, node_code.length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode.max_depth()); clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + clustering_problem.net_handle_to_node_problem_index.emplace(parent_code.net_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(parent_code.net_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), distance_index, &seed, seed.seed->zipcode.max_depth() - 1); } new_parent = true; } + size_t parent_depth = 0; + + for (size_t d = 0 ; d < seed.unpacked_zipcode.size() ; d++) { + const auto& type = seed.unpacked_zipcode[d].code_type; + if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { + parent_depth++; + } + } #ifdef DEBUG_CLUSTER - assert(seed.payload.parent_depth == distance_index.get_depth(seed.payload.parent_handle)); + assert(parent_depth == distance_index.get_depth(parent_code.net_handle)); #endif //If chains_by_level isn't big enough for this depth, resize it and reserve space at each level - if (seed.payload.parent_depth+1 > chains_by_level.size()) { - size_t to_add = (seed.payload.parent_depth+1) - chains_by_level.size(); + if (parent_depth+1 > chains_by_level.size()) { + size_t to_add = (parent_depth+1) - chains_by_level.size(); for (size_t i = 0 ; i < to_add ; i++) { chains_by_level.emplace_back(); chains_by_level.back().reserve(clustering_problem.seed_count_prefix_sum.back()); @@ -453,66 +438,26 @@ cerr << "Add all seeds to nodes: " << endl; } //Make sure the seed's distances are relative to the orientation in the parent - seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) + seed.distance_left = node_code.is_reversed != is_rev(pos) ? node_code.length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 - : seed.payload.node_length- get_offset(pos); + seed.distance_right = node_code.is_reversed != is_rev(pos) ? get_offset(pos) + 1 + : node_code.length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.parent_handle)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(parent_code.net_handle)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = seed.payload.node_handle; + parent_problem.children.back().net_handle = node_code.net_handle; parent_problem.children.back().seed_indices = {read_num, i}; parent_problem.children.back().is_seed = true; parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = seed.payload.chain_component; + parent_problem.children.back().chain_component = node_code.chain_component; parent_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - seed.payload.prefix_sum); + node_code.prefix_sum_or_snarl_rank); //And the parent to chains_by_level if (new_parent) { - chains_by_level[seed.payload.parent_depth].emplace_back(seed.payload.parent_handle); - } - - - //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too - if ( new_parent) { - if (seed.payload.is_trivial_chain && !seed.payload.parent_is_root) { - bool grandparent_is_simple_snarl = seed.payload.parent_is_chain; - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = grandparent_is_simple_snarl - ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(seed.payload.node_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE, - 1) - : distance_index.get_net_handle_from_values(seed.payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::SNARL_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; -#endif - - if (grandparent_is_simple_snarl) { - //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too - parent_problem.has_grandparent_handle = true; - parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( - seed.payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; -#endif - } - } else if (seed.payload.parent_is_root && seed.payload.parent_is_chain && !seed.payload.is_trivial_chain) { - //The parent chain is a child of the root - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( - 0, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); -#ifdef DEBUG_CLUSTER - cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; -#endif - } + chains_by_level[parent_depth].emplace_back(parent_code.net_handle); } @@ -520,38 +465,36 @@ cerr << "Add all seeds to nodes: " << endl; //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node - - //Create a new SnarlTreeNodeProblem for this node bool new_node = false; - if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.node_handle) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(node_code.net_handle) == 0) { new_node = true; - clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, + clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(node_code.net_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, seed.payload.node_length, std::numeric_limits::max(), + false, node_code.length, std::numeric_limits::max(), std::numeric_limits::max(), &seed, seed.seed->zipcode.max_depth()); //Remember the parent of this node, since it will be needed to remember the root snarl later - clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; + clustering_problem.all_node_problems.back().parent_net_handle = parent_code.net_handle; } - seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); + seed.distance_left = node_code.is_reversed != is_rev(pos) ? node_code.length- get_offset(pos) : get_offset(pos) + 1; + seed.distance_right = node_code.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : node_code.length- get_offset(pos); - SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.node_handle)); + SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(node_code.net_handle)); node_problem.children.emplace_back(); - node_problem.children.back().net_handle = seed.payload.node_handle; + node_problem.children.back().net_handle = node_code.net_handle; node_problem.children.back().seed_indices = {read_num, i}; node_problem.children.back().is_seed = true; node_problem.children.back().has_chain_values = true; - node_problem.children.back().chain_component = seed.payload.chain_component; + node_problem.children.back().chain_component = node_code.chain_component; node_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - seed.payload.prefix_sum); + node_code.prefix_sum_or_snarl_rank); @@ -569,7 +512,7 @@ cerr << "Add all seeds to nodes: " << endl; //Go through and cluster nodes that are children of the root or root snarls for(const SeedCache* seed : nodes_to_cluster_now) { - const net_handle_t& node_net_handle = seed->payload.node_handle; + const net_handle_t& node_net_handle = seed->unpacked_zipcode.back().net_handle; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); @@ -580,7 +523,7 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t parent = node_problem.parent_net_handle; - if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { + if (seed->unpacked_zipcode[seed->unpacked_zipcode.size()-2].code_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, @@ -1826,10 +1769,10 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } else if (child1.prefix_sum == child2.prefix_sum && !(child1.is_seed && child2.is_seed)) { //Get the prefix sum values not including the offset in the positions size_t prefix_sum1 = child1.is_seed - ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).payload.prefix_sum + ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank : child1.prefix_sum; size_t prefix_sum2 = child2.is_seed - ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).payload.prefix_sum + ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank : child2.prefix_sum; if (prefix_sum1 == prefix_sum2){ return child2.is_seed; @@ -1951,11 +1894,11 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; size_t last_length = last_child.is_seed - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.node_length + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().length : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; size_t last_chain_component_end = last_child.is_seed - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.chain_component + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().chain_component : clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; @@ -2215,17 +2158,17 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == current_child_seed.payload.chain_component) { + } else if ( last_chain_component_end == current_child_seed.unpacked_zipcode.back().chain_component) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = current_child_seed.payload.prefix_sum; + distance_from_last_child_to_current_child = current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); //Distance is the current node's prefix sum minus the distance from the start of the chain to the last node - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.payload.prefix_sum, + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank, distance_from_chain_start_to_last_node); } } @@ -2242,21 +2185,21 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { + } else if (chain_problem->chain_component_end != current_child_seed.unpacked_zipcode.back().chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { //Length of the chain - (prefix sum + node length of the current node) distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, - SnarlDistanceIndex::sum(current_child_seed.payload.prefix_sum, - current_child_seed.payload.node_length)); + SnarlDistanceIndex::sum(current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank, + current_child_seed.unpacked_zipcode.back().length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() : current_child_seed.payload.prefix_sum) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.unpacked_zipcode.back().chain_component != 0 ? std::numeric_limits::max() : current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2291,13 +2234,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (current_child_seed.payload.chain_component != 0) { + if (current_child_seed.unpacked_zipcode.back().chain_component != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { //Prefix sum + offset of the seed in the node current_child_seed.distance_left = SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.payload.prefix_sum); + current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank); } current_child_seed.distance_right = SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain); @@ -2342,16 +2285,16 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : (last_child.net_handle == current_child.net_handle ? 0 - : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload.node_length)); + : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.unpacked_zipcode.back().length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) //Left distance is the prefix sum (or inf if the node isn't in the first component of the chain) + offset of seed in node //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() + current_child_seed.unpacked_zipcode.back().chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.payload.prefix_sum), + current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2386,7 +2329,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node - distance_between -= current_child_seed.payload.node_length; + distance_between -= current_child_seed.unpacked_zipcode.back().length; } #ifdef DEBUG_CLUSTER @@ -2495,9 +2438,9 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = current_child_seed.payload.prefix_sum; - last_length = current_child_seed.payload.node_length; - last_chain_component_end = current_child_seed.payload.chain_component; + last_prefix_sum = current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank; + last_length = current_child_seed.unpacked_zipcode.back().length; + last_chain_component_end = current_child_seed.unpacked_zipcode.back().chain_component; } @@ -3178,7 +3121,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t dist_left = clustering_problem.all_seeds->at(read_num)->at(seed_i).distance_left; if (include_prefix_sum) { dist_left = SnarlDistanceIndex::sum(dist_left, - clustering_problem.all_seeds->at(read_num)->at(seed_i).payload.prefix_sum); + clustering_problem.all_seeds->at(read_num)->at(seed_i).unpacked_zipcode.back().prefix_sum_or_snarl_rank); } //Since we only stored the proper distance left for seeds on chains size_t dist_right = structure_length - dist_left + 1; @@ -3213,9 +3156,8 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr if (!skip_distances_to_ends) { const SeedCache& first_seed = clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second); - //TOOD: get_id is weird node_problem->fragment_best_left = SnarlDistanceIndex::sum(first_seed.distance_left, - include_prefix_sum ? first_seed.payload.prefix_sum : 0); + include_prefix_sum ? first_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank : 0); //Record the new cluster for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++ ) { @@ -3261,7 +3203,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t offset = clustering_problem.all_seeds->at(read_num)->at(seed_num).distance_left; if (include_prefix_sum) { offset = SnarlDistanceIndex::sum(offset, - clustering_problem.all_seeds->at(read_num)->at(seed_num).payload.prefix_sum); + clustering_problem.all_seeds->at(read_num)->at(seed_num).unpacked_zipcode.back().prefix_sum_or_snarl_rank); } //First and last offset and last cluster head for this read diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 22f8478e6ff..e449e6a46b9 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -99,8 +99,7 @@ class SnarlDistanceIndexClusterer { struct SeedCache{ const Seed* seed; - //TODO: I think I can skip the zipcode now since I have the payload - MIPayload payload; + vector unpacked_zipcode; //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index 3f8ab7522f8..db0aab6c987 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -375,7 +375,7 @@ int main_minimizer(int argc, char** argv) { } if (distance_name.empty()) { gbwtgraph::index_haplotypes(gbz->graph, *index, [](const pos_t&) -> gbwtgraph::Payload { - return MIPayload::NO_CODE; + return ZipCode::NO_PAYLOAD; }); } else { gbwtgraph::index_haplotypes(gbz->graph, *index, [&](const pos_t& pos) -> gbwtgraph::Payload { @@ -397,7 +397,7 @@ int main_minimizer(int argc, char** argv) { cout << endl; #endif auto payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { //If the zipcode is small enough to store in the payload return payload; } else if (!zipcode_name.empty()) { @@ -421,7 +421,7 @@ int main_minimizer(int argc, char** argv) { } return {0, zip_index}; } else { - return MIPayload::NO_CODE; + return ZipCode::NO_PAYLOAD; } }); } diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index c42ea1086a1..d05ac0b6173 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -65,7 +65,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -333,7 +333,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -344,7 +344,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -355,7 +355,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -366,7 +366,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -377,7 +377,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -388,7 +388,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -944,7 +944,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -955,7 +955,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -966,7 +966,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -977,7 +977,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -988,7 +988,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -999,7 +999,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1010,7 +1010,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1021,7 +1021,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1261,7 +1261,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1272,7 +1272,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1283,7 +1283,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1294,7 +1294,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1305,7 +1305,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1316,7 +1316,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1327,7 +1327,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1524,7 +1524,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1535,7 +1535,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1546,7 +1546,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1557,7 +1557,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1568,7 +1568,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1579,7 +1579,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1590,7 +1590,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1710,7 +1710,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1721,7 +1721,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1732,7 +1732,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1743,7 +1743,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1754,7 +1754,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1765,7 +1765,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1776,7 +1776,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != MIPayload::NO_CODE) { + if (payload != ZipCode::NO_PAYLOAD) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index cf965b795f5..dc6af0fb863 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2026,14 +2026,14 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const { - vector unpacked_zipcode; +vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const { + vector unpacked_zipcode; //Otherwise, walk through the zipcode start to end (root to leaf) and fill in the unpacked zipcode //Fill in everything in the zipcode in this pass, and then go back and fill in any net handles that //weren't stored in the zipcode by getting the parents for (size_t depth = 0 ; depth < decoder_length() ; depth++) { - unpacked_zipcode.empalce_back(); + unpacked_zipcode.emplace_back(); zip_code_t& current_code = unpacked_zipcode.back(); size_t zip_value; @@ -2049,7 +2049,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& current_code.code_type = ZipCode::ROOT_NODE; //Get the root node as a chain - current_code.handle = distance_index.get_net_handle_from_values( + current_code.net_handle = distance_index.get_net_handle_from_values( distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); @@ -2155,10 +2155,10 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; //CHild count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index) + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //Chain component - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); current_code.chain_component = zip_value; if (current_code.code_type == ZipCode::REGULAR_SNARL) { @@ -2173,7 +2173,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& //Snarl record for irregular/cyclic snarls std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; - current_code.net_handle = distance_index->get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + current_code.net_handle = distance_index.get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); //Distance values //These are actually the distances from the child to the bounds of the snarl @@ -2199,8 +2199,8 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& zip_code_t& current_code = unpacked_zipcode[depth]; //If we need to set the net handle - if (current_codenet_handle == distance_index.get_root()) { - if (depth == decoder_length-1 ) { + if (current_code.net_handle == distance_index.get_root()) { + if (depth == decoder_length()-1 ) { current_code.net_handle = distance_index.get_node_net_handle(id); if (current_code.code_type == ZipCode::CHAIN) { current_code.net_handle = distance_index.get_net_handle_from_values( From dfde73511a6fd9f780a45d86c77961406ef2020c Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 6 Aug 2024 23:19:50 +0200 Subject: [PATCH 094/124] Add unit tests and fix bug for unpacked zip codes --- src/unittest/zip_code.cpp | 340 +++++++++++++++++++++++++++++++++++++- src/zip_code.cpp | 7 +- 2 files changed, 344 insertions(+), 3 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index d05ac0b6173..520264d001f 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -51,6 +51,16 @@ using namespace std; REQUIRE(zipcode.decoder.front().is_chain == 1); REQUIRE(zipcode.decoder.front().offset == 0); } + SECTION("unpacked zipcode") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + + vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); + REQUIRE(unpacked.size() == 1); + REQUIRE(unpacked[0].net_handle == distance_index.get_parent(distance_index.get_node_net_handle(n1->id()))); + REQUIRE(unpacked[0].length == distance_index.minimum_length(distance_index.get_node_net_handle(n1->id()))); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_NODE); + } SECTION("decoded code") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -175,6 +185,30 @@ using namespace std; REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); + } + SECTION ("unpacked zip code for node on top-level chain") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); + + + net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); + net_handle_t chain1 = distance_index.get_parent(node1); + + REQUIRE(unpacked.size() == 2); + + + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(chain1)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); + + + //Next is the node code + REQUIRE(unpacked[1].code_type == ZipCode::NODE); + REQUIRE(unpacked[1].length == distance_index.minimum_length(node1)); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_prefix_sum_value(node1)); + REQUIRE(unpacked[1].is_reversed == distance_index.is_reversed_in_parent(node1)); + } SECTION ("zip code for node in simple snarl") { ZipCode zipcode; @@ -279,6 +313,46 @@ using namespace std; REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); } + SECTION ("unpacked zip code for node in simple snarl") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n4->id(), distance_index); + REQUIRE(unpacked.size() == 3); + + + net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); + net_handle_t snarl36 = distance_index.get_parent(chain4); + net_handle_t chain1 = distance_index.get_parent(snarl36); + + + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(chain1)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); + + //values for the snarl + REQUIRE(unpacked[1].length == distance_index.minimum_length(snarl36)); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 5 : 6)); + REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); + bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), + distance_index.flip(chain4)) != 0; + + //values for the chain + REQUIRE(unpacked[2].length == distance_index.minimum_length(chain4)); + REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain4)); + REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); + REQUIRE(unpacked[2].is_reversed == is_rev); + if (is_rev) { + REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_start_right == 0); + REQUIRE(unpacked[2].distance_end_left == 0); + REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); + } else { + REQUIRE(unpacked[2].distance_start_left == 0); + REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_right == 0); + } + } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -628,6 +702,53 @@ using namespace std; REQUIRE(zipcode.get_code_type(3) == ZipCode::NODE); REQUIRE(zipcode.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); + } + SECTION ("unpacked zip code for node on in nested chain") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n2->id(), distance_index); + REQUIRE(unpacked.size() == 4); + + net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); + net_handle_t chain2 = distance_index.get_parent(node2); + net_handle_t snarl1 = distance_index.get_parent(chain2); + net_handle_t chain1 = distance_index.get_parent(snarl1); + + + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(chain1)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); + + //Snarl at depth 1 + REQUIRE(unpacked[1].length == 0); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 4 : 3)); + REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); + bool is_rev = distance_index.distance_in_parent(snarl1, distance_index.get_bound(snarl1, false, true), + distance_index.flip(distance_index.canonical(chain2))) != 0; + + //Chain at depth 2 + REQUIRE(unpacked[2].length == 3); + REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); + REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); + REQUIRE(unpacked[2].is_reversed == is_rev); + if (is_rev) { + REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_start_right == 0); + REQUIRE(unpacked[2].distance_end_left == 0); + REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); + } else { + REQUIRE(unpacked[2].distance_start_left == 0); + REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_right == 0); + } + + //Node at depth 3 + REQUIRE(unpacked[3].length == 1); + REQUIRE(unpacked[3].prefix_sum_or_snarl_rank == distance_index.get_prefix_sum_value(node2)); + REQUIRE(unpacked[3].code_type == ZipCode::NODE); + REQUIRE(unpacked[3].is_reversed == distance_index.is_reversed_in_parent(node2)); + } SECTION ("zip code for more deeply nested node") { ZipCode zipcode; @@ -853,6 +974,93 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); REQUIRE(zipcode.get_code_type(6) == ZipCode::CHAIN); + } + SECTION ("unpacked zip code for more deeply nested node") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n4->id(), distance_index); + REQUIRE(unpacked.size() == 7); + + net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); + net_handle_t snarl3 = distance_index.get_parent(chain4); + net_handle_t chain3 = distance_index.get_parent(snarl3); + net_handle_t snarl2 = distance_index.get_parent(chain3); + net_handle_t chain2 = distance_index.get_parent(snarl2); + net_handle_t snarl1 = distance_index.get_parent(chain2); + net_handle_t chain1 = distance_index.get_parent(snarl1); + + + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(chain1)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); + + //Snarl at depth 1 + REQUIRE(unpacked[1].length == 0); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 4 : 3)); + REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); + net_handle_t snarl = distance_index.get_parent(chain2); + bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), + distance_index.flip(distance_index.canonical(chain2))) != 0; + + + //Chain at depth 2 + REQUIRE(unpacked[2].is_reversed == is_rev); + REQUIRE(unpacked[2].length == 3); + REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); + REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); + if (is_rev) { + REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_start_right == 0); + REQUIRE(unpacked[2].distance_end_left == 0); + REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); + } else { + REQUIRE(unpacked[2].distance_start_left == 0); + REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[2].distance_end_right == 0); + } + + + //Snarl at depth 3 + REQUIRE(unpacked[3].length == 1); + REQUIRE(unpacked[3].prefix_sum_or_snarl_rank == 1); + REQUIRE(unpacked[3].code_type == ZipCode::REGULAR_SNARL); + snarl = distance_index.get_parent(chain3); + is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), + distance_index.flip(distance_index.canonical(chain3))) != 0; + + //Chain at depth 4 + REQUIRE(unpacked[4].is_reversed == is_rev); + REQUIRE(unpacked[4].length == distance_index.minimum_length(chain3)); + REQUIRE(unpacked[4].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain3)); + REQUIRE(unpacked[4].code_type == ZipCode::CHAIN); + if (is_rev) { + REQUIRE(unpacked[4].distance_start_left == std::numeric_limits::max()); + REQUIRE(unpacked[4].distance_start_right == 0); + REQUIRE(unpacked[4].distance_end_left == 0); + REQUIRE(unpacked[4].distance_end_right == std::numeric_limits::max()); + } else { + REQUIRE(unpacked[4].distance_start_left == 0); + REQUIRE(unpacked[4].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[4].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[4].distance_end_right == 0); + } + + + //Snarl3 at depth 5 + REQUIRE(unpacked[5].length == 0); + REQUIRE(unpacked[5].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); + REQUIRE(unpacked[5].code_type == ZipCode::REGULAR_SNARL); + snarl = distance_index.get_parent(chain4); + is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), + distance_index.flip(distance_index.canonical(chain4))) != 0; + + //node/chain at depth 6 + REQUIRE(unpacked[6].is_reversed == is_rev); + REQUIRE(unpacked[6].length == 4); + REQUIRE(unpacked[6].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain4)); + REQUIRE(unpacked[6].code_type == ZipCode::CHAIN); + } SECTION("Distances") { ZipCode zip1; @@ -1172,7 +1380,6 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); - bool chain_is_rev = distance_index.is_reversed_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))); //node1 to left side of node 3 REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); //Node 1 to right side of node 3 @@ -1182,6 +1389,51 @@ using namespace std; //Node 4 to right side of node 3 REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); } + SECTION ("unpacked zip code for node in irregular snarl") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n3->id(), distance_index); + REQUIRE(unpacked.size() == 3); + + net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); + net_handle_t snarl1 = distance_index.get_parent(chain3); + net_handle_t chain1 = distance_index.get_parent(snarl1); + + + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(chain1)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); + + //Snarl1 at depth 1 + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); + REQUIRE(unpacked[1].length == distance_index.minimum_length(snarl1)); + REQUIRE(unpacked[1].code_type == ZipCode::CYCLIC_SNARL); + + //chain3 at depth 3 + REQUIRE(unpacked[2].length == 1); + REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain3)); + REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); + bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); + if (snarl_is_rev) { + //node1 to left side of node 3 + REQUIRE(unpacked[2].distance_end_left == 1); + //Node 1 to right side of node 3 + REQUIRE(unpacked[2].distance_end_right == 2); + //node4 to left side of node 3 + REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); + //Node 4 to right side of node 3 + REQUIRE(unpacked[2].distance_start_right == 0); + + } else { + REQUIRE(unpacked[2].distance_start_left == 1); + //Node 1 to right side of node 3 + REQUIRE(unpacked[2].distance_start_right == 2); + //node4 to left side of node 3 + REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); + //Node 4 to right side of node 3 + REQUIRE(unpacked[2].distance_end_right == 0); + } + } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -1408,6 +1660,27 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); } + SECTION ("unpacked zip code for node in top-level snarl") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); + REQUIRE(unpacked.size() == 2); + + + net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); + net_handle_t root_snarl = distance_index.get_parent(chain1); + + + //Root snarl + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == + distance_index.canonical(distance_index.get_parent(chain1))); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_SNARL); + + //Chain1 at depth 1 + REQUIRE(unpacked[1].length == 3); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain1)); + REQUIRE(unpacked[1].code_type == ZipCode::CHAIN); + } SECTION ("zip code for node in chain in top-level snarl") { net_handle_t node1 = distance_index.get_node_net_handle(n3->id()); ZipCode zipcode; @@ -1472,6 +1745,31 @@ using namespace std; REQUIRE(zipcode.get_code_type(2) == ZipCode::NODE); REQUIRE(zipcode.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); } + SECTION ("unpack zip code for node in chain in top-level snarl") { + net_handle_t node3 = distance_index.get_node_net_handle(n3->id()); + net_handle_t chain2 = distance_index.get_parent(node3); + net_handle_t root_snarl = distance_index.get_parent(chain2); + + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n3->id(), distance_index); + REQUIRE(unpacked.size() == 3); + + //Root snarl + REQUIRE(distance_index.canonical(unpacked[0].net_handle) == distance_index.canonical(root_snarl)); + REQUIRE(unpacked[0].code_type == ZipCode::ROOT_SNARL); + + //chain2 at depth 1 + REQUIRE(unpacked[1].length == 2); + REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); + REQUIRE(unpacked[1].code_type == ZipCode::CHAIN); + + //node3 at depth 2 + REQUIRE(unpacked[2].length == 1); + REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); + REQUIRE(unpacked[2].code_type == ZipCode::NODE); + REQUIRE(unpacked[2].is_reversed == distance_index.is_reversed_in_parent(node3)); + } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -1870,6 +2168,22 @@ using namespace std; REQUIRE(zipcode.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); REQUIRE(zipcode.get_is_looping_chain(0)); } + SECTION( "node2 unpacked" ) { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); + vector unpacked = zipcode.unpack_zip_code(n2->id(), distance_index); + REQUIRE(unpacked.size() == 2); + + net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); + net_handle_t parent = distance_index.get_parent(node2); + net_handle_t bound = distance_index.get_bound(parent, true, false); + + + REQUIRE(distance_index.minimum_length(node2) == unpacked[1].length); + REQUIRE(unpacked[1].chain_component == distance_index.get_chain_component(node2)); + REQUIRE(unpacked[0].chain_component == 1); + REQUIRE(unpacked[0].is_looping_chain); + } SECTION( "node5" ) { ZipCode zipcode; @@ -1881,6 +2195,10 @@ using namespace std; REQUIRE(distance_index.minimum_length(node) == zipcode.get_length(zipcode.max_depth())); + + vector unpacked = zipcode.unpack_zip_code(n5->id(), distance_index); + + REQUIRE(distance_index.minimum_length(node) == unpacked[unpacked.size()-1].length); } } TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { @@ -1924,6 +2242,26 @@ using namespace std; } } + SECTION( "Check connectivity unpacked" ) { + ZipCode zipcode; + zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); + vector unpacked = zipcode.unpack_zip_code(n2->id(), dist_index); + + REQUIRE(unpacked[1].length == 1); + + if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { + REQUIRE(unpacked[0].distance_end_right == 0); + REQUIRE(unpacked[0].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[0].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[0].distance_start_left == std::numeric_limits::max()); + } else { + REQUIRE(unpacked[0].distance_end_right == std::numeric_limits::max()); + REQUIRE(unpacked[0].distance_end_left == std::numeric_limits::max()); + REQUIRE(unpacked[0].distance_start_right == std::numeric_limits::max()); + REQUIRE(unpacked[0].distance_start_left == 0); + } + + } } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index dc6af0fb863..121fe322168 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2040,7 +2040,10 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& size_t zip_index = decoder[depth].offset; bool is_chain = decoder[depth].is_chain; if (depth == 0) { - //identifier is first for anything in the root + //is_Chain is first for anything in the root + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + + //identifier std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); if (is_chain) { @@ -2056,7 +2059,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& //For a root node, this is the length std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + current_code.length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; } else { From 7cc9f4cccb11ff17441ef01002626be47b531e06 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 7 Aug 2024 18:06:00 +0200 Subject: [PATCH 095/124] Use the unpacked zipcode for clustering instead of the old payload --- src/snarl_seed_clusterer.cpp | 32 ++++++++++++++++++-------------- src/zip_code.cpp | 4 ++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 2ff6b814fa6..989d785e418 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -353,24 +353,28 @@ cerr << "Add all seeds to nodes: " << endl; //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now const zip_code_t& node_code = seed.unpacked_zipcode.back(); - const zip_code_t& parent_code = seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2]; + bool is_trivial_chain = distance_index.is_chain(node_code.net_handle); + const zip_code_t& parent_code = seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2]; #ifdef DEBUG_CLUSTER net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; + cerr << "Got net handle from zipcode " << distance_index.net_handle_as_string(node_code.net_handle) << endl; cerr << "Node length " << node_code.length << " should be " << distance_index.minimum_length(handle) << endl; - assert(seed.unpacked_vector.back().length == distance_index.minimum_length(handle)); + assert(seed.unpacked_zipcode.back().length == distance_index.minimum_length(handle)); size_t chain_component = (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0); - chain_component = chain_component == std::numeric_limits::max() ? 0 : chain_component; + chain_component = chain_component ; cerr << "For node " << distance_index.net_handle_as_string(handle) << endl; cerr << "Chain compoentn: " << chain_component << " was " << node_code.chain_component << endl; - assert(node_code.chain_component == chain_component); + if (chain_component != 0 && chain_component != std::numeric_limits::max()) { + assert(node_code.chain_component == chain_component); + } #endif if (!((seed.unpacked_zipcode.front().code_type == ZipCode::ROOT_SNARL && seed.unpacked_zipcode.size() == 2) @@ -384,7 +388,7 @@ cerr << "Add all seeds to nodes: " << endl; #ifdef DEBUG_CLUSTER cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2].net_handle) << endl; cerr << "Node length should be " << distance_index.minimum_length(node_code.net_handle) << " actually " << node_code.length << endl; - assert(node_code.length == distance_index.minimum_length(node_code.handle)); + assert(node_code.length == distance_index.minimum_length(node_code.net_handle)); cerr << "Reversed in parent? " << distance_index.net_handle_as_string(node_code.net_handle) << " " << distance_index.net_handle_as_string(parent_code.net_handle) << " " << node_code.is_reversed << endl; #endif @@ -393,11 +397,10 @@ cerr << "Add all seeds to nodes: " << endl; bool new_parent = false; - new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(parent_code.net_handle) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(is_trivial_chain ? node_code.net_handle : parent_code.net_handle) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; - if (distance_index.is_chain(node_code.net_handle) ) { + if (is_trivial_chain) { //Trivial chain clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); clustering_problem.all_node_problems.emplace_back(node_code.net_handle, clustering_problem.all_seeds->size(), @@ -417,14 +420,15 @@ cerr << "Add all seeds to nodes: " << endl; } size_t parent_depth = 0; - for (size_t d = 0 ; d < seed.unpacked_zipcode.size() ; d++) { + for (size_t d = 0 ; d <= seed.unpacked_zipcode.size()-(is_trivial_chain ? 1 : 2) ; d++) { const auto& type = seed.unpacked_zipcode[d].code_type; if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { parent_depth++; } } #ifdef DEBUG_CLUSTER - assert(parent_depth == distance_index.get_depth(parent_code.net_handle)); + cerr << "depth of " << distance_index.net_handle_as_string(parent_code.net_handle) << " " << distance_index.get_depth(is_trivial_chain ? node_code.net_handle : parent_code.net_handle) << " guessed " << parent_depth << endl; + assert(parent_depth == distance_index.get_depth(is_trivial_chain ? node_code.net_handle : parent_code.net_handle)); #endif @@ -444,7 +448,7 @@ cerr << "Add all seeds to nodes: " << endl; : node_code.length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(parent_code.net_handle)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(is_trivial_chain ? node_code.net_handle : parent_code.net_handle)); parent_problem.children.emplace_back(); parent_problem.children.back().net_handle = node_code.net_handle; parent_problem.children.back().seed_indices = {read_num, i}; @@ -457,7 +461,7 @@ cerr << "Add all seeds to nodes: " << endl; //And the parent to chains_by_level if (new_parent) { - chains_by_level[parent_depth].emplace_back(parent_code.net_handle); + chains_by_level[parent_depth].emplace_back(is_trivial_chain ? node_code.net_handle : parent_code.net_handle); } @@ -523,7 +527,7 @@ cerr << "Add all seeds to nodes: " << endl; net_handle_t parent = node_problem.parent_net_handle; - if (seed->unpacked_zipcode[seed->unpacked_zipcode.size()-2].code_type == ZipCode::ROOT_SNARL) { + if (seed->unpacked_zipcode[0].code_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, @@ -659,7 +663,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); + assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == distance_index.start_end_traversal_of(parent)); } #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 121fe322168..e0f20b1cd28 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2202,7 +2202,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& zip_code_t& current_code = unpacked_zipcode[depth]; //If we need to set the net handle - if (current_code.net_handle == distance_index.get_root()) { + if (!(depth == 0 || current_code.code_type == ZipCode::IRREGULAR_SNARL || current_code.code_type == ZipCode::CYCLIC_SNARL)) { if (depth == decoder_length()-1 ) { current_code.net_handle = distance_index.get_node_net_handle(id); if (current_code.code_type == ZipCode::CHAIN) { @@ -2213,7 +2213,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index.get_node_record_offset(current_code.net_handle)); } } else { - current_code.net_handle = distance_index.get_parent(unpacked_zipcode[depth+1].net_handle); + current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(unpacked_zipcode[depth+1].net_handle)); } } From 2c955f90e87ec3a175b8326d4e5169ac04eb7ec8 Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 8 Aug 2024 11:24:13 +0200 Subject: [PATCH 096/124] Use the length of the last chain component as the length of a multicomponent chain --- src/zip_code.cpp | 22 ++++++++++++++++++---- src/zip_code.hpp | 3 ++- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index e0f20b1cd28..5c108d8bcbe 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -412,7 +412,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } -size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index, bool get_chain_component_length) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -440,7 +440,20 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } - return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + size_t len = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + if (get_chain_component_length || (depth != 0 && decoder[depth-1].is_chain)) { + //If this is a node or we want the component length that got saved, return the actual saved value + return len; + } else { + //If we want the length of the last component of the chain, check if it is a multicopmonent chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + if (zip_value != 0) { + //If this is a multicomponent (or looping chain, which also must be a multicomponent chain) + return std::numeric_limits::max(); + } else { + return len; + } + } } else { //If this is a snarl @@ -947,9 +960,10 @@ vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDis //Chain code is: rank in snarl, length vector chain_code (CHAIN_SIZE); chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); - size_t len = distance_index.minimum_length(chain); - chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; bool is_trivial = distance_index.is_trivial_chain(chain) ; + //Length is the length of the last component + size_t len = is_trivial ? distance_index.minimum_length(chain) : distance_index.chain_minimum_length(chain); + chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; size_t component = is_trivial ? 0 : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 4d2b9332773..6c7569f29fc 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -255,7 +255,8 @@ class ZipCode { ///This requires the distance index for irregular snarls (except for a top-level snarl) ///Throws an exception if the distance index is not given when it is needed ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + ///If chain_component_length is true, then get the length of the last component of the multicomponent chain (instead of inf) + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr, bool get_chain_component_length=false) const ; ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl size_t get_rank_in_snarl(const size_t& depth) const ; From cf6b5beb00114884ac4ed8366022e6baa3f45685 Mon Sep 17 00:00:00 2001 From: Xian Date: Thu, 8 Aug 2024 12:30:41 +0200 Subject: [PATCH 097/124] Use unpacked zipcode in SnarlTreeNodeProblem instead of zipcode --- src/snarl_seed_clusterer.cpp | 419 +++++++++++++++++------------------ src/snarl_seed_clusterer.hpp | 67 ++---- src/zip_code.cpp | 14 ++ src/zip_code.hpp | 1 + 4 files changed, 234 insertions(+), 267 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 989d785e418..f671e3943ce 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -353,7 +353,7 @@ cerr << "Add all seeds to nodes: " << endl; //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now const zip_code_t& node_code = seed.unpacked_zipcode.back(); - bool is_trivial_chain = distance_index.is_chain(node_code.net_handle); + bool is_trivial_chain = node_code.code_type == ZipCode::CHAIN; const zip_code_t& parent_code = seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2]; #ifdef DEBUG_CLUSTER @@ -403,17 +403,15 @@ cerr << "Add all seeds to nodes: " << endl; if (is_trivial_chain) { //Trivial chain clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(node_code.net_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, node_code.length, std::numeric_limits::max(), std::numeric_limits::max(), - &seed, seed.seed->zipcode.max_depth()); - clustering_problem.all_node_problems.back().is_trivial_chain = true; + seed.unpacked_zipcode, seed.seed->zipcode.max_depth()); } else { //The parent is an actual chain clustering_problem.net_handle_to_node_problem_index.emplace(parent_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent_code.net_handle, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - &seed, seed.seed->zipcode.max_depth() - 1); + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + seed.unpacked_zipcode, seed.seed->zipcode.max_depth() - 1); } new_parent = true; @@ -442,9 +440,9 @@ cerr << "Add all seeds to nodes: " << endl; } //Make sure the seed's distances are relative to the orientation in the parent - seed.distance_left = node_code.is_reversed != is_rev(pos) ? node_code.length- get_offset(pos) + seed.distance_left = (!is_trivial_chain && node_code.is_reversed) != is_rev(pos) ? node_code.length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = node_code.is_reversed != is_rev(pos) ? get_offset(pos) + 1 + seed.distance_right =(!is_trivial_chain && node_code.is_reversed) != is_rev(pos) ? get_offset(pos) + 1 : node_code.length- get_offset(pos); //Add this seed to its parent cluster @@ -475,14 +473,10 @@ cerr << "Add all seeds to nodes: " << endl; new_node = true; clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(node_code.net_handle, clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - false, node_code.length, std::numeric_limits::max(), - std::numeric_limits::max(), - &seed, seed.seed->zipcode.max_depth()); + seed.unpacked_zipcode, seed.seed->zipcode.max_depth()); - //Remember the parent of this node, since it will be needed to remember the root snarl later - clustering_problem.all_node_problems.back().parent_net_handle = parent_code.net_handle; } @@ -525,16 +519,16 @@ cerr << "Add all seeds to nodes: " << endl; //if current_iterator is the last thing in the list and the same node cluster_one_node(clustering_problem, &node_problem); - net_handle_t parent = node_problem.parent_net_handle; + net_handle_t parent = node_problem.unpacked_zipcode[node_problem.zipcode_depth-1].net_handle; if (seed->unpacked_zipcode[0].code_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - seed, 0); + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + seed->unpacked_zipcode, 0); } clustering_problem.root_children.emplace_back(parent, node_net_handle); } else { @@ -562,7 +556,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); #ifdef DEBUG_CLUSTER - cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; + cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle) << endl; #endif //Cluster the snarlindex]; @@ -582,27 +576,19 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Make a new SnarlTreeNodeProblem for the parent - net_handle_t snarl_parent = snarl_problem->has_parent_handle - ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + net_handle_t snarl_parent = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth-1].net_handle; bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(snarl_parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - snarl_problem->seed, snarl_problem->zipcode_depth-1); + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + snarl_problem->unpacked_zipcode, snarl_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); - if (snarl_problem.has_grandparent_handle) { - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = snarl_problem.grandparent_net_handle; - } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); @@ -612,8 +598,8 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster parent_problem.children.back().net_handle = snarl_handle; parent_problem.children.back().is_seed = false; parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = snarl_problem->chain_component_start; - parent_problem.children.back().prefix_sum = snarl_problem->prefix_sum_value; + parent_problem.children.back().chain_component = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].chain_component; + parent_problem.children.back().prefix_sum = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].prefix_sum_or_snarl_rank; if (new_parent) { //And the parent chain to the things to be clustered next @@ -654,11 +640,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif - net_handle_t parent = chain_problem->has_parent_handle - ? chain_problem->parent_net_handle - : (chain_problem->zipcode_depth == 0 - ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + net_handle_t parent = chain_problem->zipcode_depth == 0 + ? distance_index.get_root() + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].net_handle; #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { @@ -668,17 +652,17 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1); + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].code_type; bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - !chain_problem->seed->seed->zipcode.is_externally_start_start_connected(0) && - !chain_problem->seed->seed->zipcode.is_externally_start_end_connected(0) && - !chain_problem->seed->seed->zipcode.is_externally_end_end_connected(0) && - !chain_problem->seed->seed->zipcode.get_is_looping_chain(0); + chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left == std::numeric_limits::max() && + chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right == std::numeric_limits::max() && + chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right == std::numeric_limits::max() && + !chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain; // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -690,9 +674,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the parent is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - chain_problem->seed, chain_problem->zipcode_depth-1); + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + chain_problem->unpacked_zipcode, chain_problem->zipcode_depth-1); } clustering_problem.root_children.emplace_back(parent, chain_handle); } else if (!is_top_level_chain) { @@ -707,112 +691,123 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode.max_depth() + bool snarl_child_is_rev = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].code_type == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 ? false - : chain_problem->seed->seed->zipcode.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth+1].is_reversed; + //TODO: Double check these distances +// chain_problem->distance_start_left = snarl_child_is_rev +// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) +// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); +// +// chain_problem->distance_start_right = snarl_child_is_rev +// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) +// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); +// +// chain_problem->distance_end_left = snarl_child_is_rev +// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) +// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); +// +// chain_problem->distance_end_right = snarl_child_is_rev +// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) +// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); +// chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); + ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left; chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); + ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right; chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); + ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_left; chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) - : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); + ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_left + : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right; - #ifdef DEBUG_CLUSTER - cerr << "For child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; - cerr << "For parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; - cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + #ifdef debug_cluster + cerr << "for child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "for parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; - cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; + cerr << "check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " << distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)) << " " + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle)) << " " << distance_index.distance_to_parent_bound(parent, true, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)) << " " + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle)) << " " << distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)) << " " + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle)) << " " << distance_index.distance_to_parent_bound(parent, false, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE)) << endl; + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle)) << endl; assert(chain_problem->distance_start_left == distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE))); + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle))); assert(chain_problem->distance_start_right == distance_index.distance_to_parent_bound(parent, true, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE))); + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle))); assert(chain_problem->distance_end_left == distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE))); + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle))); assert(chain_problem->distance_end_right == distance_index.distance_to_parent_bound(parent, false, chain_handle, - std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, - SnarlDistanceIndex::SNARL_HANDLE, - (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE - : SnarlDistanceIndex::CHAIN_HANDLE), - SnarlDistanceIndex::CHAIN_HANDLE))); + std::make_tuple(snarldistanceindex::snarl_handle, + snarldistanceindex::snarl_handle, + (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle + : snarldistanceindex::chain_handle), + snarldistanceindex::chain_handle))); #endif - //And add it to its parent snarl + //and add it to its parent snarl bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - chain_problem->seed, chain_problem->zipcode_depth-1); - //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved + clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + chain_problem->unpacked_zipcode, chain_problem->zipcode_depth-1); + //because a new snarltreenodeproblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); - if (chain_problem.has_grandparent_handle) { - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(parent)); - parent_problem.has_parent_handle = true; - parent_problem.parent_net_handle = chain_problem.grandparent_net_handle; - } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(parent)); @@ -833,14 +828,14 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster void SnarlDistanceIndexClusterer::cluster_one_node( ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* node_problem) const { -#ifdef DEBUG_CLUSTER - cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; +#ifdef debug_cluster + cerr << "finding clusters on node " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; #endif - size_t node_length = node_problem->node_length; + size_t node_length = node_problem->unpacked_zipcode[node_problem->zipcode_depth].length; - //Sort the seeds on the node + //sort the seeds on the node std::sort(node_problem->children.begin(), node_problem->children.end(), [&](const SnarlTreeNodeProblem::SnarlTreeChild& a, const SnarlTreeNodeProblem::SnarlTreeChild& b) { return clustering_problem.all_seeds->at(a.seed_indices.first)->at(a.seed_indices.second).distance_left @@ -850,9 +845,9 @@ void SnarlDistanceIndexClusterer::cluster_one_node( cluster_seeds_on_linear_structure(clustering_problem, node_problem, node_length, false, false); -#ifdef DEBUG_CLUSTER +#ifdef debug_cluster - cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; + cerr << "\tfound read clusters on node " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; bool got_left = false; bool got_right = false; @@ -897,26 +892,26 @@ void SnarlDistanceIndexClusterer::cluster_one_node( }; -//Go through pairs of clusters of the two children and see which ones can be combined -//The first child may not have been seen before, so all of it's clusters may be added to the parent, then +//go through pairs of clusters of the two children and see which ones can be combined +//the first child may not have been seen before, so all of it's clusters may be added to the parent, then //anything that was combined gets removed and only the cluster heads get added. -//For the second child, everything is already in the parent so remove ones that were combined then +//for the second child, everything is already in the parent so remove ones that were combined then //add the head of the combined clusters // -//If this is the first time we see the first child, then also update the best distances to the ends of the +//if this is the first time we see the first child, then also update the best distances to the ends of the //parent for the parent clusters void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structures(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem1, SnarlTreeNodeProblem* child_problem2, SnarlTreeNodeProblem* parent_problem, const vector> & child_distances, bool is_root, bool first_child) const { -#ifdef DEBUG_CLUSTER - cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem1->containing_net_handle) - << " and " << distance_index.net_handle_as_string(child_problem2->containing_net_handle) - << " which are children of " << distance_index.net_handle_as_string(parent_problem->containing_net_handle) << endl; +#ifdef debug_cluster + cerr << "\tcompare " << distance_index.net_handle_as_string(child_problem1->unpacked_zipcode[child_problem1_problem->zipcode_depth].net_handle) + << " and " << distance_index.net_handle_as_string(child_problem2->unpacked_zipcode[child_problem2_problem->zipcode_depth].net_handle) + << " which are children of " << distance_index.net_handle_as_string(parent_problem->unpacked_zipcode[parent_problem->zipcode_depth].net_handle) << endl; #endif - net_handle_t& parent_handle = parent_problem->containing_net_handle; - net_handle_t& child_handle1 = child_problem1->containing_net_handle; - net_handle_t& child_handle2 = child_problem2->containing_net_handle; + const net_handle_t& parent_handle = parent_problem->unpacked_zipcode[parent_problem->zipcode_depth].net_handle; + const net_handle_t& child_handle1 = child_problem1->unpacked_zipcode[child_problem1->zipcode_depth].net_handle; + const net_handle_t& child_handle2 = child_problem2->unpacked_zipcode[child_problem2->zipcode_depth].net_handle; @@ -1381,26 +1376,18 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem) const { #ifdef DEBUG_CLUSTER - cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->containing_net_handle) + cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->unpacked_zipcode[child_problem->zipcode_depth].net_handle) << " to itself in the root" << endl; #endif - net_handle_t& handle = child_problem->containing_net_handle; + const net_handle_t& handle = child_problem->unpacked_zipcode[child_problem->zipcode_depth].net_handle; //Get the distances between the two sides of the child - size_t distance_left_left = - child_problem->seed->seed->zipcode.is_externally_start_start_connected(child_problem->zipcode_depth) - ? 0 - : std::numeric_limits::max(); - size_t distance_left_right = - child_problem->seed->seed->zipcode.is_externally_start_end_connected(child_problem->zipcode_depth) - ? 0 - : std::numeric_limits::max(); - size_t distance_right_right = - child_problem->seed->seed->zipcode.is_externally_end_end_connected(child_problem->zipcode_depth) - ? 0 - : std::numeric_limits::max(); + size_t distance_left_left = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_start_left; + size_t distance_left_right = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_start_right; + size_t distance_right_right = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_end_right; + if (distance_left_left == std::numeric_limits::max() && distance_left_right == std::numeric_limits::max() && distance_right_right == std::numeric_limits::max()) { @@ -1534,17 +1521,17 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the clusters on this snarl, assumes that all of the snarls children have been clustered already. #ifdef DEBUG_CLUSTER - cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; + cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle) << endl; #endif snarl_problem->set_snarl_values(distance_index); - net_handle_t& snarl_handle = snarl_problem->containing_net_handle; + const net_handle_t& snarl_handle = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle; //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (snarl_problem->seed->seed->zipcode.get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { + if (snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].code_type != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters @@ -1595,8 +1582,8 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin #ifdef DEBUG_CLUSTER cerr << "\tComparing two children of " << distance_index.net_handle_as_string(snarl_handle) << ": " - << distance_index.net_handle_as_string(child_problem_i.containing_net_handle) << " and " - << distance_index.net_handle_as_string(child_problem_j.containing_net_handle) << endl; + << distance_index.net_handle_as_string(child_problem_i.unpacked_zipcode[child_problem_i.zipcode_depth].net_handle) << " and " + << distance_index.net_handle_as_string(child_problem_j.unpacked_zipcode[child_problem_j.zipcode_depth].net_handle) << endl; @@ -1619,7 +1606,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //May need to flip the distances for (auto& cluster_head : child_problem.read_cluster_heads) { snarl_problem->read_cluster_heads.emplace(cluster_head); - if (child_problem.is_reversed_in_parent) { + if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { size_t old_left = clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left; clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left = clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_right; @@ -1631,7 +1618,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Update the distances for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { if (read_num == 0) { - if (child_problem.is_reversed_in_parent) { + if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_left.first, child_problem.read_best_left.first); snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_right.first, @@ -1643,7 +1630,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin child_problem.read_best_right.first); } } else { - if (child_problem.is_reversed_in_parent) { + if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_left.second, child_problem.read_best_left.second); snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_right.second, @@ -1656,7 +1643,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } } - if (child_problem.is_reversed_in_parent) { + if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_left, child_problem.fragment_best_left); snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_right, @@ -1726,7 +1713,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* chain_problem, bool is_top_level_chain) const { #ifdef DEBUG_CLUSTERS - assert(distance_index.is_chain(chain_problem->containing_net_handle)); + assert(distance_index.is_chain(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle)); //if (only_seeds) { // for (auto child : children_in_chain) { // assert(!std::get<3>(child)); @@ -1754,18 +1741,16 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet - child1.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; - child1.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; + const SnarlTreeNodeProblem& child1_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)); + child1.chain_component = child1_problem.chain_component_start; + child1.prefix_sum = child1_problem.unpacked_zipcode[child1_problem.zipcode_depth].prefix_sum_or_snarl_rank; child2.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet - child2.chain_component = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).chain_component_start; - child2.prefix_sum = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).prefix_sum_value; + const SnarlTreeNodeProblem& child2_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)); + child2.chain_component = child2_problem.chain_component_start; + child2.prefix_sum = child2_problem.unpacked_zipcode[child2_problem.zipcode_depth].prefix_sum_or_snarl_rank; child2.has_chain_values = true; } if (child1.chain_component != child2.chain_component) { @@ -1788,10 +1773,13 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } }); - net_handle_t& chain_handle = chain_problem->containing_net_handle; + const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; + + if (!(chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 && chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].code_type == ZipCode::CHAIN) + && ! is_top_level_chain) { + //If this isn't a trivial chain and isn't a top-level chain - if (!chain_problem->is_trivial_chain && ! is_top_level_chain) { //If we need it, get the values from the distance index: //is_looping_chain, node_length, the end boundary node, and the end component //THese only get used if we need the distances to the ends of the chain @@ -1799,15 +1787,16 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } - if (only_seeds && !chain_problem->is_looping_chain && + if (only_seeds && !chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain && (chain_problem->chain_component_end == 0 || chain_problem->chain_component_end == std::numeric_limits::max())) { //If there are only seeds in the chain (and the chain doesn't loop and isn't a multicomponent chain), //then cluster by walking through the seeds //This also does the work of clustering a trivial chain (which is just a node), which should be the same amount of work as using cluster_one_node - cluster_seeds_on_linear_structure(clustering_problem, chain_problem, chain_problem->node_length, - !chain_problem->is_trivial_chain, is_top_level_chain); + cluster_seeds_on_linear_structure(clustering_problem, chain_problem, chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, + !(chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 && chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].code_type == ZipCode::CHAIN), + is_top_level_chain); #ifdef DEBUG_CLUSTER cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; @@ -1890,21 +1879,22 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //The last child we saw SnarlTreeNodeProblem::SnarlTreeChild& last_child = chain_problem->children.front(); + const SnarlTreeNodeProblem* last_child_problem = last_child.is_seed + ? nullptr + : &clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)); //And values we need to save from the last child //If the last child is a snarl, get it from the SnarlTreeNodeProblem otherwise from the seed's cache size_t last_prefix_sum = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left - : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; + : last_child_problem->chain_component_start; size_t last_length = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().length - : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; + : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].length; size_t last_chain_component_end = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().chain_component - : clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; + : last_child_problem->chain_component_start; //This is initialized to the start of the snarl //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small @@ -2001,7 +1991,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //If the chain loops, then we also have to compare the first thing we saw to the last things - if (chain_problem->is_looping_chain){ + if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain){ #ifdef DEBUG_CLUSTER cerr << "Check connectivity around a looping chain" << endl; cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; @@ -2144,7 +2134,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const size_t& read_num = current_child.seed_indices.first; const size_t& cluster_num = current_child.seed_indices.second; - net_handle_t& chain_handle = chain_problem->containing_net_handle; + const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* Get a bunch of distances from the current child that will be used to calculate distance @@ -2195,7 +2185,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c } else { //Length of the chain - (prefix sum + node length of the current node) - distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, + distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, SnarlDistanceIndex::sum(current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank, current_child_seed.unpacked_zipcode.back().length)); @@ -2483,8 +2473,10 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t old_left = clustering_problem.all_seeds->at(read_num)->at(cluster_num).distance_left; size_t old_right = clustering_problem.all_seeds->at(read_num)->at(cluster_num).distance_right; //Get the new best distances for the cluster considering chain loops - size_t updated_left = std::min(old_left, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_right, child_problem.loop_right), child_problem.node_length)); - size_t updated_right = std::min(old_right, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_left, child_problem.loop_left), child_problem.node_length)); + size_t updated_left = std::min(old_left, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_right, child_problem.loop_right), + child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); + size_t updated_right = std::min(old_right, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_left, child_problem.loop_left), + child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); @@ -2600,7 +2592,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& }; - net_handle_t& chain_handle = chain_problem->containing_net_handle; + const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); @@ -2636,10 +2628,10 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& if (last_length == std::numeric_limits::max() && last_chain_component_end ) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = child_problem.prefix_sum_value; + distance_from_last_child_to_current_child = child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(child_problem.prefix_sum_value, + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank, distance_from_chain_start_to_last_node); } } @@ -2656,7 +2648,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& ? std::numeric_limits::max() : (last_child.net_handle == current_child.net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, - child_problem.node_length)); + child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); //The distance to add to get to the end of the chain. Only matters if this is the last thing in the chain //The distances will include the distance to the end of a trivial chain, @@ -2672,24 +2664,25 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); //TODO: Used to do this, I"m pretty sure I don't need to though //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.net_handle); - } else if (child_problem.node_length == std::numeric_limits::max() ) { + } else if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ) { //If the node length is infinite, then it is a snarl that isn't start-end connected, so the start //and end of the snarl are in different components of the chain. Since it reached here, the end //node of the snarl is in the same component as the end of the chain, so the distance to the //end of the chain is just the length of the last component of the chain, which is //chain_problem.node_length - distance_from_current_end_to_end_of_chain = chain_problem->node_length; + distance_from_current_end_to_end_of_chain = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length; } else { - distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, - SnarlDistanceIndex::sum(child_problem.prefix_sum_value, child_problem.node_length)); + distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, + SnarlDistanceIndex::sum(child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank, + child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; cerr << "\tDistance from start of chain to the left side of this one: " << (child_problem.chain_component_start != 0 - ? std::numeric_limits::max() : child_problem.prefix_sum_value) << endl; + ? std::numeric_limits::max() : child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank) << endl; cerr << "\tDistance from the last child to the right side of this one: " << distance_from_last_child_to_current_end << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2707,7 +2700,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //And one new fragment cluster size_t new_cluster_head_fragment = std::numeric_limits::max(); - bool child_is_reversed = child_problem.is_reversed_in_parent; + bool child_is_reversed = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed; //Remember the current best chain distances, and reset them to inf since we need to update them size_t old_best_right = std::move(chain_problem->fragment_best_right); @@ -2747,15 +2740,15 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e size_t read_num = cluster_head.first; pair dists (clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_left, clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_right); - size_t dist_left = child_problem.is_reversed_in_parent ? dists.second : dists.first; - size_t dist_right = child_problem.is_reversed_in_parent ? dists.first : dists.second; + size_t dist_left = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed ? dists.second : dists.first; + size_t dist_right = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed ? dists.first : dists.second; //Distances to the start of the chain, and the end of this node //If this is the last thing in the chain, then the distance to the end of the chain //If the snarl is isn't in the first component of the chain, then the left distance is infinite pair new_distances = make_pair( child_problem.chain_component_start != 0 ? std::numeric_limits::max() - : SnarlDistanceIndex::sum(dist_left, child_problem.prefix_sum_value), + : SnarlDistanceIndex::sum(dist_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), SnarlDistanceIndex::sum(dist_right, distance_from_current_end_to_end_of_chain)); //Add this to the chain @@ -2810,7 +2803,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //The new distances from this child to the start of the chain and the end of this child pair new_distances = make_pair( child_problem.chain_component_start != 0 ? std::numeric_limits::max() - : SnarlDistanceIndex::sum(distance_left, child_problem.prefix_sum_value), + : SnarlDistanceIndex::sum(distance_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), SnarlDistanceIndex::sum(distance_right, distance_from_current_end_to_end_of_chain)); if (distance_between <= clustering_problem.read_distance_limit) { @@ -2986,8 +2979,8 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = child_problem.prefix_sum_value; - last_length = child_problem.node_length; //The length of this snarl + last_prefix_sum = child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank; + last_length = child_problem.unpacked_zipcode[child_problem.zipcode_depth].length; //The length of this snarl last_chain_component_end = child_problem.chain_component_end;//The component of the end node of this snarl } @@ -3002,12 +2995,6 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro return; } - //Keep track of all clusters on the root - SnarlTreeNodeProblem root_problem(distance_index.get_root(), clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), distance_index, - &clustering_problem.all_seeds->at(0)->front(), 0); - //TODO: ikd about the seed here - //Remember old distances vector> child_distances (clustering_problem.seed_count_prefix_sum.back(), make_pair(std::numeric_limits::max(), std::numeric_limits::max())); @@ -3035,8 +3022,14 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro #ifdef DEBUG_CLUSTER cerr << "Clustering root snarl " << distance_index.net_handle_as_string(parent) << " with " << children.size() << " chidlren" << endl; #endif - if (children.size() > 0) { + //Make a new problem just for the root snarl + SnarlTreeNodeProblem root_problem(clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), + clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(children[0])).unpacked_zipcode, 0); + + for (size_t i = 0; i < children.size() ; i++) { //Go through each child node of the netgraph @@ -3063,15 +3056,8 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro } } - } - current_parent = parent; - children.clear(); - children.emplace_back(parent_to_child.second); - } - - } #ifdef DEBUG_CLUSTER - cerr << "\tFound clusters on the root" << endl; + cerr << "\tFound clusters on a root snarl" << endl; for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { cerr << "\t for read num " << read_num << endl; for (pair c : root_problem.read_cluster_heads) { @@ -3091,6 +3077,13 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); } #endif + } + current_parent = parent; + children.clear(); + children.emplace_back(parent_to_child.second); + } + + } } @@ -3103,7 +3096,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr return; } #ifdef DEBUG_CLUSTER - cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; + cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; cerr << "\t with node length " << structure_length << endl; #endif @@ -3176,7 +3169,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr } #ifdef DEBUG_CLUSTER - cerr << "\t" << distance_index.net_handle_as_string(node_problem->containing_net_handle) << " is shorter than the distance limit so just one cluster" << endl; + cerr << "\t" << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << " is shorter than the distance limit so just one cluster" << endl; #endif return; diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index e449e6a46b9..e80888efb14 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -190,6 +190,7 @@ class SnarlDistanceIndexClusterer { //Struct to store one child, which may be a seed, node, snarl, or chain struct SnarlTreeChild { + //TODO : Double check if the prefix sum etc can be gotten from the zipcode //If the net_handle is a node, then the child is a seed, otherwise the handle //is used to find the problem net_handle_t net_handle; @@ -226,90 +227,48 @@ class SnarlDistanceIndexClusterer { size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - //The snarl tree node that the clusters are on - net_handle_t containing_net_handle; - - - - - //The parent and grandparent of containing_net_handle, which might or might not be set - //This is just to store information from the minimizer cache - net_handle_t parent_net_handle; - net_handle_t grandparent_net_handle; - - //One representative seed so we can get the zipcode and stuff - const SeedCache* seed; + //One representative zipcode and the depth of whatever this is on + const vector& unpacked_zipcode; size_t zipcode_depth; //Minimum length of a node or snarl //If it is a chain, then it is distance_index.chain_minimum_length(), which is //the expected length for a normal chain, and the length of the //last component for a multicomponent chain - size_t node_length = std::numeric_limits::max(); - size_t prefix_sum_value = std::numeric_limits::max(); //of node or first node in snarl size_t chain_component_start = 0; //of node or start of snarl size_t chain_component_end = 0; //of node or end of snarl size_t loop_left = std::numeric_limits::max(); size_t loop_right = std::numeric_limits::max(); - //These are sometimes set if the value was in the cache - bool has_parent_handle = false; - bool has_grandparent_handle = false; - - //Only set this for nodes or snarls in chains - bool is_reversed_in_parent = false; - - bool is_trivial_chain = false; - bool is_looping_chain = false; //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, - const SeedCache* seed, size_t zipcode_depth) : - containing_net_handle(std::move(net)), - fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), - seed(seed), - zipcode_depth(zipcode_depth) { - read_cluster_heads.reserve(seed_count); - } - //Constructor for a node or trivial chain, used to remember information from the cache - SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, - size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : - containing_net_handle(net), - is_reversed_in_parent(is_reversed_in_parent), - node_length(node_length), - prefix_sum_value(prefix_sum), - chain_component_start(component), - chain_component_end(component), + SnarlTreeNodeProblem(size_t read_count, size_t seed_count, + const vector& unpacked_zipcode, size_t zipcode_depth) : + chain_component_start(unpacked_zipcode[zipcode_depth].chain_component), + chain_component_end(unpacked_zipcode[zipcode_depth].chain_component), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), - seed(seed), + unpacked_zipcode(unpacked_zipcode), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); } //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); - node_length = distance_index.chain_minimum_length(containing_net_handle); - chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); - is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); + chain_component_end = unpacked_zipcode[zipcode_depth].chain_component; } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); - net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); - net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); - chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 + net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, false, true)); + net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, true, true)); + chain_component_start = unpacked_zipcode[zipcode_depth].chain_component; + chain_component_end = unpacked_zipcode[zipcode_depth].length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; - prefix_sum_value = SnarlDistanceIndex::sum( - distance_index.get_prefix_sum_value(start_in), - distance_index.minimum_length(start_in)); loop_right = SnarlDistanceIndex::sum(distance_index.get_forward_loop_value(end_in), 2*distance_index.minimum_length(end_in)); //Distance to go backward in the chain and back diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 5c108d8bcbe..5841fad2645 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -447,6 +447,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan } else { //If we want the length of the last component of the chain, check if it is a multicopmonent chain std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + cerr << "Component " << zip_value << endl; if (zip_value != 0) { //If this is a multicomponent (or looping chain, which also must be a multicomponent chain) return std::numeric_limits::max(); @@ -2092,19 +2093,32 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& } //The next thing for both nodes and chains is the connectivity value std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + bool externally_connected = false; //start-end connected if ((zip_value & 1) != 0) { current_code.distance_start_right = 0; current_code.distance_end_left = 0; + externally_connected = true; } //start-start connected if((zip_value & 2) != 0){ current_code.distance_start_left = 0; + externally_connected = true; } //end-end connected if ((zip_value & 4) != 0) { current_code.distance_end_right = 0; + externally_connected = true; } + if (current_code.chain_component != 0 || externally_connected) { + //If this is a multicomponent chain or has external connectivity, then we want to know the length + if (decoder_length() == 1) { + current_code.length = distance_index.minimum_length(current_code.net_handle); + } else { + current_code.length = distance_index.chain_minimum_length(current_code.net_handle); + } + } + } else { //Root snarl current_code.code_type = ZipCode::ROOT_SNARL; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 6c7569f29fc..64faf7ce3df 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -152,6 +152,7 @@ class ZipCode { ///Offsets for chain codes const static size_t CHAIN_SIZE = 3; const static size_t CHAIN_RANK_IN_SNARL_OFFSET = 0; + //For a multicomponent chain, this is the length of the last component, because the real length will always be inf const static size_t CHAIN_LENGTH_OFFSET = 1; //This tells us if the chain is a multicomponent chain, how many components it has, and if the chain loops From e5fe8b6cea89abf4ac2121f74e753582fb4281ad Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 9 Aug 2024 10:37:04 +0200 Subject: [PATCH 098/124] Reserve memory for unpacked zipcode --- src/zip_code.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 5841fad2645..062346a4314 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2043,6 +2043,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const { vector unpacked_zipcode; + unpacked_zipcode.reserve(decoder_length()); //Otherwise, walk through the zipcode start to end (root to leaf) and fill in the unpacked zipcode //Fill in everything in the zipcode in this pass, and then go back and fill in any net handles that From 8ccb11008036b190588c4cd06a224384c1b77304 Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 9 Aug 2024 10:48:17 +0200 Subject: [PATCH 099/124] Reserve memory --- src/snarl_seed_clusterer.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index e80888efb14..6f57bd7f259 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -255,6 +255,7 @@ class SnarlDistanceIndexClusterer { unpacked_zipcode(unpacked_zipcode), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); + children.reserve(seed_count); } //Set the values needed to cluster a chain From a73b80ec8d6a73717acca01d492f82fe67003f4a Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 9 Aug 2024 11:56:27 +0200 Subject: [PATCH 100/124] Take out chain component --- src/snarl_seed_clusterer.cpp | 43 ++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 24 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index f671e3943ce..2cb0c13066c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2,7 +2,7 @@ #include -//#define DEBUG_CLUSTER +#define DEBUG_CLUSTER //#define debug_distances //#define EXHAUSTIVE_CLUSTER_CHECK @@ -1742,14 +1742,14 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin if (!child1.is_seed && !child1.has_chain_values) { //If child1 is a snarl and hasn't had its values set yet const SnarlTreeNodeProblem& child1_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)); - child1.chain_component = child1_problem.chain_component_start; + child1.chain_component = child1_problem.unpacked_zipcode[child1_problem.zipcode_depth].chain_component; child1.prefix_sum = child1_problem.unpacked_zipcode[child1_problem.zipcode_depth].prefix_sum_or_snarl_rank; child2.has_chain_values = true; } if (!child2.is_seed && !child2.has_chain_values) { //If child2 is a snarl and hasn't had its values set yet const SnarlTreeNodeProblem& child2_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)); - child2.chain_component = child2_problem.chain_component_start; + child2.chain_component = child2_problem.unpacked_zipcode[child2_problem.zipcode_depth].chain_component; child2.prefix_sum = child2_problem.unpacked_zipcode[child2_problem.zipcode_depth].prefix_sum_or_snarl_rank; child2.has_chain_values = true; } @@ -1776,20 +1776,10 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; - if (!(chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 && chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].code_type == ZipCode::CHAIN) - && ! is_top_level_chain) { - //If this isn't a trivial chain and isn't a top-level chain - - //If we need it, get the values from the distance index: - //is_looping_chain, node_length, the end boundary node, and the end component - //THese only get used if we need the distances to the ends of the chain - chain_problem->set_chain_values(distance_index); - } - if (only_seeds && !chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain && - (chain_problem->chain_component_end == 0 - || chain_problem->chain_component_end == std::numeric_limits::max())) { + (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component == 0 + || chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component == std::numeric_limits::max())) { //If there are only seeds in the chain (and the chain doesn't loop and isn't a multicomponent chain), //then cluster by walking through the seeds //This also does the work of clustering a trivial chain (which is just a node), which should be the same amount of work as using cluster_one_node @@ -1888,13 +1878,14 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //If the last child is a snarl, get it from the SnarlTreeNodeProblem otherwise from the seed's cache size_t last_prefix_sum = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left - : last_child_problem->chain_component_start; + : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].prefix_sum_or_snarl_rank; +//TODO: Get both from problem? size_t last_length = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().length : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].length; size_t last_chain_component_end = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().chain_component - : last_child_problem->chain_component_start; //This is initialized to the start of the snarl + : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].chain_component; //This is initialized to the start of the snarl //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small @@ -2179,7 +2170,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->chain_component_end != current_child_seed.unpacked_zipcode.back().chain_component) { + } else if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component != current_child_seed.unpacked_zipcode.back().chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { @@ -2623,7 +2614,8 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if ( last_chain_component_end == child_problem.chain_component_start) { + if ( last_chain_component_end == + child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component ) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max() && last_chain_component_end ) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance @@ -2659,7 +2651,9 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->chain_component_end != child_problem.chain_component_end) { + } else if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component != + child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component + + (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ? 1 : 0)) { //If it's not in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); //TODO: Used to do this, I"m pretty sure I don't need to though @@ -2681,7 +2675,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; -cerr << "\tDistance from start of chain to the left side of this one: " << (child_problem.chain_component_start != 0 +cerr << "\tDistance from start of chain to the left side of this one: " << (child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 ? std::numeric_limits::max() : child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank) << endl; cerr << "\tDistance from the last child to the right side of this one: " << distance_from_last_child_to_current_end << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; @@ -2747,7 +2741,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //If this is the last thing in the chain, then the distance to the end of the chain //If the snarl is isn't in the first component of the chain, then the left distance is infinite pair new_distances = make_pair( - child_problem.chain_component_start != 0 ? std::numeric_limits::max() + child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(dist_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), SnarlDistanceIndex::sum(dist_right, distance_from_current_end_to_end_of_chain)); @@ -2802,7 +2796,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //The new distances from this child to the start of the chain and the end of this child pair new_distances = make_pair( - child_problem.chain_component_start != 0 ? std::numeric_limits::max() + child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(distance_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), SnarlDistanceIndex::sum(distance_right, distance_from_current_end_to_end_of_chain)); @@ -2981,7 +2975,8 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e last_child = current_child; last_prefix_sum = child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank; last_length = child_problem.unpacked_zipcode[child_problem.zipcode_depth].length; //The length of this snarl - last_chain_component_end = child_problem.chain_component_end;//The component of the end node of this snarl + last_chain_component_end = child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component + + (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ? 1 : 0);//The component of the end node of this snarl } //Cluster the root From 4286171c750dd43212d2ef016c8240f1e7c9595c Mon Sep 17 00:00:00 2001 From: Xian Date: Fri, 9 Aug 2024 19:03:40 +0200 Subject: [PATCH 101/124] Fix bug getting best distanecs --- src/snarl_seed_clusterer.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 2cb0c13066c..fa9b7fb276e 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -1619,9 +1619,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { if (read_num == 0) { if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { + size_t old_best_right = snarl_problem->read_best_right.first; snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_left.first, child_problem.read_best_left.first); - snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_right.first, + snarl_problem->read_best_left.first = std::min(old_best_right, child_problem.read_best_right.first); } else { snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_left.first, @@ -1631,9 +1632,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } else { if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { + size_t old_best_right = snarl_problem->read_best_right.second; snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_left.second, child_problem.read_best_left.second); - snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_right.second, + snarl_problem->read_best_left.second = std::min(old_best_right, child_problem.read_best_right.second); } else { snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_left.second, @@ -2457,6 +2459,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& vector> to_erase; to_erase.reserve(child_problem.read_cluster_heads.size()); + for (auto& child_cluster_head : child_problem.read_cluster_heads) { //Go through each of the clusters on this child size_t read_num = child_cluster_head.first; @@ -2586,7 +2589,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); - + //Skip this child if its seeds are all too far away bool skip_snarl = false; if (child_problem.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && @@ -2877,6 +2880,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e distance_from_last_child_to_current_child), current_distance_left), 1); + size_t distance_between_fragment = SnarlDistanceIndex::minus( SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(chain_cluster_distances.second, distance_from_last_child_to_current_child), From 5beb1be90d7bfafa91c7c13742fd691906054150 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 10 Aug 2024 18:21:23 +0200 Subject: [PATCH 102/124] Get the length of a cyclic chain --- src/zip_code.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 062346a4314..e1be76a36ac 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2111,7 +2111,7 @@ vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& current_code.distance_end_right = 0; externally_connected = true; } - if (current_code.chain_component != 0 || externally_connected) { + if (current_code.chain_component != 0 || externally_connected || current_code.is_looping_chain) { //If this is a multicomponent chain or has external connectivity, then we want to know the length if (decoder_length() == 1) { current_code.length = distance_index.minimum_length(current_code.net_handle); From a0cbb237216da3c5d3b4351004a9a42f47372e4c Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 10 Aug 2024 18:31:21 +0200 Subject: [PATCH 103/124] Turn off debug --- src/snarl_seed_clusterer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index fa9b7fb276e..021e448da30 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -2,7 +2,7 @@ #include -#define DEBUG_CLUSTER +//#define DEBUG_CLUSTER //#define debug_distances //#define EXHAUSTIVE_CLUSTER_CHECK From d7e2553189a0e8fba1d467f56800061148f32616 Mon Sep 17 00:00:00 2001 From: Xian Date: Sat, 10 Aug 2024 23:23:40 +0200 Subject: [PATCH 104/124] Get values from unpacked zipcode for children of problems --- src/snarl_seed_clusterer.cpp | 122 ++++++++++++++++------------------- src/snarl_seed_clusterer.hpp | 17 +---- 2 files changed, 58 insertions(+), 81 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 021e448da30..d4eb0fdb60d 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -448,13 +448,9 @@ cerr << "Add all seeds to nodes: " << endl; //Add this seed to its parent cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(is_trivial_chain ? node_code.net_handle : parent_code.net_handle)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = node_code.net_handle; + parent_problem.children.back().unpacked_zipcode = &seed.unpacked_zipcode; + parent_problem.children.back().zipcode_depth = seed.unpacked_zipcode.size()-1; parent_problem.children.back().seed_indices = {read_num, i}; - parent_problem.children.back().is_seed = true; - parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = node_code.chain_component; - parent_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - node_code.prefix_sum_or_snarl_rank); //And the parent to chains_by_level @@ -486,13 +482,9 @@ cerr << "Add all seeds to nodes: " << endl; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(node_code.net_handle)); node_problem.children.emplace_back(); - node_problem.children.back().net_handle = node_code.net_handle; + node_problem.children.back().unpacked_zipcode = &seed.unpacked_zipcode; + node_problem.children.back().zipcode_depth = seed.unpacked_zipcode.size()-1; node_problem.children.back().seed_indices = {read_num, i}; - node_problem.children.back().is_seed = true; - node_problem.children.back().has_chain_values = true; - node_problem.children.back().chain_component = node_code.chain_component; - node_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, - node_code.prefix_sum_or_snarl_rank); @@ -595,11 +587,8 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Add the snarl to its parent chain parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = snarl_handle; - parent_problem.children.back().is_seed = false; - parent_problem.children.back().has_chain_values = true; - parent_problem.children.back().chain_component = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].chain_component; - parent_problem.children.back().prefix_sum = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].prefix_sum_or_snarl_rank; + parent_problem.children.back().unpacked_zipcode = &snarl_problem->unpacked_zipcode; + parent_problem.children.back().zipcode_depth = snarl_problem->zipcode_depth; if (new_parent) { //And the parent chain to the things to be clustered next @@ -635,7 +624,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #ifdef DEBUG_CLUSTER cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { - cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; + cerr << "\t" << distance_index.net_handle_as_string(x.unpacked_zipcode->at(x.zipcode_depth).net_handle) << endl; } #endif @@ -812,9 +801,8 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(parent)); parent_problem.children.emplace_back(); - parent_problem.children.back().net_handle = chain_handle; - parent_problem.children.back().is_seed = false; - parent_problem.children.back().has_chain_values = false; + parent_problem.children.back().unpacked_zipcode = &chain_problem->unpacked_zipcode; + parent_problem.children.back().zipcode_depth = chain_problem->zipcode_depth; if (new_parent) { @@ -1545,7 +1533,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Go through each child node of the netgraph SnarlTreeNodeProblem& child_problem_i = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].unpacked_zipcode->at(snarl_problem->children[i].zipcode_depth).net_handle)); if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit @@ -1573,7 +1561,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the other node and its clusters SnarlTreeNodeProblem& child_problem_j = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].unpacked_zipcode->at(snarl_problem->children[j].zipcode_depth).net_handle)); if (child_problem_j.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && child_problem_j.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { @@ -1600,7 +1588,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (SnarlTreeNodeProblem::SnarlTreeChild& node_problem : snarl_problem->children) { //Go through each child node of the netgraph and add its clusters to the snarl SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(node_problem.net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(node_problem.unpacked_zipcode->at(node_problem.zipcode_depth).net_handle)); //Add the cluster heads //May need to flip the distances @@ -1646,9 +1634,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { + size_t old_best_right = snarl_problem->fragment_best_right; snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_left, child_problem.fragment_best_left); - snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_right, + snarl_problem->fragment_best_left = std::min(old_best_right, child_problem.fragment_best_right); } else { snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_left, @@ -1733,45 +1722,46 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //First, sort the children of the chain //If there is only one child, check if it's a seeed - bool only_seeds=chain_problem->children.size() == 1 ? chain_problem->children.front().is_seed + bool only_seeds=chain_problem->children.size() == 1 ? chain_problem->children.front().zipcode_depth == chain_problem->children.front().unpacked_zipcode->size()-1 : true; std::sort(chain_problem->children.begin(), chain_problem->children.end(), [&] (SnarlTreeNodeProblem::SnarlTreeChild& child1, SnarlTreeNodeProblem::SnarlTreeChild& child2) { - if (!child1.is_seed || !child2.is_seed) { + + const zip_code_t& child1_code = child1.unpacked_zipcode->at(child1.zipcode_depth); + const zip_code_t& child2_code = child2.unpacked_zipcode->at(child2.zipcode_depth); + + bool child1_is_seed = child1.zipcode_depth == child1.unpacked_zipcode->size()-1; + bool child2_is_seed = child2.zipcode_depth == child2.unpacked_zipcode->size()-1; + + if (!child1_is_seed || !child2_is_seed) { only_seeds = false; } - if (!child1.is_seed && !child1.has_chain_values) { - //If child1 is a snarl and hasn't had its values set yet - const SnarlTreeNodeProblem& child1_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)); - child1.chain_component = child1_problem.unpacked_zipcode[child1_problem.zipcode_depth].chain_component; - child1.prefix_sum = child1_problem.unpacked_zipcode[child1_problem.zipcode_depth].prefix_sum_or_snarl_rank; - child2.has_chain_values = true; - } - if (!child2.is_seed && !child2.has_chain_values) { - //If child2 is a snarl and hasn't had its values set yet - const SnarlTreeNodeProblem& child2_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)); - child2.chain_component = child2_problem.unpacked_zipcode[child2_problem.zipcode_depth].chain_component; - child2.prefix_sum = child2_problem.unpacked_zipcode[child2_problem.zipcode_depth].prefix_sum_or_snarl_rank; - child2.has_chain_values = true; - } - if (child1.chain_component != child2.chain_component) { - return child1.chain_component < child2.chain_component; - } else if (child1.prefix_sum == child2.prefix_sum && !(child1.is_seed && child2.is_seed)) { + + size_t prefix_sum1 = child1_is_seed ? SnarlDistanceIndex::sum(clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).distance_left, + child1_code.prefix_sum_or_snarl_rank) + : child1_code.prefix_sum_or_snarl_rank; + size_t prefix_sum2 = child2_is_seed ? SnarlDistanceIndex::sum(clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).distance_left, + child2_code.prefix_sum_or_snarl_rank) + : child2_code.prefix_sum_or_snarl_rank; + + if (child1_code.chain_component != child2_code.chain_component) { + return child1_code.chain_component < child2_code.chain_component; + } else if (prefix_sum1 == prefix_sum2 && !(child1_is_seed && child2_is_seed)) { //Get the prefix sum values not including the offset in the positions - size_t prefix_sum1 = child1.is_seed + prefix_sum1 = child1_is_seed ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank - : child1.prefix_sum; - size_t prefix_sum2 = child2.is_seed + : child1_code.prefix_sum_or_snarl_rank; + prefix_sum2 = child2_is_seed ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank - : child2.prefix_sum; + : child2_code.prefix_sum_or_snarl_rank; if (prefix_sum1 == prefix_sum2){ - return child2.is_seed; + return child2_is_seed; } else { return prefix_sum1 < prefix_sum2; } } else { - return child1.prefix_sum < child2.prefix_sum; + return prefix_sum1 < prefix_sum2; } }); @@ -1871,21 +1861,21 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //The last child we saw SnarlTreeNodeProblem::SnarlTreeChild& last_child = chain_problem->children.front(); - const SnarlTreeNodeProblem* last_child_problem = last_child.is_seed + const SnarlTreeNodeProblem* last_child_problem = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE ? nullptr : &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle)); //And values we need to save from the last child //If the last child is a snarl, get it from the SnarlTreeNodeProblem otherwise from the seed's cache - size_t last_prefix_sum = last_child.is_seed + size_t last_prefix_sum = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].prefix_sum_or_snarl_rank; //TODO: Get both from problem? - size_t last_length = last_child.is_seed + size_t last_length = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().length : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].length; - size_t last_chain_component_end = last_child.is_seed + size_t last_chain_component_end = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().chain_component : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].chain_component; //This is initialized to the start of the snarl @@ -1909,7 +1899,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin SnarlTreeNodeProblem::SnarlTreeChild& child = chain_problem->children[child_i]; - if (!child.is_seed){ + if (child.unpacked_zipcode->at(child.zipcode_depth).code_type != ZipCode::NODE){ //If this is a snarl, then cluster the children here add_snarl_to_chain_problem(clustering_problem, chain_problem, last_child, last_prefix_sum, last_length, @@ -1923,7 +1913,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } #ifdef DEBUG_CLUSTER - cerr << "\tintermediate clusters on " << distance_index.net_handle_as_string(chain_handle) << " after child " << distance_index.net_handle_as_string(child.net_handle) << endl; + cerr << "\tintermediate clusters on " << distance_index.net_handle_as_string(chain_handle) << " after child " << distance_index.net_handle_as_string(child.unpacked_zipcode->at(child.zipcode_depth).net_handle) << endl; cerr << "\t with best left and right values: " << chain_problem->fragment_best_left << " " << chain_problem->fragment_best_right << endl; bool got_left = false; @@ -2142,7 +2132,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if (last_child.net_handle == current_child.net_handle) { + if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; } else if ( last_chain_component_end == current_child_seed.unpacked_zipcode.back().chain_component) { @@ -2191,7 +2181,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c #endif - if (last_child.net_handle != current_child.net_handle && + if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle != current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, chain_problem->fragment_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -2271,7 +2261,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.net_handle == current_child.net_handle ? 0 + (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.unpacked_zipcode.back().length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) @@ -2312,7 +2302,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child), current_child_seed.distance_left), 1); - if (!is_first_child && last_child.net_handle == current_child.net_handle) { + if (!is_first_child && last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) { //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node @@ -2588,7 +2578,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle)); //Skip this child if its seeds are all too far away bool skip_snarl = false; @@ -2602,7 +2592,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& update_distances_on_same_child(child_problem); } #ifdef DEBUG_CLUSTER - cerr << "At child " << distance_index.net_handle_as_string(current_child.net_handle) << endl; + cerr << "At child " << distance_index.net_handle_as_string(current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) << endl; #endif /* @@ -2641,7 +2631,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.net_handle == current_child.net_handle ? 0 + (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); @@ -2660,7 +2650,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& //If it's not in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); //TODO: Used to do this, I"m pretty sure I don't need to though - //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.net_handle); + //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.child_code->net_handle); } else if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ) { //If the node length is infinite, then it is a snarl that isn't start-end connected, so the start //and end of the snarl are in different components of the chain. Since it reached here, the end @@ -2706,7 +2696,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e chain_problem->read_best_right = std::make_pair(std::numeric_limits::max(), std::numeric_limits::max()); - if (last_child.net_handle != current_child.net_handle && + if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle != current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, old_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 6f57bd7f259..f400947e455 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -190,24 +190,11 @@ class SnarlDistanceIndexClusterer { //Struct to store one child, which may be a seed, node, snarl, or chain struct SnarlTreeChild { - //TODO : Double check if the prefix sum etc can be gotten from the zipcode //If the net_handle is a node, then the child is a seed, otherwise the handle //is used to find the problem - net_handle_t net_handle; + const vector* unpacked_zipcode; + size_t zipcode_depth; pair seed_indices; - - //The values used to sort the children of a chain - //Storing it here is faster than looking it up each time - size_t chain_component; - size_t prefix_sum; - //Is this child a seed - //This is redundant with net_handle because any net_handle_t that is a node will really be a seed, - //but it's faster than looking it up in the distance index - bool is_seed; - //Have chain_component and prefix_sum been set? - //For a seed, it gets set when the child is made, otherwise the first time this - //child is seen when sorting - bool has_chain_values; }; //The children of this snarl tree node //Initially unsorted, sort before clustering for chains From 98a5518ff0090c8c7167d658590e2edcd1f23499 Mon Sep 17 00:00:00 2001 From: Xian Date: Sun, 11 Aug 2024 09:07:23 +0200 Subject: [PATCH 105/124] Take out unused chain component ints --- src/snarl_seed_clusterer.hpp | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index f400947e455..0159d7278f7 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -218,13 +218,6 @@ class SnarlDistanceIndexClusterer { const vector& unpacked_zipcode; size_t zipcode_depth; - //Minimum length of a node or snarl - //If it is a chain, then it is distance_index.chain_minimum_length(), which is - //the expected length for a normal chain, and the length of the - //last component for a multicomponent chain - size_t chain_component_start = 0; //of node or start of snarl - size_t chain_component_end = 0; //of node or end of snarl - size_t loop_left = std::numeric_limits::max(); size_t loop_right = std::numeric_limits::max(); @@ -236,8 +229,6 @@ class SnarlDistanceIndexClusterer { //read_count is the number of reads in a fragment (2 for paired end) SnarlTreeNodeProblem(size_t read_count, size_t seed_count, const vector& unpacked_zipcode, size_t zipcode_depth) : - chain_component_start(unpacked_zipcode[zipcode_depth].chain_component), - chain_component_end(unpacked_zipcode[zipcode_depth].chain_component), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), unpacked_zipcode(unpacked_zipcode), zipcode_depth(zipcode_depth) { @@ -245,18 +236,11 @@ class SnarlDistanceIndexClusterer { children.reserve(seed_count); } - //Set the values needed to cluster a chain - void set_chain_values(const SnarlDistanceIndex& distance_index) { - chain_component_end = unpacked_zipcode[zipcode_depth].chain_component; - } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, false, true)); net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, true, true)); - chain_component_start = unpacked_zipcode[zipcode_depth].chain_component; - chain_component_end = unpacked_zipcode[zipcode_depth].length == std::numeric_limits::max() ? chain_component_start+1 - : chain_component_start; loop_right = SnarlDistanceIndex::sum(distance_index.get_forward_loop_value(end_in), 2*distance_index.minimum_length(end_in)); //Distance to go backward in the chain and back From f5ad68775d4303701428885be7b5c5410376fc79 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 12 Aug 2024 11:04:29 +0200 Subject: [PATCH 106/124] Reserve memory when getting zipcodes from payload --- src/zip_code.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index e1be76a36ac..d1fc617a4fa 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1821,6 +1821,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Get the decoder offsets varint_vector_t decoder_vector; + decoder_vector.data.reserve(16); for (size_t i = decoded_bytes ; i <16 ; i++) { uint8_t saved_byte; if (decoded_bytes < 8) { @@ -1837,6 +1838,8 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Now go through the varint vector up and add anything that isn't 0 size_t varint_value= 1; size_t varint_index = 0; + //Somewhat arbitrarily reserve what we expect to be the number of codes in the zipcode + decoder.reserve(decoded_bytes / 4); decoder.emplace_back(is_chain, 0); is_chain = !is_chain; if (decoder_vector.byte_count() != 0) { @@ -1852,6 +1855,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(!decoder.back().is_chain); decoder.back().is_chain = true; } + finished_decoding = true; } From d02f4f753be1990ab3bb781f6ad541af5913c895 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 12 Aug 2024 11:21:04 +0200 Subject: [PATCH 107/124] Remove unused ints and reserve memory for unpacked zipcode --- src/snarl_seed_clusterer.hpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 0159d7278f7..ba2709b6415 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -99,17 +99,19 @@ class SnarlDistanceIndexClusterer { struct SeedCache{ const Seed* seed; - vector unpacked_zipcode; - //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds //For a seed in a chain, distance_left is the left of the chain, right is the distance //to the right side of the node, relative to the chain size_t distance_left = std::numeric_limits::max(); size_t distance_right = std::numeric_limits::max(); - //Values from the payload that we're saving - size_t payload_prefix_sum = std::numeric_limits::max(); - size_t payload_node_length = std::numeric_limits::max(); + + vector unpacked_zipcode; + + //Start with enough memory reserved for what is probably at least the max depth of the snarl tree + SeedCache() { + unpacked_zipcode.reserve(10); + } }; From 2762b05cad08668e6d11a54c56f87cf33e6f9506 Mon Sep 17 00:00:00 2001 From: Xian Date: Mon, 12 Aug 2024 14:52:25 +0200 Subject: [PATCH 108/124] Reserve less memory because it made it slower --- src/snarl_seed_clusterer.hpp | 2 +- src/zip_code.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index ba2709b6415..034f98323c8 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -110,7 +110,7 @@ class SnarlDistanceIndexClusterer { //Start with enough memory reserved for what is probably at least the max depth of the snarl tree SeedCache() { - unpacked_zipcode.reserve(10); + unpacked_zipcode.reserve(6); } }; diff --git a/src/zip_code.cpp b/src/zip_code.cpp index d1fc617a4fa..f5fb52d09d0 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1821,7 +1821,6 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Get the decoder offsets varint_vector_t decoder_vector; - decoder_vector.data.reserve(16); for (size_t i = decoded_bytes ; i <16 ; i++) { uint8_t saved_byte; if (decoded_bytes < 8) { @@ -1839,7 +1838,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { size_t varint_value= 1; size_t varint_index = 0; //Somewhat arbitrarily reserve what we expect to be the number of codes in the zipcode - decoder.reserve(decoded_bytes / 4); + decoder.reserve(decoder_vector.byte_count()); decoder.emplace_back(is_chain, 0); is_chain = !is_chain; if (decoder_vector.byte_count() != 0) { From f5e56388e45f694e5d356fd5ce579872d7943730 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 09:39:27 +0200 Subject: [PATCH 109/124] Undo unpacking zipcode to get back to 020cbb --- src/minimizer_mapper.cpp | 2 +- src/minimizer_mapper.hpp | 2 +- src/snarl_seed_clusterer.cpp | 752 ++++++++++++++++-------------- src/snarl_seed_clusterer.hpp | 102 +++- src/subcommand/minimizer_main.cpp | 6 +- src/unittest/zip_code.cpp | 412 ++-------------- src/zip_code.cpp | 386 ++++++--------- src/zip_code.hpp | 57 +-- 8 files changed, 717 insertions(+), 1002 deletions(-) diff --git a/src/minimizer_mapper.cpp b/src/minimizer_mapper.cpp index 3a87586f0a7..14eccb6acd8 100644 --- a/src/minimizer_mapper.cpp +++ b/src/minimizer_mapper.cpp @@ -3741,7 +3741,7 @@ std::vector MinimizerMapper::find_seeds(const std::vector seeds.back().source = i; //Get the zipcode - if (minimizer.occs[j].payload == ZipCode::NO_PAYLOAD) { + if (minimizer.occs[j].payload == MIPayload::NO_CODE) { //If the zipcocde wasn't saved, then calculate it seeds.back().zipcode.fill_in_zipcode(*(this->distance_index), hit); seeds.back().zipcode.fill_in_full_decoder(); diff --git a/src/minimizer_mapper.hpp b/src/minimizer_mapper.hpp index b8cf445753b..117e9b624bf 100644 --- a/src/minimizer_mapper.hpp +++ b/src/minimizer_mapper.hpp @@ -596,7 +596,7 @@ class MinimizerMapper : public AlignerClient { /// How should we initialize chain info when it's not stored in the minimizer index? inline static gbwtgraph::Payload no_chain_info() { - return ZipCode::NO_PAYLOAD; + return MIPayload::NO_CODE; } /// How do we convert chain info to an actual seed of the type we are using? diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index d4eb0fdb60d..31579b53103 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -35,7 +35,7 @@ vector SnarlDistanceIndexClusterer::cluste #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].unpacked_zipcode = seeds[i].zipcode.unpack_zip_code(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); } } vector*> all_seed_caches = {&seed_caches}; @@ -66,7 +66,7 @@ vector> SnarlDistanceIndexClusterer throw std::runtime_error("Clusterer: We can't handle more than paired end mapping"); } - //Make a vector of SeedCache that contains all the unpacked zipcodes + //Make a vector of SeedCache that contains all the payloads vector> all_seed_caches; all_seed_caches.reserve(all_seeds.size()); @@ -79,7 +79,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].unpacked_zipcode = all_seeds[read_num][i].zipcode.unpack_zip_code(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); } } } @@ -352,87 +352,100 @@ cerr << "Add all seeds to nodes: " << endl; //The zipcodes are already filled in //TODO: The whole thing could now be done with the zipcodes instead of looking at the distance //index but that would be too much work to write for now - const zip_code_t& node_code = seed.unpacked_zipcode.back(); - bool is_trivial_chain = node_code.code_type == ZipCode::CHAIN; - const zip_code_t& parent_code = seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2]; + const MIPayload& payload = seed.payload; #ifdef DEBUG_CLUSTER + //cerr << "Using cached values for node " << id << ": " + // << ", " << seed.payload.record_offset + // << ", " << seed.payload.parent_record_offset + // << ", " << seed.payload.node_length + // << ", " << seed.payload.prefix_sum + // << ", " << seed.payload.chain_component << endl; net_handle_t handle = distance_index.get_node_net_handle(id); net_handle_t parent_handle = distance_index.get_parent(handle); cerr << "Check values for node " << distance_index.net_handle_as_string(handle) << " in parent " << distance_index.net_handle_as_string(parent_handle) << endl; - cerr << "Got net handle from zipcode " << distance_index.net_handle_as_string(node_code.net_handle) << endl; - cerr << "Node length " << node_code.length << " should be " << distance_index.minimum_length(handle) << endl; - assert(seed.unpacked_zipcode.back().length == distance_index.minimum_length(handle)); + //assert(seed.payload.parent_record_offset == + // (distance_index.is_trivial_chain(parent_handle) ? distance_index.get_record_offset(distance_index.get_parent(parent_handle)) + // :distance_index.get_record_offset(parent_handle))); + cerr << "Node length " << seed.payload.node_length << " should be " << distance_index.minimum_length(handle) << endl; + assert(seed.payload.node_length == distance_index.minimum_length(handle)); + //size_t prefix_sum = distance_index.is_trivial_chain(parent_handle) + // ? std::numeric_limits::max() + // : distance_index.get_prefix_sum_value(handle); + //assert(seed.payload.prefix_sum == prefix_sum); size_t chain_component = (distance_index.is_multicomponent_chain(parent_handle) ? distance_index.get_chain_component(handle) : 0); - chain_component = chain_component ; - cerr << "For node " << distance_index.net_handle_as_string(handle) << endl; - cerr << "Chain compoentn: " << chain_component << " was " << node_code.chain_component << endl; - if (chain_component != 0 && chain_component != std::numeric_limits::max()) { - assert(node_code.chain_component == chain_component); - } + chain_component = chain_component == std::numeric_limits::max() ? 0 : chain_component; + cerr << "For nod " << distance_index.net_handle_as_string(handle) << endl; + cerr << "Chain compoentn: " << chain_component << " was " << seed.payload.chain_component << endl; + assert(seed.payload.chain_component == chain_component); + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Parent should be " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))) << endl; + cerr <<" Is actually " << distance_index.net_handle_as_string( distance_index.start_end_traversal_of(seed.payload.parent_handle)) << endl; + assert( distance_index.start_end_traversal_of(seed.payload.parent_handle) == distance_index.start_end_traversal_of(distance_index.get_parent(seed.payload.node_handle))); + } #endif - if (!((seed.unpacked_zipcode.front().code_type == ZipCode::ROOT_SNARL && seed.unpacked_zipcode.size() == 2) - || seed.unpacked_zipcode.front().code_type == ZipCode::ROOT_NODE)) { + if (!(seed.payload.parent_type == ZipCode::ROOT_SNARL || seed.payload.parent_type == ZipCode::ROOT_NODE)) { //If the parent is not the root and not a root snarl (it is a chain or trivial chain) //Add the seed to its parent //Also update the zipcode on the seed - #ifdef DEBUG_CLUSTER - cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.unpacked_zipcode[seed.unpacked_zipcode.size()-2].net_handle) << endl; - cerr << "Node length should be " << distance_index.minimum_length(node_code.net_handle) << " actually " << node_code.length << endl; - assert(node_code.length == distance_index.minimum_length(node_code.net_handle)); - cerr << "Reversed in parent? " << distance_index.net_handle_as_string(node_code.net_handle) << " " << distance_index.net_handle_as_string(parent_code.net_handle) << " " << node_code.is_reversed << endl; + cerr << "\tchild of a chain " << distance_index.net_handle_as_string(seed.payload.parent_handle) << endl; + //assert(prefix_sum == (is_trivial_chain ? std::numeric_limits::max() + // : distance_index.get_prefix_sum_value(seed.payload.node_handle))); + cerr << "Node length should be " << distance_index.minimum_length(seed.payload.node_handle) << " actually " << seed.payload.node_length << endl; + assert(seed.payload.node_length == distance_index.minimum_length(seed.payload.node_handle)); + cerr << "Reversed in parent? " << distance_index.net_handle_as_string(seed.payload.node_handle) << " " << distance_index.net_handle_as_string(seed.payload.parent_handle) << " " << seed.payload.is_reversed << endl; + cerr << "is trivial? " << seed.payload.is_trivial_chain << endl; + if (!distance_index.is_root(seed.payload.parent_handle)) { + cerr << "Grandparent: " << distance_index.net_handle_as_string(distance_index.get_parent(seed.payload.parent_handle)) << endl; + } + cerr << seed.payload.is_reversed << " " << distance_index.is_reversed_in_parent(seed.payload.parent_handle) << endl; + assert(seed.payload.is_reversed == (seed.payload.is_trivial_chain ? distance_index.is_reversed_in_parent(seed.payload.parent_handle) + : distance_index.is_reversed_in_parent(seed.payload.node_handle))); #endif //Add the parent chain or trivial chain bool new_parent = false; - if (clustering_problem.net_handle_to_node_problem_index.count(is_trivial_chain ? node_code.net_handle : parent_code.net_handle) == 0) { + new_parent = false; + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.parent_handle) == 0) { //If we haven't seen the parent chain before, make a new SnarlTreeNodeProblem for it new_parent = true; - if (is_trivial_chain) { - //Trivial chain - clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + if (seed.payload.is_trivial_chain ) { + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - seed.unpacked_zipcode, seed.seed->zipcode.max_depth()); + false, seed.payload.node_length, std::numeric_limits::max(), std::numeric_limits::max(), + &seed, seed.seed->zipcode.max_depth()); + clustering_problem.all_node_problems.back().is_trivial_chain = true; } else { //The parent is an actual chain - clustering_problem.net_handle_to_node_problem_index.emplace(parent_code.net_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - seed.unpacked_zipcode, seed.seed->zipcode.max_depth() - 1); + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.parent_handle, clustering_problem.all_node_problems.size()); + clustering_problem.all_node_problems.emplace_back(seed.payload.parent_handle, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + &seed, seed.seed->zipcode.max_depth() - 1); } new_parent = true; } - size_t parent_depth = 0; - - for (size_t d = 0 ; d <= seed.unpacked_zipcode.size()-(is_trivial_chain ? 1 : 2) ; d++) { - const auto& type = seed.unpacked_zipcode[d].code_type; - if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { - parent_depth++; - } - } #ifdef DEBUG_CLUSTER - cerr << "depth of " << distance_index.net_handle_as_string(parent_code.net_handle) << " " << distance_index.get_depth(is_trivial_chain ? node_code.net_handle : parent_code.net_handle) << " guessed " << parent_depth << endl; - assert(parent_depth == distance_index.get_depth(is_trivial_chain ? node_code.net_handle : parent_code.net_handle)); + assert(seed.payload.parent_depth == distance_index.get_depth(seed.payload.parent_handle)); #endif //If chains_by_level isn't big enough for this depth, resize it and reserve space at each level - if (parent_depth+1 > chains_by_level.size()) { - size_t to_add = (parent_depth+1) - chains_by_level.size(); + if (seed.payload.parent_depth+1 > chains_by_level.size()) { + size_t to_add = (seed.payload.parent_depth+1) - chains_by_level.size(); for (size_t i = 0 ; i < to_add ; i++) { chains_by_level.emplace_back(); chains_by_level.back().reserve(clustering_problem.seed_count_prefix_sum.back()); @@ -440,22 +453,66 @@ cerr << "Add all seeds to nodes: " << endl; } //Make sure the seed's distances are relative to the orientation in the parent - seed.distance_left = (!is_trivial_chain && node_code.is_reversed) != is_rev(pos) ? node_code.length- get_offset(pos) + seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right =(!is_trivial_chain && node_code.is_reversed) != is_rev(pos) ? get_offset(pos) + 1 - : node_code.length- get_offset(pos); + seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 + : seed.payload.node_length- get_offset(pos); //Add this seed to its parent cluster - SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(is_trivial_chain ? node_code.net_handle : parent_code.net_handle)); + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.parent_handle)); parent_problem.children.emplace_back(); - parent_problem.children.back().unpacked_zipcode = &seed.unpacked_zipcode; - parent_problem.children.back().zipcode_depth = seed.unpacked_zipcode.size()-1; + parent_problem.children.back().net_handle = seed.payload.node_handle; parent_problem.children.back().seed_indices = {read_num, i}; + parent_problem.children.back().is_seed = true; + parent_problem.children.back().has_chain_values = true; + parent_problem.children.back().chain_component = seed.payload.chain_component; + parent_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, + seed.payload.prefix_sum); //And the parent to chains_by_level if (new_parent) { - chains_by_level[parent_depth].emplace_back(is_trivial_chain ? node_code.net_handle : parent_code.net_handle); + chains_by_level[seed.payload.parent_depth].emplace_back(seed.payload.parent_handle); + } + + + //If the parent is a trivial chain and not in the root, then we also stored the identity of the snarl, so add it here too + if ( new_parent) { + if (seed.payload.is_trivial_chain && !seed.payload.parent_is_root) { + bool grandparent_is_simple_snarl = seed.payload.parent_is_chain; + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = grandparent_is_simple_snarl + ? distance_index.get_net_handle_from_values(distance_index.get_record_offset(seed.payload.node_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE, + 1) + : distance_index.get_net_handle_from_values(seed.payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::SNARL_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; +#endif + + if (grandparent_is_simple_snarl) { + //If the grandparent is a simple snarl, then we also stored the identity of its parent chain, so add it here too + parent_problem.has_grandparent_handle = true; + parent_problem.grandparent_net_handle = distance_index.get_net_handle_from_values( + seed.payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "GRANDPARENT: " << distance_index.net_handle_as_string(parent_problem.grandparent_net_handle) << endl; +#endif + } + } else if (seed.payload.parent_is_root && seed.payload.parent_is_chain && !seed.payload.is_trivial_chain) { + //The parent chain is a child of the root + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = distance_index.get_net_handle_from_values( + 0, SnarlDistanceIndex::START_END, SnarlDistanceIndex::ROOT_HANDLE); +#ifdef DEBUG_CLUSTER + cerr << "PARENT: " << distance_index.net_handle_as_string(parent_problem.parent_net_handle) << endl; +#endif + } } @@ -463,28 +520,38 @@ cerr << "Add all seeds to nodes: " << endl; //Otherwise, the parent is the root or a root snarl, and the node_net_handle is a node + + //Create a new SnarlTreeNodeProblem for this node bool new_node = false; - if (clustering_problem.net_handle_to_node_problem_index.count(node_code.net_handle) == 0) { + if (clustering_problem.net_handle_to_node_problem_index.count(seed.payload.node_handle) == 0) { new_node = true; - clustering_problem.net_handle_to_node_problem_index.emplace(node_code.net_handle, + clustering_problem.net_handle_to_node_problem_index.emplace(seed.payload.node_handle, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), + clustering_problem.all_node_problems.emplace_back(seed.payload.node_handle, clustering_problem.all_seeds->size(), clustering_problem.seed_count_prefix_sum.back(), - seed.unpacked_zipcode, seed.seed->zipcode.max_depth()); + false, seed.payload.node_length, std::numeric_limits::max(), + std::numeric_limits::max(), + &seed, seed.seed->zipcode.max_depth()); + //Remember the parent of this node, since it will be needed to remember the root snarl later + clustering_problem.all_node_problems.back().parent_net_handle = seed.payload.parent_handle; } - seed.distance_left = node_code.is_reversed != is_rev(pos) ? node_code.length- get_offset(pos) : get_offset(pos) + 1; - seed.distance_right = node_code.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : node_code.length- get_offset(pos); + seed.distance_left = seed.payload.is_reversed != is_rev(pos) ? seed.payload.node_length- get_offset(pos) : get_offset(pos) + 1; + seed.distance_right = seed.payload.is_reversed != is_rev(pos) ? get_offset(pos) + 1 : seed.payload.node_length- get_offset(pos); - SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(node_code.net_handle)); + SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at(clustering_problem.net_handle_to_node_problem_index.at(seed.payload.node_handle)); node_problem.children.emplace_back(); - node_problem.children.back().unpacked_zipcode = &seed.unpacked_zipcode; - node_problem.children.back().zipcode_depth = seed.unpacked_zipcode.size()-1; + node_problem.children.back().net_handle = seed.payload.node_handle; node_problem.children.back().seed_indices = {read_num, i}; + node_problem.children.back().is_seed = true; + node_problem.children.back().has_chain_values = true; + node_problem.children.back().chain_component = seed.payload.chain_component; + node_problem.children.back().prefix_sum = SnarlDistanceIndex::sum(seed.distance_left, + seed.payload.prefix_sum); @@ -502,7 +569,7 @@ cerr << "Add all seeds to nodes: " << endl; //Go through and cluster nodes that are children of the root or root snarls for(const SeedCache* seed : nodes_to_cluster_now) { - const net_handle_t& node_net_handle = seed->unpacked_zipcode.back().net_handle; + const net_handle_t& node_net_handle = seed->payload.node_handle; SnarlTreeNodeProblem& node_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(node_net_handle)); @@ -511,16 +578,16 @@ cerr << "Add all seeds to nodes: " << endl; //if current_iterator is the last thing in the list and the same node cluster_one_node(clustering_problem, &node_problem); - net_handle_t parent = node_problem.unpacked_zipcode[node_problem.zipcode_depth-1].net_handle; + net_handle_t parent = node_problem.parent_net_handle; - if (seed->unpacked_zipcode[0].code_type == ZipCode::ROOT_SNARL) { + if (seed->payload.parent_type == ZipCode::ROOT_SNARL) { //If this is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - seed->unpacked_zipcode, 0); + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + seed, 0); } clustering_problem.root_children.emplace_back(parent, node_net_handle); } else { @@ -548,7 +615,7 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); #ifdef DEBUG_CLUSTER - cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle) << endl; + cerr << "Cluster one snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; #endif //Cluster the snarlindex]; @@ -568,27 +635,38 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster //Make a new SnarlTreeNodeProblem for the parent - net_handle_t snarl_parent = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth-1].net_handle; + net_handle_t snarl_parent = snarl_problem->has_parent_handle + ? snarl_problem->parent_net_handle + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(snarl_parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - snarl_problem->unpacked_zipcode, snarl_problem->zipcode_depth-1); + clustering_problem.all_node_problems.emplace_back(snarl_parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + snarl_problem->seed, snarl_problem->zipcode_depth-1); //Because a new SnarlTreeNodeProblem got added, the snarl_problem pointer might have moved SnarlTreeNodeProblem& snarl_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_handle)); + if (snarl_problem.has_grandparent_handle) { + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = snarl_problem.grandparent_net_handle; + } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(snarl_parent)); //Add the snarl to its parent chain parent_problem.children.emplace_back(); - parent_problem.children.back().unpacked_zipcode = &snarl_problem->unpacked_zipcode; - parent_problem.children.back().zipcode_depth = snarl_problem->zipcode_depth; + parent_problem.children.back().net_handle = snarl_handle; + parent_problem.children.back().is_seed = false; + parent_problem.children.back().has_chain_values = true; + parent_problem.children.back().chain_component = snarl_problem->chain_component_start; + parent_problem.children.back().prefix_sum = snarl_problem->prefix_sum_value; if (new_parent) { //And the parent chain to the things to be clustered next @@ -624,34 +702,36 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster #ifdef DEBUG_CLUSTER cerr << "Cluster one chain " << distance_index.net_handle_as_string(chain_handle) << " with " << chain_problem->children.size() << " children" << endl; for (auto& x : chain_problem->children) { - cerr << "\t" << distance_index.net_handle_as_string(x.unpacked_zipcode->at(x.zipcode_depth).net_handle) << endl; + cerr << "\t" << distance_index.net_handle_as_string(x.net_handle) << endl; } #endif - net_handle_t parent = chain_problem->zipcode_depth == 0 - ? distance_index.get_root() - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].net_handle; + net_handle_t parent = chain_problem->has_parent_handle + ? chain_problem->parent_net_handle + : (chain_problem->zipcode_depth == 0 + ? distance_index.get_root() + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == distance_index.start_end_traversal_of(parent)); + assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); } #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 ? ZipCode::EMPTY - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].code_type; + : chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1); bool is_root = parent_type == ZipCode::EMPTY || parent_type == ZipCode::ROOT_SNARL; bool is_root_snarl = parent_type == ZipCode::ROOT_SNARL; //This is used to determine if we need to remember the distances to the ends of the chain, since //for a top level chain it doesn't matter bool is_top_level_chain = (depth == 1) && !is_root_snarl && - chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left == std::numeric_limits::max() && - chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right == std::numeric_limits::max() && - chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right == std::numeric_limits::max() && - !chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain; + !chain_problem->seed->seed->zipcode.is_externally_start_start_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_start_end_connected(0) && + !chain_problem->seed->seed->zipcode.is_externally_end_end_connected(0) && + !chain_problem->seed->seed->zipcode.get_is_looping_chain(0); // Compute the clusters for the chain cluster_one_chain(clustering_problem, chain_problem, is_top_level_chain); @@ -663,9 +743,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the parent is a root snarl, then remember it to cluster in the root if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - chain_problem->unpacked_zipcode, chain_problem->zipcode_depth-1); + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); } clustering_problem.root_children.emplace_back(parent, chain_handle); } else if (!is_top_level_chain) { @@ -680,129 +760,119 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster //If the child of the snarl child (a node or snarl in the chain) was reversed, then we got a backwards handle //to the child when getting the distances - bool snarl_child_is_rev = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth-1].code_type == ZipCode::REGULAR_SNARL - || chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 + bool snarl_child_is_rev = chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) == ZipCode::REGULAR_SNARL + || chain_problem->zipcode_depth == chain_problem->seed->seed->zipcode.max_depth() ? false - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth+1].is_reversed; + : chain_problem->seed->seed->zipcode.get_is_reversed_in_parent(chain_problem->zipcode_depth+1); - //TODO: Double check these distances -// chain_problem->distance_start_left = snarl_child_is_rev -// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) -// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); -// -// chain_problem->distance_start_right = snarl_child_is_rev -// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) -// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); -// -// chain_problem->distance_end_left = snarl_child_is_rev -// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) -// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); -// -// chain_problem->distance_end_right = snarl_child_is_rev -// ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) -// : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); -// chain_problem->distance_start_left = snarl_child_is_rev - ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left; + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true); chain_problem->distance_start_right = snarl_child_is_rev - ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_left - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_start_right; + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, true, false); chain_problem->distance_end_left = snarl_child_is_rev - ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_left; + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true); chain_problem->distance_end_right = snarl_child_is_rev - ? chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_left - : chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].distance_end_right; + ? chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, true) + : chain_problem->seed->seed->zipcode.get_distance_to_snarl_bound(chain_problem->zipcode_depth, false, false); - #ifdef debug_cluster - cerr << "for child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; - cerr << "for parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; - cerr << "zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " + #ifdef DEBUG_CLUSTER + cerr << "For child type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth) << endl; + cerr << "For parent type " << chain_problem->seed->seed->zipcode.get_code_type(chain_problem->zipcode_depth-1) << endl; + cerr << "Zipcode thinks we're looking at " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth, &distance_index)) << " and " << distance_index.net_handle_as_string(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), chain_problem->zipcode_depth-1, &distance_index))<< endl; - cerr << "check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; + cerr << "Check distances from " << distance_index.net_handle_as_string(chain_handle) << " to parent " << distance_index.net_handle_as_string(parent) << endl; cerr << "\t guessed: " << chain_problem->distance_start_left << " " << chain_problem->distance_start_right << " " << chain_problem->distance_end_left << " " << chain_problem->distance_end_right << endl; cerr << "\t should be " << distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle)) << " " + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " << distance_index.distance_to_parent_bound(parent, true, chain_handle, - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle)) << " " + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " << distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle)) << " " + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << " " << distance_index.distance_to_parent_bound(parent, false, chain_handle, - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle)) << endl; + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE)) << endl; assert(chain_problem->distance_start_left == distance_index.distance_to_parent_bound(parent, true, distance_index.flip(chain_handle), - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle))); + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); assert(chain_problem->distance_start_right == distance_index.distance_to_parent_bound(parent, true, chain_handle, - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle))); + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); assert(chain_problem->distance_end_left == distance_index.distance_to_parent_bound(parent, false, distance_index.flip(chain_handle), - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle))); + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); assert(chain_problem->distance_end_right == distance_index.distance_to_parent_bound(parent, false, chain_handle, - std::make_tuple(snarldistanceindex::snarl_handle, - snarldistanceindex::snarl_handle, - (chain_problem->is_trivial_chain ? snarldistanceindex::node_handle - : snarldistanceindex::chain_handle), - snarldistanceindex::chain_handle))); + std::make_tuple(SnarlDistanceIndex::SNARL_HANDLE, + SnarlDistanceIndex::SNARL_HANDLE, + (chain_problem->is_trivial_chain ? SnarlDistanceIndex::NODE_HANDLE + : SnarlDistanceIndex::CHAIN_HANDLE), + SnarlDistanceIndex::CHAIN_HANDLE))); #endif - //and add it to its parent snarl + //And add it to its parent snarl bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(parent) == 0) { new_parent = true; clustering_problem.net_handle_to_node_problem_index.emplace(parent, clustering_problem.all_node_problems.size()); - clustering_problem.all_node_problems.emplace_back(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - chain_problem->unpacked_zipcode, chain_problem->zipcode_depth-1); - //because a new snarltreenodeproblem got added, the old chain_problem pointer might have moved + clustering_problem.all_node_problems.emplace_back(parent, clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + chain_problem->seed, chain_problem->zipcode_depth-1); + //Because a new SnarlTreeNodeProblem got added, the old chain_problem pointer might have moved SnarlTreeNodeProblem& chain_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(chain_handle)); + if (chain_problem.has_grandparent_handle) { + SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(parent)); + parent_problem.has_parent_handle = true; + parent_problem.parent_net_handle = chain_problem.grandparent_net_handle; + } } SnarlTreeNodeProblem& parent_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(parent)); parent_problem.children.emplace_back(); - parent_problem.children.back().unpacked_zipcode = &chain_problem->unpacked_zipcode; - parent_problem.children.back().zipcode_depth = chain_problem->zipcode_depth; + parent_problem.children.back().net_handle = chain_handle; + parent_problem.children.back().is_seed = false; + parent_problem.children.back().has_chain_values = false; if (new_parent) { @@ -816,14 +886,14 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster void SnarlDistanceIndexClusterer::cluster_one_node( ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* node_problem) const { -#ifdef debug_cluster - cerr << "finding clusters on node " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; +#ifdef DEBUG_CLUSTER + cerr << "Finding clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; #endif - size_t node_length = node_problem->unpacked_zipcode[node_problem->zipcode_depth].length; + size_t node_length = node_problem->node_length; - //sort the seeds on the node + //Sort the seeds on the node std::sort(node_problem->children.begin(), node_problem->children.end(), [&](const SnarlTreeNodeProblem::SnarlTreeChild& a, const SnarlTreeNodeProblem::SnarlTreeChild& b) { return clustering_problem.all_seeds->at(a.seed_indices.first)->at(a.seed_indices.second).distance_left @@ -833,9 +903,9 @@ void SnarlDistanceIndexClusterer::cluster_one_node( cluster_seeds_on_linear_structure(clustering_problem, node_problem, node_length, false, false); -#ifdef debug_cluster +#ifdef DEBUG_CLUSTER - cerr << "\tfound read clusters on node " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; + cerr << "\tFound read clusters on node " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; bool got_left = false; bool got_right = false; @@ -880,26 +950,26 @@ void SnarlDistanceIndexClusterer::cluster_one_node( }; -//go through pairs of clusters of the two children and see which ones can be combined -//the first child may not have been seen before, so all of it's clusters may be added to the parent, then +//Go through pairs of clusters of the two children and see which ones can be combined +//The first child may not have been seen before, so all of it's clusters may be added to the parent, then //anything that was combined gets removed and only the cluster heads get added. -//for the second child, everything is already in the parent so remove ones that were combined then +//For the second child, everything is already in the parent so remove ones that were combined then //add the head of the combined clusters // -//if this is the first time we see the first child, then also update the best distances to the ends of the +//If this is the first time we see the first child, then also update the best distances to the ends of the //parent for the parent clusters void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structures(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem1, SnarlTreeNodeProblem* child_problem2, SnarlTreeNodeProblem* parent_problem, const vector> & child_distances, bool is_root, bool first_child) const { -#ifdef debug_cluster - cerr << "\tcompare " << distance_index.net_handle_as_string(child_problem1->unpacked_zipcode[child_problem1_problem->zipcode_depth].net_handle) - << " and " << distance_index.net_handle_as_string(child_problem2->unpacked_zipcode[child_problem2_problem->zipcode_depth].net_handle) - << " which are children of " << distance_index.net_handle_as_string(parent_problem->unpacked_zipcode[parent_problem->zipcode_depth].net_handle) << endl; +#ifdef DEBUG_CLUSTER + cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem1->containing_net_handle) + << " and " << distance_index.net_handle_as_string(child_problem2->containing_net_handle) + << " which are children of " << distance_index.net_handle_as_string(parent_problem->containing_net_handle) << endl; #endif - const net_handle_t& parent_handle = parent_problem->unpacked_zipcode[parent_problem->zipcode_depth].net_handle; - const net_handle_t& child_handle1 = child_problem1->unpacked_zipcode[child_problem1->zipcode_depth].net_handle; - const net_handle_t& child_handle2 = child_problem2->unpacked_zipcode[child_problem2->zipcode_depth].net_handle; + net_handle_t& parent_handle = parent_problem->containing_net_handle; + net_handle_t& child_handle1 = child_problem1->containing_net_handle; + net_handle_t& child_handle2 = child_problem2->containing_net_handle; @@ -1364,18 +1434,26 @@ void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_child_structure void SnarlDistanceIndexClusterer::compare_and_combine_cluster_on_one_child(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* child_problem) const { #ifdef DEBUG_CLUSTER - cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->unpacked_zipcode[child_problem->zipcode_depth].net_handle) + cerr << "\tCompare " << distance_index.net_handle_as_string(child_problem->containing_net_handle) << " to itself in the root" << endl; #endif - const net_handle_t& handle = child_problem->unpacked_zipcode[child_problem->zipcode_depth].net_handle; + net_handle_t& handle = child_problem->containing_net_handle; //Get the distances between the two sides of the child - size_t distance_left_left = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_start_left; - size_t distance_left_right = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_start_right; - size_t distance_right_right = child_problem->unpacked_zipcode[child_problem->zipcode_depth].distance_end_right; - + size_t distance_left_left = + child_problem->seed->seed->zipcode.is_externally_start_start_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); + size_t distance_left_right = + child_problem->seed->seed->zipcode.is_externally_start_end_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); + size_t distance_right_right = + child_problem->seed->seed->zipcode.is_externally_end_end_connected(child_problem->zipcode_depth) + ? 0 + : std::numeric_limits::max(); if (distance_left_left == std::numeric_limits::max() && distance_left_right == std::numeric_limits::max() && distance_right_right == std::numeric_limits::max()) { @@ -1509,17 +1587,17 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the clusters on this snarl, assumes that all of the snarls children have been clustered already. #ifdef DEBUG_CLUSTER - cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle) << endl; + cerr << "Finding clusters on snarl " << distance_index.net_handle_as_string(snarl_problem->containing_net_handle) << endl; #endif snarl_problem->set_snarl_values(distance_index); - const net_handle_t& snarl_handle = snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].net_handle; + net_handle_t& snarl_handle = snarl_problem->containing_net_handle; //If the snarl is a simple snarl, then there is no clustering to do because there is no path between //the nodes. Otherwise, compare the children of the snarl - if (snarl_problem->unpacked_zipcode[snarl_problem->zipcode_depth].code_type != ZipCode::REGULAR_SNARL) { + if (snarl_problem->seed->seed->zipcode.get_code_type(snarl_problem->zipcode_depth) != ZipCode::REGULAR_SNARL) { //If this isn't a simple snarl //Get the children of this snarl and their clusters @@ -1533,7 +1611,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Go through each child node of the netgraph SnarlTreeNodeProblem& child_problem_i = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].unpacked_zipcode->at(snarl_problem->children[i].zipcode_depth).net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[i].net_handle)); if (child_problem_i.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit @@ -1561,7 +1639,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Get the other node and its clusters SnarlTreeNodeProblem& child_problem_j = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].unpacked_zipcode->at(snarl_problem->children[j].zipcode_depth).net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(snarl_problem->children[j].net_handle)); if (child_problem_j.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && child_problem_j.fragment_best_right > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { @@ -1570,8 +1648,8 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin #ifdef DEBUG_CLUSTER cerr << "\tComparing two children of " << distance_index.net_handle_as_string(snarl_handle) << ": " - << distance_index.net_handle_as_string(child_problem_i.unpacked_zipcode[child_problem_i.zipcode_depth].net_handle) << " and " - << distance_index.net_handle_as_string(child_problem_j.unpacked_zipcode[child_problem_j.zipcode_depth].net_handle) << endl; + << distance_index.net_handle_as_string(child_problem_i.containing_net_handle) << " and " + << distance_index.net_handle_as_string(child_problem_j.containing_net_handle) << endl; @@ -1588,13 +1666,13 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (SnarlTreeNodeProblem::SnarlTreeChild& node_problem : snarl_problem->children) { //Go through each child node of the netgraph and add its clusters to the snarl SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(node_problem.unpacked_zipcode->at(node_problem.zipcode_depth).net_handle)); + clustering_problem.net_handle_to_node_problem_index.at(node_problem.net_handle)); //Add the cluster heads //May need to flip the distances for (auto& cluster_head : child_problem.read_cluster_heads) { snarl_problem->read_cluster_heads.emplace(cluster_head); - if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { + if (child_problem.is_reversed_in_parent) { size_t old_left = clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left; clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_left = clustering_problem.all_seeds->at(cluster_head.first)->at(cluster_head.second).distance_right; @@ -1606,11 +1684,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin //Update the distances for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { if (read_num == 0) { - if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { - size_t old_best_right = snarl_problem->read_best_right.first; + if (child_problem.is_reversed_in_parent) { snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_left.first, child_problem.read_best_left.first); - snarl_problem->read_best_left.first = std::min(old_best_right, + snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_right.first, child_problem.read_best_right.first); } else { snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_left.first, @@ -1619,11 +1696,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin child_problem.read_best_right.first); } } else { - if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { - size_t old_best_right = snarl_problem->read_best_right.second; + if (child_problem.is_reversed_in_parent) { snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_left.second, child_problem.read_best_left.second); - snarl_problem->read_best_left.second = std::min(old_best_right, + snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_right.second, child_problem.read_best_right.second); } else { snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_left.second, @@ -1633,11 +1709,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } } - if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed) { - size_t old_best_right = snarl_problem->fragment_best_right; + if (child_problem.is_reversed_in_parent) { snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_left, child_problem.fragment_best_left); - snarl_problem->fragment_best_left = std::min(old_best_right, + snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_right, child_problem.fragment_best_right); } else { snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_left, @@ -1704,7 +1779,7 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clustering_problem, SnarlTreeNodeProblem* chain_problem, bool is_top_level_chain) const { #ifdef DEBUG_CLUSTERS - assert(distance_index.is_chain(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle)); + assert(distance_index.is_chain(chain_problem->containing_net_handle)); //if (only_seeds) { // for (auto child : children_in_chain) { // assert(!std::get<3>(child)); @@ -1722,63 +1797,70 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //First, sort the children of the chain //If there is only one child, check if it's a seeed - bool only_seeds=chain_problem->children.size() == 1 ? chain_problem->children.front().zipcode_depth == chain_problem->children.front().unpacked_zipcode->size()-1 + bool only_seeds=chain_problem->children.size() == 1 ? chain_problem->children.front().is_seed : true; std::sort(chain_problem->children.begin(), chain_problem->children.end(), [&] (SnarlTreeNodeProblem::SnarlTreeChild& child1, SnarlTreeNodeProblem::SnarlTreeChild& child2) { - - const zip_code_t& child1_code = child1.unpacked_zipcode->at(child1.zipcode_depth); - const zip_code_t& child2_code = child2.unpacked_zipcode->at(child2.zipcode_depth); - - bool child1_is_seed = child1.zipcode_depth == child1.unpacked_zipcode->size()-1; - bool child2_is_seed = child2.zipcode_depth == child2.unpacked_zipcode->size()-1; - - if (!child1_is_seed || !child2_is_seed) { + if (!child1.is_seed || !child2.is_seed) { only_seeds = false; } - - size_t prefix_sum1 = child1_is_seed ? SnarlDistanceIndex::sum(clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).distance_left, - child1_code.prefix_sum_or_snarl_rank) - : child1_code.prefix_sum_or_snarl_rank; - size_t prefix_sum2 = child2_is_seed ? SnarlDistanceIndex::sum(clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).distance_left, - child2_code.prefix_sum_or_snarl_rank) - : child2_code.prefix_sum_or_snarl_rank; - - if (child1_code.chain_component != child2_code.chain_component) { - return child1_code.chain_component < child2_code.chain_component; - } else if (prefix_sum1 == prefix_sum2 && !(child1_is_seed && child2_is_seed)) { + if (!child1.is_seed && !child1.has_chain_values) { + //If child1 is a snarl and hasn't had its values set yet + child1.chain_component = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).chain_component_start; + child1.prefix_sum = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(child1.net_handle)).prefix_sum_value; + child2.has_chain_values = true; + } + if (!child2.is_seed && !child2.has_chain_values) { + //If child2 is a snarl and hasn't had its values set yet + child2.chain_component = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).chain_component_start; + child2.prefix_sum = clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(child2.net_handle)).prefix_sum_value; + child2.has_chain_values = true; + } + if (child1.chain_component != child2.chain_component) { + return child1.chain_component < child2.chain_component; + } else if (child1.prefix_sum == child2.prefix_sum && !(child1.is_seed && child2.is_seed)) { //Get the prefix sum values not including the offset in the positions - prefix_sum1 = child1_is_seed - ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank - : child1_code.prefix_sum_or_snarl_rank; - prefix_sum2 = child2_is_seed - ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).unpacked_zipcode.back().prefix_sum_or_snarl_rank - : child2_code.prefix_sum_or_snarl_rank; + size_t prefix_sum1 = child1.is_seed + ? clustering_problem.all_seeds->at(child1.seed_indices.first)->at(child1.seed_indices.second).payload.prefix_sum + : child1.prefix_sum; + size_t prefix_sum2 = child2.is_seed + ? clustering_problem.all_seeds->at(child2.seed_indices.first)->at(child2.seed_indices.second).payload.prefix_sum + : child2.prefix_sum; if (prefix_sum1 == prefix_sum2){ - return child2_is_seed; + return child2.is_seed; } else { return prefix_sum1 < prefix_sum2; } } else { - return prefix_sum1 < prefix_sum2; + return child1.prefix_sum < child2.prefix_sum; } }); - const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; + net_handle_t& chain_handle = chain_problem->containing_net_handle; + + if (!chain_problem->is_trivial_chain && ! is_top_level_chain) { + //If we need it, get the values from the distance index: + //is_looping_chain, node_length, the end boundary node, and the end component + //THese only get used if we need the distances to the ends of the chain + chain_problem->set_chain_values(distance_index); + } - if (only_seeds && !chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain && - (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component == 0 - || chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component == std::numeric_limits::max())) { + if (only_seeds && !chain_problem->is_looping_chain && + (chain_problem->chain_component_end == 0 + || chain_problem->chain_component_end == std::numeric_limits::max())) { //If there are only seeds in the chain (and the chain doesn't loop and isn't a multicomponent chain), //then cluster by walking through the seeds //This also does the work of clustering a trivial chain (which is just a node), which should be the same amount of work as using cluster_one_node - cluster_seeds_on_linear_structure(clustering_problem, chain_problem, chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, - !(chain_problem->zipcode_depth == chain_problem->unpacked_zipcode.size()-1 && chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].code_type == ZipCode::CHAIN), - is_top_level_chain); + cluster_seeds_on_linear_structure(clustering_problem, chain_problem, chain_problem->node_length, + !chain_problem->is_trivial_chain, is_top_level_chain); #ifdef DEBUG_CLUSTER cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; @@ -1861,23 +1943,21 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //The last child we saw SnarlTreeNodeProblem::SnarlTreeChild& last_child = chain_problem->children.front(); - const SnarlTreeNodeProblem* last_child_problem = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE - ? nullptr - : &clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle)); //And values we need to save from the last child //If the last child is a snarl, get it from the SnarlTreeNodeProblem otherwise from the seed's cache - size_t last_prefix_sum = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE + size_t last_prefix_sum = last_child.is_seed ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).distance_left - : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].prefix_sum_or_snarl_rank; -//TODO: Get both from problem? - size_t last_length = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().length - : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].length; - size_t last_chain_component_end = last_child.unpacked_zipcode->at(last_child.zipcode_depth).code_type == ZipCode::NODE - ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).unpacked_zipcode.back().chain_component - : last_child_problem->unpacked_zipcode[last_child_problem->zipcode_depth].chain_component; //This is initialized to the start of the snarl + : clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; + size_t last_length = last_child.is_seed + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.node_length + : clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).node_length; + size_t last_chain_component_end = last_child.is_seed + ? clustering_problem.all_seeds->at(last_child.seed_indices.first)->at(last_child.seed_indices.second).payload.chain_component + : clustering_problem.all_node_problems.at( + clustering_problem.net_handle_to_node_problem_index.at(last_child.net_handle)).chain_component_start; //These are clusters that we don't want to consider as we walk through the chain but that //we want to remember after we're done with the chain because the left distance is small @@ -1899,7 +1979,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin SnarlTreeNodeProblem::SnarlTreeChild& child = chain_problem->children[child_i]; - if (child.unpacked_zipcode->at(child.zipcode_depth).code_type != ZipCode::NODE){ + if (!child.is_seed){ //If this is a snarl, then cluster the children here add_snarl_to_chain_problem(clustering_problem, chain_problem, last_child, last_prefix_sum, last_length, @@ -1913,7 +1993,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin } #ifdef DEBUG_CLUSTER - cerr << "\tintermediate clusters on " << distance_index.net_handle_as_string(chain_handle) << " after child " << distance_index.net_handle_as_string(child.unpacked_zipcode->at(child.zipcode_depth).net_handle) << endl; + cerr << "\tintermediate clusters on " << distance_index.net_handle_as_string(chain_handle) << " after child " << distance_index.net_handle_as_string(child.net_handle) << endl; cerr << "\t with best left and right values: " << chain_problem->fragment_best_left << " " << chain_problem->fragment_best_right << endl; bool got_left = false; @@ -1974,7 +2054,7 @@ void SnarlDistanceIndexClusterer::cluster_one_chain(ClusteringProblem& clusterin //If the chain loops, then we also have to compare the first thing we saw to the last things - if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].is_looping_chain){ + if (chain_problem->is_looping_chain){ #ifdef DEBUG_CLUSTER cerr << "Check connectivity around a looping chain" << endl; cerr << "\tFound clusters on " << distance_index.net_handle_as_string(chain_handle) << endl; @@ -2117,7 +2197,7 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c const size_t& read_num = current_child.seed_indices.first; const size_t& cluster_num = current_child.seed_indices.second; - const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; + net_handle_t& chain_handle = chain_problem->containing_net_handle; SeedCache& current_child_seed = clustering_problem.all_seeds->at(read_num)->at(cluster_num); /* Get a bunch of distances from the current child that will be used to calculate distance @@ -2132,20 +2212,20 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) { + if (last_child.net_handle == current_child.net_handle) { //This can happen if the last thing was also a seed on the same node distance_from_last_child_to_current_child = 0; - } else if ( last_chain_component_end == current_child_seed.unpacked_zipcode.back().chain_component) { + } else if ( last_chain_component_end == current_child_seed.payload.chain_component) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max()) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank; + distance_from_last_child_to_current_child = current_child_seed.payload.prefix_sum; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); //Distance is the current node's prefix sum minus the distance from the start of the chain to the last node - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank, + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(current_child_seed.payload.prefix_sum, distance_from_chain_start_to_last_node); } } @@ -2162,26 +2242,26 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component != current_child_seed.unpacked_zipcode.back().chain_component) { + } else if (chain_problem->chain_component_end != current_child_seed.payload.chain_component) { //If they aren't in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); } else { //Length of the chain - (prefix sum + node length of the current node) - distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, - SnarlDistanceIndex::sum(current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank, - current_child_seed.unpacked_zipcode.back().length)); + distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, + SnarlDistanceIndex::sum(current_child_seed.payload.prefix_sum, + current_child_seed.payload.node_length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; - cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.unpacked_zipcode.back().chain_component != 0 ? std::numeric_limits::max() : current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank) << endl; + cerr << "\tDistance from start of chain to the left side of this one: " << (current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() : current_child_seed.payload.prefix_sum) << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif - if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle != current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle && + if (last_child.net_handle != current_child.net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, chain_problem->fragment_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -2211,13 +2291,13 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //The distance left and right of the seed are currently oriented relative to the chain //The current left distance is infinite if it is not in the first component of a multicomponent chain - if (current_child_seed.unpacked_zipcode.back().chain_component != 0) { + if (current_child_seed.payload.chain_component != 0) { //If this node isn't in the first component of the chain current_child_seed.distance_left = std::numeric_limits::max(); } else { //Prefix sum + offset of the seed in the node current_child_seed.distance_left = SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank); + current_child_seed.payload.prefix_sum); } current_child_seed.distance_right = SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain); @@ -2261,17 +2341,17 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle ? 0 - : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.unpacked_zipcode.back().length)); + (last_child.net_handle == current_child.net_handle ? 0 + : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, current_child_seed.payload.node_length)); //The new distances from this child to the start of the chain and the end of this child (or the end of the chain if it's the last child) //Left distance is the prefix sum (or inf if the node isn't in the first component of the chain) + offset of seed in node //Right distance is the right offst of the seed in the node + the distance from the end of the node to the end of the chain // (or 0 if it isn't the last thing in the chain) pair new_distances = make_pair( - current_child_seed.unpacked_zipcode.back().chain_component != 0 ? std::numeric_limits::max() + current_child_seed.payload.chain_component != 0 ? std::numeric_limits::max() : SnarlDistanceIndex::sum(current_child_seed.distance_left, - current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank), + current_child_seed.payload.prefix_sum), SnarlDistanceIndex::sum(current_child_seed.distance_right, distance_from_current_end_to_end_of_chain)); @@ -2302,11 +2382,11 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c distance_from_last_child_to_current_child), current_child_seed.distance_left), 1); - if (!is_first_child && last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) { + if (!is_first_child && last_child.net_handle == current_child.net_handle) { //If the last child was the same as this child (seeds on the same node), //then the distances right are including the current node, so subtract //the length of this node - distance_between -= current_child_seed.unpacked_zipcode.back().length; + distance_between -= current_child_seed.payload.node_length; } #ifdef DEBUG_CLUSTER @@ -2415,9 +2495,9 @@ void SnarlDistanceIndexClusterer::add_seed_to_chain_problem(ClusteringProblem& c //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = current_child_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank; - last_length = current_child_seed.unpacked_zipcode.back().length; - last_chain_component_end = current_child_seed.unpacked_zipcode.back().chain_component; + last_prefix_sum = current_child_seed.payload.prefix_sum; + last_length = current_child_seed.payload.node_length; + last_chain_component_end = current_child_seed.payload.chain_component; } @@ -2449,7 +2529,6 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& vector> to_erase; to_erase.reserve(child_problem.read_cluster_heads.size()); - for (auto& child_cluster_head : child_problem.read_cluster_heads) { //Go through each of the clusters on this child size_t read_num = child_cluster_head.first; @@ -2457,10 +2536,8 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t old_left = clustering_problem.all_seeds->at(read_num)->at(cluster_num).distance_left; size_t old_right = clustering_problem.all_seeds->at(read_num)->at(cluster_num).distance_right; //Get the new best distances for the cluster considering chain loops - size_t updated_left = std::min(old_left, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_right, child_problem.loop_right), - child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); - size_t updated_right = std::min(old_right, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_left, child_problem.loop_left), - child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); + size_t updated_left = std::min(old_left, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_right, child_problem.loop_right), child_problem.node_length)); + size_t updated_right = std::min(old_right, SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(old_left, child_problem.loop_left), child_problem.node_length)); @@ -2576,10 +2653,10 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& }; - const net_handle_t& chain_handle = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].net_handle; + net_handle_t& chain_handle = chain_problem->containing_net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle)); - + clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); + //Skip this child if its seeds are all too far away bool skip_snarl = false; if (child_problem.fragment_best_left > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit) && @@ -2592,7 +2669,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& update_distances_on_same_child(child_problem); } #ifdef DEBUG_CLUSTER - cerr << "At child " << distance_index.net_handle_as_string(current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle) << endl; + cerr << "At child " << distance_index.net_handle_as_string(current_child.net_handle) << endl; #endif /* @@ -2607,16 +2684,15 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_child = std::numeric_limits::max(); if (!is_first_child) { //If this isn't the first child we're looking at - if ( last_chain_component_end == - child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component ) { + if ( last_chain_component_end == child_problem.chain_component_start) { //If this child is in the same component as the last one if (last_length == std::numeric_limits::max() && last_chain_component_end ) { //If the last length is infinite, then is must be a snarl that is not start-end reachable, so the distance //from the last child is the same as the distance from the start of the chain (the start of this compnent) - distance_from_last_child_to_current_child = child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank; + distance_from_last_child_to_current_child = child_problem.prefix_sum_value; } else { size_t distance_from_chain_start_to_last_node = SnarlDistanceIndex::sum(last_prefix_sum,last_length); - distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank, + distance_from_last_child_to_current_child = SnarlDistanceIndex::minus(child_problem.prefix_sum_value, distance_from_chain_start_to_last_node); } } @@ -2631,9 +2707,9 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& size_t distance_from_last_child_to_current_end = distance_from_last_child_to_current_child == std::numeric_limits::max() ? std::numeric_limits::max() : - (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle == current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle ? 0 + (last_child.net_handle == current_child.net_handle ? 0 : SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, - child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); + child_problem.node_length)); //The distance to add to get to the end of the chain. Only matters if this is the last thing in the chain //The distances will include the distance to the end of a trivial chain, @@ -2644,32 +2720,29 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& //If this isn't the last child in the chain, then we only want the distance to the end of the current child distance_from_current_end_to_end_of_chain = 0; - } else if (chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].chain_component != - child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component - + (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ? 1 : 0)) { + } else if (chain_problem->chain_component_end != child_problem.chain_component_end) { //If it's not in the same component distance_from_current_end_to_end_of_chain = std::numeric_limits::max(); //TODO: Used to do this, I"m pretty sure I don't need to though - //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.child_code->net_handle); - } else if (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ) { + //distance_index.distance_in_parent(chain_handle, chain_problem->end_in, current_child.net_handle); + } else if (child_problem.node_length == std::numeric_limits::max() ) { //If the node length is infinite, then it is a snarl that isn't start-end connected, so the start //and end of the snarl are in different components of the chain. Since it reached here, the end //node of the snarl is in the same component as the end of the chain, so the distance to the //end of the chain is just the length of the last component of the chain, which is //chain_problem.node_length - distance_from_current_end_to_end_of_chain = chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length; + distance_from_current_end_to_end_of_chain = chain_problem->node_length; } else { - distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->unpacked_zipcode[chain_problem->zipcode_depth].length, - SnarlDistanceIndex::sum(child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank, - child_problem.unpacked_zipcode[child_problem.zipcode_depth].length)); + distance_from_current_end_to_end_of_chain = SnarlDistanceIndex::minus(chain_problem->node_length, + SnarlDistanceIndex::sum(child_problem.prefix_sum_value, child_problem.node_length)); } #ifdef DEBUG_CLUSTER cerr << "\tDistance from last child to this one: " << distance_from_last_child_to_current_child << endl; -cerr << "\tDistance from start of chain to the left side of this one: " << (child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 - ? std::numeric_limits::max() : child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank) << endl; +cerr << "\tDistance from start of chain to the left side of this one: " << (child_problem.chain_component_start != 0 + ? std::numeric_limits::max() : child_problem.prefix_sum_value) << endl; cerr << "\tDistance from the last child to the right side of this one: " << distance_from_last_child_to_current_end << endl; cerr << "\tDistance to get to the end of the chain: " << distance_from_current_end_to_end_of_chain << endl; #endif @@ -2687,7 +2760,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //And one new fragment cluster size_t new_cluster_head_fragment = std::numeric_limits::max(); - bool child_is_reversed = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed; + bool child_is_reversed = child_problem.is_reversed_in_parent; //Remember the current best chain distances, and reset them to inf since we need to update them size_t old_best_right = std::move(chain_problem->fragment_best_right); @@ -2696,7 +2769,7 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e chain_problem->read_best_right = std::make_pair(std::numeric_limits::max(), std::numeric_limits::max()); - if (last_child.unpacked_zipcode->at(last_child.zipcode_depth).net_handle != current_child.unpacked_zipcode->at(current_child.zipcode_depth).net_handle && + if (last_child.net_handle != current_child.net_handle && SnarlDistanceIndex::sum(distance_from_last_child_to_current_child, old_best_right) > (clustering_problem.fragment_distance_limit == 0 ? clustering_problem.read_distance_limit : clustering_problem.fragment_distance_limit)) { #ifdef DEBUG_CLUSTER @@ -2727,15 +2800,15 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e size_t read_num = cluster_head.first; pair dists (clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_left, clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_right); - size_t dist_left = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed ? dists.second : dists.first; - size_t dist_right = child_problem.unpacked_zipcode[child_problem.zipcode_depth].is_reversed ? dists.first : dists.second; + size_t dist_left = child_problem.is_reversed_in_parent ? dists.second : dists.first; + size_t dist_right = child_problem.is_reversed_in_parent ? dists.first : dists.second; //Distances to the start of the chain, and the end of this node //If this is the last thing in the chain, then the distance to the end of the chain //If the snarl is isn't in the first component of the chain, then the left distance is infinite pair new_distances = make_pair( - child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 ? std::numeric_limits::max() - : SnarlDistanceIndex::sum(dist_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), + child_problem.chain_component_start != 0 ? std::numeric_limits::max() + : SnarlDistanceIndex::sum(dist_left, child_problem.prefix_sum_value), SnarlDistanceIndex::sum(dist_right, distance_from_current_end_to_end_of_chain)); //Add this to the chain @@ -2789,8 +2862,8 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //The new distances from this child to the start of the chain and the end of this child pair new_distances = make_pair( - child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component != 0 ? std::numeric_limits::max() - : SnarlDistanceIndex::sum(distance_left, child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank), + child_problem.chain_component_start != 0 ? std::numeric_limits::max() + : SnarlDistanceIndex::sum(distance_left, child_problem.prefix_sum_value), SnarlDistanceIndex::sum(distance_right, distance_from_current_end_to_end_of_chain)); if (distance_between <= clustering_problem.read_distance_limit) { @@ -2870,7 +2943,6 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e distance_from_last_child_to_current_child), current_distance_left), 1); - size_t distance_between_fragment = SnarlDistanceIndex::minus( SnarlDistanceIndex::sum(SnarlDistanceIndex::sum(chain_cluster_distances.second, distance_from_last_child_to_current_child), @@ -2967,10 +3039,9 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e //Update the last node we saw to this one last_child = current_child; - last_prefix_sum = child_problem.unpacked_zipcode[child_problem.zipcode_depth].prefix_sum_or_snarl_rank; - last_length = child_problem.unpacked_zipcode[child_problem.zipcode_depth].length; //The length of this snarl - last_chain_component_end = child_problem.unpacked_zipcode[child_problem.zipcode_depth].chain_component + - (child_problem.unpacked_zipcode[child_problem.zipcode_depth].length == std::numeric_limits::max() ? 1 : 0);//The component of the end node of this snarl + last_prefix_sum = child_problem.prefix_sum_value; + last_length = child_problem.node_length; //The length of this snarl + last_chain_component_end = child_problem.chain_component_end;//The component of the end node of this snarl } //Cluster the root @@ -2984,6 +3055,12 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro return; } + //Keep track of all clusters on the root + SnarlTreeNodeProblem root_problem(distance_index.get_root(), clustering_problem.all_seeds->size(), + clustering_problem.seed_count_prefix_sum.back(), distance_index, + &clustering_problem.all_seeds->at(0)->front(), 0); + //TODO: ikd about the seed here + //Remember old distances vector> child_distances (clustering_problem.seed_count_prefix_sum.back(), make_pair(std::numeric_limits::max(), std::numeric_limits::max())); @@ -3011,14 +3088,8 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro #ifdef DEBUG_CLUSTER cerr << "Clustering root snarl " << distance_index.net_handle_as_string(parent) << " with " << children.size() << " chidlren" << endl; #endif - if (children.size() > 0) { - //Make a new problem just for the root snarl - SnarlTreeNodeProblem root_problem(clustering_problem.all_seeds->size(), - clustering_problem.seed_count_prefix_sum.back(), - clustering_problem.all_node_problems.at( - clustering_problem.net_handle_to_node_problem_index.at(children[0])).unpacked_zipcode, 0); - + if (children.size() > 0) { for (size_t i = 0; i < children.size() ; i++) { //Go through each child node of the netgraph @@ -3045,8 +3116,15 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro } } + } + current_parent = parent; + children.clear(); + children.emplace_back(parent_to_child.second); + } + + } #ifdef DEBUG_CLUSTER - cerr << "\tFound clusters on a root snarl" << endl; + cerr << "\tFound clusters on the root" << endl; for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { cerr << "\t for read num " << read_num << endl; for (pair c : root_problem.read_cluster_heads) { @@ -3066,13 +3144,6 @@ void SnarlDistanceIndexClusterer::cluster_root(ClusteringProblem& clustering_pro assert (group_id.second == clustering_problem.read_union_find[group_id.first].find_group(group_id.second)); } #endif - } - current_parent = parent; - children.clear(); - children.emplace_back(parent_to_child.second); - } - - } } @@ -3085,7 +3156,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr return; } #ifdef DEBUG_CLUSTER - cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << endl; + cerr << "Cluster " << node_problem->children.size() << " seeds on a single structure " << distance_index.net_handle_as_string(node_problem->containing_net_handle) << endl; cerr << "\t with node length " << structure_length << endl; #endif @@ -3107,7 +3178,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t dist_left = clustering_problem.all_seeds->at(read_num)->at(seed_i).distance_left; if (include_prefix_sum) { dist_left = SnarlDistanceIndex::sum(dist_left, - clustering_problem.all_seeds->at(read_num)->at(seed_i).unpacked_zipcode.back().prefix_sum_or_snarl_rank); + clustering_problem.all_seeds->at(read_num)->at(seed_i).payload.prefix_sum); } //Since we only stored the proper distance left for seeds on chains size_t dist_right = structure_length - dist_left + 1; @@ -3142,8 +3213,9 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr if (!skip_distances_to_ends) { const SeedCache& first_seed = clustering_problem.all_seeds->at(node_problem->children.front().seed_indices.first)->at(node_problem->children.front().seed_indices.second); + //TOOD: get_id is weird node_problem->fragment_best_left = SnarlDistanceIndex::sum(first_seed.distance_left, - include_prefix_sum ? first_seed.unpacked_zipcode.back().prefix_sum_or_snarl_rank : 0); + include_prefix_sum ? first_seed.payload.prefix_sum : 0); //Record the new cluster for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++ ) { @@ -3158,7 +3230,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr } #ifdef DEBUG_CLUSTER - cerr << "\t" << distance_index.net_handle_as_string(node_problem->unpacked_zipcode[node_problem->zipcode_depth].net_handle) << " is shorter than the distance limit so just one cluster" << endl; + cerr << "\t" << distance_index.net_handle_as_string(node_problem->containing_net_handle) << " is shorter than the distance limit so just one cluster" << endl; #endif return; @@ -3189,7 +3261,7 @@ void SnarlDistanceIndexClusterer::cluster_seeds_on_linear_structure(ClusteringPr size_t offset = clustering_problem.all_seeds->at(read_num)->at(seed_num).distance_left; if (include_prefix_sum) { offset = SnarlDistanceIndex::sum(offset, - clustering_problem.all_seeds->at(read_num)->at(seed_num).unpacked_zipcode.back().prefix_sum_or_snarl_rank); + clustering_problem.all_seeds->at(read_num)->at(seed_num).payload.prefix_sum); } //First and last offset and last cluster head for this read diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 034f98323c8..22f8478e6ff 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -99,19 +99,18 @@ class SnarlDistanceIndexClusterer { struct SeedCache{ const Seed* seed; + //TODO: I think I can skip the zipcode now since I have the payload + MIPayload payload; + //The distances to the left and right of whichever cluster this seed represents //This gets updated as clustering proceeds //For a seed in a chain, distance_left is the left of the chain, right is the distance //to the right side of the node, relative to the chain size_t distance_left = std::numeric_limits::max(); size_t distance_right = std::numeric_limits::max(); - - vector unpacked_zipcode; - - //Start with enough memory reserved for what is probably at least the max depth of the snarl tree - SeedCache() { - unpacked_zipcode.reserve(6); - } + //Values from the payload that we're saving + size_t payload_prefix_sum = std::numeric_limits::max(); + size_t payload_node_length = std::numeric_limits::max(); }; @@ -194,9 +193,21 @@ class SnarlDistanceIndexClusterer { struct SnarlTreeChild { //If the net_handle is a node, then the child is a seed, otherwise the handle //is used to find the problem - const vector* unpacked_zipcode; - size_t zipcode_depth; + net_handle_t net_handle; pair seed_indices; + + //The values used to sort the children of a chain + //Storing it here is faster than looking it up each time + size_t chain_component; + size_t prefix_sum; + //Is this child a seed + //This is redundant with net_handle because any net_handle_t that is a node will really be a seed, + //but it's faster than looking it up in the distance index + bool is_seed; + //Have chain_component and prefix_sum been set? + //For a seed, it gets set when the child is made, otherwise the first time this + //child is seen when sorting + bool has_chain_values; }; //The children of this snarl tree node //Initially unsorted, sort before clustering for chains @@ -216,33 +227,90 @@ class SnarlDistanceIndexClusterer { size_t distance_end_left = std::numeric_limits::max(); size_t distance_end_right = std::numeric_limits::max(); - //One representative zipcode and the depth of whatever this is on - const vector& unpacked_zipcode; + //The snarl tree node that the clusters are on + net_handle_t containing_net_handle; + + + + + //The parent and grandparent of containing_net_handle, which might or might not be set + //This is just to store information from the minimizer cache + net_handle_t parent_net_handle; + net_handle_t grandparent_net_handle; + + //One representative seed so we can get the zipcode and stuff + const SeedCache* seed; size_t zipcode_depth; + //Minimum length of a node or snarl + //If it is a chain, then it is distance_index.chain_minimum_length(), which is + //the expected length for a normal chain, and the length of the + //last component for a multicomponent chain + size_t node_length = std::numeric_limits::max(); + size_t prefix_sum_value = std::numeric_limits::max(); //of node or first node in snarl + size_t chain_component_start = 0; //of node or start of snarl + size_t chain_component_end = 0; //of node or end of snarl + size_t loop_left = std::numeric_limits::max(); size_t loop_right = std::numeric_limits::max(); + //These are sometimes set if the value was in the cache + bool has_parent_handle = false; + bool has_grandparent_handle = false; + + //Only set this for nodes or snarls in chains + bool is_reversed_in_parent = false; + + bool is_trivial_chain = false; + bool is_looping_chain = false; //Constructor //read_count is the number of reads in a fragment (2 for paired end) - SnarlTreeNodeProblem(size_t read_count, size_t seed_count, - const vector& unpacked_zipcode, size_t zipcode_depth) : + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, const SnarlDistanceIndex& distance_index, + const SeedCache* seed, size_t zipcode_depth) : + containing_net_handle(std::move(net)), fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), - unpacked_zipcode(unpacked_zipcode), + seed(seed), + zipcode_depth(zipcode_depth) { + read_cluster_heads.reserve(seed_count); + } + //Constructor for a node or trivial chain, used to remember information from the cache + SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, + size_t node_length, size_t prefix_sum, size_t component, const SeedCache* seed, size_t zipcode_depth) : + containing_net_handle(net), + is_reversed_in_parent(is_reversed_in_parent), + node_length(node_length), + prefix_sum_value(prefix_sum), + chain_component_start(component), + chain_component_end(component), + fragment_best_left(std::numeric_limits::max()), fragment_best_right(std::numeric_limits::max()), + seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - children.reserve(seed_count); } + //Set the values needed to cluster a chain + void set_chain_values(const SnarlDistanceIndex& distance_index) { + is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); + node_length = distance_index.chain_minimum_length(containing_net_handle); + chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); + is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); + } //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, false, true)); - net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(unpacked_zipcode[zipcode_depth].net_handle, true, true)); + node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); + net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); + net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); + chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); + chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 + : chain_component_start; + prefix_sum_value = SnarlDistanceIndex::sum( + distance_index.get_prefix_sum_value(start_in), + distance_index.minimum_length(start_in)); loop_right = SnarlDistanceIndex::sum(distance_index.get_forward_loop_value(end_in), 2*distance_index.minimum_length(end_in)); //Distance to go backward in the chain and back diff --git a/src/subcommand/minimizer_main.cpp b/src/subcommand/minimizer_main.cpp index db0aab6c987..3f8ab7522f8 100644 --- a/src/subcommand/minimizer_main.cpp +++ b/src/subcommand/minimizer_main.cpp @@ -375,7 +375,7 @@ int main_minimizer(int argc, char** argv) { } if (distance_name.empty()) { gbwtgraph::index_haplotypes(gbz->graph, *index, [](const pos_t&) -> gbwtgraph::Payload { - return ZipCode::NO_PAYLOAD; + return MIPayload::NO_CODE; }); } else { gbwtgraph::index_haplotypes(gbz->graph, *index, [&](const pos_t& pos) -> gbwtgraph::Payload { @@ -397,7 +397,7 @@ int main_minimizer(int argc, char** argv) { cout << endl; #endif auto payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { //If the zipcode is small enough to store in the payload return payload; } else if (!zipcode_name.empty()) { @@ -421,7 +421,7 @@ int main_minimizer(int argc, char** argv) { } return {0, zip_index}; } else { - return ZipCode::NO_PAYLOAD; + return MIPayload::NO_CODE; } }); } diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index 520264d001f..c42ea1086a1 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -51,16 +51,6 @@ using namespace std; REQUIRE(zipcode.decoder.front().is_chain == 1); REQUIRE(zipcode.decoder.front().offset == 0); } - SECTION("unpacked zipcode") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - - vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); - REQUIRE(unpacked.size() == 1); - REQUIRE(unpacked[0].net_handle == distance_index.get_parent(distance_index.get_node_net_handle(n1->id()))); - REQUIRE(unpacked[0].length == distance_index.minimum_length(distance_index.get_node_net_handle(n1->id()))); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_NODE); - } SECTION("decoded code") { ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -75,7 +65,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -185,30 +175,6 @@ using namespace std; REQUIRE(zipcode.get_offset_in_chain(1) == distance_index.get_prefix_sum_value(node1)); REQUIRE(zipcode.get_is_reversed_in_parent(1) == distance_index.is_reversed_in_parent(node1)); - } - SECTION ("unpacked zip code for node on top-level chain") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); - - - net_handle_t node1 = distance_index.get_node_net_handle(n1->id()); - net_handle_t chain1 = distance_index.get_parent(node1); - - REQUIRE(unpacked.size() == 2); - - - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(chain1)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); - - - //Next is the node code - REQUIRE(unpacked[1].code_type == ZipCode::NODE); - REQUIRE(unpacked[1].length == distance_index.minimum_length(node1)); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_prefix_sum_value(node1)); - REQUIRE(unpacked[1].is_reversed == distance_index.is_reversed_in_parent(node1)); - } SECTION ("zip code for node in simple snarl") { ZipCode zipcode; @@ -313,46 +279,6 @@ using namespace std; REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); } - SECTION ("unpacked zip code for node in simple snarl") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n4->id(), distance_index); - REQUIRE(unpacked.size() == 3); - - - net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); - net_handle_t snarl36 = distance_index.get_parent(chain4); - net_handle_t chain1 = distance_index.get_parent(snarl36); - - - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(chain1)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); - - //values for the snarl - REQUIRE(unpacked[1].length == distance_index.minimum_length(snarl36)); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 5 : 6)); - REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); - bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), - distance_index.flip(chain4)) != 0; - - //values for the chain - REQUIRE(unpacked[2].length == distance_index.minimum_length(chain4)); - REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain4)); - REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); - REQUIRE(unpacked[2].is_reversed == is_rev); - if (is_rev) { - REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_start_right == 0); - REQUIRE(unpacked[2].distance_end_left == 0); - REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); - } else { - REQUIRE(unpacked[2].distance_start_left == 0); - REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_right == 0); - } - } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -407,7 +333,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -418,7 +344,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -429,7 +355,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -440,7 +366,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -451,7 +377,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -462,7 +388,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -702,53 +628,6 @@ using namespace std; REQUIRE(zipcode.get_code_type(3) == ZipCode::NODE); REQUIRE(zipcode.get_is_reversed_in_parent(3) == distance_index.is_reversed_in_parent(node2)); - } - SECTION ("unpacked zip code for node on in nested chain") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n2->id(), distance_index); - REQUIRE(unpacked.size() == 4); - - net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); - net_handle_t chain2 = distance_index.get_parent(node2); - net_handle_t snarl1 = distance_index.get_parent(chain2); - net_handle_t chain1 = distance_index.get_parent(snarl1); - - - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(chain1)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); - - //Snarl at depth 1 - REQUIRE(unpacked[1].length == 0); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 4 : 3)); - REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); - bool is_rev = distance_index.distance_in_parent(snarl1, distance_index.get_bound(snarl1, false, true), - distance_index.flip(distance_index.canonical(chain2))) != 0; - - //Chain at depth 2 - REQUIRE(unpacked[2].length == 3); - REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); - REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); - REQUIRE(unpacked[2].is_reversed == is_rev); - if (is_rev) { - REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_start_right == 0); - REQUIRE(unpacked[2].distance_end_left == 0); - REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); - } else { - REQUIRE(unpacked[2].distance_start_left == 0); - REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_right == 0); - } - - //Node at depth 3 - REQUIRE(unpacked[3].length == 1); - REQUIRE(unpacked[3].prefix_sum_or_snarl_rank == distance_index.get_prefix_sum_value(node2)); - REQUIRE(unpacked[3].code_type == ZipCode::NODE); - REQUIRE(unpacked[3].is_reversed == distance_index.is_reversed_in_parent(node2)); - } SECTION ("zip code for more deeply nested node") { ZipCode zipcode; @@ -974,93 +853,6 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(6) == distance_index.get_rank_in_parent(chain4)); REQUIRE(zipcode.get_code_type(6) == ZipCode::CHAIN); - } - SECTION ("unpacked zip code for more deeply nested node") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n4->id(), distance_index); - REQUIRE(unpacked.size() == 7); - - net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); - net_handle_t snarl3 = distance_index.get_parent(chain4); - net_handle_t chain3 = distance_index.get_parent(snarl3); - net_handle_t snarl2 = distance_index.get_parent(chain3); - net_handle_t chain2 = distance_index.get_parent(snarl2); - net_handle_t snarl1 = distance_index.get_parent(chain2); - net_handle_t chain1 = distance_index.get_parent(snarl1); - - - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(chain1)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); - - //Snarl at depth 1 - REQUIRE(unpacked[1].length == 0); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (chain_is_reversed ? 4 : 3)); - REQUIRE(unpacked[1].code_type == ZipCode::REGULAR_SNARL); - net_handle_t snarl = distance_index.get_parent(chain2); - bool is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), - distance_index.flip(distance_index.canonical(chain2))) != 0; - - - //Chain at depth 2 - REQUIRE(unpacked[2].is_reversed == is_rev); - REQUIRE(unpacked[2].length == 3); - REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); - REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); - if (is_rev) { - REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_start_right == 0); - REQUIRE(unpacked[2].distance_end_left == 0); - REQUIRE(unpacked[2].distance_end_right == std::numeric_limits::max()); - } else { - REQUIRE(unpacked[2].distance_start_left == 0); - REQUIRE(unpacked[2].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[2].distance_end_right == 0); - } - - - //Snarl at depth 3 - REQUIRE(unpacked[3].length == 1); - REQUIRE(unpacked[3].prefix_sum_or_snarl_rank == 1); - REQUIRE(unpacked[3].code_type == ZipCode::REGULAR_SNARL); - snarl = distance_index.get_parent(chain3); - is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), - distance_index.flip(distance_index.canonical(chain3))) != 0; - - //Chain at depth 4 - REQUIRE(unpacked[4].is_reversed == is_rev); - REQUIRE(unpacked[4].length == distance_index.minimum_length(chain3)); - REQUIRE(unpacked[4].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain3)); - REQUIRE(unpacked[4].code_type == ZipCode::CHAIN); - if (is_rev) { - REQUIRE(unpacked[4].distance_start_left == std::numeric_limits::max()); - REQUIRE(unpacked[4].distance_start_right == 0); - REQUIRE(unpacked[4].distance_end_left == 0); - REQUIRE(unpacked[4].distance_end_right == std::numeric_limits::max()); - } else { - REQUIRE(unpacked[4].distance_start_left == 0); - REQUIRE(unpacked[4].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[4].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[4].distance_end_right == 0); - } - - - //Snarl3 at depth 5 - REQUIRE(unpacked[5].length == 0); - REQUIRE(unpacked[5].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 3 : 1)); - REQUIRE(unpacked[5].code_type == ZipCode::REGULAR_SNARL); - snarl = distance_index.get_parent(chain4); - is_rev = distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), - distance_index.flip(distance_index.canonical(chain4))) != 0; - - //node/chain at depth 6 - REQUIRE(unpacked[6].is_reversed == is_rev); - REQUIRE(unpacked[6].length == 4); - REQUIRE(unpacked[6].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain4)); - REQUIRE(unpacked[6].code_type == ZipCode::CHAIN); - } SECTION("Distances") { ZipCode zip1; @@ -1152,7 +944,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1163,7 +955,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1174,7 +966,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1185,7 +977,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1196,7 +988,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1207,7 +999,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1218,7 +1010,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1229,7 +1021,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n8->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1380,6 +1172,7 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(2) == distance_index.get_rank_in_parent(chain3)); REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); + bool chain_is_rev = distance_index.is_reversed_in_parent(distance_index.get_parent(distance_index.get_node_net_handle(n3->id()))); //node1 to left side of node 3 REQUIRE(zipcode.get_distance_to_snarl_bound(2, !snarl_is_rev, true) == 1); //Node 1 to right side of node 3 @@ -1389,51 +1182,6 @@ using namespace std; //Node 4 to right side of node 3 REQUIRE(zipcode.get_distance_to_snarl_bound(2, snarl_is_rev, false) == 0); } - SECTION ("unpacked zip code for node in irregular snarl") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n3->id(), distance_index); - REQUIRE(unpacked.size() == 3); - - net_handle_t chain3 = distance_index.get_parent(distance_index.get_node_net_handle(n3->id())); - net_handle_t snarl1 = distance_index.get_parent(chain3); - net_handle_t chain1 = distance_index.get_parent(snarl1); - - - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(chain1)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_CHAIN); - - //Snarl1 at depth 1 - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())) ? 6 : 3)); - REQUIRE(unpacked[1].length == distance_index.minimum_length(snarl1)); - REQUIRE(unpacked[1].code_type == ZipCode::CYCLIC_SNARL); - - //chain3 at depth 3 - REQUIRE(unpacked[2].length == 1); - REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain3)); - REQUIRE(unpacked[2].code_type == ZipCode::CHAIN); - bool snarl_is_rev = distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n1->id())); - if (snarl_is_rev) { - //node1 to left side of node 3 - REQUIRE(unpacked[2].distance_end_left == 1); - //Node 1 to right side of node 3 - REQUIRE(unpacked[2].distance_end_right == 2); - //node4 to left side of node 3 - REQUIRE(unpacked[2].distance_start_left == std::numeric_limits::max()); - //Node 4 to right side of node 3 - REQUIRE(unpacked[2].distance_start_right == 0); - - } else { - REQUIRE(unpacked[2].distance_start_left == 1); - //Node 1 to right side of node 3 - REQUIRE(unpacked[2].distance_start_right == 2); - //node4 to left side of node 3 - REQUIRE(unpacked[2].distance_end_left == std::numeric_limits::max()); - //Node 4 to right side of node 3 - REQUIRE(unpacked[2].distance_end_right == 0); - } - } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -1513,7 +1261,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1524,7 +1272,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1535,7 +1283,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1546,7 +1294,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1557,7 +1305,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1568,7 +1316,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1579,7 +1327,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1660,27 +1408,6 @@ using namespace std; REQUIRE(zipcode.get_rank_in_snarl(1) == distance_index.get_rank_in_parent(chain1)); REQUIRE(zipcode.get_code_type(1) == ZipCode::CHAIN); } - SECTION ("unpacked zip code for node in top-level snarl") { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n1->id(), distance_index); - REQUIRE(unpacked.size() == 2); - - - net_handle_t chain1 = distance_index.get_parent(distance_index.get_node_net_handle(n1->id())); - net_handle_t root_snarl = distance_index.get_parent(chain1); - - - //Root snarl - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == - distance_index.canonical(distance_index.get_parent(chain1))); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_SNARL); - - //Chain1 at depth 1 - REQUIRE(unpacked[1].length == 3); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain1)); - REQUIRE(unpacked[1].code_type == ZipCode::CHAIN); - } SECTION ("zip code for node in chain in top-level snarl") { net_handle_t node1 = distance_index.get_node_net_handle(n3->id()); ZipCode zipcode; @@ -1745,31 +1472,6 @@ using namespace std; REQUIRE(zipcode.get_code_type(2) == ZipCode::NODE); REQUIRE(zipcode.get_is_reversed_in_parent(2) == distance_index.is_reversed_in_parent(node3)); } - SECTION ("unpack zip code for node in chain in top-level snarl") { - net_handle_t node3 = distance_index.get_node_net_handle(n3->id()); - net_handle_t chain2 = distance_index.get_parent(node3); - net_handle_t root_snarl = distance_index.get_parent(chain2); - - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n3->id(), distance_index); - REQUIRE(unpacked.size() == 3); - - //Root snarl - REQUIRE(distance_index.canonical(unpacked[0].net_handle) == distance_index.canonical(root_snarl)); - REQUIRE(unpacked[0].code_type == ZipCode::ROOT_SNARL); - - //chain2 at depth 1 - REQUIRE(unpacked[1].length == 2); - REQUIRE(unpacked[1].prefix_sum_or_snarl_rank == distance_index.get_rank_in_parent(chain2)); - REQUIRE(unpacked[1].code_type == ZipCode::CHAIN); - - //node3 at depth 2 - REQUIRE(unpacked[2].length == 1); - REQUIRE(unpacked[2].prefix_sum_or_snarl_rank == (distance_index.is_reversed_in_parent(distance_index.get_node_net_handle(n3->id())) ? 0 : 1)); - REQUIRE(unpacked[2].code_type == ZipCode::NODE); - REQUIRE(unpacked[2].is_reversed == distance_index.is_reversed_in_parent(node3)); - } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); @@ -1822,7 +1524,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1833,7 +1535,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1844,7 +1546,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1855,7 +1557,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1866,7 +1568,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1877,7 +1579,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -1888,7 +1590,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2008,7 +1710,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2019,7 +1721,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2030,7 +1732,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n3->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2041,7 +1743,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2052,7 +1754,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n5->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2063,7 +1765,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n6->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2074,7 +1776,7 @@ using namespace std; ZipCode zipcode; zipcode.fill_in_zipcode(distance_index, make_pos_t(n7->id(), 0, false)); gbwtgraph::Payload payload = zipcode.get_payload_from_zip(); - if (payload != ZipCode::NO_PAYLOAD) { + if (payload != MIPayload::NO_CODE) { ZipCode decoded; decoded.fill_in_zipcode_from_payload(payload); REQUIRE(zipcode == decoded); @@ -2168,22 +1870,6 @@ using namespace std; REQUIRE(zipcode.get_last_chain_component(0, false) == distance_index.get_chain_component(bound, false)); REQUIRE(zipcode.get_is_looping_chain(0)); } - SECTION( "node2 unpacked" ) { - ZipCode zipcode; - zipcode.fill_in_zipcode(distance_index, make_pos_t(n2->id(), 0, false)); - vector unpacked = zipcode.unpack_zip_code(n2->id(), distance_index); - REQUIRE(unpacked.size() == 2); - - net_handle_t node2 = distance_index.get_node_net_handle(n2->id()); - net_handle_t parent = distance_index.get_parent(node2); - net_handle_t bound = distance_index.get_bound(parent, true, false); - - - REQUIRE(distance_index.minimum_length(node2) == unpacked[1].length); - REQUIRE(unpacked[1].chain_component == distance_index.get_chain_component(node2)); - REQUIRE(unpacked[0].chain_component == 1); - REQUIRE(unpacked[0].is_looping_chain); - } SECTION( "node5" ) { ZipCode zipcode; @@ -2195,10 +1881,6 @@ using namespace std; REQUIRE(distance_index.minimum_length(node) == zipcode.get_length(zipcode.max_depth())); - - vector unpacked = zipcode.unpack_zip_code(n5->id(), distance_index); - - REQUIRE(distance_index.minimum_length(node) == unpacked[unpacked.size()-1].length); } } TEST_CASE( "Chain with external connectivity zipcode","[zipcode]" ) { @@ -2242,26 +1924,6 @@ using namespace std; } } - SECTION( "Check connectivity unpacked" ) { - ZipCode zipcode; - zipcode.fill_in_zipcode(dist_index, make_pos_t(n2->id(), false, 0)); - vector unpacked = zipcode.unpack_zip_code(n2->id(), dist_index); - - REQUIRE(unpacked[1].length == 1); - - if (dist_index.is_reversed_in_parent(dist_index.get_node_net_handle(n1->id()))) { - REQUIRE(unpacked[0].distance_end_right == 0); - REQUIRE(unpacked[0].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[0].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[0].distance_start_left == std::numeric_limits::max()); - } else { - REQUIRE(unpacked[0].distance_end_right == std::numeric_limits::max()); - REQUIRE(unpacked[0].distance_end_left == std::numeric_limits::max()); - REQUIRE(unpacked[0].distance_start_right == std::numeric_limits::max()); - REQUIRE(unpacked[0].distance_start_left == 0); - } - - } } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index f5fb52d09d0..c87751df3cb 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -412,7 +412,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } -size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index, bool get_chain_component_length) const { +size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -440,21 +440,7 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } - size_t len = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - if (get_chain_component_length || (depth != 0 && decoder[depth-1].is_chain)) { - //If this is a node or we want the component length that got saved, return the actual saved value - return len; - } else { - //If we want the length of the last component of the chain, check if it is a multicopmonent chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - cerr << "Component " << zip_value << endl; - if (zip_value != 0) { - //If this is a multicomponent (or looping chain, which also must be a multicomponent chain) - return std::numeric_limits::max(); - } else { - return len; - } - } + return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; } else { //If this is a snarl @@ -592,7 +578,6 @@ size_t ZipCode::get_last_chain_component(const size_t& depth, bool get_end) cons if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - assert(ZipCode::CHAIN_COMPONENT_COUNT_OFFSET == ZipCode::ROOT_CHAIN_COMPONENT_COUNT_OFFSET); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { @@ -614,7 +599,6 @@ bool ZipCode::get_is_looping_chain(const size_t& depth) const { if (!decoder[depth].is_chain) { throw std::runtime_error("zipcodes trying to find the last chain component a snarl"); } - assert(ZipCode::CHAIN_COMPONENT_COUNT_OFFSET == ZipCode::ROOT_CHAIN_COMPONENT_COUNT_OFFSET); size_t zip_value; size_t zip_index = decoder[depth].offset; for (size_t i = 0 ; i <= ZipCode::CHAIN_COMPONENT_COUNT_OFFSET ; i++) { @@ -961,10 +945,9 @@ vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDis //Chain code is: rank in snarl, length vector chain_code (CHAIN_SIZE); chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); - bool is_trivial = distance_index.is_trivial_chain(chain) ; - //Length is the length of the last component - size_t len = is_trivial ? distance_index.minimum_length(chain) : distance_index.chain_minimum_length(chain); + size_t len = distance_index.minimum_length(chain); chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; + bool is_trivial = distance_index.is_trivial_chain(chain) ; size_t component = is_trivial ? 0 : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); @@ -1742,7 +1725,7 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { //First byte is for the byte_count if (byte_count() + decoder_vector.byte_count() > 15) { //If there aren't enough bits to represent the zip code - return ZipCode::NO_PAYLOAD; + return MIPayload::NO_CODE; } //Encode it as the byte count of the zipcode, the zipcode, and the decoder @@ -1790,7 +1773,7 @@ gbwtgraph::Payload ZipCode::get_payload_from_zip() const { } void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { - assert(payload != ZipCode::NO_PAYLOAD); + assert(payload != MIPayload::NO_CODE); zipcode.data.reserve(16); size_t decoded_bytes = 0; @@ -1837,8 +1820,6 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Now go through the varint vector up and add anything that isn't 0 size_t varint_value= 1; size_t varint_index = 0; - //Somewhat arbitrarily reserve what we expect to be the number of codes in the zipcode - decoder.reserve(decoder_vector.byte_count()); decoder.emplace_back(is_chain, 0); is_chain = !is_chain; if (decoder_vector.byte_count() != 0) { @@ -1854,7 +1835,6 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { assert(!decoder.back().is_chain); decoder.back().is_chain = true; } - finished_decoding = true; } @@ -2044,251 +2024,179 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -vector ZipCode::unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const { - vector unpacked_zipcode; - unpacked_zipcode.reserve(decoder_length()); +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { + MIPayload payload; - //Otherwise, walk through the zipcode start to end (root to leaf) and fill in the unpacked zipcode - //Fill in everything in the zipcode in this pass, and then go back and fill in any net handles that - //weren't stored in the zipcode by getting the parents - for (size_t depth = 0 ; depth < decoder_length() ; depth++) { - unpacked_zipcode.emplace_back(); - zip_code_t& current_code = unpacked_zipcode.back(); + if (decoder_length() == 1) { + //If the root-level structure is a node + payload.parent_is_root = true; + payload.parent_is_chain = true; + //Walk through the zipcode to get values size_t zip_value; - size_t zip_index = decoder[depth].offset; - bool is_chain = decoder[depth].is_chain; - if (depth == 0) { - //is_Chain is first for anything in the root - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - - //identifier - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - - if (is_chain) { - if (decoder_length() == 1) { - //Root node + size_t zip_index = decoder[0].offset; + //Root is chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //root_identifier + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE); - current_code.code_type = ZipCode::ROOT_NODE; - //Get the root node as a chain - current_code.net_handle = distance_index.get_net_handle_from_values( - distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE); + //Root node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //For a root node, this is the length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + payload.is_trivial_chain = true; + payload.is_reversed = false; + payload.parent_handle = distance_index.get_root(); + payload.parent_type = ZipCode::ROOT_NODE; + payload.parent_record_offset = 0; + } else if (decoder[max_depth() - 1].is_chain) { + //If the parent is a chain + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_is_chain = true; + payload.parent_is_root = false; - } else { - //Root chain - current_code.code_type = ZipCode::ROOT_CHAIN; + //Walk through the zipcode to get values + size_t zip_value; + size_t zip_index = decoder[max_depth()-1].offset; + //is_chain/rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + //root_identifier for root, chain length for anything else + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //For a root chain, this is the component count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.is_looping_chain = zip_value % 2; - if (zip_value % 2) { - zip_value -= 1; - } - current_code.chain_component = zip_value / 2; - } - //The next thing for both nodes and chains is the connectivity value - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - bool externally_connected = false; - //start-end connected - if ((zip_value & 1) != 0) { - current_code.distance_start_right = 0; - current_code.distance_end_left = 0; - externally_connected = true; - } - //start-start connected - if((zip_value & 2) != 0){ - current_code.distance_start_left = 0; - externally_connected = true; - } - //end-end connected - if ((zip_value & 4) != 0) { - current_code.distance_end_right = 0; - externally_connected = true; - } - if (current_code.chain_component != 0 || externally_connected || current_code.is_looping_chain) { - //If this is a multicomponent chain or has external connectivity, then we want to know the length - if (decoder_length() == 1) { - current_code.length = distance_index.minimum_length(current_code.net_handle); - } else { - current_code.length = distance_index.chain_minimum_length(current_code.net_handle); - } - } - - } else { - //Root snarl - current_code.code_type = ZipCode::ROOT_SNARL; - current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); - } + if (decoder_length() == 2) { + //If the node is a child of the root chain + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_type = ZipCode::ROOT_CHAIN; + payload.parent_is_root = true; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } else { - if (is_chain) { - if (decoder[depth-1].is_chain) { - //Node in a chain - current_code.code_type = ZipCode::NODE; - - //Prefix sum value - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.prefix_sum_or_snarl_rank = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_parent(payload.node_handle)); + payload.parent_type = ZipCode::CHAIN; + } + payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); - //Node length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + //chain component count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Node is reversed - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.is_reversed = zip_value; + //Node prefix sum + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.prefix_sum = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //Node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; + //is_reversed + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //TODO: For top-level chains we got this from the distance index + payload.is_reversed = zip_value; - //Node chain component - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.chain_component = zip_value; - } else { - //Chain - current_code.code_type = ZipCode::CHAIN; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.chain_component = zip_value; - //chain rank in snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.prefix_sum_or_snarl_rank = zip_value; - //Chain length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1;; - //chain component count / is looping chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.is_looping_chain = zip_value % 2; - if (zip_value % 2) { - zip_value -= 1; - } - current_code.chain_component = zip_value / 2; - } - } else { - //Snarl + } else { + //If the node is a child of a snarl + + payload.node_handle = distance_index.get_node_net_handle(id); + payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::CHAIN_HANDLE, + distance_index.get_node_record_offset(payload.node_handle)); + payload.parent_is_chain = false; + payload.parent_is_root = decoder_length() == 2; + payload.is_trivial_chain = true; - //snarl type - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - if (zip_value == 1) { - current_code.code_type = ZipCode::REGULAR_SNARL; - } else if (zip_value == 0) { - current_code.code_type = ZipCode::IRREGULAR_SNARL; - } else { - current_code.code_type = ZipCode::CYCLIC_SNARL; - } - //Offset in chain - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.prefix_sum_or_snarl_rank = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - //snarl length - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.length = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + size_t zip_value; + size_t zip_index; + if (payload.parent_is_root) { + //is_chain + zip_index = decoder[0].offset; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Identifier for root snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_handle = payload.parent_handle; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::ROOT_HANDLE); + payload.parent_type = ZipCode::ROOT_SNARL; + } else { + zip_index = decoder[max_depth()-1].offset; + //is_regular + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //If this is a non-root snarl, get as much as we can from it + payload.parent_type = ZipCode::EMPTY; + if (zip_value == 0) { + payload.parent_type = ZipCode::IRREGULAR_SNARL; + } else if (zip_value == 1) { + payload.parent_type = ZipCode::REGULAR_SNARL; + } else { + payload.parent_type = ZipCode::CYCLIC_SNARL; + } - //CHild count - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Snarl prefix sum + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Chain component - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.chain_component = zip_value; + payload.prefix_sum = 0; //TODO: SHould use this zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - if (current_code.code_type == ZipCode::REGULAR_SNARL) { - //Regular snarl + //Snarl length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Snarl child_count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Chain component of the snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //TODO: SHould use this somehow + payload.chain_component = 0; + //is_reversed for regular snarl and record offset for irregular/cyclic snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - //Is reversed. This really means is_reversed for the child, which will be used to get the distance values for the child - //The child's values will be set in the second pass - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; - current_code.is_reversed = zip_value; + if (payload.parent_type == ZipCode::REGULAR_SNARL) { + //Snarl is reversed + net_handle_t grandparent_handle = distance_index.get_parent(payload.parent_handle); + //Simple and regular snarls are different for clustering + if (distance_index.is_simple_snarl(grandparent_handle)) { + payload.is_reversed = zip_value; + payload.parent_is_chain=true; + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_parent(grandparent_handle)); } else { - //Irregular/cyclic snarl - - //Snarl record for irregular/cyclic snarls - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index);; - current_code.net_handle = distance_index.get_net_handle_from_values(zip_value, SnarlDistanceIndex::START_END, SnarlDistanceIndex::SNARL_HANDLE); + payload.is_reversed = false; + payload.parent_record_offset = distance_index.get_record_offset(grandparent_handle); + } - //Distance values - //These are actually the distances from the child to the bounds of the snarl - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.distance_start_left = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + } else { + payload.is_reversed = false; + payload.parent_record_offset = zip_value; + } - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.distance_end_left = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + } + //We should be at the node/trivial chain now + zip_index = decoder[max_depth()].offset; + //Chain rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Chain length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + payload.node_length = zip_value == std::numeric_limits::max() ? 0 : zip_value-1; - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.distance_start_right = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + //Get the rest as default values - std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - current_code.distance_end_right = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; - } - } + } + payload.parent_depth = 0; + for (size_t d = 0 ; d <= max_depth() ; d++) { + auto type = get_code_type(d); + if (type == ZipCode::CHAIN || type == ZipCode::ROOT_CHAIN || type == ZipCode::ROOT_NODE) { + payload.parent_depth++; } } - //Now go back walking up the snarl tree and add all the stuff from the distance index: - //net handles if they haven't been set and distances for children of snarls - for (int depth = decoder_length()-1 ; depth >= 0 ; depth--) { - zip_code_t& current_code = unpacked_zipcode[depth]; - - //If we need to set the net handle - if (!(depth == 0 || current_code.code_type == ZipCode::IRREGULAR_SNARL || current_code.code_type == ZipCode::CYCLIC_SNARL)) { - if (depth == decoder_length()-1 ) { - current_code.net_handle = distance_index.get_node_net_handle(id); - if (current_code.code_type == ZipCode::CHAIN) { - current_code.net_handle = distance_index.get_net_handle_from_values( - distance_index.get_record_offset(current_code.net_handle), - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::CHAIN_HANDLE, - distance_index.get_node_record_offset(current_code.net_handle)); - } - } else { - current_code.net_handle = distance_index.start_end_traversal_of(distance_index.get_parent(unpacked_zipcode[depth+1].net_handle)); - } - } - //If we need to set distances and sometimes the orientation - if (depth != 0) { - zip_code_t& parent_code = unpacked_zipcode[depth-1]; - if (parent_code.code_type == ZipCode::REGULAR_SNARL) { - //If the parent was a regular snarl, then we stored the orientation to get the distances - current_code.is_reversed = parent_code.is_reversed; - parent_code.is_reversed = false; - if (current_code.is_reversed) { - current_code.distance_start_left = std::numeric_limits::max(); - current_code.distance_start_right = 0; - current_code.distance_end_left = 0; - current_code.distance_end_right = std::numeric_limits::max(); - } else { - current_code.distance_start_left = 0; - current_code.distance_start_right = std::numeric_limits::max(); - current_code.distance_end_left = std::numeric_limits::max(); - current_code.distance_end_right = 0; - } - parent_code.distance_start_left = std::numeric_limits::max(); - parent_code.distance_start_right = std::numeric_limits::max(); - parent_code.distance_end_left = std::numeric_limits::max(); - parent_code.distance_end_right = std::numeric_limits::max(); - } else if (parent_code.code_type == ZipCode::IRREGULAR_SNARL || parent_code.code_type == ZipCode::CYCLIC_SNARL) { - //If the parent was an irregular or cyclic snarl, then we saved the distances - current_code.distance_start_left = parent_code.distance_start_left; - current_code.distance_start_right = parent_code.distance_start_right; - current_code.distance_end_left = parent_code.distance_end_left; - current_code.distance_end_right = parent_code.distance_end_right; - - parent_code.distance_start_left = std::numeric_limits::max(); - parent_code.distance_start_right = std::numeric_limits::max(); - parent_code.distance_end_left = std::numeric_limits::max(); - parent_code.distance_end_right = std::numeric_limits::max(); - - parent_code.is_reversed = false; - } - } - } - return unpacked_zipcode; + return payload; } net_identifier_t ZipCode::get_identifier(size_t depth) const { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 64faf7ce3df..451a7875ca3 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -29,8 +29,10 @@ using namespace std; -///A struct to store an unpacked version of one node/snarl/chain code -struct zip_code_t; +///A struct to interpret the minimizer payload +///I want to use zipcodes as the payload but at the moment clustering still expects the old payload +///This can interpret zipcodes to format them as the old payload +struct MIPayload; /// A struct to be used as a unique identifier for a snarl tree node (node/snarl/chain) @@ -152,7 +154,6 @@ class ZipCode { ///Offsets for chain codes const static size_t CHAIN_SIZE = 3; const static size_t CHAIN_RANK_IN_SNARL_OFFSET = 0; - //For a multicomponent chain, this is the length of the last component, because the real length will always be inf const static size_t CHAIN_LENGTH_OFFSET = 1; //This tells us if the chain is a multicomponent chain, how many components it has, and if the chain loops @@ -256,8 +257,7 @@ class ZipCode { ///This requires the distance index for irregular snarls (except for a top-level snarl) ///Throws an exception if the distance index is not given when it is needed ///Doesn't use a given distance index if it isn't needed - ///If chain_component_length is true, then get the length of the last component of the multicomponent chain (instead of inf) - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr, bool get_chain_component_length=false) const ; + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl size_t get_rank_in_snarl(const size_t& depth) const ; @@ -320,16 +320,16 @@ class ZipCode { /// unit test from the resulting information. void dump(std::ostream& out) const; - ///Unpack the zip code to get a bigger version with random access - vector unpack_zip_code(nid_t id, const SnarlDistanceIndex& distance_index) const; + //TODO: I want to make a struct for holding all values of a code as real values + + ///Fill in a payload with values from the zipcode + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id net_identifier_t get_identifier(size_t depth) const; const static net_identifier_t get_parent_identifier(const net_identifier_t& child); - public: - constexpr static gbwtgraph::Payload NO_PAYLOAD = {0, 0}; }; /// Print a code type to a stream @@ -380,29 +380,34 @@ std::ostream& operator<<(std::ostream& out, const ZipCode& decoder); /** - An unpacked version of one node/snarl/chain code - Not all values will be set for every type of code + The payload for the minimizer index. This stores distance information that gets used in clustering + The payload now uses zip codes, so this gets used to go from a zip code to distance information + usable by the clusterer */ -struct zip_code_t { - ZipCode::code_type_t code_type = ZipCode::EMPTY; +struct MIPayload { + typedef std::uint64_t code_type; // We assume that this fits into gbwtgraph::Payload. + //typedef std::pair payload_type; + + + constexpr static gbwtgraph::Payload NO_CODE = {0, 0}; + constexpr static std::size_t NO_VALUE = std::numeric_limits::max(); - //TODO: I'd like this to be the root or another placeholder - net_handle_t net_handle; - size_t length = std::numeric_limits::max(); - size_t prefix_sum_or_snarl_rank = std::numeric_limits::max(); - size_t chain_component = std::numeric_limits::max(); + net_handle_t node_handle; + net_handle_t parent_handle; - //distance from the left side of the child to the start of the snarl - //or, for root nodes/chains, start-start connected - //start-right and end-left are the same for root nodes/chains - size_t distance_start_left = std::numeric_limits::max(); - size_t distance_start_right = std::numeric_limits::max(); - size_t distance_end_left = std::numeric_limits::max(); - size_t distance_end_right = std::numeric_limits::max(); + size_t node_length = std::numeric_limits::max(); + size_t prefix_sum = 0; + size_t chain_component = 0; + //Depth according to the distance index + size_t parent_depth = 0; + size_t parent_record_offset = 0; + ZipCode::code_type_t parent_type = ZipCode::EMPTY; bool is_reversed = false; - bool is_looping_chain = false; + bool is_trivial_chain = false; + bool parent_is_chain = false; + bool parent_is_root = false; }; } From c17281692c1c1392f8a23dd4f728dae3654766a6 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 09:59:28 +0200 Subject: [PATCH 110/124] Fix bug getting minimum distances --- src/snarl_seed_clusterer.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 31579b53103..bd7d0bae16d 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -1685,9 +1685,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin for (size_t read_num = 0 ; read_num < clustering_problem.all_seeds->size() ; read_num++) { if (read_num == 0) { if (child_problem.is_reversed_in_parent) { + size_t old_best_right = snarl_problem->read_best_right.first; snarl_problem->read_best_right.first = std::min(snarl_problem->read_best_left.first, child_problem.read_best_left.first); - snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_right.first, + snarl_problem->read_best_left.first = std::min(old_best_right, child_problem.read_best_right.first); } else { snarl_problem->read_best_left.first = std::min(snarl_problem->read_best_left.first, @@ -1710,9 +1711,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } if (child_problem.is_reversed_in_parent) { + size_t old_best_right = snarl_problem->fragment_best_right; snarl_problem->fragment_best_right = std::min(snarl_problem->fragment_best_left, child_problem.fragment_best_left); - snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_right, + snarl_problem->fragment_best_left = std::min(old_best_right, child_problem.fragment_best_right); } else { snarl_problem->fragment_best_left = std::min(snarl_problem->fragment_best_left, From aff0cc6ac3da7b2ca5dfa3639c83b15afe1c5883 Mon Sep 17 00:00:00 2001 From: Xian Chang Date: Tue, 13 Aug 2024 04:42:59 -0700 Subject: [PATCH 111/124] Get chains last component length and get chain length from zipcode --- src/snarl_seed_clusterer.hpp | 2 +- src/zip_code.cpp | 25 +++++++++++++++++++++---- src/zip_code.hpp | 4 +++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 22f8478e6ff..e1f72f381af 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -295,7 +295,7 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); - node_length = distance_index.chain_minimum_length(containing_net_handle); + node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index, true); chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index c87751df3cb..6541fe04f3a 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -412,7 +412,7 @@ ZipCode::code_type_t ZipCode::get_code_type(const size_t& depth) const { } } -size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index) const { +size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distance_index, bool get_chain_component_length) const { if (depth == 0) { //If this is the root chain/snarl/node @@ -440,7 +440,22 @@ size_t ZipCode::get_length(const size_t& depth, const SnarlDistanceIndex* distan for (size_t i = 0 ; i <= ZipCode::CHAIN_LENGTH_OFFSET ; i++) { std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); } - return zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + + size_t len = zip_value == 0 ? std::numeric_limits::max() : zip_value-1; + if (get_chain_component_length || (depth != 0 && decoder[depth-1].is_chain)) { + //If this is a node or we want the component length that got saved, return the actual saved value + return len; + } else { + //If we want the length of the last component of the chain, check if it is a multicopmonent chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + if (zip_value != 0) { + //If this is a multicomponent (or looping chain, which also must be a multicomponent chain) + return std::numeric_limits::max(); + } else { + return len; + } + } + } else { //If this is a snarl @@ -945,9 +960,9 @@ vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDis //Chain code is: rank in snarl, length vector chain_code (CHAIN_SIZE); chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); - size_t len = distance_index.minimum_length(chain); - chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; bool is_trivial = distance_index.is_trivial_chain(chain) ; + size_t len = is_trivial ? distance_index.minimum_length(chain) : distance_index.chain_minimum_length(chain); + chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; size_t component = is_trivial ? 0 : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); @@ -1804,6 +1819,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Get the decoder offsets varint_vector_t decoder_vector; + decoder_vector.data.reserve(16-decoded_bytes); for (size_t i = decoded_bytes ; i <16 ; i++) { uint8_t saved_byte; if (decoded_bytes < 8) { @@ -1820,6 +1836,7 @@ void ZipCode::fill_in_zipcode_from_payload(const gbwtgraph::Payload& payload) { //Now go through the varint vector up and add anything that isn't 0 size_t varint_value= 1; size_t varint_index = 0; + decoder.reserve(decoder_vector.byte_count()); decoder.emplace_back(is_chain, 0); is_chain = !is_chain; if (decoder_vector.byte_count() != 0) { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 451a7875ca3..972a8b479dd 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -154,6 +154,8 @@ class ZipCode { ///Offsets for chain codes const static size_t CHAIN_SIZE = 3; const static size_t CHAIN_RANK_IN_SNARL_OFFSET = 0; + //This is the distance index's chain_minimum_length, meaning that if it's a multicomponent chain, + //then it is the length of the last component. const static size_t CHAIN_LENGTH_OFFSET = 1; //This tells us if the chain is a multicomponent chain, how many components it has, and if the chain loops @@ -257,7 +259,7 @@ class ZipCode { ///This requires the distance index for irregular snarls (except for a top-level snarl) ///Throws an exception if the distance index is not given when it is needed ///Doesn't use a given distance index if it isn't needed - size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr) const ; + size_t get_length(const size_t& depth, const SnarlDistanceIndex* distance_index=nullptr, bool get_chain_component_length = false) const ; ///Get the rank of a node/snarl in a snarl. Throw an exception if it isn't the child of a snarl size_t get_rank_in_snarl(const size_t& depth) const ; From 7ed8f6e6cfbe87cccc881e4a5bcf6dbb43b39568 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 16:12:05 +0200 Subject: [PATCH 112/124] Make a decoded code type (but not for roots) and use it for building the zipcode --- src/zip_code.cpp | 184 ++++++++++++++++++++++++++--------------------- src/zip_code.hpp | 72 +++++++++++++++++-- 2 files changed, 168 insertions(+), 88 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index c87751df3cb..baec9b8846d 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -91,18 +91,17 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p cerr << "Adding code for " << distance_index.net_handle_as_string(current_ancestor) << endl; #endif if (distance_index.is_node(current_ancestor)) { - vector to_add = get_node_code(current_ancestor, distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::NODE_SIZE); -#endif + node_code_t node_code = get_node_code(current_ancestor, distance_index); + zipcode.add_value(node_code.prefix_sum); + zipcode.add_value(node_code.length); + zipcode.add_value(node_code.is_reversed); + zipcode.add_value(node_code.chain_component); + } else if (distance_index.is_chain(current_ancestor)) { - vector to_add = get_chain_code(current_ancestor, distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } + chain_code_t chain_code = get_chain_code(current_ancestor, distance_index); + zipcode.add_value(chain_code.snarl_rank_or_identifier); + zipcode.add_value(chain_code.length); + zipcode.add_value(chain_code.last_component); #ifdef DEBUG_ZIPCODE assert(to_add.size() == ZipCode::CHAIN_SIZE); #endif @@ -113,24 +112,28 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p return; } } else if (distance_index.is_regular_snarl(current_ancestor)) { - vector to_add = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); - for (auto& x : to_add) { - zipcode.add_value(x); - } -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::REGULAR_SNARL_SIZE); -#endif + snarl_code_t snarl_code = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); + zipcode.add_value(snarl_code.code_type); + zipcode.add_value(snarl_code.prefix_sum); + zipcode.add_value(snarl_code.length); + zipcode.add_value(snarl_code.child_count); + zipcode.add_value(snarl_code.chain_component); + zipcode.add_value(snarl_code.is_reversed); } else { #ifdef DEBUG_ZIPCODE assert(distance_index.is_snarl(current_ancestor)); #endif - vector to_add = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); -#ifdef DEBUG_ZIPCODE - assert(to_add.size() == ZipCode::IRREGULAR_SNARL_SIZE); -#endif - for (auto& x : to_add) { - zipcode.add_value(x); - } + snarl_code_t snarl_code = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); + zipcode.add_value(snarl_code.code_type); + zipcode.add_value(snarl_code.prefix_sum); + zipcode.add_value(snarl_code.length); + zipcode.add_value(snarl_code.child_count); + zipcode.add_value(snarl_code.chain_component); + zipcode.add_value(snarl_code.record_offset); + zipcode.add_value(snarl_code.distance_start_left); + zipcode.add_value(snarl_code.distance_end_left); + zipcode.add_value(snarl_code.distance_start_right); + zipcode.add_value(snarl_code.distance_end_right); } } if (fill_in_decoder) { @@ -924,134 +927,151 @@ std::ostream& operator<<(std::ostream& out, const ZipCode& zip) { } -vector ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { +node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { #ifdef DEBUG_ZIPCODE assert(!distance_index.is_trivial_chain(node)); assert((distance_index.is_chain(distance_index.get_parent(node)) || distance_index.is_root(distance_index.get_parent(node)))); #endif //Node code is: offset in chain, length, is reversed - vector node_code(NODE_SIZE); + node_code_t node_code; //Assume this node is in a regular chain - size_t prefix_sum = distance_index.get_prefix_sum_value(node); - node_code[NODE_OFFSET_OFFSET] = prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1; - node_code[NODE_LENGTH_OFFSET] = distance_index.minimum_length(node)+1; - node_code[NODE_IS_REVERSED_OFFSET] = distance_index.is_reversed_in_parent(node); - size_t component = distance_index.get_chain_component(node); - node_code[NODE_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + node_code.prefix_sum = distance_index.get_prefix_sum_value(node); + node_code.prefix_sum = node_code.prefix_sum == std::numeric_limits::max() ? 0 : node_code.prefix_sum+1; + + node_code.length = distance_index.minimum_length(node)+1; + + node_code.is_reversed = distance_index.is_reversed_in_parent(node); + node_code.chain_component = distance_index.get_chain_component(node); + node_code.chain_component = node_code.chain_component == std::numeric_limits::max() ? 0 : node_code.chain_component; + return node_code; } -vector ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { +chain_code_t ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { //Chain code is: rank in snarl, length - vector chain_code (CHAIN_SIZE); - chain_code[CHAIN_RANK_IN_SNARL_OFFSET] = distance_index.get_rank_in_parent(chain); - size_t len = distance_index.minimum_length(chain); - chain_code[CHAIN_LENGTH_OFFSET] = len == std::numeric_limits::max() ? 0 : len+1; + chain_code_t chain_code; + chain_code.snarl_rank_or_identifier = distance_index.get_rank_in_parent(chain); + + chain_code.length = distance_index.minimum_length(chain); + chain_code.length = chain_code.length == std::numeric_limits::max() ? 0 : chain_code.length+1; + bool is_trivial = distance_index.is_trivial_chain(chain) ; + + chain_code.is_looping_chain = is_trivial ? false + : distance_index.is_looping_chain(chain); size_t component = is_trivial ? 0 : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); - component = component == std::numeric_limits::max() ? 0 : component*2; - if (!is_trivial && distance_index.is_looping_chain(chain)) { + component = component == std::numeric_limits::max() ? 0 : component * 2; + if (chain_code.is_looping_chain) { component += 1; } - chain_code[CHAIN_COMPONENT_COUNT_OFFSET] = component; + chain_code.last_component = component; + return chain_code; } -vector ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { +snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { //Regular snarl code is 1, offset in chain, length, is reversed - vector snarl_code (REGULAR_SNARL_SIZE); + snarl_code_t snarl_code; //Tag to say that it's a regular snarl - snarl_code[SNARL_IS_REGULAR_OFFSET] = 1; + snarl_code.code_type = 1; //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; + snarl_code.child_count = child_count; //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); + size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); + snarl_code.prefix_sum = prefix_sum == std::numeric_limits::max() ? 0 + : prefix_sum+1; - size_t component = distance_index.get_chain_component(start_node); - snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + size_t chain_component = distance_index.get_chain_component(start_node); + snarl_code.chain_component = chain_component == std::numeric_limits::max() ? 0 + : chain_component; //Length of the snarl - size_t len = distance_index.minimum_length(snarl); - snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + size_t length = distance_index.minimum_length(snarl); + snarl_code.length = length == std::numeric_limits::max() ? 0 + : length+1; //Is the child of the snarl reversed in the snarl #ifdef DEBUG_ZIPCODE assert(distance_index.is_chain(snarl_child)); #endif - snarl_code[REGULAR_SNARL_IS_REVERSED_OFFSET] = (distance_index.distance_in_parent(snarl, + snarl_code.is_reversed = (distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), distance_index.flip(distance_index.canonical(snarl_child))) != 0); return snarl_code; } -vector ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, +snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { - vector snarl_code (IRREGULAR_SNARL_SIZE); + snarl_code_t snarl_code; //Tag to say that it's an irregular snarl - snarl_code[SNARL_IS_REGULAR_OFFSET] = distance_index.is_dag(snarl) ? 0 : 2; + snarl_code.code_type = distance_index.is_dag(snarl) ? 0 : 2; //The number of children - size_t child_count = 0; + snarl_code.child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { - child_count++; + snarl_code.child_count++; }); - snarl_code[SNARL_CHILD_COUNT_OFFSET] = child_count; //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); - size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - snarl_code[SNARL_OFFSET_IN_CHAIN_OFFSET] = (prefix_sum == std::numeric_limits::max() ? 0 : prefix_sum+1); - size_t component = distance_index.get_chain_component(start_node); - snarl_code[SNARL_CHAIN_COMPONENT_OFFSET] = component == std::numeric_limits::max() ? 0 : component; + snarl_code.prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); + + snarl_code.prefix_sum = snarl_code.prefix_sum == std::numeric_limits::max() ? 0 + : snarl_code.prefix_sum + 1; + + snarl_code.chain_component = distance_index.get_chain_component(start_node) ; + snarl_code.chain_component = snarl_code.chain_component == std::numeric_limits::max() ? 0 + : snarl_code.chain_component; //Length of the snarl - size_t len = distance_index.minimum_length(snarl); - snarl_code[SNARL_LENGTH_OFFSET] = (len == std::numeric_limits::max() ? 0 : len+1); + snarl_code.length = distance_index.minimum_length(snarl); + snarl_code.length = snarl_code.length == std::numeric_limits::max() ? 0 + : snarl_code.length+1; //Record offset to look up distances in the index later - snarl_code[IRREGULAR_SNARL_RECORD_OFFSET] = (distance_index.get_record_offset(snarl)); - - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = distance_index.distance_to_parent_bound(snarl, true, snarl_child); - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = distance_index.distance_to_parent_bound(snarl, false, snarl_child); + snarl_code.record_offset = distance_index.get_record_offset(snarl); + snarl_code.distance_start_left = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); + snarl_code.distance_end_left = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); + snarl_code.distance_start_right = distance_index.distance_to_parent_bound(snarl, true, snarl_child); + snarl_code.distance_end_right = distance_index.distance_to_parent_bound(snarl, false, snarl_child); //Add 1 to values to store inf properly - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] == std::numeric_limits::max() + snarl_code.distance_start_left = + snarl_code.distance_start_left == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_START_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] == std::numeric_limits::max() + : snarl_code.distance_start_left + 1; + snarl_code.distance_start_right = + snarl_code.distance_start_right == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_START_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] == std::numeric_limits::max() + : snarl_code.distance_start_right + 1; + snarl_code.distance_end_left = + snarl_code.distance_end_left == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_LEFT_END_OFFSET] + 1; - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] = - snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] == std::numeric_limits::max() + : snarl_code.distance_end_left + 1; + snarl_code.distance_end_right = + snarl_code.distance_end_right == std::numeric_limits::max() ? 0 - : snarl_code[IRREGULAR_SNARL_DISTANCE_RIGHT_END_OFFSET] + 1; + : snarl_code.distance_end_right + 1; + - return snarl_code; + return snarl_code; } size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 451a7875ca3..849fc574cca 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -40,6 +40,10 @@ struct MIPayload; /// It should be unique and hashable typedef std::string net_identifier_t; +///A struct to store an unpacked version of one node/snarl/chain code +struct node_code_t; +struct chain_code_t; +struct snarl_code_t; /* Zip codes store the snarl decomposition location and distance information for a position on a graph * A zip code will contain all the information necessary to compute the minimum distance between two @@ -202,15 +206,16 @@ class ZipCode { /* Functions for getting the code for each snarl/chain/node * Distances will be stored as distance+1, 0 will be reserved for inf */ - //Return a vector of size_ts that will represent the node in the zip code - inline vector get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index); - //Return a vector of size_ts that will represent the chain in the zip code - inline vector get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index); + //Return a node_code_t that will represent the node in the zip code + inline node_code_t get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index); + //Return a chain_code_t that will represent the chain in the zip code + //The actual values being stored, not the raw values + inline chain_code_t get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index); //Return a vector of size_ts that will represent the snarl in the zip code - inline vector get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, + inline snarl_code_t get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); //Return a vector of size_ts that will represent the snarl in the zip code - inline vector get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); + inline snarl_code_t get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); //////////////////////////////// Stuff for decoding the zipcode @@ -369,6 +374,61 @@ class ZipCodeCollection { }; + +/** + An unpacked version of one node code +*/ +struct node_code_t { + size_t prefix_sum ; + size_t chain_component : 32; + size_t length : 31; + bool is_reversed; +}; + +/** + An unpacked version of one chain code +*/ +struct chain_code_t { + + //The length of the last component of the chain (which may be the whole chain) + size_t length; + //The rank in the parent snarl or, if it is a root chain, the identifier + size_t snarl_rank_or_identifier : 32; + size_t last_component : 16; + + //For root chain/nodes, a bitvector representing the connectivity + size_t connectivity : 4; + + bool is_looping_chain; +}; + +/** + An unpacked version of one snarl code +*/ +struct snarl_code_t { + + size_t length; + size_t prefix_sum; + + //distance from the left side of the child to the start of the snarl + //or, for root nodes/chains, start-start connected + //start-right and end-left are the same for root nodes/chains + size_t distance_start_left; + size_t distance_start_right; + size_t distance_end_left; + size_t distance_end_right; + + size_t record_offset : 32; + + size_t child_count : 16; + size_t chain_component : 16; + + size_t code_type : 4; + + bool is_reversed; +}; + + template<> struct wang_hash { size_t operator()(const net_identifier_t& id) const { From a600bbce9411d9accbe4c00c3b86e71df8b67553 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 17:40:43 +0200 Subject: [PATCH 113/124] Add getters and setters for unpacked codes --- src/zip_code.cpp | 151 ++++++++++++++------------------ src/zip_code.hpp | 222 +++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 270 insertions(+), 103 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index baec9b8846d..5ba7b7e3362 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -92,16 +92,16 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p #endif if (distance_index.is_node(current_ancestor)) { node_code_t node_code = get_node_code(current_ancestor, distance_index); - zipcode.add_value(node_code.prefix_sum); - zipcode.add_value(node_code.length); - zipcode.add_value(node_code.is_reversed); - zipcode.add_value(node_code.chain_component); + zipcode.add_value(node_code.get_raw_prefix_sum()); + zipcode.add_value(node_code.get_raw_length()); + zipcode.add_value(node_code.get_raw_is_reversed()); + zipcode.add_value(node_code.get_raw_chain_component()); } else if (distance_index.is_chain(current_ancestor)) { chain_code_t chain_code = get_chain_code(current_ancestor, distance_index); - zipcode.add_value(chain_code.snarl_rank_or_identifier); - zipcode.add_value(chain_code.length); - zipcode.add_value(chain_code.last_component); + zipcode.add_value(chain_code.get_raw_snarl_rank_or_identifier()); + zipcode.add_value(chain_code.get_raw_length()); + zipcode.add_value(chain_code.get_raw_last_component()); #ifdef DEBUG_ZIPCODE assert(to_add.size() == ZipCode::CHAIN_SIZE); #endif @@ -113,27 +113,27 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p } } else if (distance_index.is_regular_snarl(current_ancestor)) { snarl_code_t snarl_code = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); - zipcode.add_value(snarl_code.code_type); - zipcode.add_value(snarl_code.prefix_sum); - zipcode.add_value(snarl_code.length); - zipcode.add_value(snarl_code.child_count); - zipcode.add_value(snarl_code.chain_component); - zipcode.add_value(snarl_code.is_reversed); + zipcode.add_value(snarl_code.get_raw_code_type()); + zipcode.add_value(snarl_code.get_raw_prefix_sum()); + zipcode.add_value(snarl_code.get_raw_length()); + zipcode.add_value(snarl_code.get_raw_child_count()); + zipcode.add_value(snarl_code.get_raw_chain_component()); + zipcode.add_value(snarl_code.get_raw_is_reversed()); } else { #ifdef DEBUG_ZIPCODE assert(distance_index.is_snarl(current_ancestor)); #endif snarl_code_t snarl_code = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); - zipcode.add_value(snarl_code.code_type); - zipcode.add_value(snarl_code.prefix_sum); - zipcode.add_value(snarl_code.length); - zipcode.add_value(snarl_code.child_count); - zipcode.add_value(snarl_code.chain_component); - zipcode.add_value(snarl_code.record_offset); - zipcode.add_value(snarl_code.distance_start_left); - zipcode.add_value(snarl_code.distance_end_left); - zipcode.add_value(snarl_code.distance_start_right); - zipcode.add_value(snarl_code.distance_end_right); + zipcode.add_value(snarl_code.get_raw_code_type()); + zipcode.add_value(snarl_code.get_raw_prefix_sum()); + zipcode.add_value(snarl_code.get_raw_length()); + zipcode.add_value(snarl_code.get_raw_child_count()); + zipcode.add_value(snarl_code.get_raw_chain_component()); + zipcode.add_value(snarl_code.get_raw_record_offset()); + zipcode.add_value(snarl_code.get_raw_distance_start_left()); + zipcode.add_value(snarl_code.get_raw_distance_end_left()); + zipcode.add_value(snarl_code.get_raw_distance_start_right()); + zipcode.add_value(snarl_code.get_raw_distance_end_right()); } } if (fill_in_decoder) { @@ -935,14 +935,12 @@ node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistance //Node code is: offset in chain, length, is reversed node_code_t node_code; //Assume this node is in a regular chain - node_code.prefix_sum = distance_index.get_prefix_sum_value(node); - node_code.prefix_sum = node_code.prefix_sum == std::numeric_limits::max() ? 0 : node_code.prefix_sum+1; + node_code.set_prefix_sum(distance_index.get_prefix_sum_value(node)); - node_code.length = distance_index.minimum_length(node)+1; + node_code.set_length(distance_index.minimum_length(node)); - node_code.is_reversed = distance_index.is_reversed_in_parent(node); - node_code.chain_component = distance_index.get_chain_component(node); - node_code.chain_component = node_code.chain_component == std::numeric_limits::max() ? 0 : node_code.chain_component; + node_code.set_is_reversed(distance_index.is_reversed_in_parent(node)); + node_code.set_chain_component(distance_index.get_chain_component(node)); return node_code; @@ -950,23 +948,17 @@ node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistance chain_code_t ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { //Chain code is: rank in snarl, length chain_code_t chain_code; - chain_code.snarl_rank_or_identifier = distance_index.get_rank_in_parent(chain); + chain_code.set_snarl_rank_or_identifier(distance_index.get_rank_in_parent(chain)); - chain_code.length = distance_index.minimum_length(chain); - chain_code.length = chain_code.length == std::numeric_limits::max() ? 0 : chain_code.length+1; + chain_code.set_length(distance_index.minimum_length(chain)); bool is_trivial = distance_index.is_trivial_chain(chain) ; - chain_code.is_looping_chain = is_trivial ? false - : distance_index.is_looping_chain(chain); + bool is_looping_chain(is_trivial ? false : distance_index.is_looping_chain(chain)); size_t component = is_trivial ? 0 : distance_index.get_chain_component(distance_index.get_bound(chain, true, false), true); - component = component == std::numeric_limits::max() ? 0 : component * 2; - if (chain_code.is_looping_chain) { - component += 1; - } - chain_code.last_component = component; + chain_code.set_last_component(component, is_looping_chain); return chain_code; @@ -976,38 +968,32 @@ snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const ne snarl_code_t snarl_code; //Tag to say that it's a regular snarl - snarl_code.code_type = 1; + snarl_code.set_code_type(1); //The number of children size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { child_count++; }); - snarl_code.child_count = child_count; + snarl_code.set_child_count(child_count); //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); - size_t prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); - snarl_code.prefix_sum = prefix_sum == std::numeric_limits::max() ? 0 - : prefix_sum+1; + snarl_code.set_prefix_sum(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); - size_t chain_component = distance_index.get_chain_component(start_node); - snarl_code.chain_component = chain_component == std::numeric_limits::max() ? 0 - : chain_component; + snarl_code.set_chain_component(distance_index.get_chain_component(start_node)); //Length of the snarl - size_t length = distance_index.minimum_length(snarl); - snarl_code.length = length == std::numeric_limits::max() ? 0 - : length+1; + snarl_code.set_length(distance_index.minimum_length(snarl)); //Is the child of the snarl reversed in the snarl #ifdef DEBUG_ZIPCODE assert(distance_index.is_chain(snarl_child)); #endif - snarl_code.is_reversed = (distance_index.distance_in_parent(snarl, + snarl_code.set_is_reversed((distance_index.distance_in_parent(snarl, distance_index.get_bound(snarl, false, true), - distance_index.flip(distance_index.canonical(snarl_child))) != 0); + distance_index.flip(distance_index.canonical(snarl_child))) != 0)); return snarl_code; @@ -1017,63 +1003,52 @@ snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const snarl_code_t snarl_code; //Tag to say that it's an irregular snarl - snarl_code.code_type = distance_index.is_dag(snarl) ? 0 : 2; + snarl_code.set_code_type(distance_index.is_dag(snarl) ? 0 : 2); //The number of children - snarl_code.child_count = 0; + size_t child_count = 0; distance_index.for_each_child(snarl, [&] (const net_handle_t& child) { - snarl_code.child_count++; + child_count++; }); + snarl_code.set_child_count(child_count); //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); - snarl_code.prefix_sum = SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node)); + snarl_code.set_prefix_sum(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); - snarl_code.prefix_sum = snarl_code.prefix_sum == std::numeric_limits::max() ? 0 - : snarl_code.prefix_sum + 1; - snarl_code.chain_component = distance_index.get_chain_component(start_node) ; - snarl_code.chain_component = snarl_code.chain_component == std::numeric_limits::max() ? 0 - : snarl_code.chain_component; + snarl_code.set_chain_component(distance_index.get_chain_component(start_node) ); //Length of the snarl - snarl_code.length = distance_index.minimum_length(snarl); - snarl_code.length = snarl_code.length == std::numeric_limits::max() ? 0 - : snarl_code.length+1; + snarl_code.set_length(distance_index.minimum_length(snarl)); //Record offset to look up distances in the index later - snarl_code.record_offset = distance_index.get_record_offset(snarl); - - snarl_code.distance_start_left = distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child)); - snarl_code.distance_end_left = distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child)); - snarl_code.distance_start_right = distance_index.distance_to_parent_bound(snarl, true, snarl_child); - snarl_code.distance_end_right = distance_index.distance_to_parent_bound(snarl, false, snarl_child); - - //Add 1 to values to store inf properly - snarl_code.distance_start_left = - snarl_code.distance_start_left == std::numeric_limits::max() - ? 0 - : snarl_code.distance_start_left + 1; - snarl_code.distance_start_right = - snarl_code.distance_start_right == std::numeric_limits::max() - ? 0 - : snarl_code.distance_start_right + 1; - snarl_code.distance_end_left = - snarl_code.distance_end_left == std::numeric_limits::max() - ? 0 - : snarl_code.distance_end_left + 1; - snarl_code.distance_end_right = - snarl_code.distance_end_right == std::numeric_limits::max() - ? 0 - : snarl_code.distance_end_right + 1; + snarl_code.set_record_offset(distance_index.get_record_offset(snarl)); + snarl_code.set_distance_start_left(distance_index.distance_to_parent_bound(snarl, true, distance_index.flip(snarl_child))); + snarl_code.set_distance_end_left(distance_index.distance_to_parent_bound(snarl, false, distance_index.flip(snarl_child))); + snarl_code.set_distance_start_right(distance_index.distance_to_parent_bound(snarl, true, snarl_child)); + snarl_code.set_distance_end_right(distance_index.distance_to_parent_bound(snarl, false, snarl_child)); return snarl_code; } +node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { + node_code_t node_code; + if (zipcode_level == 0) { + } else { + + size_t zip_index = decoder[zipcode_level].offset; + size_t zip_value; + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + } + return node_code; + +} + size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit, bool undirected_distance, const HandleGraph* graph){ diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 849fc574cca..77dd078ffa8 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -218,6 +218,18 @@ class ZipCode { inline snarl_code_t get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); + /* Functions to get the values out of the zipcode for one code + The decoded code might not have all the values set*/ + + // Get a node_code_t for the given level + node_code_t unpack_node_code(size_t zipcode_level); + //Return a chain_code_t that will represent the chain in the zip code + //The actual values being stored, not the raw values + chain_code_t unpack_chain_code(size_t zipcode_level); + //Return a vector of size_ts that will represent the snarl in the zip code + snarl_code_t unpack_snarl_code(size_t zipcode_level); + + //////////////////////////////// Stuff for decoding the zipcode public: @@ -377,55 +389,235 @@ class ZipCodeCollection { /** An unpacked version of one node code + The values actually stored are the same ones that get stored in the zipcode + This has getters and setters for getting the actual value, + and getters and setters for getting the raw values */ struct node_code_t { + private: size_t prefix_sum ; size_t chain_component : 32; size_t length : 31; bool is_reversed; + + public: + + ////// Raw getters + size_t get_raw_prefix_sum() {return prefix_sum;} + size_t get_raw_chain_component() {return chain_component;} + size_t get_raw_length() {return length;} + bool get_raw_is_reversed() {return is_reversed;} + + ///// Raw setters + void set_raw_prefix_sum(size_t val) {prefix_sum = val;} + void set_raw_chain_component(size_t val) {chain_component = val;} + void set_raw_length(size_t val) {length = val;} + void set_raw_is_reversed(bool val) {is_reversed = val;} + + //// Real value setters + size_t get_prefix_sum() {return prefix_sum == 0 ? numeric_limits::max() : prefix_sum-1;} + size_t get_chain_component() {return chain_component;} + size_t get_length() {return length-1;} + bool get_is_reversed() {return is_reversed;} + + ////Real value getters + void set_prefix_sum(size_t val) {prefix_sum = val == std::numeric_limits::max() ? 0 : val+1;} + void set_chain_component(size_t val) {chain_component = val == std::numeric_limits::max() ? 0 : val;} + void set_length(size_t val) {length = val+1;} + void set_is_reversed(bool val) {is_reversed = val;} }; /** An unpacked version of one chain code + The values actually stored are the same ones that get stored in the zipcode + This has getters and setters for getting the actual value, + and getters and setters for getting the raw values */ struct chain_code_t { + + private: //The length of the last component of the chain (which may be the whole chain) size_t length; //The rank in the parent snarl or, if it is a root chain, the identifier size_t snarl_rank_or_identifier : 32; + + //This stores the component and is_looping_chain size_t last_component : 16; //For root chain/nodes, a bitvector representing the connectivity size_t connectivity : 4; - bool is_looping_chain; + + public: + size_t get_raw_length() {return length;} + size_t get_raw_snarl_rank_or_identifier() {return snarl_rank_or_identifier;} + size_t get_raw_last_component() {return last_component;} + size_t get_raw_connectivity() {return connectivity;} + void set_raw_length(size_t val) {length = val;} + void set_raw_snarl_rank_or_identifier(size_t val) {snarl_rank_or_identifier = val;} + void set_raw_last_component(size_t val) {last_component = val;} + void set_raw_connectivity (size_t val){connectivity = val;} + + size_t get_length() { + return length == 0 ? std::numeric_limits::max() : length-1; + } + size_t get_snarl_rank_or_identifier() {return snarl_rank_or_identifier;} + size_t get_last_component() { + if (last_component % 2 ) { + return (last_component-1) / 2; + } else { + return last_component / 2; + } + } + + size_t get_connectivity() {return connectivity;} + bool get_is_looping_chain() {return last_component % 2;} + + void set_length(size_t val) { + length = val == std::numeric_limits::max() ? 0 : val+1; + } + void set_snarl_rank_or_identifier(size_t val) { + snarl_rank_or_identifier = val; + } + void set_last_component(size_t comp, bool loops) { + comp = comp == std::numeric_limits::max() ? 0 : comp*2; + if (loops) { comp ++;} + last_component = comp; + } + void set_connectivity(size_t val) {connectivity = val;} }; /** An unpacked version of one snarl code + The values actually stored are the same ones that get stored in the zipcode + This has getters and setters for getting the actual value, + and getters and setters for getting the raw values */ struct snarl_code_t { - size_t length; - size_t prefix_sum; + private: + size_t length; + size_t prefix_sum; - //distance from the left side of the child to the start of the snarl - //or, for root nodes/chains, start-start connected - //start-right and end-left are the same for root nodes/chains - size_t distance_start_left; - size_t distance_start_right; - size_t distance_end_left; - size_t distance_end_right; + size_t distance_start_left; + size_t distance_start_right; + size_t distance_end_left; + size_t distance_end_right; - size_t record_offset : 32; + size_t record_offset : 32; - size_t child_count : 16; - size_t chain_component : 16; + size_t child_count : 16; + size_t chain_component : 16; - size_t code_type : 4; + size_t code_type : 4; + + bool is_reversed; + + public: + //We use getters and setters to deal with things that are max() but stored as 0 + //and getters and setters for the raw values. These are sometimes redundant + + size_t get_raw_length() {return length;} + size_t get_raw_prefix_sum () {return prefix_sum;} + size_t get_raw_distance_start_left () {return distance_start_left;} + size_t get_raw_distance_start_right () {return distance_start_right;} + size_t get_raw_distance_end_left () {return distance_end_left;} + size_t get_raw_distance_end_right () {return distance_end_right;} + size_t get_raw_record_offset () { return record_offset;} + size_t get_raw_child_count() {return child_count;} + size_t get_raw_chain_component() {return chain_component;} + size_t get_raw_code_type() {return code_type;} + bool get_raw_is_reversed() {return is_reversed;} + + void set_raw_length(size_t val) {length = val;} + void set_raw_prefix_sum (size_t val) {prefix_sum = val;} + void set_raw_distance_start_left (size_t val) {distance_start_left = val;} + void set_raw_distance_start_right (size_t val) {distance_start_right = val;} + void set_raw_distance_end_left (size_t val) {distance_end_left = val;} + void set_raw_distance_end_right (size_t val) {distance_end_right = val;} + void set_raw_record_offset (size_t val) { record_offset = val;} + void set_raw_child_count(size_t val) {child_count = val;} + void set_raw_chain_component(size_t val) {chain_component = val;} + void set_raw_code_type(size_t val) {code_type = val;} + void set_raw_is_reversed(bool val) {is_reversed = val;} + + + + //// Getters + size_t get_length() { + return length == 0 ? std::numeric_limits::max() : length-1; + } + size_t get_prefix_sum() { + return prefix_sum == 0 ? std::numeric_limits::max() : prefix_sum-1; + } + + //distance from the left side of the child to the start of the snarl + //or, for root nodes/chains, start-start connected + //start-right and end-left are the same for root nodes/chains + size_t get_distance_start_left() { + return distance_start_left == 0 ? std::numeric_limits::max() : distance_start_left-1; + } + size_t get_distance_start_right() { + return distance_start_right == 0 ? std::numeric_limits::max() : distance_start_right-1; + } + size_t get_distance_end_left() { + return distance_end_left == 0 ? std::numeric_limits::max() : distance_end_left-1; + } + size_t get_distance_end_right() { + return distance_end_right == 0 ? std::numeric_limits::max() : distance_end_right-1; + } + + size_t get_record_offset() {return record_offset;} + + size_t get_child_count() {return child_count;} + size_t get_chain_component() {return chain_component;} + + size_t get_code_type() {return code_type;} + + bool get_is_reversed() {return is_reversed;} + + //////// Setters + void set_length(size_t val) { + length = val == std::numeric_limits::max() ? 0 : val+1; + } + void set_prefix_sum(size_t val) { + prefix_sum = val == std::numeric_limits::max() ? 0 : val+1; + } + + void set_distance_start_left(size_t val) { + distance_start_left = val == std::numeric_limits::max() ? 0 : val+1; + } + void set_distance_start_right(size_t val) { + distance_start_right = val == std::numeric_limits::max() ? 0 : val+1; + } + void set_distance_end_left(size_t val) { + distance_end_left = val == std::numeric_limits::max() ? 0 : val+1; + } + void set_distance_end_right(size_t val) { + distance_end_right = val == std::numeric_limits::max() ? 0 : val+1; + } + + void set_record_offset(size_t val) { + record_offset = val; + } + + void set_child_count(size_t val) { + child_count = val; + } + + void set_chain_component(size_t val) { + chain_component = val == std::numeric_limits::max() ? 0 : val; + } + + void set_code_type(size_t val) { + code_type = val; + } + + void set_is_reversed(bool val) { + is_reversed = val; + } - bool is_reversed; }; From 3cbd7be9cd86225fa3b6da6f436b0195d888b05d Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 21:25:37 +0200 Subject: [PATCH 114/124] Add functions for interpreting one level of the zipcode --- src/zip_code.cpp | 171 +++++++++++++++++++++++++++++++++++++++++++++-- src/zip_code.hpp | 27 ++++---- 2 files changed, 180 insertions(+), 18 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 5ba7b7e3362..740ebc61a28 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -92,7 +92,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p #endif if (distance_index.is_node(current_ancestor)) { node_code_t node_code = get_node_code(current_ancestor, distance_index); - zipcode.add_value(node_code.get_raw_prefix_sum()); + zipcode.add_value(node_code.get_raw_prefix_sum_or_identifier()); zipcode.add_value(node_code.get_raw_length()); zipcode.add_value(node_code.get_raw_is_reversed()); zipcode.add_value(node_code.get_raw_chain_component()); @@ -114,7 +114,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p } else if (distance_index.is_regular_snarl(current_ancestor)) { snarl_code_t snarl_code = get_regular_snarl_code(current_ancestor, ancestors[i-1], distance_index); zipcode.add_value(snarl_code.get_raw_code_type()); - zipcode.add_value(snarl_code.get_raw_prefix_sum()); + zipcode.add_value(snarl_code.get_raw_prefix_sum_or_identifier()); zipcode.add_value(snarl_code.get_raw_length()); zipcode.add_value(snarl_code.get_raw_child_count()); zipcode.add_value(snarl_code.get_raw_chain_component()); @@ -125,7 +125,7 @@ void ZipCode::fill_in_zipcode (const SnarlDistanceIndex& distance_index, const p #endif snarl_code_t snarl_code = get_irregular_snarl_code(current_ancestor, ancestors[i-1], distance_index); zipcode.add_value(snarl_code.get_raw_code_type()); - zipcode.add_value(snarl_code.get_raw_prefix_sum()); + zipcode.add_value(snarl_code.get_raw_prefix_sum_or_identifier()); zipcode.add_value(snarl_code.get_raw_length()); zipcode.add_value(snarl_code.get_raw_child_count()); zipcode.add_value(snarl_code.get_raw_chain_component()); @@ -935,7 +935,7 @@ node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistance //Node code is: offset in chain, length, is reversed node_code_t node_code; //Assume this node is in a regular chain - node_code.set_prefix_sum(distance_index.get_prefix_sum_value(node)); + node_code.set_prefix_sum_or_identifier(distance_index.get_prefix_sum_value(node)); node_code.set_length(distance_index.minimum_length(node)); @@ -980,7 +980,7 @@ snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const ne //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); - snarl_code.set_prefix_sum(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); + snarl_code.set_prefix_sum_or_identifier(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); snarl_code.set_chain_component(distance_index.get_chain_component(start_node)); @@ -1015,7 +1015,7 @@ snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const //Chain prefix sum value for the start of the snarl, which is the prefix sum of the start node + length of the start node net_handle_t start_node = distance_index.get_node_from_sentinel(distance_index.get_bound(snarl, false, false)); - snarl_code.set_prefix_sum(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); + snarl_code.set_prefix_sum_or_identifier(SnarlDistanceIndex::sum(distance_index.get_prefix_sum_value(start_node), distance_index.minimum_length(start_node))); snarl_code.set_chain_component(distance_index.get_chain_component(start_node) ); @@ -1039,16 +1039,175 @@ snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { node_code_t node_code; if (zipcode_level == 0) { + throw std::runtime_error("error: Unpacking a root node. Use a chain instead"); } else { size_t zip_index = decoder[zipcode_level].offset; size_t zip_value; + //Prefix sum std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + node_code.set_raw_prefix_sum_or_identifier(zip_value); + //Length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + node_code.set_raw_length(zip_value); + + //Is reversed + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + node_code.set_raw_is_reversed(zip_value); + //Chain component + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + node_code.set_raw_chain_component(zip_value); } return node_code; } +chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { + chain_code_t chain_code; + size_t zip_index = decoder[zipcode_level].offset; + size_t zip_value; + if (zipcode_level == 0 && decoder.size() == 1) { + //Root node + //is_chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Identifier + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_snarl_rank_or_identifier(zip_value); + + //Node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_length(zip_value); + + //Connectivity + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_connectivity (zip_value); + + //No component + chain_code.set_last_component(0, false); + + } else if (zipcode_level == 0) { + //Root chain + //is_chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Identifier + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_snarl_rank_or_identifier(zip_value); + + //Component count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_last_component(zip_value); + + //Connectivity + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_connectivity (zip_value); + + //No Node length + chain_code.set_length(std::numeric_limits::max()); + } else { + //Nested chain + //Rank in snarl + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_snarl_rank_or_identifier(zip_value); + + //Node length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_length(zip_value); + + + //Component count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + chain_code.set_raw_last_component(0); + + //No connectivity + chain_code.set_connectivity (0); + + } + + return chain_code; +} + +snarl_code_t ZipCode::unpack_snarl_code(size_t zipcode_level) { + snarl_code_t snarl_code; + size_t zip_index = decoder[zipcode_level].offset; + size_t zip_value; + if (zipcode_level == 0) { + //Root snarl + //is_chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + //Identifier + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_prefix_sum_or_identifier(zip_value); + + //Nothing else gets stored so set everything else to inf + snarl_code.set_length(std::numeric_limits::max()); + snarl_code.set_distance_start_left(std::numeric_limits::max()); + snarl_code.set_distance_start_right(std::numeric_limits::max()); + snarl_code.set_distance_end_left(std::numeric_limits::max()); + snarl_code.set_distance_end_right(std::numeric_limits::max()); + snarl_code.set_record_offset(std::numeric_limits::max()); + snarl_code.set_child_count(std::numeric_limits::max()); + snarl_code.set_chain_component(std::numeric_limits::max()); + snarl_code.set_code_type(std::numeric_limits::max()); + + } else { + //Nested snarl + + //Snarl is regular + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_code_type(zip_value); + + //Offset in chain + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_prefix_sum_or_identifier(zip_value); + + //Length + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_length(zip_value); + + //Child count + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_child_count(zip_value); + + //Chain component + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_chain_component(zip_value); + + if (snarl_code.get_code_type() == 1) { + //Regular snarl + + //Is-reversed + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_is_reversed(zip_value); + } else { + //Irregular/cyclic snarl + + //Record offset + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_record_offset(zip_value); + + //distance left start + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_distance_start_left(zip_value); + + //distance left end + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_distance_end_left(zip_value); + + //distance right start + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_distance_start_right(zip_value); + + //Distance right end + std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); + snarl_code.set_raw_distance_end_right(zip_value); + } + + } + return snarl_code; +} + + + size_t ZipCode::minimum_distance_between(ZipCode& zip1, const pos_t& pos1, ZipCode& zip2, const pos_t& pos2, const SnarlDistanceIndex& distance_index, size_t distance_limit, bool undirected_distance, const HandleGraph* graph){ diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 77dd078ffa8..939909c45da 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -222,6 +222,7 @@ class ZipCode { The decoded code might not have all the values set*/ // Get a node_code_t for the given level + //For a root node, use a chain node_code_t unpack_node_code(size_t zipcode_level); //Return a chain_code_t that will represent the chain in the zip code //The actual values being stored, not the raw values @@ -395,7 +396,8 @@ class ZipCodeCollection { */ struct node_code_t { private: - size_t prefix_sum ; + //Prefix sum for a nested node, address for a root node + size_t prefix_sum_or_identifier ; size_t chain_component : 32; size_t length : 31; bool is_reversed; @@ -403,25 +405,25 @@ struct node_code_t { public: ////// Raw getters - size_t get_raw_prefix_sum() {return prefix_sum;} + size_t get_raw_prefix_sum_or_identifier() {return prefix_sum_or_identifier;} size_t get_raw_chain_component() {return chain_component;} size_t get_raw_length() {return length;} bool get_raw_is_reversed() {return is_reversed;} ///// Raw setters - void set_raw_prefix_sum(size_t val) {prefix_sum = val;} + void set_raw_prefix_sum_or_identifier(size_t val) {prefix_sum_or_identifier = val;} void set_raw_chain_component(size_t val) {chain_component = val;} void set_raw_length(size_t val) {length = val;} void set_raw_is_reversed(bool val) {is_reversed = val;} //// Real value setters - size_t get_prefix_sum() {return prefix_sum == 0 ? numeric_limits::max() : prefix_sum-1;} + size_t get_prefix_sum_or_identifier() {return prefix_sum_or_identifier == 0 ? numeric_limits::max() : prefix_sum_or_identifier-1;} size_t get_chain_component() {return chain_component;} size_t get_length() {return length-1;} bool get_is_reversed() {return is_reversed;} ////Real value getters - void set_prefix_sum(size_t val) {prefix_sum = val == std::numeric_limits::max() ? 0 : val+1;} + void set_prefix_sum_or_identifier(size_t val) {prefix_sum_or_identifier = val == std::numeric_limits::max() ? 0 : val+1;} void set_chain_component(size_t val) {chain_component = val == std::numeric_limits::max() ? 0 : val;} void set_length(size_t val) {length = val+1;} void set_is_reversed(bool val) {is_reversed = val;} @@ -498,7 +500,8 @@ struct snarl_code_t { private: size_t length; - size_t prefix_sum; + //Prefix sum for a nested snarl, identifier for a root snarl + size_t prefix_sum_or_identifier; size_t distance_start_left; size_t distance_start_right; @@ -519,7 +522,7 @@ struct snarl_code_t { //and getters and setters for the raw values. These are sometimes redundant size_t get_raw_length() {return length;} - size_t get_raw_prefix_sum () {return prefix_sum;} + size_t get_raw_prefix_sum_or_identifier () {return prefix_sum_or_identifier;} size_t get_raw_distance_start_left () {return distance_start_left;} size_t get_raw_distance_start_right () {return distance_start_right;} size_t get_raw_distance_end_left () {return distance_end_left;} @@ -531,7 +534,7 @@ struct snarl_code_t { bool get_raw_is_reversed() {return is_reversed;} void set_raw_length(size_t val) {length = val;} - void set_raw_prefix_sum (size_t val) {prefix_sum = val;} + void set_raw_prefix_sum_or_identifier (size_t val) {prefix_sum_or_identifier = val;} void set_raw_distance_start_left (size_t val) {distance_start_left = val;} void set_raw_distance_start_right (size_t val) {distance_start_right = val;} void set_raw_distance_end_left (size_t val) {distance_end_left = val;} @@ -548,8 +551,8 @@ struct snarl_code_t { size_t get_length() { return length == 0 ? std::numeric_limits::max() : length-1; } - size_t get_prefix_sum() { - return prefix_sum == 0 ? std::numeric_limits::max() : prefix_sum-1; + size_t get_prefix_sum_or_identifier() { + return prefix_sum_or_identifier == 0 ? std::numeric_limits::max() : prefix_sum_or_identifier-1; } //distance from the left side of the child to the start of the snarl @@ -581,8 +584,8 @@ struct snarl_code_t { void set_length(size_t val) { length = val == std::numeric_limits::max() ? 0 : val+1; } - void set_prefix_sum(size_t val) { - prefix_sum = val == std::numeric_limits::max() ? 0 : val+1; + void set_prefix_sum_or_identifier(size_t val) { + prefix_sum_or_identifier = val == std::numeric_limits::max() ? 0 : val+1; } void set_distance_start_left(size_t val) { From 72025a02f31bdd6cf373e477b6a8f1990f8a0c00 Mon Sep 17 00:00:00 2001 From: Xian Date: Tue, 13 Aug 2024 22:15:21 +0200 Subject: [PATCH 115/124] Add unit tests for unpacked codes --- src/unittest/zip_code.cpp | 37 +++++++++++++++++++++++++++++++++++++ src/zip_code.cpp | 16 ++++++++-------- src/zip_code.hpp | 17 ++++++++++------- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/src/unittest/zip_code.cpp b/src/unittest/zip_code.cpp index c42ea1086a1..6e6344a4105 100644 --- a/src/unittest/zip_code.cpp +++ b/src/unittest/zip_code.cpp @@ -81,6 +81,15 @@ using namespace std; distance_index) == 3); } + SECTION("unpacked root node") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); + + ZipCode::chain_code_t unpacked_chain = zipcode.unpack_chain_code(0); + REQUIRE(unpacked_chain.get_snarl_rank_or_identifier() == 0); + REQUIRE(unpacked_chain.get_length() == 11); + REQUIRE(unpacked_chain.get_connectivity() == 0); + } } TEST_CASE("Simple chain zipcode", "[zipcode]") { //Snarl 1-3, snarl 3-6 @@ -279,6 +288,34 @@ using namespace std; REQUIRE(zipcode.get_code_type(2) == ZipCode::CHAIN); REQUIRE(zipcode.get_is_reversed_in_parent(2) == is_rev); } + SECTION ("unpacked zip code for node in simple snarl") { + ZipCode zipcode; + zipcode.fill_in_zipcode(distance_index, make_pos_t(n4->id(), 0, false)); + + + net_handle_t chain4 = distance_index.get_parent(distance_index.get_node_net_handle(n4->id())); + net_handle_t snarl36 = distance_index.get_parent(chain4); + net_handle_t chain1 = distance_index.get_parent(snarl36); + + + ZipCode::chain_code_t chain_code = zipcode.unpack_chain_code(0); + REQUIRE(chain_code.get_snarl_rank_or_identifier() == 0); + + ZipCode::snarl_code_t snarl_code = zipcode.unpack_snarl_code(1); + //values for the snarl + REQUIRE(snarl_code.get_length() == distance_index.minimum_length(snarl36)); + REQUIRE(snarl_code.get_prefix_sum_or_identifier() == (chain_is_reversed ? 5 : 6)); + REQUIRE(snarl_code.get_code_type() == 1); + bool is_rev = distance_index.distance_in_parent(snarl36, distance_index.get_bound(snarl36, false, true), + distance_index.flip(chain4)) != 0; + REQUIRE(snarl_code.get_is_reversed() == is_rev); + + + ZipCode::chain_code_t node_code = zipcode.unpack_chain_code(2); + //values for the chain + REQUIRE(node_code.get_length() == distance_index.minimum_length(chain4)); + REQUIRE(node_code.get_snarl_rank_or_identifier() == distance_index.get_rank_in_parent(chain4)); + } SECTION("Distances") { ZipCode zip1; zip1.fill_in_zipcode(distance_index, make_pos_t(n1->id(), 0, false)); diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 740ebc61a28..845c8c6f3fc 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -927,7 +927,7 @@ std::ostream& operator<<(std::ostream& out, const ZipCode& zip) { } -node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { +ZipCode::node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistanceIndex& distance_index) { #ifdef DEBUG_ZIPCODE assert(!distance_index.is_trivial_chain(node)); assert((distance_index.is_chain(distance_index.get_parent(node)) || distance_index.is_root(distance_index.get_parent(node)))); @@ -945,7 +945,7 @@ node_code_t ZipCode::get_node_code(const net_handle_t& node, const SnarlDistance return node_code; } -chain_code_t ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { +ZipCode::chain_code_t ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDistanceIndex& distance_index) { //Chain code is: rank in snarl, length chain_code_t chain_code; chain_code.set_snarl_rank_or_identifier(distance_index.get_rank_in_parent(chain)); @@ -963,7 +963,7 @@ chain_code_t ZipCode::get_chain_code(const net_handle_t& chain, const SnarlDista return chain_code; } -snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { +ZipCode::snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { //Regular snarl code is 1, offset in chain, length, is reversed snarl_code_t snarl_code; @@ -998,7 +998,7 @@ snarl_code_t ZipCode::get_regular_snarl_code(const net_handle_t& snarl, const ne return snarl_code; } -snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, +ZipCode::snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index) { snarl_code_t snarl_code; @@ -1036,7 +1036,7 @@ snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snarl, const return snarl_code; } -node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { +ZipCode::node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { node_code_t node_code; if (zipcode_level == 0) { throw std::runtime_error("error: Unpacking a root node. Use a chain instead"); @@ -1062,7 +1062,7 @@ node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { } -chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { +ZipCode::chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { chain_code_t chain_code; size_t zip_index = decoder[zipcode_level].offset; size_t zip_value; @@ -1116,7 +1116,7 @@ chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { //Component count std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - chain_code.set_raw_last_component(0); + chain_code.set_raw_last_component(zip_value); //No connectivity chain_code.set_connectivity (0); @@ -1126,7 +1126,7 @@ chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { return chain_code; } -snarl_code_t ZipCode::unpack_snarl_code(size_t zipcode_level) { +ZipCode::snarl_code_t ZipCode::unpack_snarl_code(size_t zipcode_level) { snarl_code_t snarl_code; size_t zip_index = decoder[zipcode_level].offset; size_t zip_value; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 939909c45da..279f7ec2014 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -40,10 +40,6 @@ struct MIPayload; /// It should be unique and hashable typedef std::string net_identifier_t; -///A struct to store an unpacked version of one node/snarl/chain code -struct node_code_t; -struct chain_code_t; -struct snarl_code_t; /* Zip codes store the snarl decomposition location and distance information for a position on a graph * A zip code will contain all the information necessary to compute the minimum distance between two @@ -51,6 +47,12 @@ struct snarl_code_t; */ class ZipCode { + ///structs to store an unpacked version of one node/snarl/chain code + public: + struct node_code_t; + struct chain_code_t; + struct snarl_code_t; + /// The type of codes that can be stored in the zipcode /// Trivial chains that are children of snarls get saved as a chain with no child node @@ -217,6 +219,7 @@ class ZipCode { //Return a vector of size_ts that will represent the snarl in the zip code inline snarl_code_t get_irregular_snarl_code(const net_handle_t& snarl, const net_handle_t& snarl_child, const SnarlDistanceIndex& distance_index); + public: /* Functions to get the values out of the zipcode for one code The decoded code might not have all the values set*/ @@ -394,7 +397,7 @@ class ZipCodeCollection { This has getters and setters for getting the actual value, and getters and setters for getting the raw values */ -struct node_code_t { +struct ZipCode::node_code_t { private: //Prefix sum for a nested node, address for a root node size_t prefix_sum_or_identifier ; @@ -435,7 +438,7 @@ struct node_code_t { This has getters and setters for getting the actual value, and getters and setters for getting the raw values */ -struct chain_code_t { +struct ZipCode::chain_code_t { private: @@ -496,7 +499,7 @@ struct chain_code_t { This has getters and setters for getting the actual value, and getters and setters for getting the raw values */ -struct snarl_code_t { +struct ZipCode::snarl_code_t { private: size_t length; From 0356878ee83989339cc2b92e64c3ca1184e50dfb Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 12:09:32 +0200 Subject: [PATCH 116/124] Make unacking const --- src/zip_code.cpp | 6 +++--- src/zip_code.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 1e78bd1b79b..358dd9f4c32 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -1051,7 +1051,7 @@ ZipCode::snarl_code_t ZipCode::get_irregular_snarl_code(const net_handle_t& snar return snarl_code; } -ZipCode::node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { +ZipCode::node_code_t ZipCode::unpack_node_code(size_t zipcode_level) const { node_code_t node_code; if (zipcode_level == 0) { throw std::runtime_error("error: Unpacking a root node. Use a chain instead"); @@ -1077,7 +1077,7 @@ ZipCode::node_code_t ZipCode::unpack_node_code(size_t zipcode_level) { } -ZipCode::chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { +ZipCode::chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) const { chain_code_t chain_code; size_t zip_index = decoder[zipcode_level].offset; size_t zip_value; @@ -1141,7 +1141,7 @@ ZipCode::chain_code_t ZipCode::unpack_chain_code(size_t zipcode_level) { return chain_code; } -ZipCode::snarl_code_t ZipCode::unpack_snarl_code(size_t zipcode_level) { +ZipCode::snarl_code_t ZipCode::unpack_snarl_code(size_t zipcode_level) const { snarl_code_t snarl_code; size_t zip_index = decoder[zipcode_level].offset; size_t zip_value; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 8e5071b0d0a..fc1cb7ac809 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -228,12 +228,12 @@ class ZipCode { // Get a node_code_t for the given level //For a root node, use a chain - node_code_t unpack_node_code(size_t zipcode_level); + node_code_t unpack_node_code(size_t zipcode_level) const; //Return a chain_code_t that will represent the chain in the zip code //The actual values being stored, not the raw values - chain_code_t unpack_chain_code(size_t zipcode_level); + chain_code_t unpack_chain_code(size_t zipcode_level) const; //Return a vector of size_ts that will represent the snarl in the zip code - snarl_code_t unpack_snarl_code(size_t zipcode_level); + snarl_code_t unpack_snarl_code(size_t zipcode_level) const; //////////////////////////////// Stuff for decoding the zipcode From dca72e77d1f867090dfa708e2e73f661f604e6e3 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 12:09:42 +0200 Subject: [PATCH 117/124] Used unpacked zipcode for getting chain values --- src/snarl_seed_clusterer.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 3f29ca1708a..a104490b7a8 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -294,10 +294,11 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a chain void set_chain_values(const SnarlDistanceIndex& distance_index) { - is_looping_chain = seed->seed->zipcode.get_is_looping_chain(zipcode_depth); + ZipCode::chain_code_t chain_code = seed->seed->zipcode.unpack_chain_code(zipcode_depth); + is_looping_chain = chain_code.get_is_looping_chain(); node_length = zipcode_depth == 0 ? distance_index.chain_minimum_length(containing_net_handle) - : seed->seed->zipcode.get_length(zipcode_depth, &distance_index, true); - chain_component_end = seed->seed->zipcode.get_last_chain_component(zipcode_depth, true); + : chain_code.get_length(); + chain_component_end = chain_code.get_last_component(); is_reversed_in_parent = seed->seed->zipcode.get_is_reversed_in_parent(zipcode_depth); } From 01e7b4d3741bc6c4c231cd03492e719bbd0e0185 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 13:26:30 +0200 Subject: [PATCH 118/124] Use unpacked zipcode for getting snarl values and fix but I missed finding the minimum distance --- src/snarl_seed_clusterer.cpp | 10 ++++++---- src/snarl_seed_clusterer.hpp | 15 ++++++++------- 2 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index bd7d0bae16d..06bb335fa6a 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -716,7 +716,7 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { cerr << "Should be: " << distance_index.net_handle_as_string(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle))) << endl; - assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == parent); + assert(distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) == distance_index.start_end_traversal_of(parent)); } #endif ZipCode::code_type_t parent_type = chain_problem->zipcode_depth == 0 @@ -1698,9 +1698,10 @@ void SnarlDistanceIndexClusterer::cluster_one_snarl(ClusteringProblem& clusterin } } else { if (child_problem.is_reversed_in_parent) { + size_t old_best_right = snarl_problem->read_best_right.second; snarl_problem->read_best_right.second = std::min(snarl_problem->read_best_left.second, child_problem.read_best_left.second); - snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_right.second, + snarl_problem->read_best_left.second = std::min(old_best_right, child_problem.read_best_right.second); } else { snarl_problem->read_best_left.second = std::min(snarl_problem->read_best_left.second, @@ -2658,6 +2659,7 @@ void SnarlDistanceIndexClusterer::add_snarl_to_chain_problem(ClusteringProblem& net_handle_t& chain_handle = chain_problem->containing_net_handle; SnarlTreeNodeProblem& child_problem = clustering_problem.all_node_problems.at( clustering_problem.net_handle_to_node_problem_index.at(current_child.net_handle)); + //Skip this child if its seeds are all too far away bool skip_snarl = false; @@ -2802,8 +2804,8 @@ cerr << "\tDistance to get to the end of the chain: " << distance_from_current_e size_t read_num = cluster_head.first; pair dists (clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_left, clustering_problem.all_seeds->at(read_num)->at(cluster_head.second).distance_right); - size_t dist_left = child_problem.is_reversed_in_parent ? dists.second : dists.first; - size_t dist_right = child_problem.is_reversed_in_parent ? dists.first : dists.second; + size_t dist_left = child_is_reversed ? dists.second : dists.first; + size_t dist_right = child_is_reversed ? dists.first : dists.second; //Distances to the start of the chain, and the end of this node //If this is the last thing in the chain, then the distance to the end of the chain diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index a104490b7a8..939c73cabaa 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -304,20 +304,21 @@ class SnarlDistanceIndexClusterer { //Set the values needed to cluster a snarl void set_snarl_values(const SnarlDistanceIndex& distance_index) { - node_length = seed->seed->zipcode.get_length(zipcode_depth, &distance_index); - net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); - net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); - chain_component_start = seed->seed->zipcode.get_chain_component(zipcode_depth); + ZipCode::snarl_code_t snarl_code = seed->seed->zipcode.unpack_snarl_code(zipcode_depth); + node_length = snarl_code.get_length(); + chain_component_start = snarl_code.get_chain_component(); chain_component_end = node_length == std::numeric_limits::max() ? chain_component_start+1 : chain_component_start; - prefix_sum_value = SnarlDistanceIndex::sum( - distance_index.get_prefix_sum_value(start_in), - distance_index.minimum_length(start_in)); + prefix_sum_value = snarl_code.get_prefix_sum_or_identifier(); + + net_handle_t start_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, false, true)); + net_handle_t end_in = distance_index.get_node_from_sentinel(distance_index.get_bound(containing_net_handle, true, true)); loop_right = SnarlDistanceIndex::sum(distance_index.get_forward_loop_value(end_in), 2*distance_index.minimum_length(end_in)); //Distance to go backward in the chain and back loop_left = SnarlDistanceIndex::sum(distance_index.get_reverse_loop_value(start_in), 2*distance_index.minimum_length(start_in)); + is_reversed_in_parent = false; } From 62f6c38afbbe6a551696b22284c6a7d5ac5390e4 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 15:06:32 +0200 Subject: [PATCH 119/124] Get parent with distance index if its faster --- src/snarl_seed_clusterer.cpp | 9 +++++++-- src/snarl_seed_clusterer.hpp | 1 - src/zip_code.cpp | 8 +++++++- src/zip_code.hpp | 3 ++- 4 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 06bb335fa6a..2ae6308f649 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -637,7 +637,10 @@ void SnarlDistanceIndexClusterer::cluster_snarl_level(ClusteringProblem& cluster net_handle_t snarl_parent = snarl_problem->has_parent_handle ? snarl_problem->parent_net_handle - : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), snarl_problem->zipcode_depth-1, &distance_index)); + : distance_index.start_end_traversal_of(snarl_problem->seed->seed->zipcode.get_net_handle_slow(id(snarl_problem->seed->seed->pos), + snarl_problem->zipcode_depth-1, + &distance_index, + &(snarl_problem->containing_net_handle))); bool new_parent = false; if (clustering_problem.net_handle_to_node_problem_index.count(snarl_parent) == 0) { new_parent = true; @@ -711,7 +714,9 @@ void SnarlDistanceIndexClusterer::cluster_chain_level(ClusteringProblem& cluster ? chain_problem->parent_net_handle : (chain_problem->zipcode_depth == 0 ? distance_index.get_root() - : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos),chain_problem->zipcode_depth-1, &distance_index))); + : distance_index.start_end_traversal_of(chain_problem->seed->seed->zipcode.get_net_handle_slow(id(chain_problem->seed->seed->pos), + chain_problem->zipcode_depth-1, &distance_index, + &(chain_problem->containing_net_handle)))); #ifdef DEBUG_CLUSTER cerr << "Chain parent: " << distance_index.net_handle_as_string(parent) << endl; if ((distance_index.start_end_traversal_of(distance_index.get_parent(chain_handle)) != parent)) { diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 939c73cabaa..4166c57fb63 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -318,7 +318,6 @@ class SnarlDistanceIndexClusterer { //Distance to go backward in the chain and back loop_left = SnarlDistanceIndex::sum(distance_index.get_reverse_loop_value(start_in), 2*distance_index.minimum_length(start_in)); - is_reversed_in_parent = false; } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 358dd9f4c32..3d5b15d99c3 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -714,7 +714,7 @@ net_handle_t ZipCode::get_net_handle(const size_t& depth, const SnarlDistanceInd } } -net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const { +net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child_handle) const { //This is just copying get_net_handle except adding a slower version for the things we don't remember if (depth == 0) { @@ -728,6 +728,9 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } else if (decoder[depth].is_chain) { //If this is a chain/node + if (child_handle != nullptr) { + return distance_index->get_parent(*child_handle); + } net_handle_t n = distance_index->get_node_net_handle(id); for (size_t d = max_depth() ; d > depth ; d--) { @@ -748,6 +751,9 @@ net_handle_t ZipCode::get_net_handle_slow(nid_t id, const size_t& depth, const S } if (zip_value == 1) { //If this is a regular snarl + if (child_handle != nullptr) { + return distance_index->get_parent(*child_handle); + } net_handle_t n = distance_index->get_node_net_handle(id); for (size_t d = max_depth() ; d > depth ; d--) { diff --git a/src/zip_code.hpp b/src/zip_code.hpp index fc1cb7ac809..f8d7095844b 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -315,7 +315,8 @@ class ZipCode { ///Get the handle of the thing at the given depth. This can be used for anything but is slow, /// even for roots and irregular/cyclic snarls. It's a separate function to make sure I /// remember that it's slow - net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index) const; + ///If the child handle is given, get the net handle as the parent of the child, if the address isn't stored + net_handle_t get_net_handle_slow(nid_t id, const size_t& depth, const SnarlDistanceIndex* distance_index, const net_handle_t* child_handle=nullptr) const; ///Get the information that was stored to get the address in the distance index ///This is the connected component number for a root structure, or the address of From b0c02343e978b063c8a9ef018f397c724e583de0 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 15:30:37 +0200 Subject: [PATCH 120/124] Make a map from connected component number to net handle --- src/snarl_seed_clusterer.cpp | 10 ++++++++-- src/zip_code.cpp | 36 +++++++++++++++++++++++++++++------- src/zip_code.hpp | 3 ++- 3 files changed, 39 insertions(+), 10 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 2ae6308f649..f3c3868ee0c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -29,13 +29,16 @@ vector SnarlDistanceIndexClusterer::cluste //Wrapper for single ended vector seed_caches(seeds.size()); + + //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index + hash_map component_to_net_handle; for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER assert (seeds[i].zipcode.byte_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index, &component_to_net_handle); } } vector*> all_seed_caches = {&seed_caches}; @@ -70,6 +73,9 @@ vector> SnarlDistanceIndexClusterer vector> all_seed_caches; all_seed_caches.reserve(all_seeds.size()); + //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index + hash_map component_to_net_handle; + for (size_t read_num = 0 ; read_num < all_seeds.size() ; read_num++) { all_seed_caches.emplace_back(all_seeds[read_num].size()); for (size_t i = 0 ; i < all_seeds[read_num].size() ; i++) { @@ -79,7 +85,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index, &component_to_net_handle); } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 3d5b15d99c3..53717f03472 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2201,7 +2201,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const { +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle) const { MIPayload payload; if (decoder_length() == 1) { @@ -2216,9 +2216,16 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); //root_identifier std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); - payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), + if (component_to_net_handle!= nullptr && component_to_net_handle->count(zip_value)) { + payload.node_handle = component_to_net_handle->at(zip_value); + } else { + payload.node_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE); + if (component_to_net_handle!= nullptr) { + component_to_net_handle->emplace(zip_value, payload.node_handle); + } + } //Root node length std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -2247,7 +2254,14 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& if (decoder_length() == 2) { //If the node is a child of the root chain - payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + if (component_to_net_handle!= nullptr && component_to_net_handle->count(zip_value)) { + payload.parent_handle = component_to_net_handle->at(zip_value); + } else { + payload.parent_handle = distance_index.start_end_traversal_of(distance_index.get_handle_from_connected_component(zip_value)); + if (component_to_net_handle!= nullptr) { + component_to_net_handle->emplace(zip_value, payload.parent_handle); + } + } payload.parent_type = ZipCode::ROOT_CHAIN; payload.parent_is_root = true; std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); @@ -2298,10 +2312,18 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& //Identifier for root snarl std::tie(zip_value, zip_index) = zipcode.get_value_and_next_index(zip_index); payload.node_handle = payload.parent_handle; - payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); - payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, - SnarlDistanceIndex::START_END, - SnarlDistanceIndex::ROOT_HANDLE); + if (component_to_net_handle!= nullptr && component_to_net_handle->count(zip_value)) { + payload.parent_handle = component_to_net_handle->at(zip_value); + payload.parent_record_offset = distance_index.get_record_offset(payload.parent_handle); + } else { + payload.parent_record_offset = distance_index.get_record_offset(distance_index.get_handle_from_connected_component(zip_value)); + payload.parent_handle = distance_index.get_net_handle_from_values(payload.parent_record_offset, + SnarlDistanceIndex::START_END, + SnarlDistanceIndex::ROOT_HANDLE); + if (component_to_net_handle!= nullptr) { + component_to_net_handle->emplace(zip_value, payload.parent_handle); + } + } payload.parent_type = ZipCode::ROOT_SNARL; } else { zip_index = decoder[max_depth()-1].offset; diff --git a/src/zip_code.hpp b/src/zip_code.hpp index f8d7095844b..23b73c9dc5d 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -347,7 +347,8 @@ class ZipCode { //TODO: I want to make a struct for holding all values of a code as real values ///Fill in a payload with values from the zipcode - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index) const; + ///Remember how to get the net handle from the connected component number. + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle=nullptr) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id From a4410a98d84b6022a312ef01a67cbe30bb5597af Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 15:38:33 +0200 Subject: [PATCH 121/124] Make map from node id to net handle --- src/snarl_seed_clusterer.cpp | 6 ++++-- src/zip_code.cpp | 21 ++++++++++++++++++--- src/zip_code.hpp | 2 +- 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index f3c3868ee0c..0a89673c557 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -32,13 +32,14 @@ vector SnarlDistanceIndexClusterer::cluste //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index hash_map component_to_net_handle; + hash_map id_to_net_handle; for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER assert (seeds[i].zipcode.byte_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index, &component_to_net_handle); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index, &component_to_net_handle, &id_to_net_handle); } } vector*> all_seed_caches = {&seed_caches}; @@ -75,6 +76,7 @@ vector> SnarlDistanceIndexClusterer //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index hash_map component_to_net_handle; + hash_map id_to_net_handle; for (size_t read_num = 0 ; read_num < all_seeds.size() ; read_num++) { all_seed_caches.emplace_back(all_seeds[read_num].size()); @@ -85,7 +87,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index, &component_to_net_handle); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index, &component_to_net_handle, &id_to_net_handle); } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 53717f03472..5b6215f8b81 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2201,7 +2201,8 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle) const { +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle, + hash_map* id_to_net_handle) const { MIPayload payload; if (decoder_length() == 1) { @@ -2239,7 +2240,14 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else if (decoder[max_depth() - 1].is_chain) { //If the parent is a chain - payload.node_handle = distance_index.get_node_net_handle(id); + if (id_to_net_handle != nullptr && id_to_net_handle->count(id) != 0) { + payload.node_handle = id_to_net_handle->at(id); + } else { + payload.node_handle = distance_index.get_node_net_handle(id); + if (id_to_net_handle != nullptr) { + id_to_net_handle->emplace(id, payload.node_handle); + } + } payload.parent_is_chain = true; payload.parent_is_root = false; @@ -2293,7 +2301,14 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else { //If the node is a child of a snarl - payload.node_handle = distance_index.get_node_net_handle(id); + if (id_to_net_handle != nullptr && id_to_net_handle->count(id) != 0) { + payload.node_handle = id_to_net_handle->at(id); + } else { + payload.node_handle = distance_index.get_node_net_handle(id); + if (id_to_net_handle != nullptr) { + id_to_net_handle->emplace(id, payload.node_handle); + } + } payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE, diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 23b73c9dc5d..51709b6cbf0 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -348,7 +348,7 @@ class ZipCode { ///Fill in a payload with values from the zipcode ///Remember how to get the net handle from the connected component number. - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle=nullptr) const; + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle=nullptr, hash_map* id_to_net_handle=nullptr) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id From 45cba1435a5214deddda0081727fdf6da4b7d393 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 16:45:55 +0200 Subject: [PATCH 122/124] Revert "Make map from node id to net handle" This reverts commit a4410a98d84b6022a312ef01a67cbe30bb5597af. --- src/snarl_seed_clusterer.cpp | 6 ++---- src/zip_code.cpp | 21 +++------------------ src/zip_code.hpp | 2 +- 3 files changed, 6 insertions(+), 23 deletions(-) diff --git a/src/snarl_seed_clusterer.cpp b/src/snarl_seed_clusterer.cpp index 0a89673c557..f3c3868ee0c 100644 --- a/src/snarl_seed_clusterer.cpp +++ b/src/snarl_seed_clusterer.cpp @@ -32,14 +32,13 @@ vector SnarlDistanceIndexClusterer::cluste //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index hash_map component_to_net_handle; - hash_map id_to_net_handle; for (size_t i = 0 ; i < seeds.size() ; i++) { #ifdef DEBUG_CLUSTER assert (seeds[i].zipcode.byte_count() != 0) ; #endif seed_caches[i].seed = &(seeds[i]); if (seeds[i].zipcode.byte_count() != 0) { - seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index, &component_to_net_handle, &id_to_net_handle); + seed_caches[i].payload = seeds[i].zipcode.get_payload_from_zipcode(id(seeds[i].pos), distance_index, &component_to_net_handle); } } vector*> all_seed_caches = {&seed_caches}; @@ -76,7 +75,6 @@ vector> SnarlDistanceIndexClusterer //Remember how to get the net handle from the connected component number so we don't need to look it up in the distance index hash_map component_to_net_handle; - hash_map id_to_net_handle; for (size_t read_num = 0 ; read_num < all_seeds.size() ; read_num++) { all_seed_caches.emplace_back(all_seeds[read_num].size()); @@ -87,7 +85,7 @@ vector> SnarlDistanceIndexClusterer #endif all_seed_caches[read_num][i].seed = &(all_seeds[read_num][i]); if (all_seeds[read_num][i].zipcode.byte_count() != 0) { - all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index, &component_to_net_handle, &id_to_net_handle); + all_seed_caches[read_num][i].payload = all_seeds[read_num][i].zipcode.get_payload_from_zipcode(id(all_seeds[read_num][i].pos), distance_index, &component_to_net_handle); } } } diff --git a/src/zip_code.cpp b/src/zip_code.cpp index 5b6215f8b81..53717f03472 100644 --- a/src/zip_code.cpp +++ b/src/zip_code.cpp @@ -2201,8 +2201,7 @@ void ZipCodeCollection::deserialize(std::istream& in) { } } -MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle, - hash_map* id_to_net_handle) const { +MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle) const { MIPayload payload; if (decoder_length() == 1) { @@ -2240,14 +2239,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else if (decoder[max_depth() - 1].is_chain) { //If the parent is a chain - if (id_to_net_handle != nullptr && id_to_net_handle->count(id) != 0) { - payload.node_handle = id_to_net_handle->at(id); - } else { - payload.node_handle = distance_index.get_node_net_handle(id); - if (id_to_net_handle != nullptr) { - id_to_net_handle->emplace(id, payload.node_handle); - } - } + payload.node_handle = distance_index.get_node_net_handle(id); payload.parent_is_chain = true; payload.parent_is_root = false; @@ -2301,14 +2293,7 @@ MIPayload ZipCode::get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& } else { //If the node is a child of a snarl - if (id_to_net_handle != nullptr && id_to_net_handle->count(id) != 0) { - payload.node_handle = id_to_net_handle->at(id); - } else { - payload.node_handle = distance_index.get_node_net_handle(id); - if (id_to_net_handle != nullptr) { - id_to_net_handle->emplace(id, payload.node_handle); - } - } + payload.node_handle = distance_index.get_node_net_handle(id); payload.parent_handle = distance_index.get_net_handle_from_values(distance_index.get_record_offset(payload.node_handle), SnarlDistanceIndex::START_END, SnarlDistanceIndex::CHAIN_HANDLE, diff --git a/src/zip_code.hpp b/src/zip_code.hpp index 51709b6cbf0..23b73c9dc5d 100644 --- a/src/zip_code.hpp +++ b/src/zip_code.hpp @@ -348,7 +348,7 @@ class ZipCode { ///Fill in a payload with values from the zipcode ///Remember how to get the net handle from the connected component number. - MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle=nullptr, hash_map* id_to_net_handle=nullptr) const; + MIPayload get_payload_from_zipcode(nid_t id, const SnarlDistanceIndex& distance_index, hash_map* component_to_net_handle=nullptr) const; /// Get an identifier for the snarl tree node at this depth. If the snarl tree node at this depth /// would be the node, also include the node id From 17120fbcc2d4c9dbda1f234f1cc3742d24d73f38 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 19:04:16 +0200 Subject: [PATCH 123/124] Reserve memory for children --- src/snarl_seed_clusterer.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 4166c57fb63..1f08043e830 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -276,6 +276,7 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); + children.reserve(seed_count); } //Constructor for a node or trivial chain, used to remember information from the cache SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, @@ -290,6 +291,7 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); + children.reserve(seed_count); } //Set the values needed to cluster a chain From b0f1f7088aef8feb5377277dd592065e1913f962 Mon Sep 17 00:00:00 2001 From: Xian Date: Wed, 14 Aug 2024 21:29:57 +0200 Subject: [PATCH 124/124] Revert "Reserve memory for children" This reverts commit 17120fbcc2d4c9dbda1f234f1cc3742d24d73f38. --- src/snarl_seed_clusterer.hpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/snarl_seed_clusterer.hpp b/src/snarl_seed_clusterer.hpp index 1f08043e830..4166c57fb63 100644 --- a/src/snarl_seed_clusterer.hpp +++ b/src/snarl_seed_clusterer.hpp @@ -276,7 +276,6 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - children.reserve(seed_count); } //Constructor for a node or trivial chain, used to remember information from the cache SnarlTreeNodeProblem( net_handle_t net, size_t read_count, size_t seed_count, bool is_reversed_in_parent, @@ -291,7 +290,6 @@ class SnarlDistanceIndexClusterer { seed(seed), zipcode_depth(zipcode_depth) { read_cluster_heads.reserve(seed_count); - children.reserve(seed_count); } //Set the values needed to cluster a chain