diff --git a/Cargo.lock b/Cargo.lock index ad8a5b4..8f841c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,74 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + [[package]] name = "general-sam" version = "0.2.0" +dependencies = [ + "rand", +] + +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "libc" +version = "0.2.149" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a08173bc88b7955d1b3145aa561539096c421ac8debde8cbc3612ec635fee29b" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" diff --git a/Cargo.toml b/Cargo.toml index 96d5f6c..2bb03a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,3 +14,6 @@ exclude = ["release-plz.toml", "cliff.tolm"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] name = "general_sam" + +[dev-dependencies] +rand = "0.8.5" diff --git a/src/tests.rs b/src/tests.rs index fbeab67..8aa703d 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,4 +1,10 @@ -use crate::{sam::GeneralSAM, trie::Trie}; +use rand::{ + distributions::{Alphanumeric, DistString}, + rngs::StdRng, + Rng, SeedableRng, +}; + +use crate::{sam::GeneralSAM, trie::Trie, SAM_ROOT_NODE_ID}; #[test] fn test_example_from_chars() { @@ -165,3 +171,53 @@ fn test_simple_trie_suffix() { let vocab = ["ac", "bb", "b", "cc", "aabb", "a", "ba", "c", "aa"]; test_trie_suffix(&vocab); } + +#[test] +fn test_topo_and_suf_len_sorted_order() { + let mut rng = StdRng::seed_from_u64(1134759173975); + for _ in 0..10000 { + let mut trie = Trie::default(); + for _ in 0..rng.gen_range(0..32) { + let len = rng.gen_range(0..9); + let string = Alphanumeric.sample_string(&mut rng, len); + trie.insert_ref_iter(string.as_bytes().iter()); + } + + let sam: GeneralSAM = GeneralSAM::construct_from_trie(trie.get_root_state()); + + let order = sam.get_topo_and_suf_len_sorted_node_ids(); + let rank = { + let mut rank = vec![0; sam.num_of_nodes()]; + order.iter().enumerate().for_each(|(k, i)| { + rank[*i] = k; + }); + rank + }; + + // verify that max suffix lengths should be sorted + for pos in 0..order.len() - 1 { + assert!( + sam.get_node(order[pos]).unwrap().max_suffix_len() + <= sam.get_node(order[pos + 1]).unwrap().max_suffix_len() + ); + } + + // verify topological ordering + order.iter().for_each(|node_id| { + let node = sam.get_node(*node_id).unwrap(); + + node.get_trans().values().for_each(|next_node_id| { + assert!(rank[*next_node_id] > rank[*node_id]); + }); + }); + + // verify suffix parent tree depth ordering + order.iter().for_each(|node_id| { + let node = sam.get_node(*node_id).unwrap(); + + if *node_id != SAM_ROOT_NODE_ID { + assert!(rank[node.get_suffix_parent_id()] < rank[*node_id]); + } + }); + } +}