diff --git a/src/tests/test_bound.cpp b/src/tests/test_bound.cpp index 0f5604d..2e717ba 100644 --- a/src/tests/test_bound.cpp +++ b/src/tests/test_bound.cpp @@ -38,7 +38,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" typedef int tag_t; const tag_t tag_num = 4; diff --git a/src/tests/test_deepwalk.cpp b/src/tests/test_deepwalk.cpp index 62f03af..d429a7c 100644 --- a/src/tests/test_deepwalk.cpp +++ b/src/tests/test_deepwalk.cpp @@ -39,7 +39,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/deepwalk.hpp" template @@ -64,7 +64,7 @@ void test_deepwalk(vertex_id_t v_num, int worker_number) Edge *std_edges; edge_id_t std_edge_num; read_graph(test_data_file, 0, 1, std_edges, std_edge_num); - check_static_first_order_random_walk(v_num, std_edges, std_edge_num, rw_sequences); + check_static_random_walk(v_num, std_edges, std_edge_num, rw_sequences); } } diff --git a/src/tests/test_metapath.cpp b/src/tests/test_metapath.cpp index da6cf7d..ada56b0 100644 --- a/src/tests/test_metapath.cpp +++ b/src/tests/test_metapath.cpp @@ -38,7 +38,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/metapath.hpp" const int edge_type_num = 5; diff --git a/src/tests/test_node2vec.cpp b/src/tests/test_node2vec.cpp index 6a1f703..02ccd17 100644 --- a/src/tests/test_node2vec.cpp +++ b/src/tests/test_node2vec.cpp @@ -39,9 +39,115 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/node2vec.hpp" +template +void get_node2vec_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > &trans_mat) +{ + std::vector > > graph(v_num); + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + graph[edges[e_i].src].push_back(edges[e_i]); + } + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + std::sort(graph[v_i].begin(), graph[v_i].end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); + } + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + vertex_id_t src = edges[e_i].src; + vertex_id_t dst = edges[e_i].dst; + assert(src != dst); + //must be undirected graph + assert(graph[dst].size() != 0); + for (auto e : graph[dst]) + { + if (e.dst == src) + { + trans_mat[e_i][e.dst] += 1 / p * get_edge_trans_weight(e); + } else if (std::binary_search(graph[src].begin(), graph[src].end(), e, [](const Edge a, const Edge b){return a.dst < b.dst;})) + { + trans_mat[e_i][e.dst] += 1 * get_edge_trans_weight(e); + } else + { + trans_mat[e_i][e.dst] += 1 / q * get_edge_trans_weight(e); + } + } + } + mat_normalization(trans_mat); +} + +template +void check_node2vec_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > rw_sequences) +{ + std::vector > trans_mat(e_num); + for (auto &vec : trans_mat) + { + vec.resize(v_num, 0.0); + } + get_node2vec_trans_matrix(v_num, edges, e_num, p, q, trans_mat); + + //check if sequences are legal + std::vector out_degree(v_num, 0); + std::vector > adj_mat(v_num); + for (auto &vec : adj_mat) + { + vec.resize(v_num, false); + } + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + adj_mat[edges[e_i].src][edges[e_i].dst] = true; + out_degree[edges[e_i].src]++; + } + for (auto &s : rw_sequences) + { + if (out_degree[s[0]] == 0) + { + for (auto v : s) + { + ASSERT_EQ(v, s[0]); + } + } else + { + for (size_t v_i = 0; v_i + 1 < s.size(); v_i++) + { + if (adj_mat[s[v_i]][s[v_i + 1]] == false) + { + printf("fault %u %u\n", s[v_i], s[v_i + 1]); + } + ASSERT_TRUE(adj_mat[s[v_i]][s[v_i + 1]]); + } + } + } + + std::map, edge_id_t> dict; + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + std::pair key = std::pair(edges[e_i].src, edges[e_i].dst); + assert(dict.find(key) == dict.end()); + dict[key] = e_i; + } + + std::vector > real_trans_mat(e_num); + for (auto &vec : real_trans_mat) + { + vec.resize(v_num, 0.0); + } + for (auto &s : rw_sequences) + { + if (out_degree[s[0]] != 0) + { + for (size_t v_i = 0; v_i + 2 < s.size(); v_i++) + { + real_trans_mat[dict[std::pair(s[v_i], s[v_i + 1])]][s[v_i + 2]] += 1; + } + } + } + mat_normalization(real_trans_mat); + cmp_trans_matrix(real_trans_mat, trans_mat, 10.0); +} + template void test_node2vec(vertex_id_t v_num, int worker_number) { diff --git a/src/tests/test_outlier.cpp b/src/tests/test_outlier.cpp index 0859c26..b986e55 100644 --- a/src/tests/test_outlier.cpp +++ b/src/tests/test_outlier.cpp @@ -38,7 +38,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" typedef int tag_t; const tag_t tag_num = 4; @@ -368,52 +368,6 @@ void check_tagwalk_random_walk(vertex_id_t v_num, Edge *edges, edge real_trans_mat[current_v][state_id][edge_idx] += 1.0; } } - /* - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - printf("%u: ", v_i); - for (auto e : graph[v_i]) - { - printf("(%u %d) ", e.dst, e.data.tag); - } - printf("\n"); - } - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - for (tag_t walker_tag = 0; walker_tag < tag_num; walker_tag++) - { - for (tag_t pv_tag = 0; pv_tag < tag_num; pv_tag++) - { - size_t state = get_state_id(walker_tag, pv_tag); - if (!vis[v_i][state]) - { - continue; - } - printf("%u %d %d:\n", v_i, walker_tag, pv_tag); - double sum = 0; - for (auto x : std_trans_mat[v_i][state]) - { - sum += x; - } - for (auto x : std_trans_mat[v_i][state]) - { - printf("%lf ", x / sum); - } - printf("\n"); - sum = 0; - for (auto x : real_trans_mat[v_i][state]) - { - sum += x; - } - for (auto x : real_trans_mat[v_i][state]) - { - printf("%lf ", x / sum); - } - printf("\n"); - } - } - } - */ auto get_flat_mat = [] (std::vector > > &three_d_mat) { std::vector > two_d_mat; diff --git a/src/tests/test_ppr.cpp b/src/tests/test_ppr.cpp index 6dc2229..5e4648c 100644 --- a/src/tests/test_ppr.cpp +++ b/src/tests/test_ppr.cpp @@ -39,7 +39,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/ppr.hpp" template @@ -62,7 +62,7 @@ void test_ppr(vertex_id_t v_num, int worker_number) Edge *std_edges; edge_id_t std_edge_num; read_graph(test_data_file, 0, 1, std_edges, std_edge_num); - check_static_first_order_random_walk(v_num, std_edges, std_edge_num, rw_sequences); + check_static_random_walk(v_num, std_edges, std_edge_num, rw_sequences); } } diff --git a/src/tests/test_rw.hpp b/src/tests/test_walk.hpp similarity index 56% rename from src/tests/test_rw.hpp rename to src/tests/test_walk.hpp index 4129634..edd257b 100644 --- a/src/tests/test_rw.hpp +++ b/src/tests/test_walk.hpp @@ -128,7 +128,7 @@ double get_edge_trans_weight(Edge &e) } template -void get_static_first_order_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > &trans_mat) +void get_static_walk_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > &trans_mat) { std::vector weight_sum(v_num, 0.0); for (edge_id_t e_i = 0; e_i < e_num; e_i++) @@ -150,14 +150,14 @@ void get_static_first_order_trans_matrix(vertex_id_t v_num, Edge *e } template -void check_static_first_order_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > rw_sequences) +void check_static_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > rw_sequences) { std::vector > trans_mat(v_num); for (auto &vec : trans_mat) { vec.resize(v_num, 0.0); } - get_static_first_order_trans_matrix(v_num, edges, e_num, trans_mat); + get_static_walk_trans_matrix(v_num, edges, e_num, trans_mat); //check if sequences are legal for (auto &s : rw_sequences) @@ -184,109 +184,3 @@ void check_static_first_order_random_walk(vertex_id_t v_num, Edge * //check if trans_mat is obeyed during random walk cmp_trans_matrix(real_trans_mat, trans_mat); } - -template -void get_node2vec_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > &trans_mat) -{ - std::vector > > graph(v_num); - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - graph[edges[e_i].src].push_back(edges[e_i]); - } - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - std::sort(graph[v_i].begin(), graph[v_i].end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); - } - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - vertex_id_t src = edges[e_i].src; - vertex_id_t dst = edges[e_i].dst; - assert(src != dst); - //must be undirected graph - assert(graph[dst].size() != 0); - for (auto e : graph[dst]) - { - if (e.dst == src) - { - trans_mat[e_i][e.dst] += 1 / p * get_edge_trans_weight(e); - } else if (std::binary_search(graph[src].begin(), graph[src].end(), e, [](const Edge a, const Edge b){return a.dst < b.dst;})) - { - trans_mat[e_i][e.dst] += 1 * get_edge_trans_weight(e); - } else - { - trans_mat[e_i][e.dst] += 1 / q * get_edge_trans_weight(e); - } - } - } - mat_normalization(trans_mat); -} - -template -void check_node2vec_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > rw_sequences) -{ - std::vector > trans_mat(e_num); - for (auto &vec : trans_mat) - { - vec.resize(v_num, 0.0); - } - get_node2vec_trans_matrix(v_num, edges, e_num, p, q, trans_mat); - - //check if sequences are legal - std::vector out_degree(v_num, 0); - std::vector > adj_mat(v_num); - for (auto &vec : adj_mat) - { - vec.resize(v_num, false); - } - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - adj_mat[edges[e_i].src][edges[e_i].dst] = true; - out_degree[edges[e_i].src]++; - } - for (auto &s : rw_sequences) - { - if (out_degree[s[0]] == 0) - { - for (auto v : s) - { - ASSERT_EQ(v, s[0]); - } - } else - { - for (size_t v_i = 0; v_i + 1 < s.size(); v_i++) - { - if (adj_mat[s[v_i]][s[v_i + 1]] == false) - { - printf("fault %u %u\n", s[v_i], s[v_i + 1]); - } - ASSERT_TRUE(adj_mat[s[v_i]][s[v_i + 1]]); - } - } - } - - std::map, edge_id_t> dict; - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - std::pair key = std::pair(edges[e_i].src, edges[e_i].dst); - assert(dict.find(key) == dict.end()); - dict[key] = e_i; - } - - std::vector > real_trans_mat(e_num); - for (auto &vec : real_trans_mat) - { - vec.resize(v_num, 0.0); - } - for (auto &s : rw_sequences) - { - if (out_degree[s[0]] != 0) - { - for (size_t v_i = 0; v_i + 2 < s.size(); v_i++) - { - real_trans_mat[dict[std::pair(s[v_i], s[v_i + 1])]][s[v_i + 2]] += 1; - } - } - } - mat_normalization(real_trans_mat); - cmp_trans_matrix(real_trans_mat, trans_mat, 10.0); -} diff --git a/src/tests/test_walker.cpp b/src/tests/test_walker.cpp index 910cb4d..b47ce03 100644 --- a/src/tests/test_walker.cpp +++ b/src/tests/test_walker.cpp @@ -39,7 +39,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/static_comp.hpp" typedef uint64_t hash_t;