diff --git a/.gitmodules b/.gitmodules index d36e86a..7077052 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "ext/gemini"] path = ext/gemini url = https://github.com/thu-pacman/GeminiGraph.git +[submodule "ext/gtest"] + path = ext/gtest + url = https://github.com/google/googletest.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 0badb45..42d5287 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 2.8) project(KnightKing) -set(WITH_UNIT_TEST TRUE) set(KTK_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) +option(WITH_TESTS "Build unit test programs" OFF) include(knightking.cmake) @@ -21,12 +21,12 @@ if(MPI_FOUND) include_directories(${MPI_INCLUDE_PATH}) endif() -if(WITH_UNIT_TEST) +if(WITH_TESTS) enable_testing() - find_package(GTest REQUIRED) - if (GTEST_FOUND) - include_directories(${GTEST_INCLUDE_DIRS}) - endif() + include_directories("${CMAKE_CURRENT_SOURCE_DIR}/ext/gtest/googletest/include") + add_subdirectory(ext/gtest) + link_directories(${CMAKE_BINARY_DIR}/lib) + set(GTEST_LIBRARIES "gtest" "gtest_main") endif() #use c++11 diff --git a/README.md b/README.md index e027401..fe4e36b 100644 --- a/README.md +++ b/README.md @@ -57,6 +57,14 @@ The compiled executable files will be installed at the "bin" directory: ls ./bin ``` +[Optional] Run unit tests: + +``` +cmake -DWITH_TESTS=on .. +ctest +``` +note that the unit test may take about one hour. + ### Run Built-in Applications Here we take node2vec as an example to show how to run the built-in applications. The usage of other three applications is quite similar. diff --git a/ext/gtest b/ext/gtest new file mode 160000 index 0000000..f73898f --- /dev/null +++ b/ext/gtest @@ -0,0 +1 @@ +Subproject commit f73898f3ffd4005de534edec1139387457d5853c diff --git a/include/graph.hpp b/include/graph.hpp index 31dce4a..41710e4 100644 --- a/include/graph.hpp +++ b/include/graph.hpp @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/include/walk.hpp b/include/walk.hpp index 576ea54..967c090 100644 --- a/include/walk.hpp +++ b/include/walk.hpp @@ -118,7 +118,9 @@ class WalkEngine : public GraphEngine #ifdef COLLECT_WALK_SEQUENCE std::vector > footprints; #endif - +#ifdef COLLECT_WALKER_INIT_STATE + std::vector > walker_init_state; +#endif template T* alloc_walker_array() { @@ -185,6 +187,9 @@ class WalkEngine : public GraphEngine local_walker_num = 0; #ifdef COLLECT_WALK_SEQUENCE footprints.resize(this->worker_num); +#endif +#ifdef COLLECT_WALKER_INIT_STATE + walker_init_state.clear(); #endif this->set_msg_buffer(walker_num, sizeof(walker_msg_t)); this->template distributed_execute( @@ -217,6 +222,13 @@ class WalkEngine : public GraphEngine walker_init_state_func(local_walkers[w_i].data, local_walkers[w_i].dst_vertex_id); } } + #ifdef COLLECT_WALKER_INIT_STATE + walker_init_state.clear(); + for (walker_id_t w_i = 0; w_i < local_walker_num; w_i ++) + { + walker_init_state.push_back(local_walkers[w_i].data); + } + #endif } void free_walkers( @@ -686,11 +698,11 @@ class WalkEngine : public GraphEngine } } + walker.step ++; if (walker_update_state_func != nullptr) { walker_update_state_func(walker, current_v, ac_edge); } - walker.step ++; if (output_flag) { @@ -976,8 +988,8 @@ class WalkEngine : public GraphEngine { if (this->is_local_vertex(cd->candidate->neighbour)) { - walker_update_state_func(p->data, current_v, cd->candidate); p->data.step ++; + walker_update_state_func(p->data, current_v, cd->candidate); p->dst_vertex_id = cd->candidate->neighbour; if (output_flag) @@ -1086,8 +1098,8 @@ class WalkEngine : public GraphEngine { if (cd->accepted || cd->randval <= dynamic_comp_func(walker, remote_response_cache[walker_idx], current_v, cd->candidate)) { - walker_update_state_func(walker, current_v, cd->candidate); walker.step ++; + walker_update_state_func(walker, current_v, cd->candidate); this->emit(cd->candidate->neighbour, walker, worker_id); if (output_flag) @@ -1204,4 +1216,31 @@ class WalkEngine : public GraphEngine send_thread.join(); } #endif +#ifdef COLLECT_WALKER_INIT_STATE + void collect_walker_init_state(std::vector > &output) + { + if (get_mpi_rank() != 0) + { + MPI_Send(walker_init_state.data(), walker_init_state.size() * sizeof(Walker), get_mpi_data_type(), 0, Tag_Msg, MPI_COMM_WORLD); + } else + { + output = walker_init_state; + for (partition_id_t p_i = 1; p_i < get_mpi_size(); p_i++) + { + int recv_size = 0; + MPI_Status recv_status; + MPI_Probe(p_i, Tag_Msg, MPI_COMM_WORLD, &recv_status); + MPI_Get_count(&recv_status, get_mpi_data_type(), &recv_size); + int recv_n = recv_size / sizeof(Walker); + std::vector > recv_data(recv_n); + MPI_Recv(recv_data.data(), recv_size, get_mpi_data_type(), p_i, Tag_Msg, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + for (auto x : recv_data) + { + output.push_back(x); + } + } + std::sort(output.begin(), output.end(), [](const Walker a, const Walker b) { return a.id < b.id; }); + } + } +#endif }; diff --git a/knightking.cmake b/knightking.cmake index 79d5ece..ad63d59 100644 --- a/knightking.cmake +++ b/knightking.cmake @@ -17,7 +17,7 @@ foreach(prog "test_storage") add_test("${prog}" "${KTK_RUNTIME_OUTPUT_DIRECTORY}/${prog}") endforeach(prog) -foreach(prog "test_graph" "test_path" "test_outlier" "test_deepwalk" "test_ppr" "test_node2vec") +foreach(prog "test_graph" "test_path" "test_walker" "test_bound" "test_outlier" "test_deepwalk" "test_ppr" "test_metapath" "test_node2vec") add_test("${prog}" "./bin/${prog}") add_test("distributed_${prog}" "mpirun" "-n" "2" "${KTK_RUNTIME_OUTPUT_DIRECTORY}/${prog}") endforeach(prog) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9aa159f..97c6d22 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -1,4 +1,4 @@ -IF(WITH_UNIT_TEST) +IF(WITH_TESTS) add_subdirectory(tests) ENDIF() add_subdirectory(apps) diff --git a/src/apps/metapath.cpp b/src/apps/metapath.cpp index b7d5391..8850dea 100644 --- a/src/apps/metapath.cpp +++ b/src/apps/metapath.cpp @@ -74,7 +74,7 @@ int main(int argc, char** argv) run(&graph, &opt); } else if(opt.static_comp.compare("unweighted") == 0) { - WalkEngine graph; + WalkEngine graph; run(&graph, &opt); } else { diff --git a/src/apps/metapath.hpp b/src/apps/metapath.hpp index 573427a..6d4ab7e 100644 --- a/src/apps/metapath.hpp +++ b/src/apps/metapath.hpp @@ -53,7 +53,7 @@ std::vector > get_scheme_mask(std::vector -std::function*)> get_metapath_static_comp(WalkEngine *graph) +std::function*)> get_metapath_static_comp(WalkEngine *graph) { return nullptr; } @@ -67,16 +67,6 @@ std::function*)> get_metapath_stat return static_comp; } -int get_edge_meta(AdjUnit *edge) -{ - return edge->data; -} - -int get_edge_meta(AdjUnit *edge) -{ - return edge->data.meta_info; -} - template void metapath(WalkEngine *graph, std::vector > > schemes, walker_id_t walker_num, step_t walk_length) { @@ -91,7 +81,7 @@ void metapath(WalkEngine *graph, std::vectorcsr->adj_lists[v_id].begin; p < graph->csr->adj_lists[v_id].end; p++) { - vertex_masks[v_id] |= (1 << get_edge_meta(p)); + vertex_masks[v_id] |= (1 << p->data.get_meta()); } return 0; } @@ -116,7 +106,7 @@ void metapath(WalkEngine *graph, std::vector &walker, vertex_id_t current_v, AdjUnit *edge) { - if (schemes[walker.data.scheme_id][walker.data.state][get_edge_meta(edge)]) + if (schemes[walker.data.scheme_id][walker.data.state][edge->data.get_meta()]) { return 1.0; } else diff --git a/src/apps/metascheme.hpp b/src/apps/metascheme.hpp index d1d269f..68c2564 100644 --- a/src/apps/metascheme.hpp +++ b/src/apps/metascheme.hpp @@ -30,16 +30,40 @@ #include "type.hpp" +typedef int scheme_id_t; +typedef int meta_state_t; + struct MetapathState { - int scheme_id; - int state; + scheme_id_t scheme_id; + meta_state_t state; }; struct WeightedMetaData { real_t weight; - int meta_info; + meta_state_t meta_info; + meta_state_t get_meta() + { + return meta_info; + } + real_t get_weight() + { + return weight; + } +}; + +struct UnweightedMetaData +{ + meta_state_t meta_info; + meta_state_t get_meta() + { + return meta_info; + } + real_t get_weight() + { + return 1.0; + } }; std::vector > > read_metapath_schemes(const char* path) diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5bd93f4..13a26a2 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -1,13 +1,17 @@ SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCMAKE_BUILD_TYPE=Debug") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DUNIT_TEST") SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCOLLECT_WALK_SEQUENCE") -SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ggdb") +SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCOLLECT_WALKER_INIT_STATE") add_test_exec(test_storage) add_test_exec(test_graph) add_test_exec(test_path) +add_test_exec(test_walker) add_test_exec(test_outlier) +add_test_exec(test_bound) add_test_exec(test_deepwalk) add_test_exec(test_ppr) add_test_exec(test_node2vec) +add_test_exec(test_metapath) diff --git a/src/tests/test_bound.cpp b/src/tests/test_bound.cpp new file mode 100644 index 0000000..2e717ba --- /dev/null +++ b/src/tests/test_bound.cpp @@ -0,0 +1,500 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 Ke Yang, Tsinghua University + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "storage.hpp" +#include "graph.hpp" +#include "walk.hpp" +#include "util.hpp" +#include "test.hpp" +#include "test_walk.hpp" + +typedef int tag_t; +const tag_t tag_num = 4; + +struct TagWalkState +{ + tag_t tag; + union + { + vertex_id_t previous_vertex; + tag_t previous_vertex_tag; + }; +}; + +struct WeightedTag +{ + tag_t tag; + real_t weight; + real_t get_weight() + { + return weight; + } +}; + +struct UnweightedTag +{ + tag_t tag; + real_t get_weight() + { + return 1.0; + } +}; + +struct TagWalkConf +{ + walker_id_t walker_num; + step_t walk_length; + tag_t tag_num; + real_t vertex_tag_weight; +}; + +template +std::function*)> get_trivial_static_comp() +{ + printf("[error] Undefined trivial static component\n"); + exit(1); +} + +template<> +std::function*)> get_trivial_static_comp() +{ + return nullptr; +} + +template<> +std::function*)> get_trivial_static_comp() +{ + auto static_comp = [&] (vertex_id_t v, AdjUnit *edge) { + return edge->data.weight; + }; + return static_comp; +} + +template +void tagwalk(WalkEngine* graph, TagWalkConf conf, tag_t* vertex_tag, int order) +{ + graph->set_walkers( + conf.walker_num, + [&] (Walker &walker, vertex_id_t start_vertex) + { + walker.data.tag = walker.id % tag_num; + }, + [&] (Walker &walker, vertex_id_t current_v, AdjUnit *edge) + { + walker.data.tag = (walker.data.tag + 1) % tag_num; + walker.data.previous_vertex = current_v; + } + ); + auto extension_comp = [&] (Walker &walker, vertex_id_t current_v) + { + return walker.step >= conf.walk_length ? 0.0 : 1.0; + }; + auto static_comp = get_trivial_static_comp(); + auto upper_bound_func = [&] (vertex_id_t v_id, AdjList *adj_lists) + { + return (real_t)tag_num + vertex_tag[v_id] * conf.vertex_tag_weight; + }; + auto lower_bound_func = [&] (vertex_id_t v_id, AdjList *adj_lists) + { + return (real_t)vertex_tag[v_id] * conf.vertex_tag_weight; + }; + if (order == 1) + { + graph->random_walk( + extension_comp, + static_comp, + [&] (Walker& walker, vertex_id_t vertex, AdjUnit *edge) + { + if (walker.step == 0) + { + return (real_t)tag_num; + } else + { + real_t temp = (real_t)1 + (vertex_tag[walker.data.previous_vertex] + walker.data.tag + edge->data.tag) % tag_num; + temp += vertex_tag[vertex] * conf.vertex_tag_weight; + return temp; + } + }, + upper_bound_func, + lower_bound_func + ); + } else + { + graph->template second_order_random_walk ( + extension_comp, + static_comp, + [&] (Walker &walker, walker_id_t walker_idx, vertex_id_t current_v, AdjUnit *edge) + { + if (walker.step != 0) + { + stateQuery query; + query.src_v = current_v; + query.walker_idx = walker_idx; + graph->emit(walker.data.previous_vertex, query); + } + }, + [&] (vertex_id_t vtx, stateQuery query, AdjList* adj_list) + { + stateResponse response; + response.walker_idx = query.walker_idx; + response.data = vertex_tag[vtx]; + graph->emit(query.src_v, response); + }, + [&] (Walker &walker, stateResponse &response, vertex_id_t current_v, AdjUnit *edge) + { + if (walker.step == 0) + { + return (real_t)tag_num; + } else + { + real_t temp = (real_t)1 + (response.data + walker.data.tag + edge->data.tag) % tag_num; + temp += vertex_tag[current_v] * conf.vertex_tag_weight; + return temp; + } + }, + upper_bound_func, + lower_bound_func + ); + } +} + +template +void check_tagwalk_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, TagWalkConf conf, tag_t* vertex_tag, std::vector > &seq) +{ + const size_t max_state_num = tag_num * tag_num; + auto get_state_id = [&] (tag_t walker_tag, tag_t previous_vertex_tag) + { + return (size_t)walker_tag * tag_num + previous_vertex_tag; + }; + std::vector > > graph(v_num); + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + graph[edges[e_i].src].push_back(edges[e_i]); + } + for (auto &adj : graph) + { + std::sort(adj.begin(), adj.end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); + } + auto get_edge_idx = [&] (vertex_id_t src, vertex_id_t dst) + { + Edge target; + target.dst = dst; + auto p = std::lower_bound(graph[src].begin(), graph[src].end(), target, [](const Edge &a, const Edge &b) { return a.dst < b.dst; }); + assert(p != graph[src].end()); + return p - graph[src].begin(); + }; + for (auto &s : seq) + { + vertex_id_t start = s[0]; + assert((graph[start].size() == 0 && s.size() == 1) || (s.size() == conf.walk_length + 1)); + } + std::vector > vis(v_num); + for (auto &x : vis) + { + x.resize(max_state_num, false); + } + struct QueueItem + { + tag_t wk_tag; + vertex_id_t vertex; + }; + for (walker_id_t w_i = 0; w_i < seq.size(); w_i ++) + { + std::queue q; + auto expand_func = [&] (QueueItem current) + { + for (auto edge : graph[current.vertex]) + { + QueueItem next; + next.wk_tag = (current.wk_tag + 1) % tag_num; + next.vertex = edge.dst; + tag_t pv_tag = vertex_tag[current.vertex]; + size_t state_id = get_state_id(next.wk_tag, pv_tag); + if (!vis[next.vertex][state_id]) + { + vis[next.vertex][state_id] = true; + q.push(next); + } + } + + }; + QueueItem start; + start.vertex = seq[w_i][0]; + start.wk_tag = w_i % tag_num; + expand_func(start); + while (q.empty() == false) + { + QueueItem current = q.front(); + q.pop(); + expand_func(current); + } + } + std::vector > > std_trans_mat(v_num); + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + std_trans_mat[v_i].resize(max_state_num); + for (tag_t walker_tag = 0; walker_tag < tag_num; walker_tag++) + { + for (tag_t pv_tag = 0; pv_tag < tag_num; pv_tag++) + { + size_t s_i = get_state_id(walker_tag, pv_tag); + auto &dist = std_trans_mat[v_i][s_i]; + dist.resize(graph[v_i].size(), 0.0); + if (!vis[v_i][s_i]) + { + continue; + } + for (size_t e_i = 0; e_i < graph[v_i].size(); e_i++) + { + auto &edge = graph[v_i][e_i]; + double val = (real_t)1 + (pv_tag + walker_tag + edge.data.tag) % tag_num; + val += vertex_tag[v_i] * conf.vertex_tag_weight; + val *= edge.data.get_weight(); + dist[e_i] = val; + } + } + } + } + std::vector > > real_trans_mat(v_num); + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + real_trans_mat[v_i].resize(max_state_num); + for (size_t s_i = 0; s_i < max_state_num; s_i++) + { + real_trans_mat[v_i][s_i].resize(graph[v_i].size(), 0.0); + } + } + for (walker_id_t w_i = 0; w_i < seq.size(); w_i++) + { + for (step_t s_i = 1; s_i + 1 < seq[w_i].size(); s_i++) + { + vertex_id_t current_v = seq[w_i][s_i]; + size_t state_id = get_state_id((w_i + s_i) % tag_num, vertex_tag[seq[w_i][s_i - 1]]); + size_t edge_idx = get_edge_idx(seq[w_i][s_i], seq[w_i][s_i + 1]); + real_trans_mat[current_v][state_id][edge_idx] += 1.0; + } + } + /* + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + printf("%u: ", v_i); + for (auto e : graph[v_i]) + { + printf("(%u %d) ", e.dst, e.data.tag); + } + printf("\n"); + } + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + for (tag_t walker_tag = 0; walker_tag < tag_num; walker_tag++) + { + for (tag_t pv_tag = 0; pv_tag < tag_num; pv_tag++) + { + size_t state = get_state_id(walker_tag, pv_tag); + if (!vis[v_i][state]) + { + continue; + } + printf("%u %d %d:\n", v_i, walker_tag, pv_tag); + double sum = 0; + for (auto x : std_trans_mat[v_i][state]) + { + sum += x; + } + for (auto x : std_trans_mat[v_i][state]) + { + printf("%lf ", x / sum); + } + printf("\n"); + sum = 0; + for (auto x : real_trans_mat[v_i][state]) + { + sum += x; + } + for (auto x : real_trans_mat[v_i][state]) + { + printf("%lf ", x / sum); + } + printf("\n"); + } + } + } + */ + auto get_flat_mat = [] (std::vector > > &three_d_mat) + { + std::vector > two_d_mat; + for (auto &x : three_d_mat) + { + for (auto &y : x) + { + two_d_mat.push_back(y); + } + } + return two_d_mat; + }; + auto std_flat_mat = get_flat_mat(std_trans_mat); + auto real_flat_mat = get_flat_mat(real_trans_mat); + mat_normalization(std_flat_mat); + mat_normalization(real_flat_mat); + cmp_trans_matrix(real_flat_mat, std_flat_mat); +} + +template +void test_bound(vertex_id_t v_num, int worker_number, int order) +{ + WalkEngine graph; + graph.set_concurrency(worker_number); + graph.load_graph(v_num, test_data_file); + + TagWalkConf conf; + conf.walk_length = 80 + rand() % 20; + conf.walker_num = graph.get_vertex_num() * 500 + graph.get_edge_num() * 500 + rand() % 100; + conf.tag_num = 3 + rand() % 5; + conf.vertex_tag_weight = 1.0 / (1 + rand() % 3); + MPI_Bcast(&conf, sizeof(conf), get_mpi_data_type(), 0, MPI_COMM_WORLD); + + tag_t* vertex_tag = graph.template alloc_vertex_array(); + if (get_mpi_rank() == 0) + { + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + vertex_tag[v_i] = graph.get_thread_local_rand_gen()->gen(tag_num); + } + } + MPI_Bcast(vertex_tag, v_num, get_mpi_data_type(), 0, MPI_COMM_WORLD); + + tagwalk(&graph, conf, vertex_tag, order); + + std::vector > rw_sequences; + graph.collect_walk_sequence(rw_sequences); + + if (get_mpi_rank() == 0) + { + Edge *std_edges; + edge_id_t std_edge_num; + read_graph(test_data_file, 0, 1, std_edges, std_edge_num); + check_tagwalk_random_walk(v_num, std_edges, std_edge_num, conf, vertex_tag, rw_sequences); + } + graph.dealloc_vertex_array(vertex_tag); +} + +template +std::function get_tag_gen_func() +{ + printf("[error] Undefined edge data generator\n"); + exit(1); +} + +template<> +std::function get_tag_gen_func() +{ + auto func = [] (UnweightedTag &data) + { + data.tag = rand() % tag_num; + }; + return func; +} + +template<> +std::function get_tag_gen_func() +{ + auto func = [] (WeightedTag &data) + { + data.tag = rand() % tag_num; + gen_rand_edge_data(data.weight); + }; + return func; +} + +template +void test_bound(int order) +{ + edge_id_t e_nums_arr[] = {100, 200, 300, 400, 500, 600}; + vertex_id_t v_num = 50 + rand() % 25; + std::vector e_nums(e_nums_arr, e_nums_arr + 6); + /* + size_t e_nums_arr[] = {30}; + vertex_id_t v_num = 10; + std::vector e_nums(e_nums_arr, e_nums_arr + 1); + */ + + MPI_Bcast(&v_num, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + + for (auto &e_num : e_nums_arr) + { + if (get_mpi_rank() == 0) + { + gen_undirected_graph_file(v_num, e_num, get_tag_gen_func()); + } + MPI_Barrier(MPI_COMM_WORLD); + int worker_number = rand() % 8 + 1; + MPI_Bcast(&worker_number, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + test_bound(v_num, worker_number, order); + } + if (get_mpi_rank() == 0) + { + rm_test_graph_temp_file(); + } +} + +TEST(Bound, UnbiasedFirstOrder) +{ + test_bound(1); +} + +TEST(Bound, BiasedFirstOrder) +{ + test_bound(1); +} + +TEST(Bound, UnbiasedSecondOrder) +{ + test_bound(2); +} + +TEST(Bound, BiasedSecondOrder) +{ + test_bound(2); +} + + +GTEST_API_ int main(int argc, char *argv[]) +{ + MPI_Instance mpi_instance(&argc, &argv); + ::testing::InitGoogleTest(&argc, argv); + mute_nonroot_gtest_events(); + int result = RUN_ALL_TESTS(); + return result; +} diff --git a/src/tests/test_deepwalk.cpp b/src/tests/test_deepwalk.cpp index 62f03af..d429a7c 100644 --- a/src/tests/test_deepwalk.cpp +++ b/src/tests/test_deepwalk.cpp @@ -39,7 +39,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/deepwalk.hpp" template @@ -64,7 +64,7 @@ void test_deepwalk(vertex_id_t v_num, int worker_number) Edge *std_edges; edge_id_t std_edge_num; read_graph(test_data_file, 0, 1, std_edges, std_edge_num); - check_static_first_order_random_walk(v_num, std_edges, std_edge_num, rw_sequences); + check_static_random_walk(v_num, std_edges, std_edge_num, rw_sequences); } } diff --git a/src/tests/test_metapath.cpp b/src/tests/test_metapath.cpp new file mode 100644 index 0000000..ada56b0 --- /dev/null +++ b/src/tests/test_metapath.cpp @@ -0,0 +1,341 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 Ke Yang, Tsinghua University + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "storage.hpp" +#include "graph.hpp" +#include "walk.hpp" +#include "util.hpp" +#include "test.hpp" +#include "test_walk.hpp" +#include "../apps/metapath.hpp" + +const int edge_type_num = 5; + +template +void check_metapath_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, walker_id_t walker_num, step_t walk_lenght, std::vector > > schemes, std::vector > &seq, std::vector > &walker_init_state) +{ + size_t max_state_num = 0; + for (auto &sch : schemes) + { + max_state_num += sch.size(); + } + auto get_state_id = [&] (scheme_id_t scheme_id, meta_state_t meta_state) + { + assert(scheme_id < schemes.size()); + size_t state_id = 0; + for (scheme_id_t s_i = 0; s_i < scheme_id; s_i++) + { + state_id += schemes[s_i].size(); + } + return state_id + meta_state; + }; + std::vector > > graph(v_num); + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + graph[edges[e_i].src].push_back(edges[e_i]); + } + for (auto &adj : graph) + { + std::sort(adj.begin(), adj.end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); + } + auto get_edge_idx = [&] (vertex_id_t src, vertex_id_t dst) + { + Edge target; + target.dst = dst; + auto p = std::lower_bound(graph[src].begin(), graph[src].end(), target, [](const Edge &a, const Edge &b) { return a.dst < b.dst; }); + assert(p != graph[src].end()); + return p - graph[src].begin(); + }; + std::vector > vis(v_num); + for (auto &x : vis) + { + x.resize(max_state_num, false); + } + struct QueueItem + { + scheme_id_t sch_id; + meta_state_t state; + vertex_id_t vertex; + }; + for (walker_id_t v_i = 0; v_i < v_num; v_i ++) + { + for (scheme_id_t sch_i = 0; sch_i < schemes.size(); sch_i++) + { + std::queue q; + auto expand_func = [&] (QueueItem current) + { + size_t state_id = get_state_id(current.sch_id, current.state); + if (!vis[current.vertex][state_id]) + { + vis[current.vertex][state_id] = true; + for (auto edge : graph[current.vertex]) + { + if (!schemes[current.sch_id][current.state][edge.data.get_meta()]) + { + continue; + } + QueueItem next; + next.sch_id = current.sch_id; + next.state = (current.state + 1) % schemes[current.sch_id].size(); + next.vertex = edge.dst; + q.push(next); + } + } + + }; + QueueItem start; + start.vertex = v_i; + start.sch_id = sch_i; + start.state = 0; + expand_func(start); + while (q.empty() == false) + { + QueueItem current = q.front(); + q.pop(); + expand_func(current); + } + } + } + std::vector > > std_trans_mat(v_num); + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + std_trans_mat[v_i].resize(max_state_num); + for (scheme_id_t sch_i = 0; sch_i < schemes.size(); sch_i++) + { + for (meta_state_t ms_i = 0; ms_i < schemes[sch_i].size(); ms_i ++) + { + size_t state_id = get_state_id(sch_i, ms_i); + auto &dist = std_trans_mat[v_i][state_id]; + dist.resize(graph[v_i].size(), 0.0); + if (!vis[v_i][state_id]) + { + continue; + } + for (size_t e_i = 0; e_i < graph[v_i].size(); e_i++) + { + auto &edge = graph[v_i][e_i]; + double val = schemes[sch_i][ms_i][edge.data.get_meta()]? 1.0 : 0.0; + val *= edge.data.get_weight(); + dist[e_i] = val; + } + } + } + } + std::vector > > real_trans_mat(v_num); + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + real_trans_mat[v_i].resize(max_state_num); + for (size_t s_i = 0; s_i < max_state_num; s_i++) + { + real_trans_mat[v_i][s_i].resize(graph[v_i].size(), 0.0); + } + } + for (walker_id_t w_i = 0; w_i < seq.size(); w_i++) + { + meta_state_t init_meta = walker_init_state[w_i].data.state; + meta_state_t scheme_id = walker_init_state[w_i].data.scheme_id; + for (step_t s_i = 0; s_i + 1 < seq[w_i].size(); s_i++) + { + vertex_id_t current_v = seq[w_i][s_i]; + meta_state_t ms = (init_meta + s_i) % schemes[scheme_id].size(); + size_t state_id = get_state_id(scheme_id, ms); + size_t edge_idx = get_edge_idx(seq[w_i][s_i], seq[w_i][s_i + 1]); + real_trans_mat[current_v][state_id][edge_idx] += 1.0; + ASSERT_TRUE(schemes[scheme_id][ms][graph[current_v][edge_idx].data.get_meta()]); + } + } + auto get_flat_mat = [] (std::vector > > &three_d_mat) + { + std::vector > two_d_mat; + for (auto &x : three_d_mat) + { + for (auto &y : x) + { + two_d_mat.push_back(y); + } + } + return two_d_mat; + }; + auto std_flat_mat = get_flat_mat(std_trans_mat); + auto real_flat_mat = get_flat_mat(real_trans_mat); + mat_normalization(std_flat_mat); + mat_normalization(real_flat_mat); + cmp_trans_matrix(real_flat_mat, std_flat_mat); +} + +template +void test_metapath(vertex_id_t v_num, int worker_number) +{ + WalkEngine graph; + graph.set_concurrency(worker_number); + graph.load_graph(v_num, test_data_file); + + step_t walk_length = 80 + rand() % 20; + walker_id_t walker_num = graph.get_vertex_num() * 500 + graph.get_edge_num() * 500 + rand() % 100; + MPI_Bcast(&walk_length, sizeof(walk_length), get_mpi_data_type(), 0, MPI_COMM_WORLD); + MPI_Bcast(&walker_num, sizeof(walker_num), get_mpi_data_type(), 0, MPI_COMM_WORLD); + std::vector > > schemes; + if (get_mpi_rank() == 0) + { + int scheme_num = 3 + rand() % 2; + for (int s_i = 0; s_i < scheme_num; s_i++) + { + int scheme_length = 3 + rand() % 2; + std::vector > sch; + for (int l_i = 0; l_i < scheme_length; l_i++) + { + std::vector val; + for (int v_i = 0; v_i < edge_type_num; v_i++) + { + val.push_back((bool)(rand() % 2)); + } + sch.push_back(val); + } + schemes.push_back(sch); + } + } + size_t scheme_num = schemes.size(); + MPI_Bcast(&scheme_num, sizeof(scheme_num), get_mpi_data_type(), 0, MPI_COMM_WORLD); + schemes.resize(scheme_num); + for (int s_i = 0; s_i < scheme_num; s_i++) + { + size_t scheme_length = schemes[s_i].size(); + MPI_Bcast(&scheme_length, sizeof(scheme_length), get_mpi_data_type(), 0, MPI_COMM_WORLD); + schemes[s_i].resize(scheme_length); + for (int l_i = 0; l_i < scheme_length; l_i++) + { + schemes[s_i][l_i].resize(edge_type_num); + for (int v_i = 0; v_i < edge_type_num; v_i++) + { + bool val = schemes[s_i][l_i][v_i]; + MPI_Bcast(&val, sizeof(val), get_mpi_data_type(), 0, MPI_COMM_WORLD); + schemes[s_i][l_i][v_i] = val; + } + } + } + + metapath(&graph, schemes, walker_num, walk_length); + + std::vector > rw_sequences; + std::vector > walker_init_state; + graph.collect_walk_sequence(rw_sequences); + graph.collect_walker_init_state(walker_init_state); + + if (get_mpi_rank() == 0) + { + Edge *std_edges; + edge_id_t std_edge_num; + read_graph(test_data_file, 0, 1, std_edges, std_edge_num); + check_metapath_random_walk(v_num, std_edges, std_edge_num, walker_num, walk_length, schemes, rw_sequences, walker_init_state); + } +} + +template +std::function get_edge_data_gen_func() +{ + printf("[error] Undefined edge data generator\n"); + exit(1); +} + +template<> +std::function get_edge_data_gen_func() +{ + auto func = [] (UnweightedMetaData &data) + { + data.meta_info = rand() % edge_type_num; + }; + return func; +} + +template<> +std::function get_edge_data_gen_func() +{ + auto func = [] (WeightedMetaData &data) + { + data.meta_info = rand() % edge_type_num; + gen_rand_edge_data(data.weight); + }; + return func; +} + +template +void test_metapath() +{ + edge_id_t e_nums_arr[] = {100, 200, 300, 400, 500, 600}; + vertex_id_t v_num = 50 + rand() % 20; + std::vector e_nums(e_nums_arr, e_nums_arr + 6); + /* + size_t e_nums_arr[] = {30}; + vertex_id_t v_num = 10; + std::vector e_nums(e_nums_arr, e_nums_arr + 1); + */ + + MPI_Bcast(&v_num, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + + for (auto &e_num : e_nums_arr) + { + if (get_mpi_rank() == 0) + { + gen_undirected_graph_file(v_num, e_num, get_edge_data_gen_func()); + } + MPI_Barrier(MPI_COMM_WORLD); + int worker_number = rand() % 8 + 1; + MPI_Bcast(&worker_number, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + test_metapath(v_num, worker_number); + } + if (get_mpi_rank() == 0) + { + rm_test_graph_temp_file(); + } +} + +TEST(Metapath, Unbiased) +{ + test_metapath(); +} + +TEST(Outlier, Biased) +{ + test_metapath(); +} + +GTEST_API_ int main(int argc, char *argv[]) +{ + MPI_Instance mpi_instance(&argc, &argv); + ::testing::InitGoogleTest(&argc, argv); + mute_nonroot_gtest_events(); + int result = RUN_ALL_TESTS(); + return result; +} diff --git a/src/tests/test_node2vec.cpp b/src/tests/test_node2vec.cpp index 6a1f703..02ccd17 100644 --- a/src/tests/test_node2vec.cpp +++ b/src/tests/test_node2vec.cpp @@ -39,9 +39,115 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/node2vec.hpp" +template +void get_node2vec_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > &trans_mat) +{ + std::vector > > graph(v_num); + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + graph[edges[e_i].src].push_back(edges[e_i]); + } + for (vertex_id_t v_i = 0; v_i < v_num; v_i++) + { + std::sort(graph[v_i].begin(), graph[v_i].end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); + } + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + vertex_id_t src = edges[e_i].src; + vertex_id_t dst = edges[e_i].dst; + assert(src != dst); + //must be undirected graph + assert(graph[dst].size() != 0); + for (auto e : graph[dst]) + { + if (e.dst == src) + { + trans_mat[e_i][e.dst] += 1 / p * get_edge_trans_weight(e); + } else if (std::binary_search(graph[src].begin(), graph[src].end(), e, [](const Edge a, const Edge b){return a.dst < b.dst;})) + { + trans_mat[e_i][e.dst] += 1 * get_edge_trans_weight(e); + } else + { + trans_mat[e_i][e.dst] += 1 / q * get_edge_trans_weight(e); + } + } + } + mat_normalization(trans_mat); +} + +template +void check_node2vec_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > rw_sequences) +{ + std::vector > trans_mat(e_num); + for (auto &vec : trans_mat) + { + vec.resize(v_num, 0.0); + } + get_node2vec_trans_matrix(v_num, edges, e_num, p, q, trans_mat); + + //check if sequences are legal + std::vector out_degree(v_num, 0); + std::vector > adj_mat(v_num); + for (auto &vec : adj_mat) + { + vec.resize(v_num, false); + } + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + adj_mat[edges[e_i].src][edges[e_i].dst] = true; + out_degree[edges[e_i].src]++; + } + for (auto &s : rw_sequences) + { + if (out_degree[s[0]] == 0) + { + for (auto v : s) + { + ASSERT_EQ(v, s[0]); + } + } else + { + for (size_t v_i = 0; v_i + 1 < s.size(); v_i++) + { + if (adj_mat[s[v_i]][s[v_i + 1]] == false) + { + printf("fault %u %u\n", s[v_i], s[v_i + 1]); + } + ASSERT_TRUE(adj_mat[s[v_i]][s[v_i + 1]]); + } + } + } + + std::map, edge_id_t> dict; + for (edge_id_t e_i = 0; e_i < e_num; e_i++) + { + std::pair key = std::pair(edges[e_i].src, edges[e_i].dst); + assert(dict.find(key) == dict.end()); + dict[key] = e_i; + } + + std::vector > real_trans_mat(e_num); + for (auto &vec : real_trans_mat) + { + vec.resize(v_num, 0.0); + } + for (auto &s : rw_sequences) + { + if (out_degree[s[0]] != 0) + { + for (size_t v_i = 0; v_i + 2 < s.size(); v_i++) + { + real_trans_mat[dict[std::pair(s[v_i], s[v_i + 1])]][s[v_i + 2]] += 1; + } + } + } + mat_normalization(real_trans_mat); + cmp_trans_matrix(real_trans_mat, trans_mat, 10.0); +} + template void test_node2vec(vertex_id_t v_num, int worker_number) { diff --git a/src/tests/test_outlier.cpp b/src/tests/test_outlier.cpp index aa3cfe9..b986e55 100644 --- a/src/tests/test_outlier.cpp +++ b/src/tests/test_outlier.cpp @@ -38,8 +38,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" -#include "../apps/static_comp.hpp" +#include "test_walk.hpp" typedef int tag_t; const tag_t tag_num = 4; @@ -369,52 +368,6 @@ void check_tagwalk_random_walk(vertex_id_t v_num, Edge *edges, edge real_trans_mat[current_v][state_id][edge_idx] += 1.0; } } - /* - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - printf("%u: ", v_i); - for (auto e : graph[v_i]) - { - printf("(%u %d) ", e.dst, e.data.tag); - } - printf("\n"); - } - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - for (tag_t walker_tag = 0; walker_tag < tag_num; walker_tag++) - { - for (tag_t pv_tag = 0; pv_tag < tag_num; pv_tag++) - { - size_t state = get_state_id(walker_tag, pv_tag); - if (!vis[v_i][state]) - { - continue; - } - printf("%u %d %d:\n", v_i, walker_tag, pv_tag); - double sum = 0; - for (auto x : std_trans_mat[v_i][state]) - { - sum += x; - } - for (auto x : std_trans_mat[v_i][state]) - { - printf("%lf ", x / sum); - } - printf("\n"); - sum = 0; - for (auto x : real_trans_mat[v_i][state]) - { - sum += x; - } - for (auto x : real_trans_mat[v_i][state]) - { - printf("%lf ", x / sum); - } - printf("\n"); - } - } - } - */ auto get_flat_mat = [] (std::vector > > &three_d_mat) { std::vector > two_d_mat; @@ -443,7 +396,7 @@ void test_outlier(vertex_id_t v_num, int worker_number, int order) TagWalkConf conf; conf.walk_length = 80 + rand() % 20; - conf.walker_num = graph.get_vertex_num() * 300 + graph.get_edge_num() * 200 + rand() % 100; + conf.walker_num = graph.get_vertex_num() * 500 + graph.get_edge_num() * 500 + rand() % 100; conf.tag_num = 3 + rand() % 5; conf.outlier_amplifier = 1.0 + 5.0 / (rand() % 10 + 1); MPI_Bcast(&conf, sizeof(conf), get_mpi_data_type(), 0, MPI_COMM_WORLD); diff --git a/src/tests/test_ppr.cpp b/src/tests/test_ppr.cpp index 6dc2229..5e4648c 100644 --- a/src/tests/test_ppr.cpp +++ b/src/tests/test_ppr.cpp @@ -39,7 +39,7 @@ #include "walk.hpp" #include "util.hpp" #include "test.hpp" -#include "test_rw.hpp" +#include "test_walk.hpp" #include "../apps/ppr.hpp" template @@ -62,7 +62,7 @@ void test_ppr(vertex_id_t v_num, int worker_number) Edge *std_edges; edge_id_t std_edge_num; read_graph(test_data_file, 0, 1, std_edges, std_edge_num); - check_static_first_order_random_walk(v_num, std_edges, std_edge_num, rw_sequences); + check_static_random_walk(v_num, std_edges, std_edge_num, rw_sequences); } } diff --git a/src/tests/test_rw.hpp b/src/tests/test_walk.hpp similarity index 56% rename from src/tests/test_rw.hpp rename to src/tests/test_walk.hpp index 4129634..edd257b 100644 --- a/src/tests/test_rw.hpp +++ b/src/tests/test_walk.hpp @@ -128,7 +128,7 @@ double get_edge_trans_weight(Edge &e) } template -void get_static_first_order_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > &trans_mat) +void get_static_walk_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > &trans_mat) { std::vector weight_sum(v_num, 0.0); for (edge_id_t e_i = 0; e_i < e_num; e_i++) @@ -150,14 +150,14 @@ void get_static_first_order_trans_matrix(vertex_id_t v_num, Edge *e } template -void check_static_first_order_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > rw_sequences) +void check_static_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, std::vector > rw_sequences) { std::vector > trans_mat(v_num); for (auto &vec : trans_mat) { vec.resize(v_num, 0.0); } - get_static_first_order_trans_matrix(v_num, edges, e_num, trans_mat); + get_static_walk_trans_matrix(v_num, edges, e_num, trans_mat); //check if sequences are legal for (auto &s : rw_sequences) @@ -184,109 +184,3 @@ void check_static_first_order_random_walk(vertex_id_t v_num, Edge * //check if trans_mat is obeyed during random walk cmp_trans_matrix(real_trans_mat, trans_mat); } - -template -void get_node2vec_trans_matrix(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > &trans_mat) -{ - std::vector > > graph(v_num); - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - graph[edges[e_i].src].push_back(edges[e_i]); - } - for (vertex_id_t v_i = 0; v_i < v_num; v_i++) - { - std::sort(graph[v_i].begin(), graph[v_i].end(), [](const Edge a, const Edge b){return a.dst < b.dst;}); - } - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - vertex_id_t src = edges[e_i].src; - vertex_id_t dst = edges[e_i].dst; - assert(src != dst); - //must be undirected graph - assert(graph[dst].size() != 0); - for (auto e : graph[dst]) - { - if (e.dst == src) - { - trans_mat[e_i][e.dst] += 1 / p * get_edge_trans_weight(e); - } else if (std::binary_search(graph[src].begin(), graph[src].end(), e, [](const Edge a, const Edge b){return a.dst < b.dst;})) - { - trans_mat[e_i][e.dst] += 1 * get_edge_trans_weight(e); - } else - { - trans_mat[e_i][e.dst] += 1 / q * get_edge_trans_weight(e); - } - } - } - mat_normalization(trans_mat); -} - -template -void check_node2vec_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, double p, double q, std::vector > rw_sequences) -{ - std::vector > trans_mat(e_num); - for (auto &vec : trans_mat) - { - vec.resize(v_num, 0.0); - } - get_node2vec_trans_matrix(v_num, edges, e_num, p, q, trans_mat); - - //check if sequences are legal - std::vector out_degree(v_num, 0); - std::vector > adj_mat(v_num); - for (auto &vec : adj_mat) - { - vec.resize(v_num, false); - } - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - adj_mat[edges[e_i].src][edges[e_i].dst] = true; - out_degree[edges[e_i].src]++; - } - for (auto &s : rw_sequences) - { - if (out_degree[s[0]] == 0) - { - for (auto v : s) - { - ASSERT_EQ(v, s[0]); - } - } else - { - for (size_t v_i = 0; v_i + 1 < s.size(); v_i++) - { - if (adj_mat[s[v_i]][s[v_i + 1]] == false) - { - printf("fault %u %u\n", s[v_i], s[v_i + 1]); - } - ASSERT_TRUE(adj_mat[s[v_i]][s[v_i + 1]]); - } - } - } - - std::map, edge_id_t> dict; - for (edge_id_t e_i = 0; e_i < e_num; e_i++) - { - std::pair key = std::pair(edges[e_i].src, edges[e_i].dst); - assert(dict.find(key) == dict.end()); - dict[key] = e_i; - } - - std::vector > real_trans_mat(e_num); - for (auto &vec : real_trans_mat) - { - vec.resize(v_num, 0.0); - } - for (auto &s : rw_sequences) - { - if (out_degree[s[0]] != 0) - { - for (size_t v_i = 0; v_i + 2 < s.size(); v_i++) - { - real_trans_mat[dict[std::pair(s[v_i], s[v_i + 1])]][s[v_i + 2]] += 1; - } - } - } - mat_normalization(real_trans_mat); - cmp_trans_matrix(real_trans_mat, trans_mat, 10.0); -} diff --git a/src/tests/test_walker.cpp b/src/tests/test_walker.cpp new file mode 100644 index 0000000..b47ce03 --- /dev/null +++ b/src/tests/test_walker.cpp @@ -0,0 +1,316 @@ +/* + * The MIT License (MIT) + * + * Copyright (c) 2019 Ke Yang, Tsinghua University + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include "storage.hpp" +#include "graph.hpp" +#include "walk.hpp" +#include "util.hpp" +#include "test.hpp" +#include "test_walk.hpp" +#include "../apps/static_comp.hpp" + +typedef uint64_t hash_t; + +struct HashWalkState +{ + hash_t hash; + vertex_id_t previous_vertex; +}; + +struct HashWalkConf +{ + walker_id_t walker_num; + step_t walk_length; + const real_t upper_bound = 3; + const real_t lower_bound = 1; + const hash_t magic_num = 10000007; + hash_t get_walker_init_hash(walker_id_t walker, vertex_id_t vertex) + { + return walker * magic_num + vertex; + } + hash_t get_walker_new_hash(hash_t old_hash, walker_id_t walker, vertex_id_t next_vertex) + { + return old_hash * magic_num + walker ^ next_vertex; + } + real_t get_dynamic_comp(hash_t hash, vertex_id_t current, vertex_id_t next) + { + real_t temp = hash % 3 + (current ^ next); + return lower_bound + (real_t) fmod(temp, upper_bound - lower_bound); + } +}; + +template +void hashwalk(WalkEngine* graph, HashWalkConf conf, int order, std::vector &walker_hash) +{ + struct WHItem + { + walker_id_t walker; + hash_t hash; + step_t step; + WHItem() : walker(0), hash(0), step(0) {} + WHItem(walker_id_t _walker, hash_t _hash, step_t _step) : walker(_walker), hash(_hash), step(_step) {} + }; + std::vector > wh_collector(graph->get_worker_num()); + graph->set_walkers( + conf.walker_num, + [&] (Walker &walker, vertex_id_t start_vertex) + { + walker.data.hash = conf.get_walker_init_hash(walker.id, start_vertex); + wh_collector[omp_get_thread_num()].push_back(WHItem(walker.id, walker.data.hash, walker.step)); + }, + [&] (Walker &walker, vertex_id_t current_v, AdjUnit *edge) + { + walker.data.hash = conf.get_walker_new_hash(walker.data.hash, walker.id, edge->neighbour); + walker.data.previous_vertex = current_v; + wh_collector[omp_get_thread_num()].push_back(WHItem(walker.id, walker.data.hash, walker.step)); + } + ); + auto extension_comp = [&] (Walker &walker, vertex_id_t current_v) + { + return walker.step >= conf.walk_length ? 0.0 : 1.0; + }; + auto static_comp = get_trivial_static_comp(graph); + auto upper_bound_func = [&] (vertex_id_t v_id, AdjList *adj_lists) + { + return conf.upper_bound; + }; + auto lower_bound_func = [&] (vertex_id_t v_id, AdjList *adj_lists) + { + return conf.lower_bound; + }; + if (order == 1) + { + graph->random_walk( + extension_comp, + static_comp, + [&] (Walker& walker, vertex_id_t vertex, AdjUnit *edge) + { + if (walker.step == 0) + { + return conf.upper_bound; + } else + { + return conf.get_dynamic_comp(walker.data.hash, walker.data.previous_vertex, edge->neighbour); + } + }, + upper_bound_func, + lower_bound_func + ); + } else + { + graph->template second_order_random_walk ( + extension_comp, + static_comp, + [&] (Walker &walker, walker_id_t walker_idx, vertex_id_t current_v, AdjUnit *edge) + { + if (walker.step != 0) + { + stateQuery query; + query.src_v = current_v; + query.walker_idx = walker_idx; + graph->emit(walker.data.previous_vertex, query); + } + }, + [&] (vertex_id_t vtx, stateQuery query, AdjList* adj_list) + { + stateResponse response; + response.walker_idx = query.walker_idx; + response.data = vtx; + graph->emit(query.src_v, response); + }, + [&] (Walker &walker, stateResponse &response, vertex_id_t current_v, AdjUnit *edge) + { + if (walker.step == 0) + { + return conf.upper_bound; + } else + { + return conf.get_dynamic_comp(walker.data.hash, response.data, edge->neighbour); + } + }, + upper_bound_func, + lower_bound_func + ); + } + + std::thread send_thread([&]() { + for (int t_i = 0; t_i < graph->get_worker_num(); t_i++) + { + MPI_Send(wh_collector[t_i].data(), wh_collector[t_i].size() * sizeof(WHItem), get_mpi_data_type(), 0, 0, MPI_COMM_WORLD); + } + }); + if (get_mpi_rank() == 0) + { + std::vector wh_items; + for (partition_id_t p_i = 0; p_i < get_mpi_size(); p_i++) + { + for (int t_i = 0; t_i < graph->get_worker_num(); t_i++) + { + int recv_size = 0; + MPI_Status recv_status; + MPI_Probe(p_i, 0, MPI_COMM_WORLD, &recv_status); + MPI_Get_count(&recv_status, get_mpi_data_type(), &recv_size); + int recv_n = recv_size / sizeof(WHItem); + size_t old_size = wh_items.size(); + wh_items.resize(old_size + recv_n); + MPI_Recv(wh_items.data() + old_size, recv_size, get_mpi_data_type(), p_i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + + } + } + walker_hash.clear(); + walker_hash.resize(conf.walker_num, 0); + std::vector max_step(conf.walker_num, 0); + for (auto wh : wh_items) + { + if (wh.step >= max_step[wh.walker]) + { + walker_hash[wh.walker] = wh.hash; + max_step[wh.walker] = wh.step; + } + } + } + send_thread.join(); +} + +template +void check_hashwalk_random_walk(vertex_id_t v_num, Edge *edges, edge_id_t e_num, HashWalkConf conf, std::vector real_walker_hash, std::vector > &seq) +{ + std::vector std_walker_hash; + for (walker_id_t w_i = 0; w_i < seq.size(); w_i++) + { + hash_t hash = conf.get_walker_init_hash(w_i, seq[w_i][0]); + for (size_t s_i = 1; s_i < seq[w_i].size(); s_i++) + { + hash = conf.get_walker_new_hash(hash, w_i, seq[w_i][s_i]); + } + std_walker_hash.push_back(hash); + } + std::sort(real_walker_hash.begin(), real_walker_hash.end()); + std::sort(std_walker_hash.begin(), std_walker_hash.end()); + ASSERT_EQ(real_walker_hash.size(), std_walker_hash.size()); + for (size_t h_i = 0; h_i < std_walker_hash.size(); h_i++) + { + EXPECT_EQ(real_walker_hash[h_i], std_walker_hash[h_i]); + } +} + +template +void test_walker(vertex_id_t v_num, int worker_number, int order) +{ + WalkEngine graph; + graph.set_concurrency(worker_number); + graph.load_graph(v_num, test_data_file); + + HashWalkConf conf; + conf.walk_length = 20 + rand() % 20; + conf.walker_num = graph.get_vertex_num() * 100 + graph.get_edge_num() * 100 + rand() % 100; + MPI_Bcast(&conf, sizeof(conf), get_mpi_data_type(), 0, MPI_COMM_WORLD); + + std::vector walker_hash; + hashwalk(&graph, conf, order, walker_hash); + + std::vector > rw_sequences; + graph.collect_walk_sequence(rw_sequences); + + if (get_mpi_rank() == 0) + { + Edge *std_edges; + edge_id_t std_edge_num; + read_graph(test_data_file, 0, 1, std_edges, std_edge_num); + check_hashwalk_random_walk(v_num, std_edges, std_edge_num, conf, walker_hash, rw_sequences); + } +} + +template +void test_walker(int order) +{ + edge_id_t e_nums_arr[] = {200, 400, 600, 800, 1000, 1200, 1400, 1600}; + vertex_id_t v_num = 100 + rand() % 50; + std::vector e_nums(e_nums_arr, e_nums_arr + 8); + /* + size_t e_nums_arr[] = {30}; + vertex_id_t v_num = 10; + std::vector e_nums(e_nums_arr, e_nums_arr + 1); + */ + + MPI_Bcast(&v_num, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + + for (auto &e_num : e_nums_arr) + { + if (get_mpi_rank() == 0) + { + gen_undirected_graph_file(v_num, e_num); + } + MPI_Barrier(MPI_COMM_WORLD); + int worker_number = rand() % 8 + 1; + MPI_Bcast(&worker_number, 1, get_mpi_data_type(), 0, MPI_COMM_WORLD); + test_walker(v_num, worker_number, order); + } + if (get_mpi_rank() == 0) + { + rm_test_graph_temp_file(); + } +} + +TEST(Walker, UnbiasedFirstOrder) +{ + test_walker(1); +} + +TEST(Walker, BiasedFirstOrder) +{ + test_walker(1); +} + +TEST(Walker, UnbiasedSecondOrder) +{ + test_walker(2); +} + +TEST(Walker, BiasedSecondOrder) +{ + test_walker(2); +} + + +GTEST_API_ int main(int argc, char *argv[]) +{ + MPI_Instance mpi_instance(&argc, &argv); + ::testing::InitGoogleTest(&argc, argv); + mute_nonroot_gtest_events(); + int result = RUN_ALL_TESTS(); + return result; +}