Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Data structures for approximate queries. #88

Closed
wants to merge 27 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
c6543e8
Initial generic count-min-sketch implementation.
ujvl Mar 12, 2018
bd73a85
Bug fixes, count-min-sketch test.
ujvl Mar 12, 2018
b220632
* Count-min-sketch now thread safe.
ujvl Mar 12, 2018
7eae4d0
Count-sketch implementation, using writh median finding algorithm.
ujvl Mar 20, 2018
9eaf74a
Convencience constructors for monologs, hashing changes.
ujvl Mar 27, 2018
830afed
Universal monitor.
ujvl Mar 27, 2018
2996b33
Predictable behavior from count-sketch.
ujvl Jul 17, 2018
d56e5ca
Universal Sketch working.
ujvl Jul 17, 2018
26da3d1
Complete universal sketch (correct & predictable behavior)
ujvl Jul 19, 2018
e0af582
Sketch tests.
ujvl Jul 19, 2018
05ea229
Minor changes.
ujvl Jul 23, 2018
3d46249
Test with zipf, change error measurements.
ujvl Jul 23, 2018
8c033c2
Testing universal sketch adding layers behavior
ujvl Jul 23, 2018
dd5d33b
Universal sketch customized for multilog.
ujvl Jul 24, 2018
895570f
CMakeLists cleanup.
ujvl Jul 24, 2018
00e8022
Integration with atomic_multilog
ujvl Jul 24, 2018
33e2708
Naming fixes.
ujvl Jul 24, 2018
8b7a084
Bug fix attempt with header/source separation.
ujvl Jul 24, 2018
0bc97dd
Separated out code into separate files, used common substream_summary.
ujvl Jul 24, 2018
70e33e6
Allow > 1 increments.
ujvl Jul 24, 2018
e433397
Bug fix: missing param in sketch update.
ujvl Jul 25, 2018
e2280ea
Sketch experiment (temp)
ujvl Jul 25, 2018
b641e01
Changes to make testing easier.
ujvl Jul 27, 2018
a3709bc
Experiment
ujvl Jul 27, 2018
0350fc5
Cleaned up, added convenience functions for storage size.
ujvl Jul 29, 2018
a22bdd7
Switched param order.
ujvl Jul 29, 2018
8ec9193
Sketch experiment updates.
ujvl Jul 29, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} -Wall -ped
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-strict-aliasing")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -ldl")

set(CMAKE_BUILD_TYPE RelWithDebInfo)
if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "RelWithDebInfo" CACHE STRING "" FORCE)
endif()

if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
message(FATAL_ERROR "In-source builds are not allowed.")
Expand Down Expand Up @@ -69,23 +71,23 @@ add_subdirectory(libconfluo)

if (BUILD_RPC)
# RPC Framework
add_subdirectory(librpc)
#add_subdirectory(librpc)

# Python Client
if (WITH_PY_CLIENT)
add_subdirectory(pyclient)
endif()

# Java Client
if (WITH_JAVA_CLIENT)
add_subdirectory(javaclient)
endif()
# if (WITH_PY_CLIENT)
# add_subdirectory(pyclient)
# endif()
#
# # Java Client
# if (WITH_JAVA_CLIENT)
# add_subdirectory(javaclient)
# endif()
endif()

# Confluo examples
if (BUILD_EXAMPLES)
add_subdirectory(examples/libtimeseries)
add_subdirectory(examples/libstreaming)
# add_subdirectory(examples/libtimeseries)
# add_subdirectory(examples/libstreaming)
endif()

if (BUILD_DOC)
Expand Down
2 changes: 1 addition & 1 deletion build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ set -e

mkdir -p build
cd build
cmake ..
cmake "$@" ..

START=$(date +%s)
make
Expand Down
29 changes: 24 additions & 5 deletions libconfluo/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter -I/Users/Ujval/dev/research/univmon_extension/simulator/V0.3/")
if (NOT APPLE AND UNIX)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--no-as-needed -ldl")
endif ()
Expand Down Expand Up @@ -95,6 +95,14 @@ add_library(confluo STATIC
confluo/container/monolog/monolog_linear.h
confluo/container/monolog/monolog_linear_bucket.h
confluo/container/radix_tree.h
confluo/container/sketch
confluo/container/sketch/confluo_universal_sketch.h
confluo/container/sketch/count_sketch.h
confluo/container/sketch/hash_manager.h
confluo/container/sketch/priority_queue.h
confluo/container/sketch/sketch_utils.h
confluo/container/sketch/substream_summary.h
confluo/container/sketch/universal_sketch.h
confluo/schema/field.h
confluo/schema/record.h
confluo/schema/record_batch.h
Expand All @@ -119,8 +127,9 @@ add_library(confluo STATIC
confluo/conf
confluo/conf/configuration_params.h
confluo/conf/defaults.h
confluo/filter.h
confluo/alert.h
confluo/filter.h
confluo/univ_sketch_log.h
src/confluo_store.cc
src/atomic_multilog.cc
src/filter.cc
Expand All @@ -140,6 +149,7 @@ add_library(confluo STATIC
src/container/cursor/alert_cursor.cc
src/container/cursor/offset_cursors.cc
src/container/cursor/record_cursors.cc
src/container/sketch/hash_manager.cc
src/parser/aggregate_parser.cc
src/parser/expression_compiler.cc
src/parser/expression_parser.cc
Expand Down Expand Up @@ -185,10 +195,17 @@ add_library(confluo STATIC
src/types/raw_data.cc
src/types/numeric.cc
src/types/type_properties.cc
src/types/type_manager.cc)
src/types/type_manager.cc )

target_link_libraries(confluo confluoutils ${CMAKE_THREAD_LIBS_INIT} ${lz4_STATIC_LIB})
add_dependencies(confluo lz4)

add_executable(usexperiment
test/sketch_experiment.cc
)
find_package(Boost 1.45.0 COMPONENTS program_options)
target_link_libraries(usexperiment confluo ${Boost_LIBRARIES})

if (BUILD_TESTS)
# Build test
add_executable(ctest
Expand Down Expand Up @@ -222,6 +239,8 @@ if (BUILD_TESTS)
test/container/cursor/batched_cursor_test.h
test/container/bitmap/bitmap_test.h
test/container/bitmap/bitmap_array_test.h
test/container/sketch/count_sketch_test.h
test/container/sketch/universal_sketch_test.h
test/container/bitmap/delta_encoded_array_test.h
test/container/stream_test.h
test/container/string_map_test.h
Expand All @@ -238,8 +257,8 @@ if (BUILD_TESTS)
test/compression/lz4_encode_test.h
test/compression/delta_encode_test.h
test/aggregated_reflog_test.h
test/confluo_store_test.h)
target_link_libraries(ctest confluo gtest gtest_main)
test/confluo_store_test.h test/container/sketch/priority_queue_test.h)
target_link_libraries(ctest confluo gtest gtest_main) #alan_univmon)
add_dependencies(ctest googletest)

file(GLOB_RECURSE test_sources test/*.cc)
Expand Down
32 changes: 32 additions & 0 deletions libconfluo/confluo/atomic_multilog.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "container/cursor/record_cursors.h"
#include "container/cursor/alert_cursor.h"
#include "container/monolog/monolog.h"
#include "container/sketch/confluo_universal_sketch.h"
#include "container/radix_tree.h"
#include "container/string_map.h"
#include "filter.h"
Expand All @@ -42,9 +43,11 @@
#include "string_utils.h"
#include "threads/periodic_task.h"
#include "threads/task_pool.h"
#include "univ_sketch_log.h"

using namespace ::confluo::archival;
using namespace ::confluo::monolog;
using namespace ::confluo::sketch;
using namespace ::confluo::index;
using namespace ::confluo::monitor;
using namespace ::confluo::parser;
Expand Down Expand Up @@ -78,6 +81,11 @@ class atomic_multilog {
*/
typedef metadata_writer metadata_writer_type;

/**
* Identifier for universal sketch
*/
typedef size_t univ_sketch_id_t;

/**
* Identifier for filter
*/
Expand Down Expand Up @@ -174,6 +182,19 @@ class atomic_multilog {
*/
bool is_indexed(const std::string &field_name);

/**
* Adds a universal sketch ton monitor a field.
* @param name The name of the sketch
* @param field_name The name of the field to monitor.
* @param epsilon margin of error
*/
void add_universal_sketch(const std::string &name, const std::string &field_name, double epsilon = 0.01);

void remove_universal_sketch(const std::string &name);

template<typename g_ret_t>
g_ret_t evaluate_frequency_fn(const std::string &name, std::function<g_ret_t(int64_t)> &g_fn);

/**
* Adds filter to the atomic multilog
* @param name The name of the filter
Expand Down Expand Up @@ -434,6 +455,13 @@ class atomic_multilog {
*/
void remove_index_task(const std::string &field_name, optional<management_exception> &ex);

void add_universal_sketch_task(const std::string &name,
const std::string &field_name,
double epsilon,
optional<management_exception> &ex);

void remove_universal_sketch_task(const std::string &name, optional<management_exception> &ex);

/**
* Adds a filter to be executed on the data
*
Expand Down Expand Up @@ -534,9 +562,13 @@ class atomic_multilog {
filter_log filters_;
/** The list of indexes */
index_log indexes_;
/** The list of universal sketches */
univ_sketch_log univ_sketches_;
/** The list of alerts */
alert_index alerts_;

/** A map from id to universal_sketch */
string_map<univ_sketch_id_t> univ_sketch_map_;
/** A map from id to filter */
string_map<filter_id_t> filter_map_;
/** A map from id to aggregate */
Expand Down
19 changes: 17 additions & 2 deletions libconfluo/confluo/container/monolog/monolog_exp2.h
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,23 @@ class monolog_exp2 : public monolog_exp2_base<T, NBUCKETS> {
/**
* Constructs a default monolog iterator
*/
monolog_exp2()
: tail_(0) {
monolog_exp2(size_t count = 0)
: tail_(count) {
}

monolog_exp2(const monolog_exp2& other) {
atomic::store(&tail_, other.size());
for (size_t i = 0; i < this->size(); i++) {
this->set(i, other.at(i));
}
}

monolog_exp2& operator=(const monolog_exp2& other) {
atomic::store(&tail_, other.size());
for (size_t i = 0; i < this->size(); i++) {
this->set(i, other.at(i));
}
return *this;
}

/**
Expand Down
19 changes: 17 additions & 2 deletions libconfluo/confluo/container/monolog/monolog_exp2_linear.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,23 @@ class monolog_exp2_linear : public monolog_exp2_linear_base<T, NCONTAINERS, BUCK
/**
* Constructs a relaxed monolog implementation
*/
monolog_exp2_linear()
: tail_(0) {
monolog_exp2_linear(size_t count = 0)
: tail_(count) {
}

monolog_exp2_linear(const monolog_exp2_linear& other) {
atomic::store(&tail_, other.size());
for (size_t i = 0; i < this->size(); i++) {
this->set(i, other.at(i));
}
}

monolog_exp2_linear& operator=(const monolog_exp2_linear& other) {
atomic::store(&tail_, other.size());
for (size_t i = 0; i < this->size(); i++) {
this->set(i, other.at(i));
}
return *this;
}

/**
Expand Down
Loading