Skip to content

Commit

Permalink
Merge pull request emqx#12780 from keynslug/ft/EMQX-11979/snapshot-tr…
Browse files Browse the repository at this point in the history
…ansfer

feat(dsrepl): transfer storage snapshot during ra snapshot recovery
  • Loading branch information
keynslug authored Apr 2, 2024
2 parents abdd86c + 778e897 commit 879709e
Show file tree
Hide file tree
Showing 12 changed files with 1,168 additions and 212 deletions.
23 changes: 21 additions & 2 deletions apps/emqx_durable_storage/src/emqx_ds_builtin_db_sup.erl
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@
-behaviour(supervisor).

%% API:
-export([start_db/2, start_shard/1, start_egress/1, stop_shard/1, ensure_shard/1, ensure_egress/1]).
-export([
start_db/2,
start_shard/1,
start_egress/1,
stop_shard/1,
terminate_storage/1,
restart_storage/1,
ensure_shard/1,
ensure_egress/1
]).
-export([which_shards/1]).

%% behaviour callbacks:
Expand Down Expand Up @@ -64,12 +73,22 @@ start_shard({DB, Shard}) ->
start_egress({DB, Shard}) ->
supervisor:start_child(?via(#?egress_sup{db = DB}), egress_spec(DB, Shard)).

-spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok | {error, _}.
-spec stop_shard(emqx_ds_storage_layer:shard_id()) -> ok.
stop_shard(Shard = {DB, _}) ->
Sup = ?via(#?shards_sup{db = DB}),
ok = supervisor:terminate_child(Sup, Shard),
ok = supervisor:delete_child(Sup, Shard).

-spec terminate_storage(emqx_ds_storage_layer:shard_id()) -> ok | {error, _Reason}.
terminate_storage({DB, Shard}) ->
Sup = ?via(#?shard_sup{db = DB, shard = Shard}),
supervisor:terminate_child(Sup, {Shard, storage}).

-spec restart_storage(emqx_ds_storage_layer:shard_id()) -> {ok, _Child} | {error, _Reason}.
restart_storage({DB, Shard}) ->
Sup = ?via(#?shard_sup{db = DB, shard = Shard}),
supervisor:restart_child(Sup, {Shard, storage}).

-spec ensure_shard(emqx_ds_storage_layer:shard_id()) ->
ok | {error, _Reason}.
ensure_shard(Shard) ->
Expand Down
6 changes: 4 additions & 2 deletions apps/emqx_durable_storage/src/emqx_ds_lts.erl
Original file line number Diff line number Diff line change
Expand Up @@ -263,12 +263,14 @@ trie_insert(#trie{trie = Trie, stats = Stats, persist = Persist}, State, Token,
end.

-spec get_id_for_key(trie(), state(), edge()) -> static_key().
get_id_for_key(#trie{static_key_size = Size}, _State, _Token) ->
get_id_for_key(#trie{static_key_size = Size}, State, Token) when Size =< 32 ->
%% Requirements for the return value:
%%
%% It should be globally unique for the `{State, Token}` pair. Other
%% than that, there's no requirements. The return value doesn't even
%% have to be deterministic, since the states are saved in the trie.
%% Yet, it helps a lot if it is, so that applying the same sequence
%% of topics to different tries will result in the same trie state.
%%
%% The generated value becomes the ID of the topic in the durable
%% storage. Its size should be relatively small to reduce the
Expand All @@ -277,7 +279,7 @@ get_id_for_key(#trie{static_key_size = Size}, _State, _Token) ->
%% If we want to impress computer science crowd, sorry, I mean to
%% minimize storage requirements, we can even employ Huffman coding
%% based on the frequency of messages.
<<Int:(Size * 8)>> = crypto:strong_rand_bytes(Size),
<<Int:(Size * 8), _/bytes>> = crypto:hash(sha256, term_to_binary([State | Token])),
Int.

%% erlfmt-ignore
Expand Down
48 changes: 37 additions & 11 deletions apps/emqx_durable_storage/src/emqx_ds_replication_layer.erl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@
-export([
%% RPC Targets:
do_drop_db_v1/1,
do_store_batch_v1/4,
do_get_streams_v1/4,
do_get_streams_v2/4,
do_make_iterator_v2/5,
Expand All @@ -53,19 +52,21 @@
do_get_delete_streams_v4/4,
do_make_delete_iterator_v4/5,
do_delete_next_v4/5,
%% Unused:
do_drop_generation_v3/3,
%% Obsolete:
do_store_batch_v1/4,
do_make_iterator_v1/5,
do_add_generation_v2/1,
do_drop_generation_v3/3,

%% Egress API:
ra_store_batch/3
]).

-export([
init/1,
apply/3
apply/3,

snapshot_module/0
]).

-export_type([
Expand All @@ -80,6 +81,10 @@
batch/0
]).

-export_type([
ra_state/0
]).

-include_lib("emqx_utils/include/emqx_message.hrl").
-include("emqx_ds_replication_layer.hrl").

Expand Down Expand Up @@ -133,13 +138,29 @@

-type message_id() :: emqx_ds:message_id().

%% TODO: this type is obsolete and is kept only for compatibility with
%% BPAPIs. Remove it when emqx_ds_proto_v4 is gone (EMQX 5.6)
-type batch() :: #{
?tag := ?BATCH,
?batch_messages := [emqx_types:message()]
}.

-type generation_rank() :: {shard_id(), term()}.

%% Core state of the replication, i.e. the state of ra machine.
-type ra_state() :: #{
db_shard := {emqx_ds:db(), shard_id()},
latest := timestamp_us()
}.

%% Command. Each command is an entry in the replication log.
-type ra_command() :: #{
?tag := ?BATCH | add_generation | update_config | drop_generation,
_ => _
}.

-type timestamp_us() :: non_neg_integer().

%%================================================================================
%% API functions
%%================================================================================
Expand Down Expand Up @@ -380,10 +401,9 @@ do_drop_db_v1(DB) ->
batch(),
emqx_ds:message_store_opts()
) ->
emqx_ds:store_batch_result().
do_store_batch_v1(DB, Shard, #{?tag := ?BATCH, ?batch_messages := Messages}, Options) ->
Batch = [{emqx_message:timestamp(Message), Message} || Message <- Messages],
emqx_ds_storage_layer:store_batch({DB, Shard}, Batch, Options).
no_return().
do_store_batch_v1(_DB, _Shard, _Batch, _Options) ->
error(obsolete_api).

%% Remove me in EMQX 5.6
-dialyzer({nowarn_function, do_get_streams_v1/4}).
Expand Down Expand Up @@ -496,9 +516,9 @@ do_list_generations_with_lifetimes_v3(DB, Shard) ->
).

-spec do_drop_generation_v3(emqx_ds:db(), shard_id(), emqx_ds_storage_layer:gen_id()) ->
ok | {error, _}.
do_drop_generation_v3(DB, ShardId, GenId) ->
emqx_ds_storage_layer:drop_generation({DB, ShardId}, GenId).
no_return().
do_drop_generation_v3(_DB, _ShardId, _GenId) ->
error(obsolete_api).

-spec do_get_delete_streams_v4(
emqx_ds:db(), emqx_ds_replication_layer:shard_id(), emqx_ds:topic_filter(), emqx_ds:time()
Expand Down Expand Up @@ -635,9 +655,12 @@ ra_drop_shard(DB, Shard) ->

%%

-spec init(_Args :: map()) -> ra_state().
init(#{db := DB, shard := Shard}) ->
#{db_shard => {DB, Shard}, latest => 0}.

-spec apply(ra_machine:command_meta_data(), ra_command(), ra_state()) ->
{ra_state(), _Reply, _Effects}.
apply(
#{index := RaftIdx},
#{
Expand Down Expand Up @@ -717,3 +740,6 @@ timestamp_to_timeus(TimestampMs) ->

timeus_to_timestamp(TimestampUs) ->
TimestampUs div 1000.

snapshot_module() ->
emqx_ds_replication_snapshot.
18 changes: 10 additions & 8 deletions apps/emqx_durable_storage/src/emqx_ds_replication_layer_shard.erl
Original file line number Diff line number Diff line change
Expand Up @@ -147,19 +147,21 @@ start_shard(DB, Shard, #{replication_options := ReplicationOpts}) ->
Bootstrap = false;
{error, name_not_registered} ->
Bootstrap = true,
Machine = {module, emqx_ds_replication_layer, #{db => DB, shard => Shard}},
LogOpts = maps:with(
[
snapshot_interval,
resend_window
],
ReplicationOpts
),
ok = ra:start_server(DB, #{
id => LocalServer,
uid => <<ClusterName/binary, "_", Site/binary>>,
cluster_name => ClusterName,
initial_members => Servers,
machine => {module, emqx_ds_replication_layer, #{db => DB, shard => Shard}},
log_init_args => maps:with(
[
snapshot_interval,
resend_window
],
ReplicationOpts
)
machine => Machine,
log_init_args => LogOpts
})
end,
case Servers of
Expand Down
Loading

0 comments on commit 879709e

Please sign in to comment.