Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-voters and automatic promotion #375

Merged
merged 8 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 20 additions & 15 deletions src/ra.erl
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ start_cluster(System, [#{cluster_name := ClusterName} | _] = ServerConfigs,

%% @doc Starts a new distributed ra cluster.
%% @param ClusterName the name of the cluster.
%% @param ServerId the ra_server_id() of the server
%% @param ServerId the ra_server_id() of the server, or a map with server id and settings.
%% @param Machine The {@link ra_machine:machine/0} configuration.
%% @param ServerIds a list of initial (seed) server configurations
%% @returns
Expand All @@ -470,19 +470,21 @@ start_cluster(System, [#{cluster_name := ClusterName} | _] = ServerConfigs,
%% forcefully deleted.
%% @see start_server/1
%% @end
-spec start_server(atom(), ra_cluster_name(), ra_server_id(),
-spec start_server(atom(), ra_cluster_name(), ra_server_id() | ra_new_server(),
ra_server:machine_conf(), [ra_server_id()]) ->
ok | {error, term()}.
start_server(System, ClusterName, {_, _} = ServerId, Machine, ServerIds)
start_server(System, ClusterName, {_, _} = ServerId, Machine, ServerIds) ->
start_server(System, ClusterName, #{id => ServerId}, Machine, ServerIds);
start_server(System, ClusterName, #{id := {_, _}} = Conf0, Machine, ServerIds)
when is_atom(System) ->
UId = new_uid(ra_lib:to_binary(ClusterName)),
UId = maps:get(uid, Conf0,
new_uid(ra_lib:to_binary(ClusterName))),
Conf = #{cluster_name => ClusterName,
id => ServerId,
uid => UId,
initial_members => ServerIds,
log_init_args => #{uid => UId},
machine => Machine},
start_server(System, Conf).
start_server(System, maps:merge(Conf0, Conf)).

%% @doc Starts a ra server in the default system
%% @param Conf a ra_server_config() configuration map.
Expand Down Expand Up @@ -558,9 +560,10 @@ delete_cluster(ServerIds, Timeout) ->
%% affect said cluster's availability characteristics (by increasing quorum node count).
%%
%% @param ServerLoc the ra server or servers to try to send the command to
%% @param ServerId the ra server id of the new server.
%% @param ServerId the ra server id of the new server, or a map with server id and settings.
%% @end
-spec add_member(ra_server_id() | [ra_server_id()], ra_server_id()) ->
-spec add_member(ra_server_id() | [ra_server_id()],
ra_server_id() | ra_new_server()) ->
ra_cmd_ret() |
{error, already_member} |
{error, cluster_change_not_permitted}.
Expand All @@ -571,7 +574,8 @@ add_member(ServerLoc, ServerId) ->
%% @see add_member/2
%% @end
-spec add_member(ra_server_id() | [ra_server_id()],
ra_server_id(), timeout()) ->
ra_server_id() | ra_new_server(),
timeout()) ->
ra_cmd_ret() |
{error, already_member} |
{error, cluster_change_not_permitted}.
Expand All @@ -580,7 +584,6 @@ add_member(ServerLoc, ServerId, Timeout) ->
{'$ra_join', ServerId, after_log_append},
Timeout).


%% @doc Removes a server from the cluster's membership configuration.
%% This function returns after appending a cluster membership change
%% command to the log.
Expand Down Expand Up @@ -716,7 +719,6 @@ new_uid(Source) when is_binary(Source) ->
Prefix = ra_lib:derive_safe_string(Source, 6),
ra_lib:make_uid(string:uppercase(Prefix)).


%% @doc Returns a map of overview data of the default Ra system on the current Erlang
%% node.
%% DEPRECATED: user overview/1
Expand Down Expand Up @@ -1132,13 +1134,16 @@ key_metrics({Name, N} = ServerId) when N == node() ->
end,
case whereis(Name) of
undefined ->
Counters#{state => noproc};
Counters#{state => noproc,
membership => unknown};
_ ->
case ets:lookup(ra_state, Name) of
[] ->
Counters#{state => unknown};
[{_, State}] ->
Counters#{state => State}
Counters#{state => unknown,
membership => unknown};
[{_, State, Membership}] ->
Counters#{state => State,
membership => Membership}
end
end;
key_metrics({_, N} = ServerId) ->
Expand Down
25 changes: 23 additions & 2 deletions src/ra.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -39,24 +39,45 @@
%% after node restart). Pids are not stable in this sense.
-type ra_server_id() :: {Name :: atom(), Node :: node()}.

%% Specifies server configuration for a new cluster member.
%% Subset of ra_server:ra_server_config().
%% Both `ra:add_member` and `ra:start_server` must be called with the same values.
-type ra_new_server() :: #{id := ra_server_id(),
% Defaults to `voter` if absent.
membership => ra_membership(),
% Required for `promotable` in the above.
uid => ra_uid()}.

-type ra_peer_status() :: normal |
{sending_snapshot, pid()} |
suspended |
disconnected.

-type ra_membership() :: voter | promotable | non_voter | unknown.

-type ra_voter_status() :: #{membership => ra_membership(),
uid => ra_uid(),
target => ra_index()}.

-type ra_peer_state() :: #{next_index := non_neg_integer(),
match_index := non_neg_integer(),
query_index := non_neg_integer(),
% the commit index last sent
% used for evaluating pipeline status
commit_index_sent := non_neg_integer(),
%% Whether the peer is part of the consensus.
%% Defaults to "yes" if absent.
voter_status => ra_voter_status(),
%% indicates that a snapshot is being sent
%% to the peer
status := ra_peer_status()}.

-type ra_cluster() :: #{ra_server_id() => ra_peer_state()}.

-type ra_cluster_servers() :: [ra_server_id()].
%% Dehydrated cluster:
-type ra_cluster_servers() :: [ra_server_id()]. % Deprecated
-type ra_peer_snapshot() :: #{voter_status => ra_voter_status()}.
-type ra_cluster_snapshot() :: #{ra_server_id() => ra_peer_snapshot()}.

%% represent a unique entry in the ra log
-type log_entry() :: {ra_index(), ra_term(), term()}.
Expand Down Expand Up @@ -138,7 +159,7 @@

-type snapshot_meta() :: #{index := ra_index(),
term := ra_term(),
cluster := ra_cluster_servers(),
cluster := ra_cluster_snapshot(),
machine_version := ra_machine:version()}.

-record(install_snapshot_rpc,
Expand Down
10 changes: 8 additions & 2 deletions src/ra_directory.erl
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,20 @@ overview(System) when is_atom(System) ->
#{directory := Tbl,
directory_rev := _TblRev} = get_names(System),
Dir = ets:tab2list(Tbl),
States = maps:from_list(ets:tab2list(ra_state)),
Rows = lists:map(fun({K, S, V}) ->
{K, {S, V}}
end,
ets:tab2list(ra_state)),
States = maps:from_list(Rows),
Snaps = maps:from_list(ets:tab2list(ra_log_snapshot_state)),
lists:foldl(fun ({UId, Pid, Parent, ServerName, ClusterName}, Acc) ->
{S, V} = maps:get(ServerName, States, {undefined, undefined}),
Acc#{ServerName =>
#{uid => UId,
pid => Pid,
parent => Parent,
state => maps:get(ServerName, States, undefined),
state => S,
membership => V,
cluster_name => ClusterName,
snapshot_state => maps:get(UId, Snaps,
undefined)}}
Expand Down
4 changes: 3 additions & 1 deletion src/ra_log.erl
Original file line number Diff line number Diff line change
Expand Up @@ -637,7 +637,9 @@ update_release_cursor0(Idx, Cluster, MacVersion, MacState,
#?MODULE{cfg = #cfg{snapshot_interval = SnapInter},
reader = Reader,
snapshot_state = SnapState} = State0) ->
ClusterServerIds = maps:keys(Cluster),
ClusterServerIds = maps:map(fun (_, V) ->
maps:with([voter_status], V)
end, Cluster),
SnapLimit = case ra_snapshot:current(SnapState) of
undefined -> SnapInter;
{I, _} -> I + SnapInter
Expand Down
Loading
Loading