Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Non-voters and automatic promotion #375

Merged
merged 8 commits into from
Sep 25, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 18 additions & 14 deletions src/ra.erl
Original file line number Diff line number Diff line change
Expand Up @@ -455,7 +455,7 @@ start_cluster(System, [#{cluster_name := ClusterName} | _] = ServerConfigs,

%% @doc Starts a new distributed ra cluster.
%% @param ClusterName the name of the cluster.
%% @param ServerId the ra_server_id() of the server
%% @param ServerId the ra_server_id() of the server, or a map with server id and settings.
%% @param Machine The {@link ra_machine:machine/0} configuration.
%% @param ServerIds a list of initial (seed) server configurations
%% @returns
Expand All @@ -470,19 +470,20 @@ start_cluster(System, [#{cluster_name := ClusterName} | _] = ServerConfigs,
%% forcefully deleted.
%% @see start_server/1
%% @end
-spec start_server(atom(), ra_cluster_name(), ra_server_id(),
-spec start_server(atom(), ra_cluster_name(), ra_server_id() | ra_new_server(),
ra_server:machine_conf(), [ra_server_id()]) ->
ok | {error, term()}.
start_server(System, ClusterName, {_, _} = ServerId, Machine, ServerIds)
start_server(System, ClusterName, {_, _} = ServerId, Machine, ServerIds) ->
start_server(System, ClusterName, #{id => ServerId}, Machine, ServerIds);
start_server(System, ClusterName, #{id := {_, _}} = Conf0, Machine, ServerIds)
when is_atom(System) ->
UId = new_uid(ra_lib:to_binary(ClusterName)),
illotum marked this conversation as resolved.
Show resolved Hide resolved
Conf = #{cluster_name => ClusterName,
id => ServerId,
uid => UId,
initial_members => ServerIds,
log_init_args => #{uid => UId},
machine => Machine},
start_server(System, Conf).
start_server(System, maps:merge(Conf, Conf0)).

%% @doc Starts a ra server in the default system
%% @param Conf a ra_server_config() configuration map.
Expand Down Expand Up @@ -558,9 +559,10 @@ delete_cluster(ServerIds, Timeout) ->
%% affect said cluster's availability characteristics (by increasing quorum node count).
%%
%% @param ServerLoc the ra server or servers to try to send the command to
%% @param ServerId the ra server id of the new server.
%% @param ServerId the ra server id of the new server, or a map with server id and settings.
%% @end
-spec add_member(ra_server_id() | [ra_server_id()], ra_server_id()) ->
-spec add_member(ra_server_id() | [ra_server_id()],
ra_server_id() | ra_new_server()) ->
ra_cmd_ret() |
{error, already_member} |
{error, cluster_change_not_permitted}.
Expand All @@ -571,7 +573,8 @@ add_member(ServerLoc, ServerId) ->
%% @see add_member/2
%% @end
-spec add_member(ra_server_id() | [ra_server_id()],
ra_server_id(), timeout()) ->
ra_server_id() | ra_new_server(),
timeout()) ->
ra_cmd_ret() |
{error, already_member} |
{error, cluster_change_not_permitted}.
Expand All @@ -580,7 +583,6 @@ add_member(ServerLoc, ServerId, Timeout) ->
{'$ra_join', ServerId, after_log_append},
Timeout).


%% @doc Removes a server from the cluster's membership configuration.
%% This function returns after appending a cluster membership change
%% command to the log.
Expand Down Expand Up @@ -716,7 +718,6 @@ new_uid(Source) when is_binary(Source) ->
Prefix = ra_lib:derive_safe_string(Source, 6),
ra_lib:make_uid(string:uppercase(Prefix)).


%% @doc Returns a map of overview data of the default Ra system on the current Erlang
%% node.
%% DEPRECATED: user overview/1
Expand Down Expand Up @@ -1132,13 +1133,16 @@ key_metrics({Name, N} = ServerId) when N == node() ->
end,
case whereis(Name) of
undefined ->
Counters#{state => noproc};
Counters#{state => noproc,
non_voter => noproc};
illotum marked this conversation as resolved.
Show resolved Hide resolved
_ ->
case ets:lookup(ra_state, Name) of
[] ->
Counters#{state => unknown};
[{_, State}] ->
Counters#{state => State}
Counters#{state => unknown,
non_voter => unknown};
[{_, State, NonVoter}] ->
Counters#{state => State,
non_voter => NonVoter}
end
end;
key_metrics({_, N} = ServerId) ->
Expand Down
17 changes: 17 additions & 0 deletions src/ra.hrl
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,33 @@
%% after node restart). Pids are not stable in this sense.
-type ra_server_id() :: {Name :: atom(), Node :: node()}.

%% Specifies server configuration for a new cluster member.
%% Subset of ra_server:ra_server_config().
%% Both `ra:add_member` and `ra:start_server` must be called with the same values.
-type ra_new_server() :: #{id := ra_server_id(),

%% If set, server will start as non-voter until later promoted by the
%% leader.
non_voter => boolean(),
illotum marked this conversation as resolved.
Show resolved Hide resolved
uid => ra_uid()}.

-type ra_peer_status() :: normal |
{sending_snapshot, pid()} |
suspended |
disconnected.

-type ra_voter_status() :: #{non_voter => boolean(),
uid => ra_uid(),
target => ra_index()}.

-type ra_peer_state() :: #{next_index := non_neg_integer(),
match_index := non_neg_integer(),
query_index := non_neg_integer(),
% the commit index last sent
% used for evaluating pipeline status
commit_index_sent := non_neg_integer(),
%% whether the peer is part of the consensus
voter_status := ra_voter_status(),
illotum marked this conversation as resolved.
Show resolved Hide resolved
%% indicates that a snapshot is being sent
%% to the peer
status := ra_peer_status()}.
Expand Down Expand Up @@ -139,6 +155,7 @@
-type snapshot_meta() :: #{index := ra_index(),
term := ra_term(),
cluster := ra_cluster_servers(),
cluster_state => ra_cluster(), %% TODO replace `cluster`
illotum marked this conversation as resolved.
Show resolved Hide resolved
machine_version := ra_machine:version()}.

-record(install_snapshot_rpc,
Expand Down
10 changes: 8 additions & 2 deletions src/ra_directory.erl
Original file line number Diff line number Diff line change
Expand Up @@ -175,14 +175,20 @@ overview(System) when is_atom(System) ->
#{directory := Tbl,
directory_rev := _TblRev} = get_names(System),
Dir = ets:tab2list(Tbl),
States = maps:from_list(ets:tab2list(ra_state)),
Rows = lists:map(fun({K, S, V}) ->
{K, {S, V}}
end,
ets:tab2list(ra_state)),
States = maps:from_list(Rows),
Snaps = maps:from_list(ets:tab2list(ra_log_snapshot_state)),
lists:foldl(fun ({UId, Pid, Parent, ServerName, ClusterName}, Acc) ->
{S, V} = maps:get(ServerName, States, {undefined, undefined}),
Acc#{ServerName =>
#{uid => UId,
pid => Pid,
parent => Parent,
state => maps:get(ServerName, States, undefined),
state => S,
non_voter => V,
cluster_name => ClusterName,
snapshot_state => maps:get(UId, Snaps,
undefined)}}
Expand Down
1 change: 1 addition & 0 deletions src/ra_log.erl
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,7 @@ update_release_cursor0(Idx, Cluster, MacVersion, MacState,
end,
Meta = #{index => Idx,
cluster => ClusterServerIds,
cluster_state => Cluster,
machine_version => MacVersion},
% The release cursor index is the last entry _not_ contributing
% to the current state. I.e. the last entry that can be discarded.
Expand Down
Loading