From f166330e01d50ccf820c6b1e1c2a2790b7dc047a Mon Sep 17 00:00:00 2001 From: Karl Nilsson Date: Mon, 22 Jul 2024 11:45:58 +0100 Subject: [PATCH] Be more lenient during pre init. Ra currently expects that any registered server in names.dets also has a valid directory. As these two not updated in a transaction it is possible for the two to diverge hence this change introduce more leniency in that a registered server where there is no directory for is not considered an error that will stop the ra system from starting. --- src/ra_log_pre_init.erl | 27 +++++++++++++++++++++------ test/ra_log_2_SUITE.erl | 32 +++++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/src/ra_log_pre_init.erl b/src/ra_log_pre_init.erl index dc04fcc2..366757d8 100644 --- a/src/ra_log_pre_init.erl +++ b/src/ra_log_pre_init.erl @@ -42,7 +42,7 @@ init([System]) -> ?INFO("ra system '~ts' running pre init for ~b registered servers", [System, length(Regd)]), _ = [begin - try pre_init(System, Name, UId) of + try pre_init(System, UId) of ok -> ok catch _:Err -> ?ERROR("pre_init failed in system ~s for UId ~ts with name ~ts" @@ -73,7 +73,7 @@ code_change(_OldVsn, State, _Extra) -> %%% Internal functions %%%=================================================================== -pre_init(System, Name, UId) -> +pre_init(System, UId) -> case ets:lookup(?ETSTBL, UId) of [{_, _}] -> %% already initialised @@ -83,10 +83,25 @@ pre_init(System, Name, UId) -> undefined -> {error, system_not_started}; SysCfg -> - {ok, #{log_init_args := Log}} = - ra_server_sup_sup:recover_config(System, Name), - ok = ra_log:pre_init(Log#{system_config => SysCfg}), - ok + %% check if the server dir exists, if not + %% then just log and return instead of failing. + Dir = ra_env:server_data_dir(System, UId), + case ra_lib:is_dir(Dir) of + true -> + case ra_log:read_config(Dir) of + {ok, #{log_init_args := Log}} -> + ok = ra_log:pre_init(Log#{system_config => SysCfg}), + ok; + {error, Err} -> + ?ERROR("pre_init failed to read config file for UId '~ts', Err ~p", + [UId, Err]), + exit({pre_init_failed, Err}) + end; + false -> + ?INFO("pre_init UId '~ts' is registered but no data directory was found", + [UId]), + ok + end end end. diff --git a/test/ra_log_2_SUITE.erl b/test/ra_log_2_SUITE.erl index 19bf6d1e..a03ff2c4 100644 --- a/test/ra_log_2_SUITE.erl +++ b/test/ra_log_2_SUITE.erl @@ -31,7 +31,8 @@ all_tests() -> last_index_reset, last_index_reset_before_written, recovery, - recover_bigly, + recover_many, + recovery_with_missing_directory, wal_crash_recover, wal_down_read_availability, wal_down_append_throws, @@ -74,6 +75,11 @@ end_per_suite(Config) -> init_per_group(G, Config) -> DataDir = filename:join(?config(priv_dir, Config), G), + ra_env:configure_logger(logger), + LogFile = filename:join(DataDir, "ra.log"), + logger:set_primary_config(level, debug), + logger:add_handler(ra_handler, logger_std_h, + #{config => #{file => LogFile}}), [{access_pattern, G}, {work_dir, DataDir} | Config]. @@ -83,7 +89,6 @@ end_per_group(_, Config) -> init_per_testcase(TestCase, Config) -> ok = start_ra(Config), - ra_env:configure_logger(logger), DataDir = ?config(work_dir, Config), UId = <<(atom_to_binary(TestCase, utf8))/binary, (atom_to_binary(?config(access_pattern, Config)))/binary>>, @@ -609,7 +614,7 @@ recovery(Config) -> ok. -recover_bigly(Config) -> +recover_many(Config) -> Log0 = ra_log_init(Config), Log1 = write_n(1, 10000, 1, Log0), Pred = fun (L) -> @@ -626,6 +631,27 @@ recover_bigly(Config) -> ra_log:close(Log), ok. +recovery_with_missing_directory(Config) -> + %% checking that the ra system can be restarted even if a directory + %% has been deleted with a ra_directory entry still in place. + logger:set_primary_config(level, debug), + UId = ?config(uid, Config), + Log0 = ra_log_init(Config), + ra_log:close(Log0), + + ServerDataDir = ra_env:server_data_dir(default, UId), + ok = ra_lib:recursive_delete(ServerDataDir), + ?assertNot(filelib:is_dir(ServerDataDir)), + + application:stop(ra), + start_ra(Config), + + Log5 = ra_log_init(Config), + ra_log:close(Log5), + ok = ra_lib:recursive_delete(ServerDataDir), + ?assertNot(filelib:is_dir(ServerDataDir)), + + ok. resend_write(Config) -> % logger:set_primary_config(level, debug),