Skip to content

Commit

Permalink
squash w/ startup/shutdown. Improve config reload logic. Remove depen…
Browse files Browse the repository at this point in the history
…dency on irods::environment_properties interface.
  • Loading branch information
korydraughn committed Nov 11, 2024
1 parent 89a8467 commit 4500497
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 62 deletions.
6 changes: 3 additions & 3 deletions lib/core/include/irods/irods_environment_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ namespace irods {
static environment_properties& instance();

// Returns a copy of the underyling configuration.
std::unordered_map<std::string, boost::any> make_backup()
static std::unordered_map<std::string, boost::any> copy_configuration()
{
return config_props_.map();
return instance().config_props_.map();
} // make_backup

void set_configuration(std::unordered_map<std::string, boost::any> _config)
Expand All @@ -39,7 +39,7 @@ namespace irods {
/**
* @brief Read environment configuration and fill environment_properties::properties
*/
void capture( );
void capture();

/**
* @brief Get a property from the map if it exists. catch the exception in the case where
Expand Down
9 changes: 5 additions & 4 deletions lib/core/include/irods/irods_server_properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,11 +59,12 @@ namespace irods
void init(const std::string& _path);

// Returns a copy of the underyling configuration.
nlohmann::json make_backup()
static nlohmann::json copy_configuration()
{
auto lock = acquire_read_lock();
return config_props_;
} // make_backup
auto& inst = instance();
auto lock = inst.acquire_read_lock();
return inst.config_props_;
} // copy_configuration

/// @brief Read server configuration and fill server_properties::properties
void capture();
Expand Down
127 changes: 72 additions & 55 deletions server/main_server/src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -934,67 +934,83 @@ Environment Variables:
log_server::info("{}: Received configuration reload instruction. Reloading configuration.", __func__);

if (!validate_configuration()) {
log_server::error("{}: Invalid configuration. Continuing to run with previous configuration.", __func__);
log_server::error("{}: Invalid configuration. Server will continue with existing configuration.", __func__);
return;
}

nlohmann::json bkup_server_config;
std::unordered_map<std::string, boost::any> bkup_env_config;
nlohmann::json previous_server_config;

try {
log_server::info("{}: Creating backup of server and environment configurations.", __func__);
bkup_server_config = irods::server_properties::instance().make_backup();
bkup_env_config = irods::environment_properties::instance().make_backup();
log_server::info("{}: Creating backup of server configuration.", __func__);
previous_server_config = irods::server_properties::copy_configuration();
}
catch (const irods::exception& e) {
log_server::error("{}: Error creating backups of configuration for main server process: {}", __func__, e.client_display_what());
log_server::info("{}: Continuing with existing configuration.", __func__);
log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.client_display_what());
return;
}
catch (const std::exception& e) {
log_server::error("{}: Error creating backups of configuration for main server process: {}", __func__, e.what());
log_server::info("{}: Continuing with existing configuration.", __func__);
log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.what());
return;
}

try {
// Loading the config files must be treated as an atomic operation. If either operation
// fails, the server may be in an invalid state.
log_server::info("{}: Reloading server configuration for main server process.", __func__);
log_server::info("{}: Reloading server configuration.", __func__);
irods::server_properties::instance().reload();
irods::environment_properties::instance().capture();
}
catch (const irods::exception& e) {
// If an exception is thrown while reading server_config.json or the irods_environment.json,
// the server should rollback the changes.
log_server::error("{}: Error reloading configuration for main server process: {}, restoring backup.", __func__, e.client_display_what());
irods::server_properties::instance().set_configuration(std::move(bkup_server_config));
irods::environment_properties::instance().set_configuration(std::move(bkup_env_config));
log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.client_display_what());
irods::server_properties::instance().set_configuration(std::move(previous_server_config));
return;
}
catch (const std::exception& e) {
log_server::error("{}: Error reloading configuration for main server process: {}, restoring backup.", __func__, e.what());
irods::server_properties::instance().set_configuration(std::move(bkup_server_config));
irods::environment_properties::instance().set_configuration(std::move(bkup_env_config));
log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.what());
irods::server_properties::instance().set_configuration(std::move(previous_server_config));
return;
}

//
// At this point, the new configuration has been read into memory. Now the server must update
// its state and replace its child processes.
//

try {
// Update the logger for the main server process.
log_server::set_level(log_ns::get_level_from_config(irods::KW_CFG_LOG_LEVEL_CATEGORY_SERVER));
log_ns::legacy::set_level(log_ns::get_level_from_config(irods::KW_CFG_LOG_LEVEL_CATEGORY_LEGACY));
log_ns::set_server_zone(irods::get_server_property<std::string>(irods::KW_CFG_ZONE_NAME));
log_ns::set_server_hostname(boost::asio::ip::host_name());
log_ns::set_server_hostname(irods::get_server_property<std::string>(irods::KW_CFG_HOST));
}
catch (const std::exception&) {
// If we end up here, ignore it. It's inconvenient that there was a failure, but this doesn't
// hurt the server's ability to do real work. For that reason, we can advise the administrator
// to try reloading the configuration again.
log_server::warn("{}: The reload encountered an unexpected error while updating the log's state. "
"Consider reloading the configuration again to clear it up.", __func__);
catch (const irods::exception& e) {
log_server::warn("{}: Could not update the logger's state: {}. Continuing with reload.", __func__, e.client_display_what());
}
catch (const std::exception& e) {
log_server::warn("{}: Could not update the logger's state: {}. Continuing with reload.", __func__, e.what());
}

// Get the host and port information from the server configuration before stopping the agent factory
// and delay server. Doing this before stopping the child processes protects the main server from
// exceptions being thrown when extracting the host and zone port from the configuration.
std::string local_server_host;
std::string local_server_port_string;
try {
// The host property in server_config.json defines the true identity of the local server.
// We cannot use localhost or the loopback address because the computer may have multiple
// network interfaces and/or hostnames which map to different IPs.
local_server_host = irods::get_server_property<std::string>(irods::KW_CFG_HOST);

// Instruct the agent factory and delay server to gracefully stop.
// Use the zone port property from server_config.json. This property defines the port for
// server-to-server connections within the zone.
const auto local_server_port = irods::get_server_property<int>(irods::KW_CFG_ZONE_PORT);
local_server_port_string = std::to_string(local_server_port);
}
catch (const irods::exception& e) {
log_server::error("{}: Reload error: {}", __func__, e.client_display_what());
return;
}
catch (const std::exception& e) {
log_server::error("{}: Reload error: {}", __func__, e.what());
return;
}

if (g_pid_ds > 0) {
log_server::info("{}: Sending SIGTERM to delay server.", __func__);
Expand All @@ -1004,34 +1020,35 @@ Environment Variables:
log_server::info("{}: Sending SIGQUIT to agent factory.", __func__);
kill(g_pid_af, SIGQUIT);

// Reset this variable so that the delay server migration logic can handle
// Reset the variable holding the delay server's PID so the delay server migration logic can handle
// the relaunching of the delay server for us.
g_pid_ds = 0;

// Wait for the previous agent factory to close its listening socket before launching
// the new agent factory.
try {
// The server assumes the service account's irods_environment.json defines the true identity of
// the local server. We cannot use localhost or the loopback address because the computer may have
// multiple network interfaces and/or hostnames which map to different IPs.
const auto local_server_host = irods::get_environment_property<std::string>(irods::KW_CFG_IRODS_HOST);
while (true) {
// If this loop cannot make process for some reason, allow the admin to stop the server
// without needing SIGKILL.
if (g_terminate || g_terminate_graceful) {
log_server::info("{}: Received shutdown instruction. Ending reload operation.", __func__);
return;
}

// Use the zone port property from server_config.json. This property defines the port for
// server-to-server connections within the zone.
const auto local_server_port = irods::get_server_property<int>(irods::KW_CFG_ZONE_PORT);
const auto local_server_port_string = std::to_string(local_server_port);
try {
if (is_server_listening_for_connections(local_server_host, local_server_port_string) == 0) {
log_server::info("{}: Waiting for previous agent factory to close its listening socket.", __func__);
std::this_thread::sleep_for(std::chrono::seconds(1));
continue;
}

// This will loop until the agent factory terminates.
while (is_server_listening_for_connections(local_server_host, local_server_port_string) == 0) {
log_server::info("{}: Waiting for previous agent factory to close its listening socket.", __func__);
std::this_thread::sleep_for(std::chrono::seconds(1));
break;
}
catch (const irods::exception& e) {
log_server::debug("{}: Unexpected error while waiting for previous agent factory to close its listening socket: {}", __func__, e.client_display_what());
}
catch (const std::exception& e) {
log_server::debug("{}: Unexpected error while waiting for previous agent factory to close its listening socket: {}", __func__, e.what());
}
}
catch (const irods::exception& e) {
log_server::error("{}: Error reloading configuration for main server process: {}", __func__, e.client_display_what());
}
catch (const std::exception& e) {
log_server::error("{}: Error reloading configuration for main server process: {}", __func__, e.what());
}

// Launch a new agent factory to serve client requests.
Expand Down Expand Up @@ -1109,10 +1126,10 @@ Environment Variables:

// Defer the launch of the delay server if the agent factory isn't listening.
try {
// The server assumes the service account's irods_environment.json defines the true identity of
// the local server. We cannot use localhost or the loopback address because the computer may have
// multiple network interfaces and/or hostnames which map to different IPs.
const auto local_server_host = irods::get_environment_property<std::string>(irods::KW_CFG_IRODS_HOST);
// The host property in server_config.json defines the true identity of the local server.
// We cannot use localhost or the loopback address because the computer may have multiple
// network interfaces and/or hostnames which map to different IPs.
const auto local_server_host = irods::get_server_property<std::string>(irods::KW_CFG_HOST);

// Use the zone port property from server_config.json. This property defines the port for
// server-to-server connections within the zone.
Expand Down Expand Up @@ -1401,7 +1418,7 @@ Environment Variables:
const std::string_view msg(buffer.data(), stream.gcount());
log_server::debug("{}: Received [{}] from server.", __func__, msg);
if (msg != "HEARTBEAT") {
log_server::debug("{}: Heartbeat Error: Did not get expected response. Deferring launch of Delay Server.", __func__);
log_server::debug("{}: Heartbeat Error: Did not get expected response.", __func__);
return -1;
}

Expand Down

0 comments on commit 4500497

Please sign in to comment.