diff --git a/lib/core/include/irods/irods_environment_properties.hpp b/lib/core/include/irods/irods_environment_properties.hpp index db03cae10c..e21023db55 100644 --- a/lib/core/include/irods/irods_environment_properties.hpp +++ b/lib/core/include/irods/irods_environment_properties.hpp @@ -26,9 +26,9 @@ namespace irods { static environment_properties& instance(); // Returns a copy of the underyling configuration. - std::unordered_map make_backup() + static std::unordered_map copy_configuration() { - return config_props_.map(); + return instance().config_props_.map(); } // make_backup void set_configuration(std::unordered_map _config) @@ -39,7 +39,7 @@ namespace irods { /** * @brief Read environment configuration and fill environment_properties::properties */ - void capture( ); + void capture(); /** * @brief Get a property from the map if it exists. catch the exception in the case where diff --git a/lib/core/include/irods/irods_server_properties.hpp b/lib/core/include/irods/irods_server_properties.hpp index 804842e814..f9208a318f 100644 --- a/lib/core/include/irods/irods_server_properties.hpp +++ b/lib/core/include/irods/irods_server_properties.hpp @@ -59,11 +59,12 @@ namespace irods void init(const std::string& _path); // Returns a copy of the underyling configuration. - nlohmann::json make_backup() + static nlohmann::json copy_configuration() { - auto lock = acquire_read_lock(); - return config_props_; - } // make_backup + auto& inst = instance(); + auto lock = inst.acquire_read_lock(); + return inst.config_props_; + } // copy_configuration /// @brief Read server configuration and fill server_properties::properties void capture(); diff --git a/server/main_server/src/main.cpp b/server/main_server/src/main.cpp index 664b1d3f44..b374fca495 100644 --- a/server/main_server/src/main.cpp +++ b/server/main_server/src/main.cpp @@ -934,67 +934,83 @@ Environment Variables: log_server::info("{}: Received configuration reload instruction. Reloading configuration.", __func__); if (!validate_configuration()) { - log_server::error("{}: Invalid configuration. Continuing to run with previous configuration.", __func__); + log_server::error("{}: Invalid configuration. Server will continue with existing configuration.", __func__); return; } - nlohmann::json bkup_server_config; - std::unordered_map bkup_env_config; + nlohmann::json previous_server_config; try { - log_server::info("{}: Creating backup of server and environment configurations.", __func__); - bkup_server_config = irods::server_properties::instance().make_backup(); - bkup_env_config = irods::environment_properties::instance().make_backup(); + log_server::info("{}: Creating backup of server configuration.", __func__); + previous_server_config = irods::server_properties::copy_configuration(); } catch (const irods::exception& e) { - log_server::error("{}: Error creating backups of configuration for main server process: {}", __func__, e.client_display_what()); - log_server::info("{}: Continuing with existing configuration.", __func__); + log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.client_display_what()); return; } catch (const std::exception& e) { - log_server::error("{}: Error creating backups of configuration for main server process: {}", __func__, e.what()); - log_server::info("{}: Continuing with existing configuration.", __func__); + log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.what()); return; } try { - // Loading the config files must be treated as an atomic operation. If either operation - // fails, the server may be in an invalid state. - log_server::info("{}: Reloading server configuration for main server process.", __func__); + log_server::info("{}: Reloading server configuration.", __func__); irods::server_properties::instance().reload(); - irods::environment_properties::instance().capture(); } catch (const irods::exception& e) { - // If an exception is thrown while reading server_config.json or the irods_environment.json, - // the server should rollback the changes. - log_server::error("{}: Error reloading configuration for main server process: {}, restoring backup.", __func__, e.client_display_what()); - irods::server_properties::instance().set_configuration(std::move(bkup_server_config)); - irods::environment_properties::instance().set_configuration(std::move(bkup_env_config)); + log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.client_display_what()); + irods::server_properties::instance().set_configuration(std::move(previous_server_config)); return; } catch (const std::exception& e) { - log_server::error("{}: Error reloading configuration for main server process: {}, restoring backup.", __func__, e.what()); - irods::server_properties::instance().set_configuration(std::move(bkup_server_config)); - irods::environment_properties::instance().set_configuration(std::move(bkup_env_config)); + log_server::error("{}: Reload error: {}. Server will continue with existing configuration.", __func__, e.what()); + irods::server_properties::instance().set_configuration(std::move(previous_server_config)); return; } + // + // At this point, the new configuration has been read into memory. Now the server must update + // its state and replace its child processes. + // + try { // Update the logger for the main server process. log_server::set_level(log_ns::get_level_from_config(irods::KW_CFG_LOG_LEVEL_CATEGORY_SERVER)); log_ns::legacy::set_level(log_ns::get_level_from_config(irods::KW_CFG_LOG_LEVEL_CATEGORY_LEGACY)); log_ns::set_server_zone(irods::get_server_property(irods::KW_CFG_ZONE_NAME)); - log_ns::set_server_hostname(boost::asio::ip::host_name()); + log_ns::set_server_hostname(irods::get_server_property(irods::KW_CFG_HOST)); } - catch (const std::exception&) { - // If we end up here, ignore it. It's inconvenient that there was a failure, but this doesn't - // hurt the server's ability to do real work. For that reason, we can advise the administrator - // to try reloading the configuration again. - log_server::warn("{}: The reload encountered an unexpected error while updating the log's state. " - "Consider reloading the configuration again to clear it up.", __func__); + catch (const irods::exception& e) { + log_server::warn("{}: Could not update the logger's state: {}. Continuing with reload.", __func__, e.client_display_what()); } + catch (const std::exception& e) { + log_server::warn("{}: Could not update the logger's state: {}. Continuing with reload.", __func__, e.what()); + } + + // Get the host and port information from the server configuration before stopping the agent factory + // and delay server. Doing this before stopping the child processes protects the main server from + // exceptions being thrown when extracting the host and zone port from the configuration. + std::string local_server_host; + std::string local_server_port_string; + try { + // The host property in server_config.json defines the true identity of the local server. + // We cannot use localhost or the loopback address because the computer may have multiple + // network interfaces and/or hostnames which map to different IPs. + local_server_host = irods::get_server_property(irods::KW_CFG_HOST); - // Instruct the agent factory and delay server to gracefully stop. + // Use the zone port property from server_config.json. This property defines the port for + // server-to-server connections within the zone. + const auto local_server_port = irods::get_server_property(irods::KW_CFG_ZONE_PORT); + local_server_port_string = std::to_string(local_server_port); + } + catch (const irods::exception& e) { + log_server::error("{}: Reload error: {}", __func__, e.client_display_what()); + return; + } + catch (const std::exception& e) { + log_server::error("{}: Reload error: {}", __func__, e.what()); + return; + } if (g_pid_ds > 0) { log_server::info("{}: Sending SIGTERM to delay server.", __func__); @@ -1004,34 +1020,35 @@ Environment Variables: log_server::info("{}: Sending SIGQUIT to agent factory.", __func__); kill(g_pid_af, SIGQUIT); - // Reset this variable so that the delay server migration logic can handle + // Reset the variable holding the delay server's PID so the delay server migration logic can handle // the relaunching of the delay server for us. g_pid_ds = 0; // Wait for the previous agent factory to close its listening socket before launching // the new agent factory. - try { - // The server assumes the service account's irods_environment.json defines the true identity of - // the local server. We cannot use localhost or the loopback address because the computer may have - // multiple network interfaces and/or hostnames which map to different IPs. - const auto local_server_host = irods::get_environment_property(irods::KW_CFG_IRODS_HOST); + while (true) { + // If this loop cannot make process for some reason, allow the admin to stop the server + // without needing SIGKILL. + if (g_terminate || g_terminate_graceful) { + log_server::info("{}: Received shutdown instruction. Ending reload operation.", __func__); + return; + } - // Use the zone port property from server_config.json. This property defines the port for - // server-to-server connections within the zone. - const auto local_server_port = irods::get_server_property(irods::KW_CFG_ZONE_PORT); - const auto local_server_port_string = std::to_string(local_server_port); + try { + if (is_server_listening_for_connections(local_server_host, local_server_port_string) == 0) { + log_server::info("{}: Waiting for previous agent factory to close its listening socket.", __func__); + std::this_thread::sleep_for(std::chrono::seconds(1)); + continue; + } - // This will loop until the agent factory terminates. - while (is_server_listening_for_connections(local_server_host, local_server_port_string) == 0) { - log_server::info("{}: Waiting for previous agent factory to close its listening socket.", __func__); - std::this_thread::sleep_for(std::chrono::seconds(1)); + break; + } + catch (const irods::exception& e) { + log_server::debug("{}: Unexpected error while waiting for previous agent factory to close its listening socket: {}", __func__, e.client_display_what()); + } + catch (const std::exception& e) { + log_server::debug("{}: Unexpected error while waiting for previous agent factory to close its listening socket: {}", __func__, e.what()); } - } - catch (const irods::exception& e) { - log_server::error("{}: Error reloading configuration for main server process: {}", __func__, e.client_display_what()); - } - catch (const std::exception& e) { - log_server::error("{}: Error reloading configuration for main server process: {}", __func__, e.what()); } // Launch a new agent factory to serve client requests. @@ -1109,10 +1126,10 @@ Environment Variables: // Defer the launch of the delay server if the agent factory isn't listening. try { - // The server assumes the service account's irods_environment.json defines the true identity of - // the local server. We cannot use localhost or the loopback address because the computer may have - // multiple network interfaces and/or hostnames which map to different IPs. - const auto local_server_host = irods::get_environment_property(irods::KW_CFG_IRODS_HOST); + // The host property in server_config.json defines the true identity of the local server. + // We cannot use localhost or the loopback address because the computer may have multiple + // network interfaces and/or hostnames which map to different IPs. + const auto local_server_host = irods::get_server_property(irods::KW_CFG_HOST); // Use the zone port property from server_config.json. This property defines the port for // server-to-server connections within the zone. @@ -1401,7 +1418,7 @@ Environment Variables: const std::string_view msg(buffer.data(), stream.gcount()); log_server::debug("{}: Received [{}] from server.", __func__, msg); if (msg != "HEARTBEAT") { - log_server::debug("{}: Heartbeat Error: Did not get expected response. Deferring launch of Delay Server.", __func__); + log_server::debug("{}: Heartbeat Error: Did not get expected response.", __func__); return -1; }