Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] Reuse the rotation script inside the munge resource #2471

Merged
merged 17 commits into from
Oct 12, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 0 additions & 35 deletions cookbooks/aws-parallelcluster-slurm/libraries/helpers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,25 +65,13 @@ def enable_munge_service
end
end

def restart_munge_service
service "munge" do
supports restart: true
action :restart
retries 5
retry_delay 10
end
end

def setup_munge_head_node
# Generate munge key or get it's value from secrets manager
munge_key_manager 'manage_munge_key' do
munge_key_secret_arn lazy {
node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn)
}
end

enable_munge_service
share_munge_head_node
end

def update_munge_head_node
Expand All @@ -92,29 +80,6 @@ def update_munge_head_node
action :update_munge_key
only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? }
end

restart_munge_service
share_munge_head_node
end

def share_munge_key_to_dir(shared_dir)
bash 'share_munge_key' do
user 'root'
group 'root'
code <<-SHARE_MUNGE_KEY
set -e
mkdir -p #{shared_dir}/.munge
# Copy key to shared dir
cp /etc/munge/munge.key #{shared_dir}/.munge/.munge.key
chmod 0700 #{shared_dir}/.munge
chmod 0600 #{shared_dir}/.munge/.munge.key
SHARE_MUNGE_KEY
end
end

def share_munge_head_node
share_munge_key_to_dir(node['cluster']['shared_dir'])
share_munge_key_to_dir(node['cluster']['shared_dir_login'])
end

def setup_munge_key(shared_dir)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,29 @@
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

# Copy pcluster config generator and templates
remote_directory "#{node['cluster']['scripts_dir']}/slurm" do
source 'head_node_slurm/slurm'
mode '0755'
action :create
recursive true
end
jdeamicis marked this conversation as resolved.
Show resolved Hide resolved

template "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh" do
source 'slurm/head_node/update_munge_key.sh.erb'
owner 'root'
group 'root'
mode '0700'
variables(
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) },
region: node['cluster']['region'],
munge_user: node['cluster']['munge']['user'],
munge_group: node['cluster']['munge']['group'],
shared_directory_compute: node['cluster']['shared_dir'],
shared_directory_login: node['cluster']['shared_dir_login_nodes']
)
end

include_recipe 'aws-parallelcluster-slurm::config_munge_key'

# Export /opt/slurm
Expand Down Expand Up @@ -218,18 +241,3 @@
retries 5
retry_delay 2
end unless redhat_on_docker?

template "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh" do
source 'slurm/head_node/update_munge_key.sh.erb'
owner 'root'
group 'root'
mode '0700'
variables(
munge_key_secret_arn: lazy { node['cluster']['config'].dig(:DevSettings, :MungeKeySettings, :MungeKeySecretArn) },
region: node['cluster']['region'],
munge_user: node['cluster']['munge']['user'],
munge_group: node['cluster']['munge']['group'],
shared_directory_compute: node['cluster']['shared_dir'],
shared_directory_login: node['cluster']['shared_dir_login']
)
end
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ def update_nodes_in_queue(strategy, queues)
munge_user: node['cluster']['munge']['user'],
munge_group: node['cluster']['munge']['group'],
shared_directory_compute: node['cluster']['shared_dir'],
shared_directory_login: node['cluster']['shared_dir_login']
shared_directory_login: node['cluster']['shared_dir_login_nodes']
)
only_if { ::File.exist?(node['cluster']['previous_cluster_config_path']) && is_custom_munge_key_updated? }
end
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,35 +23,43 @@

default_action :setup_munge_key

def fetch_and_decode_munge_key(munge_key_secret_arn)
declare_resource(:bash, 'fetch_and_decode_munge_key') do
def restart_munge_service
declare_resource(:service, "munge") do
supports restart: true
action :restart
retries 5
retry_delay 10
end
end

def share_munge_key_to_dir(shared_dir)
declare_resource(:bash, 'share_munge_key') do
user 'root'
group 'root'
cwd '/tmp'
code <<-FETCH_AND_DECODE
code <<-SHARE_MUNGE_KEY
set -e
# Get encoded munge key from secrets manager
encoded_key=$(aws secretsmanager get-secret-value --secret-id #{munge_key_secret_arn} --query 'SecretString' --output text --region #{node['cluster']['region']})
# If encoded_key doesn't have a value, error and exit
if [ -z "$encoded_key" ]; then
echo "Error fetching munge key from Secrets Manager or the key is empty"
exit 1
fi
mkdir -p #{shared_dir}/.munge
# Copy key to shared dir
cp /etc/munge/munge.key #{shared_dir}/.munge/.munge.key
chmod 0700 #{shared_dir}/.munge
chmod 0600 #{shared_dir}/.munge/.munge.key
SHARE_MUNGE_KEY
end
end

# Decode munge key and write to /etc/munge/munge.key
decoded_key=$(echo $encoded_key | base64 -d)
if [ $? -ne 0 ]; then
echo "Error decoding the munge key with base64"
exit 1
fi
def share_munge
share_munge_key_to_dir(node['cluster']['shared_dir'])
share_munge_key_to_dir(node['cluster']['shared_dir_login_nodes'])
end

echo "$decoded_key" > /etc/munge/munge.key
def fetch_and_decode_munge_key
script_path = "#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh"

# Set ownership on the key
chown #{node['cluster']['munge']['user']}:#{node['cluster']['munge']['group']} /etc/munge/munge.key
# Enforce correct permission on the key
chmod 0600 /etc/munge/munge.key
FETCH_AND_DECODE
declare_resource(:execute, 'fetch_and_decode_munge_key') do
jdeamicis marked this conversation as resolved.
Show resolved Hide resolved
user 'root'
group 'root'
cwd ::File.dirname(script_path)
command "./#{::File.basename(script_path)} -d"
end
end
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe you can simplify this block:

def fetch_and_decode_munge_key
  declare_resource(:execute, 'fetch_and_decode_munge_key') do
    user 'root'
    group 'root'
    command "/#{node['cluster']['scripts_dir']}/slurm/update_munge_key.sh -d"
  end
end

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great comment! Thank you Jacopo.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we actually need the first / character in the command variable. Though, in principle scripts_dir will always be an absolute path, so it shouldn't hurt.


jdeamicis marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -66,12 +74,15 @@ def generate_munge_key
chmod 0600 /etc/munge/munge.key
GENERATE_KEY
end

restart_munge_service
share_munge
end

action :setup_munge_key do
if new_resource.munge_key_secret_arn
# This block will fetch the munge key from Secrets Manager
fetch_and_decode_munge_key(new_resource.munge_key_secret_arn)
fetch_and_decode_munge_key
else
# This block will randomly generate a munge key
generate_munge_key
Expand All @@ -81,7 +92,7 @@ def generate_munge_key
action :update_munge_key do
if new_resource.munge_key_secret_arn
# This block will fetch the munge key from Secrets Manager and replace the previous munge key
fetch_and_decode_munge_key(new_resource.munge_key_secret_arn)
fetch_and_decode_munge_key
else
# This block will randomly generate a munge key and replace the previous munge key
generate_munge_key
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,25 @@ MUNGE_USER="<%= @munge_user %>"
MUNGE_GROUP="<%= @munge_group %>"
SHARED_DIRECTORY_COMPUTE="<%= @shared_directory_compute %>"
SHARED_DIRECTORY_LOGIN="<%= @shared_directory_login %>"
DISABLE_RESOURCE_CHECK=false

while getopts "d" opt; do
case $opt in
d) DISABLE_RESOURCE_CHECK=true;;
*)
echo "Usage: $0 [-d]" >&2
exit 1
;;
esac
done

# Check compute fleet status
compute_fleet_status=$(get-compute-fleet-status.sh)
if ! echo "$compute_fleet_status" | grep -q '"status": "STOPPED"'; then
echo "Compute fleet is not stopped. Please stop it before updating the munge key."
exit 1
if ! $DISABLE_RESOURCE_CHECK; then
jdeamicis marked this conversation as resolved.
Show resolved Hide resolved
# Check compute fleet status
compute_fleet_status=$(get-compute-fleet-status.sh)
if ! echo "$compute_fleet_status" | grep -q '"status": "STOPPED"'; then
echo "Compute fleet is not stopped. Please stop it before updating the munge key."
exit 1
fi
jdeamicis marked this conversation as resolved.
Show resolved Hide resolved
fi

# If SECRET_ARN is provided, fetch the munge key from Secrets Manager
Expand Down
Loading