Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow specifying arbitrary SSH configuration for nodes #1024

Closed
wants to merge 12 commits into from
2 changes: 1 addition & 1 deletion nix/eval-machine-info.nix
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ rec {

machines =
flip mapAttrs nodes (n: v': let v = scrubOptionValue v'; in
{ inherit (v.config.deployment) targetEnv targetPort targetHost encryptedLinksTo storeKeysOnMachine alwaysActivate owners keys hasFastConnection;
{ inherit (v.config.deployment) targetEnv targetPort targetHost sshConfigOptionsFile encryptedLinksTo storeKeysOnMachine alwaysActivate owners keys hasFastConnection;
nixosRelease = v.config.system.nixos.release or v.config.system.nixosRelease or (removeSuffix v.config.system.nixosVersionSuffix v.config.system.nixosVersion);
azure = optionalAttrs (v.config.deployment.targetEnv == "azure") v.config.deployment.azure;
ec2 = optionalAttrs (v.config.deployment.targetEnv == "ec2") v.config.deployment.ec2;
Expand Down
6 changes: 6 additions & 0 deletions nix/options.nix
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ in
'';
};

deployment.sshConfigOptionsFile = mkOption {
type = types.nullOr types.path;
default = null;
description = "Arbitrary SSH configuration options file.";
};

deployment.alwaysActivate = mkOption {
type = types.bool;
default = true;
Expand Down
6 changes: 6 additions & 0 deletions nix/ssh-tunnel.nix
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ with lib;
type = types.int;
description = "Port number that SSH listens to on the remote machine.";
};
sshConfigOptionsFile = mkOption {
type = types.nullOr types.path;
default = null;
description = "Arbitrary SSH configuration option file.";
};
privateKey = mkOption {
type = types.path;
description = "Path to the private key file used to connect to the remote machine.";
Expand Down Expand Up @@ -91,6 +96,7 @@ with lib;
+ " -o PermitLocalCommand=yes"
+ " -o ServerAliveInterval=20"
+ " -o LocalCommand='${localCommand}'"
+ (if v.sshConfigOptionsFile == null then "" else " -F ${v.sshConfigOptionsFile}")
+ " -w ${toString v.localTunnel}:${toString v.remoteTunnel}"
+ " ${v.target} -p ${toString v.targetPort}"
+ " '${remoteCommand}'";
Expand Down
31 changes: 24 additions & 7 deletions nixops/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ def __init__(self, xml, config={}):
self.encrypted_links_to = set([e.get("value") for e in xml.findall("attrs/attr[@name='encryptedLinksTo']/list/string")])
self.store_keys_on_machine = xml.find("attrs/attr[@name='storeKeysOnMachine']/bool").get("value") == "true"
self.ssh_port = int(xml.find("attrs/attr[@name='targetPort']/int").get("value"))
ssh_options_file_elem = xml.find("attrs/attr[@name='sshConfigOptionsFile']/path")

if ssh_options_file_elem is not None:
self.ssh_config_options_file = ssh_options_file_elem.get('value')
else:
self.ssh_config_options_file = None
self.always_activate = xml.find("attrs/attr[@name='alwaysActivate']/bool").get("value") == "true"
self.owners = [e.get("value") for e in xml.findall("attrs/attr[@name='owners']/list/string")]
self.has_fast_connection = xml.find("attrs/attr[@name='hasFastConnection']/bool").get("value") == "true"
Expand Down Expand Up @@ -44,6 +50,7 @@ class MachineState(nixops.resources.ResourceState):
has_fast_connection = nixops.util.attr_property("hasFastConnection", False, bool)
ssh_pinged = nixops.util.attr_property("sshPinged", False, bool)
ssh_port = nixops.util.attr_property("targetPort", 22, int)
ssh_config_options_file = nixops.util.attr_property("sshConfigOptionsFile", None, str)
public_vpn_key = nixops.util.attr_property("publicVpnKey", None)
store_keys_on_machine = nixops.util.attr_property("storeKeysOnMachine", False, bool)
keys = nixops.util.attr_property("keys", {}, 'json')
Expand Down Expand Up @@ -86,6 +93,7 @@ def set_common_state(self, defn):
self.store_keys_on_machine = defn.store_keys_on_machine
self.keys = defn.keys
self.ssh_port = defn.ssh_port
self.ssh_config_options_file = defn.ssh_config_options_file
self.has_fast_connection = defn.has_fast_connection
if not self.has_fast_connection:
self.ssh.enable_compression()
Expand Down Expand Up @@ -190,10 +198,10 @@ def reboot_sync(self, hard=False):
"""Reboot this machine and wait until it's up again."""
self.reboot(hard=hard)
self.log_start("waiting for the machine to finish rebooting...")
nixops.util.wait_for_tcp_port(self.get_ssh_name(), self.ssh_port, open=False, callback=lambda: self.log_continue("."))
while self.try_ssh():
self.log_continue(".")
self.log_continue("[down]")
nixops.util.wait_for_tcp_port(self.get_ssh_name(), self.ssh_port, callback=lambda: self.log_continue("."))
self.log_end("[up]")
self.wait_for_ssh()
self.state = self.UP
self.ssh_pinged = True
self._ssh_pinged_this_time = True
Expand Down Expand Up @@ -275,11 +283,17 @@ def get_ssh_name(self):
assert False

def get_ssh_flags(self, scp=False):
if scp:
return ["-P", str(self.ssh_port)]
if self.ssh_config_options_file:
options = [
"-F", self.ssh_config_options_file
]
else:
return ["-p", str(self.ssh_port)]
options = []

if scp:
return options + ["-P", str(self.ssh_port)]
else:
return options + ["-p", str(self.ssh_port)]

def get_ssh_password(self):
return None
Expand All @@ -299,11 +313,14 @@ def address_to(self, r):
"""Return the IP address to be used to access resource "r" from this machine."""
return r.public_ipv4

def try_ssh(self):
return self.ssh.try_ssh()

def wait_for_ssh(self, check=False):
"""Wait until the SSH port is open on this machine."""
if self.ssh_pinged and (not check or self._ssh_pinged_this_time): return
self.log_start("waiting for SSH...")
nixops.util.wait_for_tcp_port(self.get_ssh_name(), self.ssh_port, callback=lambda: self.log_continue("."))
self.ssh.wait_for_ssh(callback=lambda: self.log_continue("."))
self.log_end("")
if self.state != self.RESCUE:
self.state = self.UP
Expand Down
1 change: 0 additions & 1 deletion nixops/backends/azure_vm.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import nixops
from nixops import known_hosts
from nixops.util import wait_for_tcp_port, ping_tcp_port
from nixops.util import attr_property, create_key_pair, generate_random_string, check_wait
from nixops.nix_expr import Call, RawValue

Expand Down
12 changes: 7 additions & 5 deletions nixops/backends/hetzner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from hetzner.robot import Robot

from nixops import known_hosts
from nixops.util import wait_for_tcp_port, ping_tcp_port
from nixops.util import attr_property, create_key_pair, xml_expr_to_python
from nixops.ssh_util import SSHCommandFailed
from nixops.backends import MachineDefinition, MachineState
Expand Down Expand Up @@ -188,9 +187,10 @@ def _wait_for_rescue(self, ip):
# systems.
self.log_start("waiting for rescue system...")
dotlog = lambda: self.log_continue(".") # NOQA
wait_for_tcp_port(ip, 22, open=False, callback=dotlog)
self.wait_for_ssh(callback=dotlog, up=False)
self.log_continue("[down]")
wait_for_tcp_port(ip, 22, callback=dotlog)

self.wait_for_ssh(callback=dotlog, up=True)
self.log_end("[up]")
self.state = self.RESCUE

Expand Down Expand Up @@ -650,7 +650,9 @@ def _wait_stop(self):
"""
self.log_start("waiting for system to shutdown... ")
dotlog = lambda: self.log_continue(".") # NOQA
wait_for_tcp_port(self.main_ipv4, 22, open=False, callback=dotlog)
while self.try_ssh():
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a bit racy, because the timeout for run_command is only the SSH connect timeout, so if the machine is no longer reachable after the key exchange try_ssh could hang for minutes.

Btw. the same problem could occur in #857, hence cc @nh2.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we'd switch to Python 3.x (I think it's long overdue), we could use subprocess.run with the timeout argument for logged_exec and in SSHMaster, but in the interim it's still possible in Python 3.4 and lower using wait and kill. We already have a timeout argument, so I'd either rename the argument or add another command_timeout argument. Note that we can't simply get rid of the connect timeout and just use the command timeout, because there might be long-running operations, like switching to a new configuration.

self.log_continue(".")

self.log_continue("[down]")

self.state = self.STOPPED
Expand Down Expand Up @@ -684,7 +686,7 @@ def _check(self, res):
return

if self.state in (self.STOPPED, self.STOPPING):
res.is_up = ping_tcp_port(self.main_ipv4, 22)
res.is_up = self.try_ssh()
if not res.is_up:
self.state = self.STOPPED
res.is_reachable = False
Expand Down
2 changes: 1 addition & 1 deletion nixops/backends/none.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def _check(self, res):
res.exists = False
return
res.exists = True
res.is_up = nixops.util.ping_tcp_port(self.target_host, self.ssh_port)
res.is_up = self.try_ssh()
if res.is_up:
MachineState._check(self, res)

Expand Down
1 change: 1 addition & 0 deletions nixops/deployment.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,7 @@ def do_machine(m):
('networking', 'p2pTunnels', 'ssh', m2.name): {
'target': '{0}-unencrypted'.format(m2.name),
'targetPort': m2.ssh_port,
'sshConfigOptionsFile': m2.ssh_config_options_file,
'localTunnel': local_tunnel,
'remoteTunnel': remote_tunnel,
'localIPv4': local_ipv4,
Expand Down
20 changes: 20 additions & 0 deletions nixops/ssh_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,5 +299,25 @@ def run_command(self, command, flags=[], timeout=None, logged=True,
else:
return res

def try_ssh(self, user=None, timeout=1):
try:
self.run_command('true', timeout=timeout, user=user, flags=['-q'])
return True
except nixops.ssh_util.SSHConnectionFailed:
return False

def wait_for_ssh(self, user=None, attempts=-1, timeout=5, callback=None, up=True):
"""Wait until the remote's SSH is up or down based on the «up» parameter."""
attempt = 0
while True:
if self.try_ssh(user=user, timeout=timeout) == up:
return True
else:
attempt += 1
if attempts != -1 and attempt >= attempts: break
if callback: callback()
raise Exception("timed out waiting for SSH on ‘{1}’".format(
self._get_target(user)))

def enable_compression(self):
self._compress = True