From 30b8f16c89c7d8ba835ff484033497f809d1e573 Mon Sep 17 00:00:00 2001 From: Michael McGee Date: Mon, 10 Jun 2024 19:55:23 +0000 Subject: [PATCH] fdctl: fix sysctl configuration --- book/guide/initializing.md | 24 +++---- solana | 2 +- src/app/fdctl/configure/sysctl.c | 110 ++++++++++++++++++++++--------- src/app/fdctl/main.c | 1 + src/app/fdctl/run/run.c | 16 +++-- 5 files changed, 102 insertions(+), 51 deletions(-) diff --git a/book/guide/initializing.md b/book/guide/initializing.md index 06f2d5082e..eb727ea15e 100644 --- a/book/guide/initializing.md +++ b/book/guide/initializing.md @@ -84,18 +84,18 @@ for best performance. The `sysctl` stage will check and configure these parameters. The stage will only increase values to meet the minimum, and will not decrease them if the minimum is already met. -| Sysctl | Minimum | Description -|-----------------------------------------| ----------- | ----------- -| /proc/sys/net/core/rmem_max | 134217728 | Solana Labs network performance tuning. -| /proc/sys/net/core/rmem_default | 134217728 | Solana Labs network performance tuning. -| /proc/sys/net/core/wmem_max | 134217728 | Solana Labs network performance tuning. -| /proc/sys/net/core/wmem_default | 134217728 | Solana Labs network performance tuning. -| /proc/sys/vm/max_map_count | 1000000 | Solana Labs accounts database requires mapping many files. -| /proc/sys/fs/file-max | 1024000 | Solana Labs accounts database requires opening many files. -| /proc/sys/fs/nr_open | 1024000 | Solana Labs accounts database requires opening many files. -| /proc/sys/net/core/bpf_jit_enable | 1 | Firedancer uses BPF for kernel bypass networking. BPF JIT makes this faster. -| /proc/sys/net/ipv4/conf/lo/rp_filter | 2 | Enable loose mode for reverse path filtering on the loopback interface. Loose mode is required for the XSK socket to successfully send packets to loopback. -| /proc/sys/net/ipv4/conf/lo/accept_local | 1 | Accept packets with local source addresses on the loopback interface. This is required for the XSK socket to successfully send packets to loopback. +| Sysctl | Minimum | Required | Description +|-----------------------------------------| ----------- | -------- | ----------- +| /proc/sys/vm/max_map_count | 1000000 | Yes | Agave accounts database requires mapping many files. +| /proc/sys/fs/file-max | 1024000 | Yes | Agave accounts database requires opening many files. +| /proc/sys/fs/nr_open | 1024000 | Yes | Agave accounts database requires opening many files. +| /proc/sys/net/ipv4/conf/lo/rp_filter | 2 | Yes | If sending QUIC transactions to Firedancer over loopback, this must be enabled to receive a response. Otherwise Linux will drop response packets due to limitations in the kernel eBPF networking stack. The sendTransaction RPC call will send over loopback. +| /proc/sys/net/ipv4/conf/lo/accept_local | 1 | Yes | If sending QUIC transactions to Firedancer over loopback, this must be enabled to receive a response. Otherwise Linux will drop response packets due to limitations in the kernel eBPF networking stack. The sendTransaction RPC call will send over loopback. +| /proc/sys/net/core/bpf_jit_enable | 1 | No | Firedancer uses BPF for kernel bypass networking. BPF JIT makes this faster. +| /proc/sys/kernel/numa_balancing | 0 | No | Firedancer assigns all memory to the right NUMA node, and rebalancing will make the system slower. + +Sysctls that are not required will produce a warning if they are not set +correctly, but configuration will proceed and exit normally. The `init` mode requires either `root` privileges, or to be run with `CAP_SYS_ADMIN`. The `fini` mode does nothing and kernel parameters diff --git a/solana b/solana index f112068fc7..f1d429252c 160000 --- a/solana +++ b/solana @@ -1 +1 @@ -Subproject commit f112068fc76fee5d14f5fa815c8b07e4f07fc566 +Subproject commit f1d429252c9d55ec51092c3bc667442bc4c76d66 diff --git a/src/app/fdctl/configure/sysctl.c b/src/app/fdctl/configure/sysctl.c index 708aaefdfd..b461a4a96c 100644 --- a/src/app/fdctl/configure/sysctl.c +++ b/src/app/fdctl/configure/sysctl.c @@ -12,56 +12,104 @@ init_perm( fd_caps_ctx_t * caps, fd_caps_check_capability( caps, NAME, CAP_SYS_ADMIN, "set kernel parameters in `/proc/sys`" ); } -static const char * params[] = { - "/proc/sys/net/core/rmem_max", - "/proc/sys/net/core/rmem_default", - "/proc/sys/net/core/wmem_max", - "/proc/sys/net/core/wmem_default", - "/proc/sys/vm/max_map_count", - "/proc/sys/net/core/bpf_jit_enable", - "/proc/sys/fs/file-max", - "/proc/sys/fs/nr_open", - "/proc/sys/net/ipv4/conf/lo/rp_filter", - "/proc/sys/net/ipv4/conf/lo/accept_local", -}; +#define ENFORCE_MINIMUM 0 +#define WARN_MINIMUM 1 +#define WARN_EXACT 2 + +typedef struct { + char const * path; + uint value; + int mode; +} sysctl_param_t; -static uint limits[] = { - 134217728, - 134217728, - 134217728, - 134217728, - 1000000, - 1, - CONFIGURE_NR_OPEN_FILES, - CONFIGURE_NR_OPEN_FILES, - 2, - 1, +static const sysctl_param_t params[] = { + { + "/proc/sys/vm/max_map_count", + 1000000, + ENFORCE_MINIMUM, + }, + { + "/proc/sys/fs/file-max", + CONFIGURE_NR_OPEN_FILES, + ENFORCE_MINIMUM, + }, + { + "/proc/sys/fs/nr_open", + CONFIGURE_NR_OPEN_FILES, + ENFORCE_MINIMUM, + }, + { + "/proc/sys/net/ipv4/conf/lo/rp_filter", + 2, + ENFORCE_MINIMUM, + }, + { + "/proc/sys/net/ipv4/conf/lo/accept_local", + 1, + ENFORCE_MINIMUM, + }, + { + "/proc/sys/net/core/bpf_jit_enable", + 1, + WARN_MINIMUM, + }, + { + "/proc/sys/kernel/numa_balancing", + 0, + WARN_EXACT, + } }; static const char * ERR_MSG = "system might not support configuring sysctl,"; -/* These sysctl limits are needed for the Solana Labs client, not Firedancer. - We set them on their behalf to make configuration easier for users. */ +/* Some of these sysctl limits are needed for the Agave client, not + Firedancer. We set them on their behalf to make configuration easier + for users. */ + static void init( config_t * const config ) { (void)config; for( ulong i=0; i %s`", params[ i ].value, params[ i ].path ) ); + write_uint_file( params[ i ].path, params[ i ].value ); + } + break; + default: + break; + } } } static configure_result_t check( config_t * const config ) { + static int has_warned = 0; + (void)config; for( ulong i=0; itopo ); - fd_caps_check_resource( caps, NAME, RLIMIT_MEMLOCK, mlock_limit, "increase `RLIMIT_MEMLOCK` to lock the workspace in memory with `mlock(2)`" ); + fd_caps_check_resource( caps, NAME, RLIMIT_MEMLOCK, mlock_limit, "call `rlimit(2)` to increase `RLIMIT_MEMLOCK` so all memory can be locked with `mlock(2)`" ); fd_caps_check_resource( caps, NAME, RLIMIT_NICE, 40, "call `setpriority(2)` to increase thread priorities" ); fd_caps_check_resource( caps, NAME, RLIMIT_NOFILE, CONFIGURE_NR_OPEN_FILES, - "increase `RLIMIT_NOFILE` to allow more open files for Solana Labs" ); - fd_caps_check_capability( caps, NAME, CAP_NET_RAW, "call `bind(2)` to bind to a socket with `SOCK_RAW`" ); - fd_caps_check_capability( caps, NAME, CAP_SYS_ADMIN, "initialize XDP by calling `bpf_obj_get`" ); + "call `rlimit(2) to increase `RLIMIT_NOFILE` to allow more open files for Agave" ); + fd_caps_check_capability( caps, NAME, CAP_NET_RAW, "call `socket(2)` to bind to a raw socket for use by XDP" ); + fd_caps_check_capability( caps, NAME, CAP_SYS_ADMIN, "call `bpf(2)` with the `BPF_OBJ_GET` command to initialize XDP" ); if( FD_LIKELY( getuid() != config->uid ) ) - fd_caps_check_capability( caps, NAME, CAP_SETUID, "switch uid by calling `setuid(2)`" ); + fd_caps_check_capability( caps, NAME, CAP_SETUID, "call `setresuid(2)` to switch uid" ); if( FD_LIKELY( getgid() != config->gid ) ) - fd_caps_check_capability( caps, NAME, CAP_SETGID, "switch gid by calling `setgid(2)`" ); + fd_caps_check_capability( caps, NAME, CAP_SETGID, "call `setresgid(2)` to switch gid" ); if( FD_UNLIKELY( config->development.netns.enabled ) ) - fd_caps_check_capability( caps, NAME, CAP_SYS_ADMIN, "enter a network namespace by calling `setns(2)`" ); + fd_caps_check_capability( caps, NAME, CAP_SYS_ADMIN, "call `setns(2)` to enter a network namespace" ); + if( FD_UNLIKELY( config->tiles.metric.prometheus_listen_port<1024 ) ) + fd_caps_check_capability( caps, NAME, CAP_NET_BIND_SERVICE, "call `bind(2)` to bind to a privileged port for serving metrics" ); } struct pidns_clone_args {