Last active
July 6, 2024 23:21
-
-
Save al3xtjames/1993a722b2ab9a9fece0477e179c468e to your computer and use it in GitHub Desktop.
https://github.com/djmdjm/openssh-wip/pull/29 rebased on 9.8p1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From ea9fe977031941be2dcb018d1c9d0117b92ab5f9 Mon Sep 17 00:00:00 2001 | |
From: Damien Miller <djm@mindrot.org> | |
Date: Wed, 21 Feb 2024 20:26:29 +1100 | |
Subject: [PATCH] Improve OpenSSH performance on high BDP links | |
The SSH protocol performs flow control using a channel receive | |
window that may only be set at the time a channel is created and | |
cannot be subsequently modified. | |
The current default window size is sufficient for most uses, but | |
on high bandwidth x delay links it is not enough. Increasing the | |
default window size is possible, but carries the cost of additional | |
worst-case memory usage if the output side is slow. | |
This implements a protocol extension to vary the maximum channel | |
window after the channel has been created, and uses this extension | |
to grow the window if the TCP receive window increases in size. | |
TCP has had a half-century of optimisation, so we should copy | |
its homework wherever possible. | |
This is based on work by Denzel Strauss and Sally Sun during their | |
STEP iternship at Google. | |
--- | |
channels.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++-- | |
channels.h | 2 + | |
clientloop.c | 3 ++ | |
kex.c | 20 +++++++- | |
kex.h | 1 + | |
packet.c | 38 ++++++++++++++ | |
packet.h | 1 + | |
serverloop.c | 3 ++ | |
8 files changed, 203 insertions(+), 5 deletions(-) | |
diff --git a/channels.c b/channels.c | |
index 3ee694717..8c71affa2 100644 | |
--- a/channels.c | |
+++ b/channels.c | |
@@ -86,6 +86,7 @@ | |
#include "authfd.h" | |
#include "pathnames.h" | |
#include "match.h" | |
+#include "kex.h" | |
/* XXX remove once we're satisfied there's no lurking bugs */ | |
/* #define DEBUG_CHANNEL_POLL 1 */ | |
@@ -933,6 +934,23 @@ channel_still_open(struct ssh *ssh) | |
return 0; | |
} | |
+ | |
+static u_int | |
+channel_count_open(struct ssh *ssh) | |
+{ | |
+ u_int i, n; | |
+ Channel *c; | |
+ | |
+ for (i = n = 0; i < ssh->chanctxt->channels_alloc; i++) { | |
+ if ((c = ssh->chanctxt->channels[i]) == NULL || | |
+ c->type != SSH_CHANNEL_OPEN || | |
+ c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD)) | |
+ continue; | |
+ n++; | |
+ } | |
+ return n; | |
+} | |
+ | |
/* Returns true if a channel with a TTY is open. */ | |
int | |
channel_tty_open(struct ssh *ssh) | |
@@ -2371,7 +2389,7 @@ channel_check_window(struct ssh *ssh, Channel *c) | |
(r = sshpkt_send(ssh)) != 0) { | |
fatal_fr(r, "channel %i", c->self); | |
} | |
- debug2("channel %d: window %d sent adjust %d", c->self, | |
+ debug3_f("channel %d: window %d sent adjust %d", c->self, | |
c->local_window, c->local_consumed); | |
c->local_window += c->local_consumed; | |
c->local_consumed = 0; | |
@@ -2379,12 +2397,80 @@ channel_check_window(struct ssh *ssh, Channel *c) | |
return 1; | |
} | |
+/* | |
+ * Try to update local_window_max when the TCP receive window grows, | |
+ * if the peer supports the channel-window-max@openssh.com extension. | |
+ * NB. must be called before channel_check_window(), as that clobbers | |
+ * c->local_consumed. | |
+ */ | |
+static void | |
+channel_check_max_window(struct ssh *ssh, Channel *c) | |
+{ | |
+ int r, tcp_rwin; | |
+ u_int new_window_max; | |
+ | |
+ if ((ssh->kex->flags & KEX_HAS_CHANNEL_MAX_WINDOW) == 0) | |
+ return; /* no protocol extension support */ | |
+ if (c->local_consumed == 0) | |
+ return; /* only modify window for active channels */ | |
+ if (c->type != SSH_CHANNEL_OPEN || | |
+ (c->flags & (CHAN_CLOSE_SENT|CHAN_CLOSE_RCVD)) != 0) | |
+ return; /* channel not in open state */ | |
+ if ((tcp_rwin = ssh_packet_get_tcp_rwin(ssh)) == -1) | |
+ return; /* no TCP window recorded */ | |
+ if (c->local_window_max >= sshbuf_max_size(c->output)) | |
+ return; /* Already at max */ | |
+ | |
+ /* | |
+ * XXX the following rules are all heuristics with factors that | |
+ * I just made up - djm | |
+ * XXX this increases the SSH window to the TCP window in | |
+ * one shot. Is this what we want? Alternately we could | |
+ * increase by a fraction of the difference, so the SSH window | |
+ * successively approximates the TCP window. | |
+ * XXX there is only a naive fair-share the TCP window across | |
+ * channel windows. It treats all channel identically (e.g. an | |
+ * agent connection vs a shell) and doesn't try very hard to only | |
+ * increase the window for channels that need it. | |
+ * XXX maybe maintain a notion of "claimed TCP window" and | |
+ * give it out only to channels that are active? | |
+ * XXX regardless, a bit of oversubscription of the TCP window | |
+ * is probably desirable as it's likely that not all channels will | |
+ * be fully utilising their windows simultaneously. | |
+ * XXX this never shrinks the window. This might be desirable for | |
+ * idle channels if other are busy, but would be a fair bit of work. | |
+ */ | |
+ if (sshbuf_len(c->output) > c->local_window_max / 4) | |
+ return; /* Don't increase window if output is slow/filling */ | |
+ | |
+ /* Allow some oversubscription of the TCP window across channels */ | |
+ new_window_max = (3 * (u_int)tcp_rwin) / channel_count_open(ssh); | |
+ if (new_window_max > (u_int)tcp_rwin) | |
+ new_window_max = (u_int)tcp_rwin; | |
+ if (new_window_max > sshbuf_max_size(c->output)) | |
+ new_window_max = sshbuf_max_size(c->output); | |
+ | |
+ if (new_window_max <= c->local_window_max) | |
+ return; /* don't shrink */ | |
+ | |
+ channel_request_start(ssh, c->self, | |
+ "max-window@openssh.com", 0); | |
+ if ((r = sshpkt_put_u32(ssh, new_window_max)) != 0 || | |
+ (r = sshpkt_send(ssh)) != 0) | |
+ fatal_fr(r, "channel %i", c->self); | |
+ debug2_f("channel %d: sent max-window@openssh.com " | |
+ "%d -> %d (+%d), peer TCP rwin %d", c->self, c->local_window_max, | |
+ new_window_max, new_window_max - c->local_window_max, tcp_rwin); | |
+ c->local_window_max = new_window_max; | |
+} | |
+ | |
static void | |
channel_post_open(struct ssh *ssh, Channel *c) | |
{ | |
channel_handle_rfd(ssh, c); | |
channel_handle_wfd(ssh, c); | |
channel_handle_efd(ssh, c); | |
+ channel_check_max_window(ssh, c); | |
channel_check_window(ssh, c); | |
} | |
@@ -3582,7 +3668,7 @@ channel_input_open_confirmation(int type, u_int32_t seq, struct ssh *ssh) | |
} | |
c->have_remote_id = 1; | |
- c->remote_window = remote_window; | |
+ c->remote_window = c->remote_window_max = remote_window; | |
c->remote_maxpacket = remote_maxpacket; | |
c->type = SSH_CHANNEL_OPEN; | |
if (c->open_confirm) { | |
@@ -3649,6 +3735,53 @@ channel_input_open_failure(int type, u_int32_t seq, struct ssh *ssh) | |
return 0; | |
} | |
+int | |
+channel_input_max_window(struct ssh *ssh, Channel *c) | |
+{ | |
+ u_int old_window, new_max_window; | |
+ int r; | |
+ | |
+ if (c->type != SSH_CHANNEL_OPEN) { | |
+ debug_f("channel %d: not open", c->self); | |
+ return SSH_ERR_INVALID_ARGUMENT; | |
+ } | |
+ if ((r = sshpkt_get_u32(ssh, &new_max_window)) != 0 || | |
+ (r = sshpkt_get_end(ssh)) != 0) { | |
+ error_fr(r, "channel %d: parse", c->self); | |
+ return r; | |
+ } | |
+ old_window = c->remote_window; | |
+ if (new_max_window > c->remote_window_max) { | |
+ /* Usual case: remote grows max window */ | |
+ c->remote_window += new_max_window - c->remote_window_max; | |
+ } else if (new_max_window < c->remote_window_max) { | |
+ /* | |
+ * Only allow shrinking the window if there is | |
+ * sufficient capacity to do so. | |
+ */ | |
+ if (c->remote_window_max - new_max_window > c->remote_window) { | |
+ error_f("channel %d: remote requested invalid remote " | |
+ "window size", c->self); | |
+ /* XXX what to do? close chan? set remote_window=0? */ | |
+ /* XXX could gracefully ratchet window back */ | |
+ /* XXX maybe better to just ban shrinking? */ | |
+ return SSH_ERR_INVALID_ARGUMENT; | |
+ } | |
+ c->remote_window -= c->remote_window_max - new_max_window; | |
+ } else { | |
+ /* Same size shouldn't happen */ | |
+ debug_f("channel %d: useless max-window update", c->self); | |
+ } | |
+ debug2_f("channel %d: new remote window max %d -> %d (%+d)", | |
+ c->self, c->remote_window_max, new_max_window, | |
+ new_max_window - c->remote_window_max); | |
+ debug3_f("channel %d: implicit window adjust %d -> %d (%+d)", | |
+ c->self, old_window, c->remote_window, | |
+ c->remote_window - old_window); | |
+ c->remote_window_max = new_max_window; | |
+ return 0; | |
+} | |
+ | |
int | |
channel_input_window_adjust(int type, u_int32_t seq, struct ssh *ssh) | |
{ | |
@@ -3670,11 +3803,12 @@ channel_input_window_adjust(int type, u_int32_t seq, struct ssh *ssh) | |
error_fr(r, "parse adjust"); | |
ssh_packet_disconnect(ssh, "Invalid window adjust message"); | |
} | |
- debug2("channel %d: rcvd adjust %u", c->self, adjust); | |
if ((new_rwin = c->remote_window + adjust) < c->remote_window) { | |
fatal("channel %d: adjust %u overflows remote window %u", | |
c->self, adjust, c->remote_window); | |
} | |
+ debug3_f("channel %d: rcvd adjust %u window %u/%u", c->self, adjust, | |
+ c->remote_window, c->remote_window_max); | |
c->remote_window = new_rwin; | |
return 0; | |
} | |
diff --git a/channels.h b/channels.h | |
index fdea9f574..631f90fcb 100644 | |
--- a/channels.h | |
+++ b/channels.h | |
@@ -167,6 +167,7 @@ struct Channel { | |
char *remote_name; /* remote hostname */ | |
u_int remote_window; | |
+ u_int remote_window_max; | |
u_int remote_maxpacket; | |
u_int local_window; | |
u_int local_window_exceeded; | |
@@ -326,6 +327,7 @@ int channel_input_open_confirmation(int, u_int32_t, struct ssh *); | |
int channel_input_open_failure(int, u_int32_t, struct ssh *); | |
int channel_input_window_adjust(int, u_int32_t, struct ssh *); | |
int channel_input_status_confirm(int, u_int32_t, struct ssh *); | |
+int channel_input_max_window(struct ssh *ssh, Channel *c); | |
/* file descriptor handling (read/write) */ | |
struct pollfd; | |
diff --git a/clientloop.c b/clientloop.c | |
index 8ed8b1c34..3b1aed840 100644 | |
--- a/clientloop.c | |
+++ b/clientloop.c | |
@@ -1977,6 +1977,9 @@ client_input_channel_req(int type, u_int32_t seq, struct ssh *ssh) | |
if ((r = sshpkt_get_end(ssh)) != 0) | |
goto out; | |
chan_rcvd_eow(ssh, c); | |
+ } else if (strcmp(rtype, "max-window@openssh.com") == 0) { | |
+ if ((r = channel_input_max_window(ssh, c)) == 0) | |
+ success = 1; | |
} else if (strcmp(rtype, "exit-status") == 0) { | |
if ((r = sshpkt_get_u32(ssh, &exitval)) != 0) | |
goto out; | |
diff --git a/kex.c b/kex.c | |
index 63aae5d71..a2a8659c9 100644 | |
--- a/kex.c | |
+++ b/kex.c | |
@@ -301,13 +301,16 @@ kex_compose_ext_info_server(struct ssh *ssh, struct sshbuf *m) | |
if (ssh->kex->server_sig_algs == NULL && | |
(ssh->kex->server_sig_algs = sshkey_alg_list(0, 1, 1, ',')) == NULL) | |
return SSH_ERR_ALLOC_FAIL; | |
- if ((r = sshbuf_put_u32(m, 3)) != 0 || | |
+ if ((r = sshbuf_put_u32(m, 4)) != 0 || | |
(r = sshbuf_put_cstring(m, "server-sig-algs")) != 0 || | |
(r = sshbuf_put_cstring(m, ssh->kex->server_sig_algs)) != 0 || | |
(r = sshbuf_put_cstring(m, | |
"publickey-hostbound@openssh.com")) != 0 || | |
(r = sshbuf_put_cstring(m, "0")) != 0 || | |
(r = sshbuf_put_cstring(m, "ping@openssh.com")) != 0 || | |
+ (r = sshbuf_put_cstring(m, "0")) != 0 || | |
+ (r = sshbuf_put_cstring(m, | |
+ "channel-max-window@openssh.com")) != 0 || | |
(r = sshbuf_put_cstring(m, "0")) != 0) { | |
error_fr(r, "compose"); | |
return r; | |
@@ -320,8 +323,11 @@ kex_compose_ext_info_client(struct ssh *ssh, struct sshbuf *m) | |
{ | |
int r; | |
- if ((r = sshbuf_put_u32(m, 1)) != 0 || | |
+ if ((r = sshbuf_put_u32(m, 2)) != 0 || | |
(r = sshbuf_put_cstring(m, "ext-info-in-auth@openssh.com")) != 0 || | |
+ (r = sshbuf_put_cstring(m, "0")) != 0 || | |
+ (r = sshbuf_put_cstring(m, | |
+ "channel-max-window@openssh.com")) != 0 || | |
(r = sshbuf_put_cstring(m, "0")) != 0) { | |
error_fr(r, "compose"); | |
goto out; | |
@@ -451,6 +457,11 @@ kex_ext_info_client_parse(struct ssh *ssh, const char *name, | |
"0", KEX_HAS_PING)) != 0) { | |
return r; | |
} | |
+ } else if (strcmp(name, "channel-max-window@openssh.com") == 0) { | |
+ if ((r = kex_ext_info_check_ver(ssh->kex, name, value, vlen, | |
+ "0", KEX_HAS_CHANNEL_MAX_WINDOW)) != 0) { | |
+ return r; | |
+ } | |
} else | |
debug_f("%s (unrecognised)", name); | |
@@ -468,6 +479,11 @@ kex_ext_info_server_parse(struct ssh *ssh, const char *name, | |
"0", KEX_HAS_EXT_INFO_IN_AUTH)) != 0) { | |
return r; | |
} | |
+ } else if (strcmp(name, "channel-max-window@openssh.com") == 0) { | |
+ if ((r = kex_ext_info_check_ver(ssh->kex, name, value, vlen, | |
+ "0", KEX_HAS_CHANNEL_MAX_WINDOW)) != 0) { | |
+ return r; | |
+ } | |
} else | |
debug_f("%s (unrecognised)", name); | |
return 0; | |
diff --git a/kex.h b/kex.h | |
index 34665eb20..7a9432754 100644 | |
--- a/kex.h | |
+++ b/kex.h | |
@@ -113,6 +113,7 @@ enum kex_exchange { | |
#define KEX_RSA_SHA2_512_SUPPORTED 0x0010 /* only set in server for now */ | |
#define KEX_HAS_PING 0x0020 | |
#define KEX_HAS_EXT_INFO_IN_AUTH 0x0040 | |
+#define KEX_HAS_CHANNEL_MAX_WINDOW 0x0080 | |
struct sshenc { | |
char *name; | |
diff --git a/packet.c b/packet.c | |
index 4fca8d660..4dd3f46fd 100644 | |
--- a/packet.c | |
+++ b/packet.c | |
@@ -219,6 +219,9 @@ struct session_state { | |
/* One-off warning about weak ciphers */ | |
int cipher_warning_done; | |
+ /* Last observed TCP receive window size */ | |
+ int tcp_rwin; | |
+ | |
/* Hook for fuzzing inbound packets */ | |
ssh_packet_hook_fn *hook_in; | |
void *hook_in_ctx; | |
@@ -248,6 +251,7 @@ ssh_alloc_session_state(void) | |
state->max_packet_size = 32768; | |
state->packet_timeout_ms = -1; | |
state->p_send.packets = state->p_read.packets = 0; | |
+ state->tcp_rwin = -1; | |
state->initialized = 1; | |
/* | |
* ssh_packet_send2() needs to queue packets until | |
@@ -668,6 +672,37 @@ ssh_packet_rdomain_in(struct ssh *ssh) | |
return ssh->rdomain_in; | |
} | |
+static void | |
+ssh_packet_update_tcp_rwin(struct ssh *ssh) | |
+{ | |
+ int rwin; | |
+ socklen_t len = sizeof(rwin); | |
+ | |
+ if (ssh->state == NULL || | |
+ !ssh_packet_connection_is_on_socket(ssh)) | |
+ return; | |
+ if (getsockopt(ssh->state->connection_in, SOL_SOCKET, SO_RCVBUF, | |
+ &rwin, &len) != 0) { | |
+ debug_f("getsockopt(%d SO_RCVBUF): %s", | |
+ ssh->state->connection_in, strerror(errno)); | |
+ return; | |
+ } | |
+ if (ssh->state->tcp_rwin != -1 && rwin != ssh->state->tcp_rwin) { | |
+ debug3_f("TCP receive window %d -> %d (%+d)", | |
+ ssh->state->tcp_rwin, rwin, rwin - ssh->state->tcp_rwin); | |
+ } | |
+ ssh->state->tcp_rwin = rwin; | |
+} | |
+ | |
+int | |
+ssh_packet_get_tcp_rwin(struct ssh *ssh) | |
+{ | |
+ if (ssh == NULL || ssh->state == NULL || | |
+ !ssh_packet_connection_is_on_socket(ssh)) | |
+ return -1; | |
+ return ssh->state->tcp_rwin; | |
+} | |
+ | |
/* Closes the connection and clears and frees internal data structures. */ | |
static void | |
@@ -1501,6 +1536,8 @@ ssh_packet_read_seqnr(struct ssh *ssh, u_char *typep, u_int32_t *seqnr_p) | |
/* Append it to the buffer. */ | |
if ((r = ssh_packet_process_incoming(ssh, buf, len)) != 0) | |
goto out; | |
+ | |
+ ssh_packet_update_tcp_rwin(ssh); | |
} | |
out: | |
return r; | |
@@ -1917,6 +1954,7 @@ ssh_packet_process_read(struct ssh *ssh, int fd) | |
if ((r = sshbuf_read(fd, state->input, PACKET_MAX_SIZE, &rlen)) != 0) | |
return r; | |
+ ssh_packet_update_tcp_rwin(ssh); | |
if (state->packet_discard) { | |
if ((r = sshbuf_consume_end(state->input, rlen)) != 0) | |
diff --git a/packet.h b/packet.h | |
index 5ab1f409a..5568ff8b1 100644 | |
--- a/packet.h | |
+++ b/packet.h | |
@@ -163,6 +163,7 @@ int ssh_remote_port(struct ssh *); | |
const char *ssh_local_ipaddr(struct ssh *); | |
int ssh_local_port(struct ssh *); | |
const char *ssh_packet_rdomain_in(struct ssh *); | |
+int ssh_packet_get_tcp_rwin(struct ssh *); | |
char *ssh_remote_hostname(struct ssh *); | |
void ssh_packet_set_rekey_limits(struct ssh *, u_int64_t, u_int32_t); | |
diff --git a/serverloop.c b/serverloop.c | |
index 757cc6f02..5e2f87be6 100644 | |
--- a/serverloop.c | |
+++ b/serverloop.c | |
@@ -855,6 +855,9 @@ server_input_channel_req(int type, u_int32_t seq, struct ssh *ssh) | |
if ((r = sshpkt_get_end(ssh)) != 0) | |
sshpkt_fatal(ssh, r, "%s: parse packet", __func__); | |
chan_rcvd_eow(ssh, c); | |
+ } else if (strcmp(rtype, "max-window@openssh.com") == 0) { | |
+ if ((r = channel_input_max_window(ssh, c)) == 0) | |
+ success = 1; | |
} else if ((c->type == SSH_CHANNEL_LARVAL || | |
c->type == SSH_CHANNEL_OPEN) && strcmp(c->ctype, "session") == 0) | |
success = session_input_channel_req(ssh, c, rtype); | |
-- | |
2.45.1 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
From ffa09d2845bacb21399b2e94109dab7e1c576ccd Mon Sep 17 00:00:00 2001 | |
From: Alex James <git@alextjam.es> | |
Date: Sat, 6 Jul 2024 17:24:13 -0500 | |
Subject: [PATCH] sandbox-seccomp-filter: allow getpeername and getsockopt | |
The BDP performance improvement patch added calls to getsockopt and | |
getpeername, which fails if the seccomp sandbox is enabled. Fix it by | |
adding them to the list of allowed syscalls. | |
--- | |
sandbox-seccomp-filter.c | 6 ++++++ | |
1 file changed, 6 insertions(+) | |
diff --git a/sandbox-seccomp-filter.c b/sandbox-seccomp-filter.c | |
index 23b40b643..a6b16b4f1 100644 | |
--- a/sandbox-seccomp-filter.c | |
+++ b/sandbox-seccomp-filter.c | |
@@ -289,6 +289,9 @@ static const struct sock_filter preauth_insns[] = { | |
#ifdef __NR_geteuid32 | |
SC_ALLOW(__NR_geteuid32), | |
#endif | |
+#ifdef __NR_getpeername | |
+ SC_ALLOW(__NR_getpeername), | |
+#endif | |
#ifdef __NR_getpgid | |
SC_ALLOW(__NR_getpgid), | |
#endif | |
@@ -298,6 +301,9 @@ static const struct sock_filter preauth_insns[] = { | |
#ifdef __NR_getrandom | |
SC_ALLOW(__NR_getrandom), | |
#endif | |
+#ifdef __NR_getsockopt | |
+ SC_ALLOW(__NR_getsockopt), | |
+#endif | |
#ifdef __NR_gettid | |
SC_ALLOW(__NR_gettid), | |
#endif | |
-- | |
2.45.1 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment