diff --git a/examples/echo.c b/examples/echo.c index 3c368938..a740e487 100644 --- a/examples/echo.c +++ b/examples/echo.c @@ -215,7 +215,7 @@ static int run_loop(int fd, quicly_conn_t *client) tv.tv_sec = delta / 1000; tv.tv_usec = (delta % 1000) * 1000; } else { - tv.tv_sec = 1000; + tv.tv_sec = 0; tv.tv_usec = 0; } FD_ZERO(&readfds); diff --git a/include/quicly.h b/include/quicly.h index 3eb9fafa..d12795cf 100644 --- a/include/quicly.h +++ b/include/quicly.h @@ -192,10 +192,11 @@ typedef struct st_quicly_crypto_engine_t { * header protection using `header_protect_ctx`. Quicly does not read or write the content of the UDP datagram payload after * this function is called. Therefore, an engine might retain the information provided by this function, and protect the packet * and the header at a later moment (e.g., hardware crypto offload). + * @param dcid specifies the CID sequence number for encrypting Multipath QUIC packets; will always be zero in QUIC v1 */ void (*encrypt_packet)(struct st_quicly_crypto_engine_t *engine, quicly_conn_t *conn, ptls_cipher_context_t *header_protect_ctx, ptls_aead_context_t *packet_protect_ctx, ptls_iovec_t datagram, size_t first_byte_at, - size_t payload_from, uint64_t packet_number, int coalesced); + size_t payload_from, uint64_t dcid, uint64_t packet_number, int coalesced); } quicly_crypto_engine_t; /** @@ -261,6 +262,10 @@ typedef struct st_quicly_transport_parameters_t { * */ uint8_t disable_active_migration : 1; + /** + * + */ + uint8_t enable_multipath : 1; /** * */ @@ -338,6 +343,10 @@ struct st_quicly_context_t { * expand client hello so that it does not fit into one datagram */ unsigned expand_client_hello : 1; + /** + * whether to use ECN on the send side; ECN is always on on the receive side + */ + unsigned enable_ecn : 1; /** * */ @@ -462,6 +471,14 @@ struct st_quicly_conn_streamgroup_state_t { * Total number of packets received out of order. \ */ \ uint64_t received_out_of_order; \ + /** \ + * connection-wide counters for ECT(0), ECT(1), CE \ + */ \ + uint64_t received_ecn_counts[3]; \ + /** \ + * connection-wide ack-received counters for ECT(0), ECT(1), CE \ + */ \ + uint64_t acked_ecn_counts[3]; \ /** \ * Total number of packets sent on promoted paths. \ */ \ @@ -522,6 +539,14 @@ struct st_quicly_conn_streamgroup_state_t { * number of alternate paths that were closed due to Connection ID being unavailable \ */ \ uint64_t closed_no_dcid; \ + /** \ + * number of paths that were ECN-capable \ + */ \ + uint64_t ecn_validated; \ + /** \ + * number of paths that were deemed as ECN black holes \ + */ \ + uint64_t ecn_failed; \ } num_paths; \ /** \ * Total number of each frame being sent / received. \ @@ -530,7 +555,7 @@ struct st_quicly_conn_streamgroup_state_t { uint64_t padding, ping, ack, reset_stream, stop_sending, crypto, new_token, stream, max_data, max_stream_data, \ max_streams_bidi, max_streams_uni, data_blocked, stream_data_blocked, streams_blocked, new_connection_id, \ retire_connection_id, path_challenge, path_response, transport_close, application_close, handshake_done, datagram, \ - ack_frequency; \ + ack_frequency, ack_mp, path_abandon, path_status; \ } num_frames_sent, num_frames_received; \ /** \ * Total number of PTOs observed during the connection. \ @@ -848,6 +873,10 @@ typedef struct st_quicly_decoded_packet_t { uint64_t pn; uint64_t key_phase; } decrypted; + /** + * ECN bits + */ + uint8_t ecn : 2; /** * */ @@ -1065,6 +1094,10 @@ size_t quicly_send_retry(quicly_context_t *ctx, ptls_aead_context_t *token_encry */ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *src, struct iovec *datagrams, size_t *num_datagrams, void *buf, size_t bufsize); +/** + * returns ECN bits to be set for the packets built by the last invocation of `quicly_send` + */ +uint8_t quicly_send_get_ecn_bits(quicly_conn_t *conn); /** * */ @@ -1128,10 +1161,20 @@ int quicly_connect(quicly_conn_t **conn, quicly_context_t *ctx, const char *serv int quicly_accept(quicly_conn_t **conn, quicly_context_t *ctx, struct sockaddr *dest_addr, struct sockaddr *src_addr, quicly_decoded_packet_t *packet, quicly_address_token_plaintext_t *address_token, const quicly_cid_plaintext_t *new_cid, ptls_handshake_properties_t *handshake_properties, void *appdata); +/** + * Adds a new path. Only usable when running as a client. Local must contain sufficient information to distinguish between the paths + * being establisished; i.e, either the port number should be different or if one port is shared then the IP addresses of each + * local address must be different. + */ +int quicly_add_path(quicly_conn_t *conn, struct sockaddr *local); /** * */ ptls_t *quicly_get_tls(quicly_conn_t *conn); +/** + * + */ +int quicly_is_multipath(quicly_conn_t *conn); /** * Resumes an async TLS handshake, and returns a pointer to the QUIC connection or NULL if the corresponding QUIC connection has * been discarded. See `quicly_async_handshake_t`. @@ -1226,6 +1269,12 @@ int quicly_set_cc(quicly_conn_t *conn, quicly_cc_type_t *cc); * */ void quicly_amend_ptls_context(ptls_context_t *ptls); +/** + * Builds the IV prefix of used to encrypt / decrypt Multipath QUIC packets. Size of the supplied buffer (`iv`) must be no less than + * `PTLS_MAX_IV_SIZE`. Once the IV is built, that should be applied to AEAD using `ptls_aead_xor_iv` prior to calling the encryption + * function. After that, `ptls_aead_xor_iv` should be called again with the same arguments to nagate the changes to IV. + */ +static size_t quicly_build_multipath_iv(ptls_aead_algorithm_t *algo, uint64_t sequence, void *iv); /** * Encrypts an address token by serializing the plaintext structure and appending an authentication tag. * @@ -1417,6 +1466,21 @@ inline void quicly_stream_set_receive_window(quicly_stream_t *stream, uint32_t w stream->_recv_aux.window = window; } +inline size_t quicly_build_multipath_iv(ptls_aead_algorithm_t *algo, uint64_t sequence, void *_iv) +{ + size_t len = algo->iv_size - 8; + uint8_t *iv = (uint8_t *)_iv; + + for (size_t i = 0; i + 4 < len; ++i) + *iv++ = 0; + *iv++ = (uint8_t)(sequence >> 24); + *iv++ = (uint8_t)(sequence >> 16); + *iv++ = (uint8_t)(sequence >> 8); + *iv++ = (uint8_t)sequence; + + return len; +} + inline int quicly_stream_is_client_initiated(quicly_stream_id_t stream_id) { if (stream_id < 0) diff --git a/include/quicly/cc.h b/include/quicly/cc.h index ca0c0f26..bc1669c8 100644 --- a/include/quicly/cc.h +++ b/include/quicly/cc.h @@ -61,6 +61,10 @@ typedef struct st_quicly_cc_t { * Packet number indicating end of recovery period, if in recovery. */ uint64_t recovery_end; + /** + * If the most recent loss episode was signalled by ECN only (i.e., no packet loss). + */ + unsigned episode_by_ecn : 1; /** * State information specific to the congestion controller implementation. */ @@ -130,9 +134,13 @@ typedef struct st_quicly_cc_t { */ uint32_t cwnd_maximum; /** - * Total number of number of loss episodes (congestion window reductions). + * Total number of loss episodes (congestion window reductions). */ uint32_t num_loss_episodes; + /** + * Total number of loss episodes that was reported only by ECN (hence no packet loss). + */ + uint32_t num_ecn_loss_episodes; } quicly_cc_t; struct st_quicly_cc_type_t { @@ -150,8 +158,9 @@ struct st_quicly_cc_type_t { void (*cc_on_acked)(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, uint64_t largest_acked, uint32_t inflight, uint64_t next_pn, int64_t now, uint32_t max_udp_payload_size); /** - * Called when a packet is detected as lost. |next_pn| is the next unsent packet number, - * used for setting the recovery window. + * Called when a packet is detected as lost. + * @param bytes bytes declared lost, or zero iff ECN_CE is observed + * @param next_pn the next unsent packet number, used for setting the recovery window */ void (*cc_on_lost)(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, uint64_t lost_pn, uint64_t next_pn, int64_t now, uint32_t max_udp_payload_size); @@ -192,6 +201,27 @@ void quicly_cc_reno_on_lost(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t int64_t now, uint32_t max_udp_payload_size); void quicly_cc_reno_on_persistent_congestion(quicly_cc_t *cc, const quicly_loss_t *loss, int64_t now); void quicly_cc_reno_on_sent(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, int64_t now); +/** + * Updates ECN counter when loss is observed. + */ +static void quicly_cc__update_ecn_episodes(quicly_cc_t *cc, uint32_t lost_bytes, uint64_t lost_pn); + +/* inline definitions */ + +inline void quicly_cc__update_ecn_episodes(quicly_cc_t *cc, uint32_t lost_bytes, uint64_t lost_pn) +{ + /* when it is a new loss episode, initially assume that all losses are due to ECN signalling ... */ + if (lost_pn >= cc->recovery_end) { + ++cc->num_ecn_loss_episodes; + cc->episode_by_ecn = 1; + } + + /* ... but if a loss is observed, decrement the ECN loss episode counter */ + if (lost_bytes != 0 && cc->episode_by_ecn) { + --cc->num_ecn_loss_episodes; + cc->episode_by_ecn = 0; + } +} #ifdef __cplusplus } diff --git a/include/quicly/constants.h b/include/quicly/constants.h index 151af360..86c622e2 100644 --- a/include/quicly/constants.h +++ b/include/quicly/constants.h @@ -97,6 +97,7 @@ extern "C" { #define QUICLY_TRANSPORT_ERROR_KEY_UPDATE QUICLY_ERROR_FROM_TRANSPORT_ERROR_CODE(0xe) #define QUICLY_TRANSPORT_ERROR_AEAD_LIMIT_REACHED QUICLY_ERROR_FROM_TRANSPORT_ERROR_CODE(0xf) #define QUICLY_TRANSPORT_ERROR_TLS_ALERT_BASE QUICLY_ERROR_FROM_TRANSPORT_ERROR_CODE(0x100) +#define QUICLY_TRANSPORT_ERROR_MP_PROTOCOL_VIOLATION QUICLY_ERROR_FROM_TRANSPORT_ERROR_CODE(0xba01) /* internal error codes, used purely for signaling status to the application */ #define QUICLY_ERROR_PACKET_IGNORED 0xff01 @@ -108,6 +109,7 @@ extern "C" { #define QUICLY_ERROR_STATE_EXHAUSTION 0xff07 #define QUICLY_ERROR_INVALID_INITIAL_VERSION 0xff08 #define QUICLY_ERROR_DECRYPTION_FAILED 0xff09 +#define QUICLY_ERROR_INVALID_PARAMETERS 0xff0a typedef int64_t quicly_stream_id_t; diff --git a/include/quicly/frame.h b/include/quicly/frame.h index 04afd0b7..91061ca4 100644 --- a/include/quicly/frame.h +++ b/include/quicly/frame.h @@ -60,6 +60,10 @@ extern "C" { #define QUICLY_FRAME_TYPE_DATAGRAM_NOLEN 48 #define QUICLY_FRAME_TYPE_DATAGRAM_WITHLEN 49 #define QUICLY_FRAME_TYPE_ACK_FREQUENCY 0xaf +#define QUICLY_FRAME_TYPE_ACK_MP 0x15228c00 +#define QUICLY_FRAME_TYPE_ACK_MP_ECN 0x15228c01 +#define QUICLY_FRAME_TYPE_PATH_ABANDON 0x15228c05 +#define QUICLY_FRAME_TYPE_PATH_STATUS 0x15228c06 #define QUICLY_FRAME_TYPE_STREAM_BITS 0x7 #define QUICLY_FRAME_TYPE_STREAM_BIT_OFF 0x4 @@ -77,8 +81,10 @@ extern "C" { #define QUICLY_STOP_SENDING_FRAME_CAPACITY (1 + 8 + 8) #define QUICLY_ACK_MAX_GAPS 256 #define QUICLY_ACK_FRAME_CAPACITY (1 + 8 + 8 + 1) +#define QUICLY_ACK_MP_FRAME_CAPACITY (4 + 8 + 8 + 8 + 1) #define QUICLY_PATH_CHALLENGE_FRAME_CAPACITY (1 + 8) #define QUICLY_STREAM_FRAME_CAPACITY (1 + 8 + 8 + 1) +#define QUICLY_PATH_STATUS_FRAME_CAPACITY (4 + 8 + 8 + 1) /** * maximum number of ACK blocks (inclusive) @@ -98,6 +104,8 @@ static size_t quicly_encodev_capacity(uint64_t v); static unsigned quicly_clz32(uint32_t v); static unsigned quicly_clz64(uint64_t v); +static ptls_iovec_t quicly__frame_decode_length_value_pair(const uint8_t **src, const uint8_t *end); + typedef struct st_quicly_stream_frame_t { uint64_t stream_id; unsigned is_fin : 1; @@ -234,18 +242,21 @@ typedef struct st_quicly_stop_sending_frame_t { static int quicly_decode_stop_sending_frame(const uint8_t **src, const uint8_t *end, quicly_stop_sending_frame_t *frame); -uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, quicly_ranges_t *ranges, uint64_t ack_delay); +uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, uint64_t multipath_cid, quicly_ranges_t *ranges, + uint64_t *ecn_counts, uint64_t ack_delay); typedef struct st_quicly_ack_frame_t { + uint64_t multipath_cid; uint64_t largest_acknowledged; uint64_t smallest_acknowledged; uint64_t ack_delay; uint64_t num_gaps; uint64_t ack_block_lengths[QUICLY_ACK_MAX_GAPS + 1]; uint64_t gaps[QUICLY_ACK_MAX_GAPS]; + uint64_t ecn_counts[3]; } quicly_ack_frame_t; -int quicly_decode_ack_frame(const uint8_t **src, const uint8_t *end, quicly_ack_frame_t *frame, int is_ack_ecn); +int quicly_decode_ack_frame(uint64_t frame_type, const uint8_t **src, const uint8_t *end, quicly_ack_frame_t *frame); static size_t quicly_new_token_frame_capacity(ptls_iovec_t token); static uint8_t *quicly_encode_new_token_frame(uint8_t *dst, ptls_iovec_t token); @@ -282,6 +293,23 @@ static uint8_t *quicly_encode_ack_frequency_frame(uint8_t *dst, uint64_t sequenc uint64_t max_ack_delay, int ignore_order); static int quicly_decode_ack_frequency_frame(const uint8_t **src, const uint8_t *end, quicly_ack_frequency_frame_t *frame); +typedef struct st_quicly_path_abandon_frame_t { + uint64_t dcid; + uint64_t error_code; + ptls_iovec_t reason_phrase; +} quicly_path_abandon_frame_t; + +static int quicly_decode_path_abandon_frame(const uint8_t **src, const uint8_t *end, quicly_path_abandon_frame_t *frame); + +typedef struct st_quicly_path_status_frame_t { + uint64_t dcid; + uint64_t sequence; + uint8_t available : 1; +} quicly_path_status_frame_t; + +static uint8_t *quicly_encode_path_status_frame(uint8_t *dst, uint64_t dcid, uint64_t sequence, int available); +static int quicly_decode_path_status_frame(const uint8_t **src, const uint8_t *end, quicly_path_status_frame_t *frame); + /* inline definitions */ inline uint16_t quicly_decode16(const uint8_t **src) @@ -342,6 +370,22 @@ inline uint8_t *quicly_encode64(uint8_t *p, uint64_t v) return p; } +inline ptls_iovec_t quicly__frame_decode_length_value_pair(const uint8_t **src, const uint8_t *end) +{ + ptls_iovec_t vec; + uint64_t len; + + if ((len = quicly_decodev(src, end)) == UINT64_MAX) + goto Fail; + if ((uint64_t)(end - *src) < len) + goto Fail; + vec = ptls_iovec_init(*src, len); + *src += len; + return vec; +Fail: + return ptls_iovec_init(NULL, 0); +} + inline size_t quicly_encodev_capacity(uint64_t v) { if (v > 63) { @@ -492,19 +536,15 @@ inline int quicly_decode_application_close_frame(const uint8_t **src, const uint inline int quicly_decode_transport_close_frame(const uint8_t **src, const uint8_t *end, quicly_transport_close_frame_t *frame) { - uint64_t error_code, reason_len; + uint64_t error_code; if ((error_code = quicly_decodev(src, end)) == UINT64_MAX) goto Error; frame->error_code = (uint16_t)error_code; if ((frame->frame_type = quicly_decodev(src, end)) == UINT64_MAX) goto Error; - if ((reason_len = quicly_decodev(src, end)) == UINT64_MAX) - goto Error; - if ((uint64_t)(end - *src) < reason_len) + if ((frame->reason_phrase = quicly__frame_decode_length_value_pair(src, end)).base == NULL) goto Error; - frame->reason_phrase = ptls_iovec_init(*src, reason_len); - *src += reason_len; return 0; Error: return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; @@ -825,6 +865,54 @@ inline int quicly_decode_ack_frequency_frame(const uint8_t **src, const uint8_t return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; } +inline int quicly_decode_path_abandon_frame(const uint8_t **src, const uint8_t *end, quicly_path_abandon_frame_t *frame) +{ + if ((frame->dcid = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + if ((frame->error_code = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + if ((frame->reason_phrase = quicly__frame_decode_length_value_pair(src, end)).base == NULL) + goto Error; + return 0; +Error: + return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; +} + +inline uint8_t *quicly_encode_path_status_frame(uint8_t *dst, uint64_t dcid, uint64_t sequence, int available) +{ + PTLS_BUILD_ASSERT(QUICLY_FRAME_TYPE_PATH_STATUS < 0x40000000 && "othewise adjust frame capacity"); + dst = quicly_encodev(dst, QUICLY_FRAME_TYPE_PATH_STATUS); + dst = quicly_encodev(dst, dcid); + dst = quicly_encodev(dst, sequence); + *dst++ = available ? 2 : 1; + return dst; +} + +inline int quicly_decode_path_status_frame(const uint8_t **src, const uint8_t *end, quicly_path_status_frame_t *frame) +{ + uint64_t status; + + if ((frame->dcid = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + if ((frame->sequence = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + if ((status = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + switch (status) { + case 1: + frame->available = 0; + break; + case 2: + frame->available = 1; + break; + default: + goto Error; + } + return 0; +Error: + return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; +} + #ifdef __cplusplus } #endif diff --git a/include/quicly/local_cid.h b/include/quicly/local_cid.h index e901be0f..7dc8d2b9 100644 --- a/include/quicly/local_cid.h +++ b/include/quicly/local_cid.h @@ -55,6 +55,9 @@ typedef struct st_quicly_local_cid_t { uint64_t sequence; quicly_cid_t cid; uint8_t stateless_reset_token[QUICLY_STATELESS_RESET_TOKEN_LEN]; + struct { + struct st_quicly_pn_space_t *space; + } multipath; } quicly_local_cid_t; /** @@ -122,7 +125,14 @@ int quicly_local_cid_on_lost(quicly_local_cid_set_t *set, uint64_t sequence); * This makes one slot for CIDs empty. The CID generator callback is then called to fill the slot with a new CID. * @return 0 if the request was legal, otherwise an error code */ -int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int *has_pending); +int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int *has_pending, + struct st_quicly_pn_space_t **multipath_space); +/** + * iterates through CIDs being issued + * @param index set to -1 to start new iteration + * @return index of the next entry that has been issued, or -1 if the iteration ended + */ +ssize_t quicly_local_cid_get_next(quicly_local_cid_set_t *set, ssize_t index); /* inline definitions */ diff --git a/include/quicly/loss.h b/include/quicly/loss.h index de6d15a5..bb25efcd 100644 --- a/include/quicly/loss.h +++ b/include/quicly/loss.h @@ -159,7 +159,8 @@ typedef struct quicly_loss_t { quicly_sentmap_t sentmap; } quicly_loss_t; -typedef void (*quicly_loss_on_detect_cb)(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold); +typedef void (*quicly_loss_on_detect_cb)(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold, + struct st_quicly_conn_t *conn); typedef enum quicly_loss_ack_received_kind_t { QUICLY_LOSS_ACK_RECEIVED_KIND_NON_ACK_ELICITING = 0, @@ -185,17 +186,18 @@ static void quicly_loss_on_ack_received(quicly_loss_t *r, uint64_t largest_newly * and then call quicly_loss_update_alarm and update the alarm */ static int quicly_loss_on_alarm(quicly_loss_t *r, int64_t now, uint32_t max_ack_delay, int is_1rtt_only, - size_t *min_packets_to_send, int *restrict_sending, quicly_loss_on_detect_cb on_loss_detected); + size_t *min_packets_to_send, int *restrict_sending, struct st_quicly_conn_t *conn, + quicly_loss_on_detect_cb on_loss_detected); /** * */ -int quicly_loss_detect_loss(quicly_loss_t *r, int64_t now, uint32_t max_ack_delay, int is_1rtt_only, +int quicly_loss_detect_loss(quicly_loss_t *r, int64_t now, uint32_t max_ack_delay, int is_1rtt_only, struct st_quicly_conn_t *conn, quicly_loss_on_detect_cb on_loss_detected); /** * initializes the sentmap iterator, evicting the entries considered too old. */ int quicly_loss_init_sentmap_iter(quicly_loss_t *loss, quicly_sentmap_iter_t *iter, int64_t now, uint32_t max_ack_delay, - int is_closing); + int is_closing, struct st_quicly_conn_t *conn); /** * Returns the timeout for sentmap entries. This timeout is also used as the duration of CLOSING / DRAINING state, and therefore be * longer than 3PTO. At the moment, the value is 4PTO. @@ -373,14 +375,15 @@ inline void quicly_loss_on_ack_received(quicly_loss_t *r, uint64_t largest_newly } inline int quicly_loss_on_alarm(quicly_loss_t *r, int64_t now, uint32_t max_ack_delay, int is_1rtt_only, - size_t *min_packets_to_send, int *restrict_sending, quicly_loss_on_detect_cb on_loss_detected) + size_t *min_packets_to_send, int *restrict_sending, quicly_conn_t *conn, + quicly_loss_on_detect_cb on_loss_detected) { r->alarm_at = INT64_MAX; *min_packets_to_send = 1; if (r->loss_time != INT64_MAX) { /* Time threshold loss detection. Send at least 1 packet, but no restrictions on sending otherwise. */ *restrict_sending = 0; - return quicly_loss_detect_loss(r, now, max_ack_delay, is_1rtt_only, on_loss_detected); + return quicly_loss_detect_loss(r, now, max_ack_delay, is_1rtt_only, conn, on_loss_detected); } /* PTO. Send at least and at most 1 packet during speculative probing and 2 packets otherwise. */ ++r->pto_count; diff --git a/include/quicly/sentmap.h b/include/quicly/sentmap.h index 63ea9ea7..ae976d4d 100644 --- a/include/quicly/sentmap.h +++ b/include/quicly/sentmap.h @@ -60,6 +60,10 @@ typedef struct st_quicly_sent_packet_t { * if sent on a promoted path */ uint8_t promoted_path : 1; + /** + * key phase bit used on 1-RTT packets + */ + uint8_t key_phase_bit : 1; /** * number of bytes in-flight for the packet, from the context of CC (becomes zero when deemed lost, but not when PTO fires) */ @@ -93,7 +97,8 @@ typedef enum en_quicly_sentmap_event_t { * @param acked true if acked, false if the information has to be scheduled for retransmission * @param data data */ -typedef int (*quicly_sent_acked_cb)(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *data); +typedef int (*quicly_sent_acked_cb)(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *data, + struct st_quicly_conn_t *conn); struct st_quicly_sent_ack_additional_t { uint8_t gap; @@ -110,17 +115,19 @@ struct st_quicly_sent_t { union { quicly_sent_packet_t packet; /** - * ACK frame. Represents up to 8 ack ranges. If not full, `additional` list is terminated by .gap = 0. + * ACK frame. Represents up to 7 ack ranges. If not full, `additional` list is terminated by .gap = 0. */ struct { uint64_t start; union { struct { uint64_t start_length; - struct st_quicly_sent_ack_additional_t additional[4]; + uint8_t path_id; + struct st_quicly_sent_ack_additional_t additional[3]; } ranges64; struct { uint8_t start_length; + uint8_t path_id; struct st_quicly_sent_ack_additional_t additional[7]; } ranges8; }; @@ -256,7 +263,7 @@ int quicly_sentmap_prepare(quicly_sentmap_t *map, uint64_t packet_number, int64_ /** * commits a write */ -static void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_flight, int promoted_path); +static void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_flight, int promoted_path, int key_phase_bit); /** * Allocates a slot to contain a callback for a frame. The function MUST be called after _prepare but before _commit. */ @@ -277,10 +284,12 @@ void quicly_sentmap_skip(quicly_sentmap_iter_t *iter); /** * updates the state of the packet being pointed to by the iterator, _and advances to the next packet_ */ -int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, quicly_sentmap_event_t event); +int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, quicly_sentmap_event_t event, + struct st_quicly_conn_t *conn); struct st_quicly_sent_block_t *quicly_sentmap__new_block(quicly_sentmap_t *map); -int quicly_sentmap__type_packet(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent); +int quicly_sentmap__type_packet(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + struct st_quicly_conn_t *conn); /* inline definitions */ @@ -294,7 +303,7 @@ inline int quicly_sentmap_is_open(quicly_sentmap_t *map) return map->_pending_packet != NULL; } -inline void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_flight, int promoted_path) +inline void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_flight, int promoted_path, int key_phase_bit) { assert(quicly_sentmap_is_open(map)); @@ -304,8 +313,9 @@ inline void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_fligh map->bytes_in_flight += bytes_in_flight; } map->_pending_packet->data.packet.frames_in_flight = 1; - if (promoted_path) - map->_pending_packet->data.packet.promoted_path = 1; + map->_pending_packet->data.packet.promoted_path = promoted_path; + map->_pending_packet->data.packet.key_phase_bit = key_phase_bit; + map->_pending_packet = NULL; ++map->num_packets; diff --git a/lib/cc-cubic.c b/lib/cc-cubic.c index e1b848d8..c1edecbd 100644 --- a/lib/cc-cubic.c +++ b/lib/cc-cubic.c @@ -105,6 +105,8 @@ static void cubic_on_acked(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t static void cubic_on_lost(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, uint64_t lost_pn, uint64_t next_pn, int64_t now, uint32_t max_udp_payload_size) { + quicly_cc__update_ecn_episodes(cc, bytes, lost_pn); + /* Nothing to do if loss is in recovery window. */ if (lost_pn < cc->recovery_end) return; diff --git a/lib/cc-pico.c b/lib/cc-pico.c index 2e520829..07a8817c 100644 --- a/lib/cc-pico.c +++ b/lib/cc-pico.c @@ -95,6 +95,8 @@ static void pico_on_acked(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t b static void pico_on_lost(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, uint64_t lost_pn, uint64_t next_pn, int64_t now, uint32_t max_udp_payload_size) { + quicly_cc__update_ecn_episodes(cc, bytes, lost_pn); + /* Nothing to do if loss is in recovery window. */ if (lost_pn < cc->recovery_end) return; diff --git a/lib/cc-reno.c b/lib/cc-reno.c index 720a5269..3152d881 100644 --- a/lib/cc-reno.c +++ b/lib/cc-reno.c @@ -53,6 +53,8 @@ static void reno_on_acked(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t b void quicly_cc_reno_on_lost(quicly_cc_t *cc, const quicly_loss_t *loss, uint32_t bytes, uint64_t lost_pn, uint64_t next_pn, int64_t now, uint32_t max_udp_payload_size) { + quicly_cc__update_ecn_episodes(cc, bytes, lost_pn); + /* Nothing to do if loss is in recovery window. */ if (lost_pn < cc->recovery_end) return; diff --git a/lib/defaults.c b/lib/defaults.c index eef6b54f..48dbc907 100644 --- a/lib/defaults.c +++ b/lib/defaults.c @@ -21,6 +21,7 @@ */ #include #include "quicly/defaults.h" +#include "quicly.h" #define DEFAULT_INITIAL_EGRESS_MAX_UDP_PAYLOAD_SIZE 1280 #define DEFAULT_MAX_UDP_PAYLOAD_SIZE 1472 @@ -54,6 +55,7 @@ const quicly_context_t quicly_spec_context = {NULL, DEFAULT_MAX_PROBE_PACKETS, DEFAULT_MAX_PATH_VALIDATION_FAILURES, 0, /* enlarge_client_hello */ + 1, /* enable_ecn */ NULL, NULL, /* on_stream_open */ &quicly_default_stream_scheduler, @@ -86,6 +88,7 @@ const quicly_context_t quicly_performant_context = {NULL, DEFAULT_MAX_PROBE_PACKETS, DEFAULT_MAX_PATH_VALIDATION_FAILURES, 0, /* enlarge_client_hello */ + 1, /* enable_ecn */ NULL, NULL, /* on_stream_open */ &quicly_default_stream_scheduler, @@ -429,15 +432,25 @@ static int default_setup_cipher(quicly_crypto_engine_t *engine, quicly_conn_t *c static void default_finalize_send_packet(quicly_crypto_engine_t *engine, quicly_conn_t *conn, ptls_cipher_context_t *header_protect_ctx, ptls_aead_context_t *packet_protect_ctx, - ptls_iovec_t datagram, size_t first_byte_at, size_t payload_from, uint64_t packet_number, - int coalesced) + ptls_iovec_t datagram, size_t first_byte_at, size_t payload_from, uint64_t dcid, + uint64_t packet_number, int coalesced) { + assert(dcid != UINT64_MAX); + ptls_aead_supplementary_encryption_t supp = {.ctx = header_protect_ctx, .input = datagram.base + payload_from - QUICLY_SEND_PN_SIZE + QUICLY_MAX_PN_SIZE}; + uint8_t multipath_iv[PTLS_MAX_IV_SIZE]; + size_t multipath_iv_len = 0; + if (dcid != 0) { + multipath_iv_len = quicly_build_multipath_iv(packet_protect_ctx->algo, dcid, multipath_iv); + ptls_aead_xor_iv(packet_protect_ctx, multipath_iv, multipath_iv_len); + } ptls_aead_encrypt_s(packet_protect_ctx, datagram.base + payload_from, datagram.base + payload_from, datagram.len - payload_from - packet_protect_ctx->algo->tag_size, packet_number, datagram.base + first_byte_at, payload_from - first_byte_at, &supp); + if (dcid != 0) + ptls_aead_xor_iv(packet_protect_ctx, multipath_iv, multipath_iv_len); datagram.base[first_byte_at] ^= supp.output[0] & (QUICLY_PACKET_IS_LONG_HEADER(datagram.base[first_byte_at]) ? 0xf : 0x1f); for (size_t i = 0; i != QUICLY_SEND_PN_SIZE; ++i) diff --git a/lib/frame.c b/lib/frame.c index bfcec178..62ad8347 100644 --- a/lib/frame.c +++ b/lib/frame.c @@ -31,7 +31,8 @@ uint8_t *quicly_encode_path_challenge_frame(uint8_t *dst, int is_response, const return dst; } -uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, quicly_ranges_t *ranges, uint64_t ack_delay) +uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, uint64_t multipath_cid, quicly_ranges_t *ranges, + uint64_t *ecn_counts, uint64_t ack_delay) { #define WRITE_BLOCK(start, end) \ do { \ @@ -42,12 +43,19 @@ uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, quicly_ranges_t dst = quicly_encodev(dst, _end - _start - 1); \ } while (0) + /* emit ACK_ECN frame if any of the three ECN counts are non-zero */ + int has_ecn = (ecn_counts[0] | ecn_counts[1] | ecn_counts[2]) != 0; size_t range_index = ranges->num_ranges - 1; assert(ranges->num_ranges != 0); /* number of bytes being emitted without space check are 1 + 8 + 8 + 1 bytes (as defined in QUICLY_ACK_FRAME_CAPACITY) */ - *dst++ = QUICLY_FRAME_TYPE_ACK; + if (multipath_cid != UINT64_MAX) { + dst = quicly_encodev(dst, has_ecn ? QUICLY_FRAME_TYPE_ACK_MP_ECN : QUICLY_FRAME_TYPE_ACK_MP); + dst = quicly_encodev(dst, multipath_cid); + } else { + *dst++ = has_ecn ? QUICLY_FRAME_TYPE_ACK_ECN : QUICLY_FRAME_TYPE_ACK; + } dst = quicly_encodev(dst, ranges->ranges[range_index].end - 1); /* largest acknowledged */ dst = quicly_encodev(dst, ack_delay); /* ack delay */ PTLS_BUILD_ASSERT(QUICLY_MAX_ACK_BLOCKS - 1 <= 63); @@ -60,15 +68,55 @@ uint8_t *quicly_encode_ack_frame(uint8_t *dst, uint8_t *dst_end, quicly_ranges_t WRITE_BLOCK(ranges->ranges[range_index].end, ranges->ranges[range_index + 1].start); } + if (has_ecn) { + uint8_t buf[24], *p = buf; + for (size_t i = 0; i < 3; ++i) + p = quicly_encodev(p, ecn_counts[i]); + size_t len = p - buf; + if (dst_end - dst < len) + return NULL; + memcpy(dst, buf, len); + dst += len; + } + return dst; #undef WRITE_BLOCK } -int quicly_decode_ack_frame(const uint8_t **src, const uint8_t *end, quicly_ack_frame_t *frame, int is_ack_ecn) +int quicly_decode_ack_frame(uint64_t frame_type, const uint8_t **src, const uint8_t *end, quicly_ack_frame_t *frame) { uint64_t i, num_gaps, gap, ack_range; + int is_ack_ecn = 0, is_multipath = 0; + + switch (frame_type) { + case QUICLY_FRAME_TYPE_ACK: + is_ack_ecn = 0; + is_multipath = 0; + break; + case QUICLY_FRAME_TYPE_ACK_ECN: + is_ack_ecn = 1; + is_multipath = 0; + break; + case QUICLY_FRAME_TYPE_ACK_MP: + is_ack_ecn = 0; + is_multipath = 1; + break; + case QUICLY_FRAME_TYPE_ACK_MP_ECN: + is_ack_ecn = 1; + is_multipath = 1; + break; + default: + assert(!"logic flaw"); + break; + } + if (is_multipath) { + if ((frame->multipath_cid = quicly_decodev(src, end)) == UINT64_MAX) + goto Error; + } else { + frame->multipath_cid = UINT64_MAX; + } if ((frame->largest_acknowledged = quicly_decodev(src, end)) == UINT64_MAX) goto Error; if ((frame->ack_delay = quicly_decodev(src, end)) == UINT64_MAX) @@ -100,11 +148,14 @@ int quicly_decode_ack_frame(const uint8_t **src, const uint8_t *end, quicly_ack_ } if (is_ack_ecn) { - /* just skip ECT(0), ECT(1), ECT-CE counters for the time being */ - for (i = 0; i != 3; ++i) - if (quicly_decodev(src, end) == UINT64_MAX) + for (i = 0; i < PTLS_ELEMENTSOF(frame->ecn_counts); ++i) + if ((frame->ecn_counts[i] = quicly_decodev(src, end)) == UINT64_MAX) goto Error; + } else { + for (i = 0; i < PTLS_ELEMENTSOF(frame->ecn_counts); ++i) + frame->ecn_counts[i] = 0; } + return 0; Error: return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; diff --git a/lib/local_cid.c b/lib/local_cid.c index 7d0ee5a6..b67d7e00 100644 --- a/lib/local_cid.c +++ b/lib/local_cid.c @@ -35,6 +35,7 @@ static int generate_cid(quicly_local_cid_set_t *set, size_t idx) return 0; set->_encryptor->encrypt_cid(set->_encryptor, &set->cids[idx].cid, set->cids[idx].stateless_reset_token, &set->plaintext); + set->cids[idx].multipath.space = NULL; set->cids[idx].sequence = set->plaintext.path_id++; return 1; @@ -179,8 +180,11 @@ int quicly_local_cid_on_lost(quicly_local_cid_set_t *set, uint64_t sequence) return 1; } -int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int *_has_pending) +int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int *_has_pending, + struct st_quicly_pn_space_t **multipath_space) { + *multipath_space = NULL; + /* find the CID to be retired, also check if there is at least one CID that has been issued */ size_t retired_at = set->_size; int becomes_empty = 1; @@ -208,6 +212,8 @@ int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int /* retire given CID */ set->cids[retired_at].state = QUICLY_LOCAL_CID_STATE_IDLE; set->cids[retired_at].sequence = UINT64_MAX; + *multipath_space = set->cids[retired_at].multipath.space; + set->cids[retired_at].multipath.space = NULL; /* move following PENDING CIDs to front */ for (size_t i = retired_at + 1; i < set->_size; i++) { @@ -227,3 +233,13 @@ int quicly_local_cid_retire(quicly_local_cid_set_t *set, uint64_t sequence, int return 0; } + +ssize_t quicly_local_cid_get_next(quicly_local_cid_set_t *set, ssize_t index) +{ + while (++index < quicly_local_cid_get_size(set)) { + if (set->cids[index].state == QUICLY_LOCAL_CID_STATE_IDLE) + continue; + return index; + } + return -1; +} diff --git a/lib/loss.c b/lib/loss.c index eb5da86f..e60ab806 100644 --- a/lib/loss.c +++ b/lib/loss.c @@ -22,7 +22,7 @@ #include "quicly/loss.h" int quicly_loss_init_sentmap_iter(quicly_loss_t *loss, quicly_sentmap_iter_t *iter, int64_t now, uint32_t max_ack_delay, - int is_closing) + int is_closing, struct st_quicly_conn_t *conn) { quicly_sentmap_init_iter(&loss->sentmap, iter); @@ -41,7 +41,7 @@ int quicly_loss_init_sentmap_iter(quicly_loss_t *loss, quicly_sentmap_iter_t *it quicly_sentmap_skip(iter); continue; } - if ((ret = quicly_sentmap_update(&loss->sentmap, iter, QUICLY_SENTMAP_EVENT_EXPIRED)) != 0) + if ((ret = quicly_sentmap_update(&loss->sentmap, iter, QUICLY_SENTMAP_EVENT_EXPIRED, conn)) != 0) return ret; } @@ -52,7 +52,7 @@ int quicly_loss_init_sentmap_iter(quicly_loss_t *loss, quicly_sentmap_iter_t *it } int quicly_loss_detect_loss(quicly_loss_t *loss, int64_t now, uint32_t max_ack_delay, int is_1rtt_only, - quicly_loss_on_detect_cb on_loss_detected) + struct st_quicly_conn_t *conn, quicly_loss_on_detect_cb on_loss_detected) { /* This function ensures that the value returned in loss_time is when the next application timer should be set for loss * detection. if no timer is required, loss_time is set to INT64_MAX. */ @@ -72,7 +72,7 @@ int quicly_loss_detect_loss(quicly_loss_t *loss, int64_t now, uint32_t max_ack_d loss->loss_time = INT64_MAX; - if ((ret = quicly_loss_init_sentmap_iter(loss, &iter, now, max_ack_delay, 0)) != 0) + if ((ret = quicly_loss_init_sentmap_iter(loss, &iter, now, max_ack_delay, 0, conn)) != 0) return ret; /* Mark packets as lost if they are smaller than the largest_acked and outside either time-threshold or packet-threshold @@ -81,8 +81,8 @@ int quicly_loss_detect_loss(quicly_loss_t *loss, int64_t now, uint32_t max_ack_d int64_t largest_acked_signed = loss->largest_acked_packet_plus1[sent->ack_epoch] - 1; if ((int64_t)sent->packet_number < largest_acked_signed && (CHECK_TIME_THRESHOLD(sent) || CHECK_PACKET_THRESHOLD(sent))) { if (sent->cc_bytes_in_flight != 0) { - on_loss_detected(loss, sent, !CHECK_PACKET_THRESHOLD(sent)); - if ((ret = quicly_sentmap_update(&loss->sentmap, &iter, QUICLY_SENTMAP_EVENT_LOST)) != 0) + on_loss_detected(loss, sent, !CHECK_PACKET_THRESHOLD(sent), conn); + if ((ret = quicly_sentmap_update(&loss->sentmap, &iter, QUICLY_SENTMAP_EVENT_LOST, conn)) != 0) return ret; } else { quicly_sentmap_skip(&iter); @@ -100,7 +100,7 @@ int quicly_loss_detect_loss(quicly_loss_t *loss, int64_t now, uint32_t max_ack_d #undef CHECK_PACKET_THRESHOLD if (!is_1rtt_only) { - if ((ret = quicly_loss_init_sentmap_iter(loss, &iter, now, max_ack_delay, 0)) != 0) + if ((ret = quicly_loss_init_sentmap_iter(loss, &iter, now, max_ack_delay, 0, conn)) != 0) return ret; sent = quicly_sentmap_get(&iter); } diff --git a/lib/quicly.c b/lib/quicly.c index 889f8b95..42803729 100644 --- a/lib/quicly.c +++ b/lib/quicly.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,7 @@ #define QUICLY_TRANSPORT_PARAMETER_ID_RETRY_SOURCE_CONNECTION_ID 16 #define QUICLY_TRANSPORT_PARAMETER_ID_MAX_DATAGRAM_FRAME_SIZE 0x20 #define QUICLY_TRANSPORT_PARAMETER_ID_MIN_ACK_DELAY 0xff03de1a +#define QUICLY_TRANSPORT_PARAMETER_ID_ENABLE_MULTIPATH 0x0f739bbc1b666d05 /** * maximum size of token that quicly accepts @@ -89,8 +91,10 @@ KHASH_MAP_INIT_INT64(quicly_stream_t, quicly_stream_t *) QUICLY_##label(_conn, __VA_ARGS__); \ QUICLY_TRACER(label, _conn, __VA_ARGS__); \ } while (0) +#define QUICLY_PROBE_ENABLED(label) (QUICLY_##label##_ENABLED()) #else #define QUICLY_PROBE(label, conn, ...) QUICLY_TRACER(label, conn, __VA_ARGS__) +#define QUICLY_PROBE_ENABLED(label) (0) #endif #define QUICLY_PROBE_HEXDUMP(s, l) \ ({ \ @@ -113,6 +117,10 @@ struct st_quicly_pn_space_t { * acks to be sent to remote peer */ quicly_ranges_t ack_queue; + /** + * when to send an ACK + */ + int64_t send_ack_at; /** * time at when the largest pn in the ack_queue has been received (or INT64_MAX if none) */ @@ -125,6 +133,10 @@ struct st_quicly_pn_space_t { * number of ACK-eliciting packets that have not been ACKed yet */ uint32_t unacked_count; + /** + * ECN in the order of ECT(0), ECT(1), CE + */ + uint64_t ecn_counts[3]; /** * maximum number of ACK-eliciting packets to be queued before sending an ACK */ @@ -132,7 +144,11 @@ struct st_quicly_pn_space_t { /** * boolean indicating if reorder should NOT trigger an immediate ack */ - uint8_t ignore_order; + uint8_t ignore_order : 1; + /** + * + */ + uint8_t delayed_free : 1; }; struct st_quicly_handshake_space_t { @@ -144,8 +160,30 @@ struct st_quicly_handshake_space_t { uint16_t largest_ingress_udp_payload_size; }; +struct st_quicly_application_space_t; + +struct st_quicly_path_vtable_t { + /** + * if this is a MP-QUIC connection + */ + unsigned multipath : 1; + /** + * Returns pn space. If multipath is not used, the only one application space is always returned. If multipath is used, space + * corresponding to given path ID is returned, or if the corresponding path no longer exists, PACKET_IGNORED is returned. Other + * error codes indicate fatal error to the connection. + */ + int (*get_space)(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t path_id); + /** + * iteration; see quicly_local_cid_get_next + */ + ssize_t (*foreach_space)(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t *path_id, ssize_t index); + /** + * returns IV to be provided to `ptls_aead_xor_iv` + */ + size_t (*get_iv)(struct st_quicly_application_space_t *space, uint8_t *iv, uint64_t path_id); +}; + struct st_quicly_application_space_t { - struct st_quicly_pn_space_t super; struct { struct { struct { @@ -162,19 +200,52 @@ struct st_quicly_application_space_t { struct st_quicly_cipher_context_t key; uint8_t secret[PTLS_MAX_DIGEST_SIZE]; uint64_t key_phase; + uint64_t next_key_update_num_sent; struct { /** - * PN at which key update was initiated. Set to UINT64_MAX once key update is acked. + * stats.num_packets.sent at which key update was initiated. Set to UINT64_MAX once key update is acked. */ uint64_t last; /** - * PN at which key update should be initiated. Set to UINT64_MAX when key update cannot be initiated. + * stats.num_packets.sent at which key update should be initiated. Set to UINT64_MAX when key update cannot be + * initiated. */ uint64_t next; - } key_update_pn; + } key_update_at; } egress; } cipher; int one_rtt_writable; + const struct st_quicly_path_vtable_t *vtable; + struct { + struct st_quicly_pn_space_t *space; + } non_multipath; +}; + +struct st_quicly_path_egress_t { + /** + * loss recovery + */ + quicly_loss_t loss; + /** + * next or the currently encoding packet number + */ + uint64_t packet_number; + /** + * next PN to be skipped + */ + uint64_t next_pn_to_skip; + /** + * + */ + int64_t last_retransmittable_sent_at; + /** + * congestion control + */ + quicly_cc_t cc; + /** + * delivery rate estimator + */ + quicly_ratemeter_t ratemeter; }; struct st_quicly_conn_path_t { @@ -193,6 +264,10 @@ struct st_quicly_conn_path_t { * the same value of zero if peer CID is zero-length. */ uint64_t dcid; + /** + * time when the path is created + */ + int64_t created_at; /** * Maximum number of packets being received by the connection when a packet was last received on this path. This value is used * to determine the least-recently-used path which will be recycled. @@ -214,6 +289,13 @@ struct st_quicly_conn_path_t { uint8_t send_; uint8_t data[QUICLY_PATH_CHALLENGE_DATA_LEN]; } path_response; + /** + * ECN + */ + struct { + enum en_quicly_ecn_state { QUICLY_ECN_OFF, QUICLY_ECN_ON, QUICLY_ECN_PROBING } state; + uint64_t counts[QUICLY_NUM_EPOCHS][3]; + } ecn; /** * if this path is the initial path (i.e., the one on which handshake is done) */ @@ -222,6 +304,15 @@ struct st_quicly_conn_path_t { * if only probe packets have been received (and hence have been sent) on the path */ uint8_t probe_only : 1; + /** + * + */ + uint8_t multipath_rebinding_check_complete : 1; + /** + * The most recent CID observed for packets arriving on this path. When multipath is active, this value is used for detecting + * NAT rebinding (in which case we'd want to retire the old path). + */ + uint64_t ingress_cid; /** * number of packets being sent / received on the path */ @@ -229,6 +320,10 @@ struct st_quicly_conn_path_t { uint64_t sent; uint64_t received; } num_packets; + /** + * loss detection, congestion control, packet number, etc. + */ + struct st_quicly_path_egress_t *egress; }; struct st_quicly_conn_t { @@ -281,18 +376,6 @@ struct st_quicly_conn_t { * */ struct { - /** - * loss recovery - */ - quicly_loss_t loss; - /** - * next or the currently encoding packet number - */ - uint64_t packet_number; - /** - * next PN to be skipped - */ - uint64_t next_pn_to_skip; /** * */ @@ -304,6 +387,7 @@ struct st_quicly_conn_t { uint16_t error_code; uint64_t frame_type; /* UINT64_MAX if application close */ const char *reason_phrase; + uint64_t send_at; /* when to send CONNECTION_CLOSE or free the connection */ unsigned long num_packets_received; } connection_close; /** @@ -337,22 +421,6 @@ struct st_quicly_conn_t { int64_t update_at; uint64_t sequence; } ack_frequency; - /** - * - */ - int64_t last_retransmittable_sent_at; - /** - * when to send an ACK, connection close frames or to destroy the connection - */ - int64_t send_ack_at; - /** - * when a PATH_CHALLENGE or PATH_RESPONSE frame is to be sent on any path - */ - int64_t send_probe_at; - /** - * congestion control - */ - quicly_cc_t cc; /** * things to be sent at the stream-level, that are not governed by the stream scheduler */ @@ -396,9 +464,9 @@ struct st_quicly_conn_t { size_t count; } datagram_frame_payloads; /** - * delivery rate estimator + * ECN bits to be used for the datagrams built by the latest invocation of `quicly_send` */ - quicly_ratemeter_t ratemeter; + uint8_t send_ecn_bits; } egress; /** * crypto data @@ -476,6 +544,7 @@ struct st_quicly_handle_payload_state_t { const uint8_t *src, *const end; size_t epoch; size_t path_index; + struct st_quicly_pn_space_t *pn_space; uint64_t frame_type; }; @@ -653,15 +722,40 @@ static int needs_cid_auth(quicly_conn_t *conn) } } -static int64_t get_sentmap_expiration_time(quicly_conn_t *conn) +static int64_t get_sentmap_expiration_time(quicly_conn_t *conn, size_t path_index) { - return quicly_loss_get_sentmap_expiration_time(&conn->egress.loss, conn->super.remote.transport_params.max_ack_delay); + return quicly_loss_get_sentmap_expiration_time(&conn->paths[path_index]->egress->loss, + conn->super.remote.transport_params.max_ack_delay); +} + +/** + * converts ECN bits to index in the order of ACK-ECN field (i.e., ECT(0) -> 0, ECT(1) -> 1, CE -> 2) + */ +static size_t get_ecn_index_from_bits(uint8_t bits) +{ + assert(1 <= bits && bits <= 3); + return (18 >> bits) & 3; +} + +static void update_ecn_state(quicly_conn_t *conn, size_t path_index, enum en_quicly_ecn_state new_state) +{ + assert(new_state == QUICLY_ECN_ON || new_state == QUICLY_ECN_OFF); + + conn->paths[path_index]->ecn.state = new_state; + if (new_state == QUICLY_ECN_ON) { + ++conn->super.stats.num_paths.ecn_validated; + } else { + ++conn->super.stats.num_paths.ecn_failed; + } + + QUICLY_PROBE(ECN_VALIDATION, conn, conn->stash.now, (int)new_state); + QUICLY_LOG_CONN(ecn_validation, conn, { PTLS_LOG_ELEMENT_SIGNED(state, (int)new_state); }); } static void ack_frequency_set_next_update_at(quicly_conn_t *conn) { if (conn->super.remote.transport_params.min_ack_delay_usec != UINT64_MAX) - conn->egress.ack_frequency.update_at = conn->stash.now + get_sentmap_expiration_time(conn); + conn->egress.ack_frequency.update_at = conn->stash.now + get_sentmap_expiration_time(conn, 0); } size_t quicly_decode_packet(quicly_context_t *ctx, quicly_decoded_packet_t *packet, const uint8_t *datagram, size_t datagram_size, @@ -677,6 +771,7 @@ size_t quicly_decode_packet(quicly_context_t *ctx, quicly_decoded_packet_t *pack packet->datagram_size = *off == 0 ? datagram_size : 0; packet->token = ptls_iovec_init(NULL, 0); packet->decrypted.pn = UINT64_MAX; + packet->ecn = 0; /* non-ECT */ /* move the cursor to the second byte */ src += *off + 1; @@ -797,25 +892,46 @@ uint64_t quicly_determine_packet_number(uint32_t truncated, size_t num_bits, uin return candidate; } -static void assert_consistency(quicly_conn_t *conn, int timer_must_be_in_future) +static int64_t calc_min_send_ack_at(quicly_conn_t *conn) +{ + int64_t at = INT64_MAX; + + if (conn->initial != NULL && at > conn->initial->super.send_ack_at) + at = conn->initial->super.send_ack_at; + if (conn->handshake != NULL && at > conn->handshake->super.send_ack_at) + at = conn->handshake->super.send_ack_at; + if (conn->application != NULL && conn->application->one_rtt_writable) { + struct st_quicly_pn_space_t *space; + ssize_t i = -1; + while ((i = conn->application->vtable->foreach_space(conn, &space, NULL, i)) != -1) { + if (at > space->send_ack_at) + at = space->send_ack_at; + } + } + + return at; +} + +static void assert_consistency(quicly_conn_t *conn, size_t path_index, int timer_must_be_in_future) { if (conn->super.state >= QUICLY_STATE_CLOSING) { - assert(!timer_must_be_in_future || conn->stash.now < conn->egress.send_ack_at); + assert(!timer_must_be_in_future || conn->stash.now < calc_min_send_ack_at(conn)); return; } - if (conn->egress.loss.sentmap.bytes_in_flight != 0 || conn->super.remote.address_validation.send_probe) { - assert(conn->egress.loss.alarm_at != INT64_MAX); + if (conn->paths[path_index]->egress->loss.sentmap.bytes_in_flight != 0 || conn->super.remote.address_validation.send_probe) { + assert(conn->paths[path_index]->egress->loss.alarm_at != INT64_MAX); } else { - assert(conn->egress.loss.loss_time == INT64_MAX); + assert(conn->paths[path_index]->egress->loss.loss_time == INT64_MAX); } /* Allow timers not in the future when the remote peer is not yet validated, since we may not be able to send packets even when * timers fire. */ if (timer_must_be_in_future && conn->super.remote.address_validation.validated) - assert(conn->stash.now < conn->egress.loss.alarm_at); + assert(conn->stash.now < conn->paths[path_index]->egress->loss.alarm_at); } -static int on_invalid_ack(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_invalid_ack(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { if (acked) return QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION; @@ -1288,6 +1404,11 @@ ptls_t *quicly_get_tls(quicly_conn_t *conn) return conn->crypto.tls; } +int quicly_is_multipath(quicly_conn_t *conn) +{ + return conn->application != NULL && conn->application->vtable->multipath; +} + uint32_t quicly_num_streams_by_group(quicly_conn_t *conn, int uni, int locally_initiated) { int server_initiated = quicly_is_client(conn) != locally_initiated; @@ -1311,11 +1432,11 @@ int quicly_get_stats(quicly_conn_t *conn, quicly_stats_t *stats) memcpy(stats, &conn->super.stats, sizeof(conn->super.stats)); /* set or generate the non-pre-built stats fields here */ - stats->rtt = conn->egress.loss.rtt; - stats->loss_thresholds = conn->egress.loss.thresholds; - stats->cc = conn->egress.cc; - quicly_ratemeter_report(&conn->egress.ratemeter, &stats->delivery_rate); - stats->num_sentmap_packets_largest = conn->egress.loss.sentmap.num_packets_largest; + stats->rtt = conn->paths[0]->egress->loss.rtt; + stats->loss_thresholds = conn->paths[0]->egress->loss.thresholds; + stats->cc = conn->paths[0]->egress->cc; + quicly_ratemeter_report(&conn->paths[0]->egress->ratemeter, &stats->delivery_rate); + stats->num_sentmap_packets_largest = conn->paths[0]->egress->loss.sentmap.num_packets_largest; stats->handshake_confirmed_msec = conn->super.stats.handshake_confirmed_msec; return 0; @@ -1323,7 +1444,7 @@ int quicly_get_stats(quicly_conn_t *conn, quicly_stats_t *stats) int quicly_get_delivery_rate(quicly_conn_t *conn, quicly_rate_t *delivery_rate) { - quicly_ratemeter_report(&conn->egress.ratemeter, delivery_rate); + quicly_ratemeter_report(&conn->paths[0]->egress->ratemeter, delivery_rate); return 0; } @@ -1358,8 +1479,8 @@ static void update_idle_timeout(quicly_conn_t *conn, int is_in_receive) if (idle_msec == INT64_MAX) return; - uint32_t three_pto = 3 * quicly_rtt_get_pto(&conn->egress.loss.rtt, conn->super.ctx->transport_params.max_ack_delay, - conn->egress.loss.conf->min_pto); + uint32_t three_pto = 3 * quicly_rtt_get_pto(&conn->paths[0]->egress->loss.rtt, conn->super.ctx->transport_params.max_ack_delay, + conn->paths[0]->egress->loss.conf->min_pto); conn->idle_timeout.at = conn->stash.now + (idle_msec > three_pto ? idle_msec : three_pto); conn->idle_timeout.should_rearm_on_send = is_in_receive; } @@ -1384,27 +1505,30 @@ static int scheduler_can_send(quicly_conn_t *conn) return conn->super.ctx->stream_scheduler->can_send(conn->super.ctx->stream_scheduler, conn, conn_is_saturated); } -static void update_send_alarm(quicly_conn_t *conn, int can_send_stream_data, int is_after_send) +static void update_send_alarm(quicly_conn_t *conn, size_t path_index, int can_send_stream_data, int is_after_send) { - int has_outstanding = conn->egress.loss.sentmap.bytes_in_flight != 0 || conn->super.remote.address_validation.send_probe, + int has_outstanding = + conn->paths[path_index]->egress->loss.sentmap.bytes_in_flight != 0 || conn->super.remote.address_validation.send_probe, handshake_is_in_progress = conn->initial != NULL || conn->handshake != NULL; - quicly_loss_update_alarm(&conn->egress.loss, conn->stash.now, conn->egress.last_retransmittable_sent_at, has_outstanding, - can_send_stream_data, handshake_is_in_progress, conn->egress.max_data.sent, is_after_send); + quicly_loss_update_alarm(&conn->paths[path_index]->egress->loss, conn->stash.now, + conn->paths[path_index]->egress->last_retransmittable_sent_at, has_outstanding, can_send_stream_data, + handshake_is_in_progress, conn->egress.max_data.sent, is_after_send); } /** * Updates the send alarm and adjusts the delivery rate estimator. This function is called from the receive path. From the sendp * path, `update_send_alarm` is called directly. */ -static void setup_next_send(quicly_conn_t *conn) +static void setup_next_send(quicly_conn_t *conn, size_t path_index) { int can_send_stream_data = scheduler_can_send(conn); - update_send_alarm(conn, can_send_stream_data, 0); + update_send_alarm(conn, path_index, can_send_stream_data, 0); /* When the flow becomes application-limited due to receiving some information, stop collecting delivery rate samples. */ if (!can_send_stream_data) - quicly_ratemeter_not_cwnd_limited(&conn->egress.ratemeter, conn->egress.packet_number); + quicly_ratemeter_not_cwnd_limited(&conn->paths[path_index]->egress->ratemeter, + conn->paths[path_index]->egress->packet_number); } static int create_handshake_flow(quicly_conn_t *conn, size_t epoch) @@ -1438,18 +1562,22 @@ static struct st_quicly_pn_space_t *alloc_pn_space(size_t sz, uint32_t packet_to return NULL; quicly_ranges_init(&space->ack_queue); + space->send_ack_at = INT64_MAX; space->largest_pn_received_at = INT64_MAX; space->next_expected_packet_number = 0; space->unacked_count = 0; + for (size_t i = 0; i < PTLS_ELEMENTSOF(space->ecn_counts); ++i) + space->ecn_counts[i] = 0; space->packet_tolerance = packet_tolerance; space->ignore_order = 0; + space->delayed_free = 0; if (sz != sizeof(*space)) memset((uint8_t *)space + sizeof(*space), 0, sz - sizeof(*space)); return space; } -static void do_free_pn_space(struct st_quicly_pn_space_t *space) +static void free_pn_space(struct st_quicly_pn_space_t *space) { quicly_ranges_clear(&space->ack_queue); free(space); @@ -1479,8 +1607,8 @@ static int record_pn(quicly_ranges_t *ranges, uint64_t pn, int *is_out_of_order) return 0; } -static int record_receipt(struct st_quicly_pn_space_t *space, uint64_t pn, int is_ack_only, int64_t now, int64_t *send_ack_at, - uint64_t *received_out_of_order) +static int record_receipt(struct st_quicly_pn_space_t *space, uint64_t pn, uint8_t ecn, int is_ack_only, + int handshake_is_in_progress, int64_t now, uint64_t *received_out_of_order) { int ret, ack_now, is_out_of_order; @@ -1489,23 +1617,33 @@ static int record_receipt(struct st_quicly_pn_space_t *space, uint64_t pn, int i if (is_out_of_order) *received_out_of_order += 1; - ack_now = is_out_of_order && !space->ignore_order && !is_ack_only; + ack_now = !is_ack_only && ((is_out_of_order && !space->ignore_order) || ecn == IPTOS_ECN_CE); /* update largest_pn_received_at (TODO implement deduplication at an earlier moment?) */ if (space->ack_queue.ranges[space->ack_queue.num_ranges - 1].end == pn + 1) space->largest_pn_received_at = now; + /* increment ecn counters */ + if (ecn != 0) + space->ecn_counts[get_ecn_index_from_bits(ecn)] += 1; + /* if the received packet is ack-eliciting, update / schedule transmission of ACK */ if (!is_ack_only) { space->unacked_count++; - if (space->unacked_count >= space->packet_tolerance) + if (space->unacked_count >= space->packet_tolerance) { + ack_now = 1; + } else if (handshake_is_in_progress) { + /* ACK immediately if handshake is in progress, even if it is the application data space. This mirrors quicly's design + * as the sender that does take ack delay into consideration until both initial and handshake contexts are discarded. + * TODO induce immediate ACK on the sender side rather than having this logic at the receiver side. */ ack_now = 1; + } } if (ack_now) { - *send_ack_at = now; - } else if (*send_ack_at == INT64_MAX && space->unacked_count != 0) { - *send_ack_at = now + QUICLY_DELAYED_ACK_TIMEOUT; + space->send_ack_at = now; + } else if (space->send_ack_at == INT64_MAX && space->unacked_count != 0) { + space->send_ack_at = now + QUICLY_DELAYED_ACK_TIMEOUT; } ret = 0; @@ -1520,7 +1658,7 @@ static void free_handshake_space(struct st_quicly_handshake_space_t **space) dispose_cipher(&(*space)->cipher.ingress); if ((*space)->cipher.egress.aead != NULL) dispose_cipher(&(*space)->cipher.egress); - do_free_pn_space(&(*space)->super); + free_pn_space(&(*space)->super); *space = NULL; } } @@ -1557,20 +1695,104 @@ static void free_application_space(struct st_quicly_application_space_t **space) if ((*space)->cipher.egress.key.aead != NULL) dispose_cipher(&(*space)->cipher.egress.key); ptls_clear_memory((*space)->cipher.egress.secret, sizeof((*space)->cipher.egress.secret)); - do_free_pn_space(&(*space)->super); + if ((*space)->non_multipath.space != NULL) + free_pn_space((*space)->non_multipath.space); + free(*space); *space = NULL; } } +/** + * given path ID (i.e., local CID sequence), return the corresponding ack space + */ +static int multipath_get_space(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t path_id) +{ + for (size_t i = 0; i < quicly_local_cid_get_size(&conn->super.local.cid_set); ++i) { + quicly_local_cid_t *cid = &conn->super.local.cid_set.cids[i]; + if (cid->sequence != path_id) + continue; + assert(cid->state != QUICLY_LOCAL_CID_STATE_IDLE); + if (cid->multipath.space == NULL && + (cid->multipath.space = alloc_pn_space(sizeof(*cid->multipath.space), QUICLY_DEFAULT_PACKET_TOLERANCE)) == NULL) + return PTLS_ERROR_NO_MEMORY; + *space = cid->multipath.space; + return 0; + } + return QUICLY_ERROR_PACKET_IGNORED; +} + +static ssize_t multipath_foreach_space(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t *path_id, ssize_t index) +{ + while ((index = quicly_local_cid_get_next(&conn->super.local.cid_set, index)) != -1) { + quicly_local_cid_t *cid = &conn->super.local.cid_set.cids[index]; + if (cid->multipath.space != NULL) { + *space = cid->multipath.space; + if (path_id != NULL) + *path_id = cid->sequence; + break; + } + } + + return index; +} + +static size_t multipath_get_iv(struct st_quicly_application_space_t *space, uint8_t *iv, uint64_t path_id) +{ + assert(space->cipher.ingress.aead[0] != NULL && "slot 0 should always be available"); + return quicly_build_multipath_iv(space->cipher.ingress.aead[0]->algo, path_id, iv); +} + +static int non_multipath_get_space(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t path_id) +{ + *space = conn->application->non_multipath.space; + return 0; +} + +static ssize_t non_multipath_foreach_space(quicly_conn_t *conn, struct st_quicly_pn_space_t **space, uint64_t *path_id, + ssize_t index) +{ + if (index != -1) + return -1; + + *space = conn->application->non_multipath.space; + if (path_id != NULL) + *path_id = UINT64_MAX; + + return 0; +} + +static size_t non_multipath_get_iv(struct st_quicly_application_space_t *space, uint8_t *iv, uint64_t path_id) +{ + return 0; +} + static int setup_application_space(quicly_conn_t *conn) { - if ((conn->application = - (void *)alloc_pn_space(sizeof(struct st_quicly_application_space_t), QUICLY_DEFAULT_PACKET_TOLERANCE)) == NULL) + if ((conn->application = malloc(sizeof(*conn->application))) == NULL) return PTLS_ERROR_NO_MEMORY; + memset(conn->application, 0, sizeof(*conn->application)); + + if (conn->super.ctx->transport_params.enable_multipath && conn->super.remote.transport_params.enable_multipath) { + static const struct st_quicly_path_vtable_t callbacks = { + .multipath = 1, .get_space = multipath_get_space, .foreach_space = multipath_foreach_space, .get_iv = multipath_get_iv}; + conn->application->vtable = &callbacks; + } else { + if ((conn->application->non_multipath.space = + alloc_pn_space(sizeof(*conn->application->non_multipath.space), QUICLY_DEFAULT_PACKET_TOLERANCE)) == NULL) { + free(conn->application); + conn->application = NULL; + return PTLS_ERROR_NO_MEMORY; + } + static const struct st_quicly_path_vtable_t callbacks = {.multipath = 0, + .get_space = non_multipath_get_space, + .foreach_space = non_multipath_foreach_space, + .get_iv = non_multipath_get_iv}; + conn->application->vtable = &callbacks; + } /* prohibit key-update until receiving an ACK for an 1-RTT packet */ - conn->application->cipher.egress.key_update_pn.last = 0; - conn->application->cipher.egress.key_update_pn.next = UINT64_MAX; + conn->application->cipher.egress.key_update_at.last = 0; + conn->application->cipher.egress.key_update_at.next = UINT64_MAX; return create_handshake_flow(conn, QUICLY_EPOCH_1RTT); } @@ -1648,8 +1870,8 @@ static int update_1rtt_egress_key(quicly_conn_t *conn) ++space->cipher.egress.key_phase; /* signal that we are waiting for an ACK */ - space->cipher.egress.key_update_pn.last = conn->egress.packet_number; - space->cipher.egress.key_update_pn.next = UINT64_MAX; + space->cipher.egress.key_update_at.last = conn->super.stats.num_packets.sent; + space->cipher.egress.key_update_at.next = UINT64_MAX; QUICLY_PROBE(CRYPTO_SEND_KEY_UPDATE, conn, conn->stash.now, space->cipher.egress.key_phase, QUICLY_PROBE_HEXDUMP(space->cipher.egress.secret, cipher->hash->digest_size)); @@ -1707,11 +1929,12 @@ static void stringify_address(char *buf, struct sockaddr *sa) case AF_INET6: *p++ = '['; inet_ntop(AF_INET6, &((struct sockaddr_in6 *)sa)->sin6_addr, p, sizeof(LONGEST_ADDRESS_STR)); + p += strlen(p); *p++ = ']'; port = ntohs(((struct sockaddr_in *)sa)->sin_port); break; default: - assert("unexpected addres family"); + assert(!"unexpected address family"); break; } @@ -1719,68 +1942,107 @@ static void stringify_address(char *buf, struct sockaddr *sa) sprintf(p, "%" PRIu16, port); } -static int new_path(quicly_conn_t *conn, size_t path_index, struct sockaddr *remote_addr, struct sockaddr *local_addr) +static int new_path(quicly_conn_t *conn, size_t path_index, struct sockaddr *remote_addr, struct sockaddr *local_addr, + uint64_t ingress_cid, uint32_t initcwnd) { struct st_quicly_conn_path_t *path; assert(conn->paths[path_index] == NULL); - if ((path = malloc(sizeof(*conn->paths[path_index]))) == NULL) + if ((path = malloc(sizeof(*path))) == NULL) return PTLS_ERROR_NO_MEMORY; + *path = (struct st_quicly_conn_path_t){.created_at = conn->stash.now, .ingress_cid = ingress_cid}; + /* setup non-zero fields */ + set_address(&path->address.remote, remote_addr); + set_address(&path->address.local, local_addr); if (path_index == 0) { - /* default path used for handshake */ - *path = (struct st_quicly_conn_path_t){ - .dcid = 0, - .path_challenge.send_at = INT64_MAX, - .initial = 1, - .probe_only = 0, - }; + /* handshake path */ + path->path_challenge.send_at = INT64_MAX; + path->initial = 1; } else { - *path = (struct st_quicly_conn_path_t){ - .dcid = UINT64_MAX, - .path_challenge.send_at = 0, - .probe_only = 1, - }; + /* alternate path (requires probing) */ + path->dcid = UINT64_MAX; + path->probe_only = 1; conn->super.ctx->tls->random_bytes(path->path_challenge.data, sizeof(path->path_challenge.data)); conn->super.stats.num_paths.created += 1; } - set_address(&path->address.remote, remote_addr); - set_address(&path->address.local, local_addr); + + /* setup egress; instantiate or share ack queue, loss detection, and CC depending on if multipah is used */ + if (path_index == 0 || quicly_is_multipath(conn)) { + if ((path->egress = malloc(sizeof(*path->egress))) == NULL) { + free(path); + return PTLS_ERROR_NO_MEMORY; + } + *path->egress = (struct st_quicly_path_egress_t){}; + + quicly_loss_init(&path->egress->loss, &conn->super.ctx->loss, + conn->super.ctx->loss.default_initial_rtt /* FIXME remember initial_rtt in session ticket */, + &conn->super.remote.transport_params.max_ack_delay, + &conn->super.remote.transport_params.ack_delay_exponent); + path->egress->next_pn_to_skip = + calc_next_pn_to_skip(conn->super.ctx->tls, 0, initcwnd, conn->super.ctx->initial_egress_max_udp_payload_size); + conn->super.ctx->init_cc->cb(conn->super.ctx->init_cc, &path->egress->cc, initcwnd, conn->stash.now); + path->ecn.state = conn->super.ctx->enable_ecn ? QUICLY_ECN_PROBING : QUICLY_ECN_OFF; + quicly_ratemeter_init(&path->egress->ratemeter); + } else { + /* ack queue, loss detection, CC are shared */ + path->egress = conn->paths[0]->egress; + } conn->paths[path_index] = path; - if (QUICLY_NEW_PATH_ENABLED() || ptls_log.is_active) { - char remote[sizeof(LONGEST_ADDRESS_STR)]; + if (QUICLY_PROBE_ENABLED(NEW_PATH) || ptls_log.is_active) { + char remote[sizeof(LONGEST_ADDRESS_STR)], local[sizeof(LONGEST_ADDRESS_STR)]; stringify_address(remote, &path->address.remote.sa); - QUICLY_NEW_PATH(conn, conn->stash.now, path_index, remote); + if (path->address.local.sa.sa_family != AF_UNSPEC) { + stringify_address(local, &path->address.local.sa); + } else { + local[0] = '\0'; + } + QUICLY_PROBE(NEW_PATH, conn, conn->stash.now, path_index, remote, local); QUICLY_LOG_CONN(new_path, conn, { PTLS_LOG_ELEMENT_UNSIGNED(path_index, path_index); PTLS_LOG_ELEMENT_SAFESTR(remote, remote); + PTLS_LOG_ELEMENT_SAFESTR(local, local); }); } return 0; } -/** - * if is_promote is set, paths[0] (the default path) is freed and the path specified by `path_index` is promoted - * if is_promote is not_set, paths[path_index] is freed - */ -static void delete_path(quicly_conn_t *conn, int is_promote, size_t path_index) +enum delete_path_mode { + /** + * frees all data associated to given path; this mode is guaranteed to succeed + */ + DELETE_PATH_MODE_FREE, + /** + * deletes given path, marking inflight data on that path as lost + */ + DELETE_PATH_MODE_DELETE, + /** + * promotes given path as the default path (i.e., path_index zero), deleting existing path zero; this mode is used when apparent + * port rebinding is confirmed + */ + DELETE_PATH_MODE_PROMOTE, +}; + +static int delete_path(quicly_conn_t *conn, size_t path_index, enum delete_path_mode mode) { struct st_quicly_conn_path_t *path; + int ret; - /* fetch and detatch the path object to be freed */ - if (is_promote) { - QUICLY_PROMOTE_PATH(conn, conn->stash.now, path_index); + /* fetch and detach the path object to be freed */ + if (mode == DELETE_PATH_MODE_PROMOTE) { + assert(path_index != 0); + QUICLY_PROBE(PROMOTE_PATH, conn, conn->stash.now, path_index); QUICLY_LOG_CONN(promote_path, conn, { PTLS_LOG_ELEMENT_UNSIGNED(path_index, path_index); }); path = conn->paths[0]; conn->paths[0] = conn->paths[path_index]; conn->paths[path_index] = NULL; conn->super.stats.num_paths.promoted += 1; } else { - QUICLY_DELETE_PATH(conn, conn->stash.now, path_index); + QUICLY_PROBE(DELETE_PATH, conn, conn->stash.now, path_index); QUICLY_LOG_CONN(delete_path, conn, { PTLS_LOG_ELEMENT_UNSIGNED(path_index, path_index); }); path = conn->paths[path_index]; conn->paths[path_index] = NULL; @@ -1789,19 +2051,69 @@ static void delete_path(quicly_conn_t *conn, int is_promote, size_t path_index) } /* deinstantiate */ + if (path_index == 0 || quicly_is_multipath(conn)) { + if (mode != DELETE_PATH_MODE_FREE) { + /* before disposing the path egress, mark all data inflight on the discarded path as lost */ + quicly_sentmap_iter_t iter; + const quicly_sent_packet_t *sent; + quicly_sentmap_init_iter(&path->egress->loss.sentmap, &iter); + while ((sent = quicly_sentmap_get(&iter))->packet_number != UINT64_MAX) { + if ((ret = quicly_sentmap_update(&path->egress->loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_EXPIRED, conn)) != 0) + goto Exit; + } + } + quicly_loss_dispose(&path->egress->loss); + free(path->egress); + } if (path->dcid != UINT64_MAX && conn->super.remote.cid_set.cids[0].cid.len != 0) retire_connection_id(conn, path->dcid); free(path); + + ret = 0; + +Exit: + return ret; } -static int open_path(quicly_conn_t *conn, size_t *path_index, struct sockaddr *remote_addr, struct sockaddr *local_addr) +/** + * When a path sharing a same incoming DCID with other paths is validated, the other paths have to be removed, as it is an apparent + * NAT rebinding and we do not want to send through the old paths. + * + * @param path_index the path that has just been validated; this is an in-out parameter as this function might move the slot + */ +static int multipath_handle_rebinding(quicly_conn_t *conn, size_t *path_index) { int ret; - /* packets arriving from new paths will start to get ignored once the number of paths that failed to validate reaches the - * defined threshold */ - if (conn->super.stats.num_paths.validation_failed > conn->super.ctx->max_path_validation_failures) - return QUICLY_ERROR_PACKET_IGNORED; + for (size_t i = 0; i < PTLS_ELEMENTSOF(conn->paths); ++i) { + if (i == *path_index || conn->paths[i] == NULL) + continue; + if (conn->paths[i]->ingress_cid == conn->paths[*path_index]->ingress_cid) { + if (i == 0) { + if ((ret = delete_path(conn, *path_index, DELETE_PATH_MODE_PROMOTE)) != 0) + goto Exit; + *path_index = 0; + } else { + if ((ret = delete_path(conn, i, DELETE_PATH_MODE_DELETE)) != 0) + goto Exit; + } + } + } + + ret = 0; + +Exit: + return ret; +} + +static int open_path(quicly_conn_t *conn, size_t *path_index, struct sockaddr *remote_addr, struct sockaddr *local_addr, + uint64_t ingress_cid) +{ + int ret; + + if (quicly_is_client(conn)) + assert(conn->paths[0]->address.local.sa.sa_family != AF_UNSPEC && + "when running as client, at least the port number is needed to distingush between the paths"); /* choose a slot that in unused or the least-recently-used one that has completed validation */ *path_index = SIZE_MAX; @@ -1820,35 +2132,18 @@ static int open_path(quicly_conn_t *conn, size_t *path_index, struct sockaddr *r return QUICLY_ERROR_PACKET_IGNORED; /* free existing path info */ - if (conn->paths[*path_index] != NULL) - delete_path(conn, 0, *path_index); + if (conn->paths[*path_index] != NULL && (ret = delete_path(conn, *path_index, DELETE_PATH_MODE_DELETE)) != 0) + return ret; /* initialize new path info */ - if ((ret = new_path(conn, *path_index, remote_addr, local_addr)) != 0) + if ((ret = new_path(conn, *path_index, remote_addr, local_addr, ingress_cid, + quicly_cc_calc_initial_cwnd(conn->super.ctx->initcwnd_packets, + conn->super.ctx->transport_params.max_udp_payload_size))) != 0) return ret; - /* schedule emission of PATH_CHALLENGE */ - conn->egress.send_probe_at = 0; - return 0; } -static void recalc_send_probe_at(quicly_conn_t *conn) -{ - conn->egress.send_probe_at = INT64_MAX; - - for (size_t i = 0; i < PTLS_ELEMENTSOF(conn->paths); ++i) { - if (conn->paths[i] == NULL) - continue; - if (conn->egress.send_probe_at > conn->paths[i]->path_challenge.send_at) - conn->egress.send_probe_at = conn->paths[i]->path_challenge.send_at; - if (conn->paths[i]->path_response.send_) { - conn->egress.send_probe_at = 0; - break; - } - } -} - void quicly_free(quicly_conn_t *conn) { lock_now(conn, 0); @@ -1856,14 +2151,12 @@ void quicly_free(quicly_conn_t *conn) QUICLY_PROBE(FREE, conn, conn->stash.now); QUICLY_LOG_CONN(free, conn, {}); -#if QUICLY_USE_DTRACE - if (QUICLY_CONN_STATS_ENABLED()) { + if (QUICLY_PROBE_ENABLED(CONN_STATS)) { quicly_stats_t stats; quicly_get_stats(conn, &stats); QUICLY_PROBE(CONN_STATS, conn, conn->stash.now, &stats, sizeof(stats)); // TODO: emit stats with QUICLY_LOG_CONN() } -#endif destroy_all_streams(conn, 0, 1); update_open_count(conn->super.ctx, -1); clear_datagram_frame_payloads(conn); @@ -1871,7 +2164,6 @@ void quicly_free(quicly_conn_t *conn) quicly_maxsender_dispose(&conn->ingress.max_data.sender); quicly_maxsender_dispose(&conn->ingress.max_streams.uni); quicly_maxsender_dispose(&conn->ingress.max_streams.bidi); - quicly_loss_dispose(&conn->egress.loss); kh_destroy(quicly_stream_t, conn->streams); @@ -1887,10 +2179,11 @@ void quicly_free(quicly_conn_t *conn) ptls_buffer_dispose(&conn->crypto.transport_params.buf); - for (size_t i = 0; i < PTLS_ELEMENTSOF(conn->paths); ++i) { + for (size_t i = 1; i < PTLS_ELEMENTSOF(conn->paths); ++i) { if (conn->paths[i] != NULL) - delete_path(conn, 0, i); + delete_path(conn, i, DELETE_PATH_MODE_FREE); } + delete_path(conn, 0, DELETE_PATH_MODE_FREE); /* `crytpo.tls` is disposed late, because logging relies on `ptls_skip_tracing` */ if (conn->crypto.async_in_progress) { @@ -2094,6 +2387,8 @@ int quicly_encode_transport_parameter_list(ptls_buffer_t *buf, const quicly_tran } if (params->disable_active_migration) PUSH_TP(buf, QUICLY_TRANSPORT_PARAMETER_ID_DISABLE_ACTIVE_MIGRATION, {}); + if (params->enable_multipath) + PUSH_TP(buf, QUICLY_TRANSPORT_PARAMETER_ID_ENABLE_MULTIPATH, {}); if (QUICLY_LOCAL_ACTIVE_CONNECTION_ID_LIMIT != QUICLY_DEFAULT_ACTIVE_CONNECTION_ID_LIMIT) PUSH_TP(buf, QUICLY_TRANSPORT_PARAMETER_ID_ACTIVE_CONNECTION_ID_LIMIT, { ptls_buffer_push_quicint(buf, QUICLY_LOCAL_ACTIVE_CONNECTION_ID_LIMIT); }); @@ -2298,6 +2593,7 @@ int quicly_decode_transport_parameter_list(quicly_transport_parameters_t *params params->active_connection_id_limit = v; }); DECODE_TP(QUICLY_TRANSPORT_PARAMETER_ID_DISABLE_ACTIVE_MIGRATION, { params->disable_active_migration = 1; }); + DECODE_TP(QUICLY_TRANSPORT_PARAMETER_ID_ENABLE_MULTIPATH, { params->enable_multipath = 1; }); DECODE_TP(QUICLY_TRANSPORT_PARAMETER_ID_MAX_DATAGRAM_FRAME_SIZE, { uint64_t v; if ((v = ptls_decode_quicint(&src, end)) == UINT64_MAX) { @@ -2356,6 +2652,13 @@ static int collect_transport_parameters(ptls_t *tls, struct st_ptls_handshake_pr return type == get_transport_parameters_extension_id(conn->super.version); } +static int can_negotiate_multipath(quicly_conn_t *conn) +{ + if (conn->super.local.long_header_src_cid.len == 0 || conn->super.remote.cid_set.cids[0].cid.len == 0) + return 0; + return 1; +} + static quicly_conn_t *create_connection(quicly_context_t *ctx, uint32_t protocol_version, const char *server_name, struct sockaddr *remote_addr, struct sockaddr *local_addr, ptls_iovec_t *remote_cid, const quicly_cid_plaintext_t *local_cid, ptls_handshake_properties_t *handshake_properties, @@ -2389,7 +2692,7 @@ static quicly_conn_t *create_connection(quicly_context_t *ctx, uint32_t protocol conn->created_at = conn->stash.now; conn->super.stats.handshake_confirmed_msec = UINT64_MAX; conn->crypto.tls = tls; - if (new_path(conn, 0, remote_addr, local_addr) != 0) { + if (new_path(conn, 0, remote_addr, local_addr, UINT64_MAX, initcwnd) != 0) { unlock_now(conn); ptls_free(tls); free(conn); @@ -2420,23 +2723,15 @@ static quicly_conn_t *create_connection(quicly_context_t *ctx, uint32_t protocol quicly_maxsender_init(&conn->ingress.max_data.sender, conn->super.ctx->transport_params.max_data); quicly_maxsender_init(&conn->ingress.max_streams.uni, conn->super.ctx->transport_params.max_streams_uni); quicly_maxsender_init(&conn->ingress.max_streams.bidi, conn->super.ctx->transport_params.max_streams_bidi); - quicly_loss_init(&conn->egress.loss, &conn->super.ctx->loss, - conn->super.ctx->loss.default_initial_rtt /* FIXME remember initial_rtt in session ticket */, - &conn->super.remote.transport_params.max_ack_delay, &conn->super.remote.transport_params.ack_delay_exponent); - conn->egress.next_pn_to_skip = - calc_next_pn_to_skip(conn->super.ctx->tls, 0, initcwnd, conn->super.ctx->initial_egress_max_udp_payload_size); conn->egress.max_udp_payload_size = conn->super.ctx->initial_egress_max_udp_payload_size; + conn->egress.connection_close.send_at = INT64_MAX; init_max_streams(&conn->egress.max_streams.uni); init_max_streams(&conn->egress.max_streams.bidi); conn->egress.ack_frequency.update_at = INT64_MAX; - conn->egress.send_ack_at = INT64_MAX; - conn->egress.send_probe_at = INT64_MAX; - conn->super.ctx->init_cc->cb(conn->super.ctx->init_cc, &conn->egress.cc, initcwnd, conn->stash.now); quicly_retire_cid_init(&conn->egress.retire_cid); quicly_linklist_init(&conn->egress.pending_streams.blocked.uni); quicly_linklist_init(&conn->egress.pending_streams.blocked.bidi); quicly_linklist_init(&conn->egress.pending_streams.control); - quicly_ratemeter_init(&conn->egress.ratemeter); if (handshake_properties != NULL) { assert(handshake_properties->additional_extensions == NULL); assert(handshake_properties->collect_extension == NULL); @@ -2508,6 +2803,11 @@ static int client_collected_extensions(ptls_t *tls, ptls_handshake_properties_t } } + if (conn->super.ctx->transport_params.enable_multipath && params.enable_multipath && !can_negotiate_multipath(conn)) { + ret = QUICLY_TRANSPORT_ERROR_TRANSPORT_PARAMETER; + goto Exit; + } + if (properties->client.early_data_acceptance == PTLS_EARLY_DATA_ACCEPTED) { #define ZERORTT_VALIDATE(x) \ if (params.x < conn->super.remote.transport_params.x) { \ @@ -2667,6 +2967,11 @@ static int server_collected_extensions(ptls_t *tls, ptls_handshake_properties_t ret = QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION; goto Exit; } + if (conn->super.ctx->transport_params.enable_multipath && conn->super.remote.transport_params.enable_multipath && + !can_negotiate_multipath(conn)) { + ret = QUICLY_TRANSPORT_ERROR_TRANSPORT_PARAMETER; + goto Exit; + } } /* setup ack frequency */ @@ -2727,9 +3032,12 @@ static int aead_decrypt_1rtt(void *ctx, uint64_t pn, quicly_decoded_packet_t *pa { quicly_conn_t *conn = ctx; struct st_quicly_application_space_t *space = conn->application; - size_t aead_index = (packet->octets.base[0] & QUICLY_KEY_PHASE_BIT) != 0; + uint8_t multipath_iv[PTLS_MAX_IV_SIZE]; + size_t aead_index = (packet->octets.base[0] & QUICLY_KEY_PHASE_BIT) != 0, multipath_iv_len; int ret; + multipath_iv_len = space->vtable->get_iv(space, multipath_iv, packet->cid.dest.plaintext.path_id); + /* prepare key, when not available (yet) */ if (space->cipher.ingress.aead[aead_index] == NULL) { Retry_1RTT : { @@ -2754,7 +3062,13 @@ static int aead_decrypt_1rtt(void *ctx, uint64_t pn, quicly_decoded_packet_t *pa /* decrypt */ ptls_aead_context_t *aead = space->cipher.ingress.aead[aead_index]; - if ((*ptlen = aead_decrypt_core(aead, pn, packet, aead_off)) == SIZE_MAX) { + if (multipath_iv_len != 0) + ptls_aead_xor_iv(aead, multipath_iv, multipath_iv_len); + *ptlen = aead_decrypt_core(aead, pn, packet, aead_off); + if (multipath_iv_len != 0) + ptls_aead_xor_iv(aead, multipath_iv, multipath_iv_len); + + if (*ptlen == SIZE_MAX) { /* retry with a new key, if possible */ if (space->cipher.ingress.key_phase.decrypted == space->cipher.ingress.key_phase.prepared && space->cipher.ingress.key_phase.decrypted % 2 != aead_index) { @@ -2851,11 +3165,12 @@ static int decrypt_packet(ptls_cipher_context_t *header_protection, return 0; } -static int do_on_ack_ack(quicly_conn_t *conn, const quicly_sent_packet_t *packet, uint64_t start, uint64_t start_length, - struct st_quicly_sent_ack_additional_t *additional, size_t additional_capacity) +static int do_on_ack_ack(quicly_conn_t *conn, quicly_sentmap_t *map, const quicly_sent_packet_t *packet, uint8_t path_id, + uint64_t start, uint64_t start_length, struct st_quicly_sent_ack_additional_t *additional, + size_t additional_capacity) { - /* find the pn space */ - struct st_quicly_pn_space_t *space; + struct st_quicly_pn_space_t *space = NULL; + switch (packet->ack_epoch) { case QUICLY_EPOCH_INITIAL: space = &conn->initial->super; @@ -2864,11 +3179,13 @@ static int do_on_ack_ack(quicly_conn_t *conn, const quicly_sent_packet_t *packet space = &conn->handshake->super; break; case QUICLY_EPOCH_1RTT: - space = &conn->application->super; + /* when multipath is used, space might have been retired already, in which case PACKET_IGNORED will be returned */ + if (conn->application->vtable->get_space(conn, &space, path_id) != 0) + return 0; break; default: assert(!"FIXME"); - return QUICLY_TRANSPORT_ERROR_INTERNAL; + break; } /* subtract given ACK ranges */ @@ -2895,25 +3212,25 @@ static int do_on_ack_ack(quicly_conn_t *conn, const quicly_sent_packet_t *packet return 0; } -static int on_ack_ack_ranges64(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_ack_ranges64(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - /* TODO log */ - return acked ? do_on_ack_ack(conn, packet, sent->data.ack.start, sent->data.ack.ranges64.start_length, - sent->data.ack.ranges64.additional, PTLS_ELEMENTSOF(sent->data.ack.ranges64.additional)) + return acked ? do_on_ack_ack(conn, map, packet, sent->data.ack.ranges64.path_id, sent->data.ack.start, + sent->data.ack.ranges64.start_length, sent->data.ack.ranges64.additional, + PTLS_ELEMENTSOF(sent->data.ack.ranges64.additional)) : 0; } -static int on_ack_ack_ranges8(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_ack_ranges8(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - /* TODO log */ - return acked ? do_on_ack_ack(conn, packet, sent->data.ack.start, sent->data.ack.ranges8.start_length, - sent->data.ack.ranges8.additional, PTLS_ELEMENTSOF(sent->data.ack.ranges8.additional)) + return acked ? do_on_ack_ack(conn, map, packet, sent->data.ack.ranges8.path_id, sent->data.ack.start, + sent->data.ack.ranges8.start_length, sent->data.ack.ranges8.additional, + PTLS_ELEMENTSOF(sent->data.ack.ranges8.additional)) : 0; } @@ -2957,9 +3274,9 @@ static int on_ack_stream_ack_cached(quicly_conn_t *conn) return ret; } -static int on_ack_stream(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_stream(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); int ret; if (acked) { @@ -3013,9 +3330,9 @@ static int on_ack_stream(quicly_sentmap_t *map, const quicly_sent_packet_t *pack return 0; } -static int on_ack_max_stream_data(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_max_stream_data(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); quicly_stream_t *stream; if ((stream = quicly_get_stream(conn, sent->data.stream.stream_id)) != NULL) { @@ -3031,10 +3348,9 @@ static int on_ack_max_stream_data(quicly_sentmap_t *map, const quicly_sent_packe return 0; } -static int on_ack_max_data(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_max_data(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - if (acked) { quicly_maxsender_acked(&conn->ingress.max_data.sender, &sent->data.max_data.args); } else { @@ -3044,9 +3360,9 @@ static int on_ack_max_data(quicly_sentmap_t *map, const quicly_sent_packet_t *pa return 0; } -static int on_ack_max_streams(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_max_streams(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); quicly_maxsender_t *maxsender = sent->data.max_streams.uni ? &conn->ingress.max_streams.uni : &conn->ingress.max_streams.bidi; assert(maxsender != NULL); /* we would only receive an ACK if we have sent the frame */ @@ -3064,9 +3380,9 @@ static void on_ack_stream_state_sender(quicly_sender_state_t *sender_state, int *sender_state = acked ? QUICLY_SENDER_STATE_ACKED : QUICLY_SENDER_STATE_SEND; } -static int on_ack_reset_stream(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_reset_stream(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); quicly_stream_t *stream; if ((stream = quicly_get_stream(conn, sent->data.stream_state_sender.stream_id)) != NULL) { @@ -3078,9 +3394,9 @@ static int on_ack_reset_stream(quicly_sentmap_t *map, const quicly_sent_packet_t return 0; } -static int on_ack_stop_sending(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_stop_sending(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); quicly_stream_t *stream; if ((stream = quicly_get_stream(conn, sent->data.stream_state_sender.stream_id)) != NULL) { @@ -3092,9 +3408,9 @@ static int on_ack_stop_sending(quicly_sentmap_t *map, const quicly_sent_packet_t return 0; } -static int on_ack_streams_blocked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_streams_blocked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); struct st_quicly_max_streams_t *m = sent->data.streams_blocked.uni ? &conn->egress.max_streams.uni : &conn->egress.max_streams.bidi; @@ -3107,10 +3423,9 @@ static int on_ack_streams_blocked(quicly_sentmap_t *map, const quicly_sent_packe return 0; } -static int on_ack_handshake_done(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_handshake_done(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - /* When lost, reschedule for transmission. When acked, suppress retransmission if scheduled. */ if (acked) { conn->egress.pending_flows &= ~QUICLY_PENDING_FLOW_HANDSHAKE_DONE_BIT; @@ -3120,10 +3435,9 @@ static int on_ack_handshake_done(quicly_sentmap_t *map, const quicly_sent_packet return 0; } -static int on_ack_data_blocked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_data_blocked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - if (conn->egress.max_data.permitted == sent->data.data_blocked.offset) { if (acked) { conn->egress.data_blocked = QUICLY_SENDER_STATE_ACKED; @@ -3137,9 +3451,8 @@ static int on_ack_data_blocked(quicly_sentmap_t *map, const quicly_sent_packet_t } static int on_ack_stream_data_blocked_frame(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, - quicly_sent_t *sent) + quicly_sent_t *sent, quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); quicly_stream_t *stream; if ((stream = quicly_get_stream(conn, sent->data.stream_data_blocked.stream_id)) == NULL) @@ -3157,10 +3470,9 @@ static int on_ack_stream_data_blocked_frame(quicly_sentmap_t *map, const quicly_ return 0; } -static int on_ack_new_token(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_new_token(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); - if (sent->data.new_token.is_inflight) { --conn->egress.new_token.num_inflight; sent->data.new_token.is_inflight = 0; @@ -3178,9 +3490,9 @@ static int on_ack_new_token(quicly_sentmap_t *map, const quicly_sent_packet_t *p return 0; } -static int on_ack_new_connection_id(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_new_connection_id(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); uint64_t sequence = sent->data.new_connection_id.sequence; if (acked) { @@ -3193,9 +3505,9 @@ static int on_ack_new_connection_id(quicly_sentmap_t *map, const quicly_sent_pac return 0; } -static int on_ack_retire_connection_id(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_ack_retire_connection_id(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { - quicly_conn_t *conn = (quicly_conn_t *)((char *)map - offsetof(quicly_conn_t, egress.loss.sentmap)); uint64_t sequence = sent->data.retire_connection_id.sequence; if (!acked) @@ -3231,7 +3543,8 @@ static inline uint64_t calc_amplification_limit_allowance(quicly_conn_t *conn) * * minimum send requirements in |min_bytes_to_send|, and * * if sending is to be restricted to the minimum, indicated in |restrict_sending| */ -static size_t calc_send_window(quicly_conn_t *conn, size_t min_bytes_to_send, uint64_t amp_window, int restrict_sending) +static size_t calc_send_window(quicly_conn_t *conn, size_t path_index, size_t min_bytes_to_send, uint64_t amp_window, + int restrict_sending) { uint64_t window = 0; if (restrict_sending) { @@ -3239,8 +3552,8 @@ static size_t calc_send_window(quicly_conn_t *conn, size_t min_bytes_to_send, ui window = min_bytes_to_send; } else { /* Limit to cwnd */ - if (conn->egress.cc.cwnd > conn->egress.loss.sentmap.bytes_in_flight) - window = conn->egress.cc.cwnd - conn->egress.loss.sentmap.bytes_in_flight; + if (conn->paths[path_index]->egress->cc.cwnd > conn->paths[path_index]->egress->loss.sentmap.bytes_in_flight) + window = conn->paths[path_index]->egress->cc.cwnd - conn->paths[path_index]->egress->loss.sentmap.bytes_in_flight; /* Allow at least one packet on time-threshold loss detection */ window = window > min_bytes_to_send ? window : min_bytes_to_send; } @@ -3268,16 +3581,31 @@ static int is_point5rtt_with_no_handshake_data_to_send(quicly_conn_t *conn) int64_t quicly_get_first_timeout(quicly_conn_t *conn) { if (conn->super.state >= QUICLY_STATE_CLOSING) - return conn->egress.send_ack_at; + return conn->egress.connection_close.send_at; if (should_send_datagram_frame(conn)) return 0; uint64_t amp_window = calc_amplification_limit_allowance(conn); - if (calc_send_window(conn, 0, amp_window, 0) > 0) { - if (conn->egress.pending_flows != 0) - return 0; + int has_send_window = 0; + for (size_t path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) { + if (conn->paths[path_index] == NULL || conn->paths[path_index]->path_challenge.send_at != INT64_MAX) + continue; + if (calc_send_window(conn, path_index, 0, amp_window, 0) > 0) { + has_send_window = 1; + break; + } + } + if (has_send_window) { + if (conn->egress.pending_flows != 0) { + /* crypto streams (as indicated by lower 4 bits) can be sent whenever CWND is available; other flows need application + * packet number space */ + if (conn->application != NULL && conn->application->cipher.egress.key.header_protection != NULL) + return 0; + if ((conn->egress.pending_flows & 0xf) != 0) + return 0; + } if (quicly_linklist_is_linked(&conn->egress.pending_streams.control)) return 0; if (scheduler_can_send(conn)) @@ -3286,24 +3614,31 @@ int64_t quicly_get_first_timeout(quicly_conn_t *conn) /* if something can be sent, return the earliest timeout. Otherwise return the idle timeout. */ int64_t at = conn->idle_timeout.at; - if (amp_window > 0) { - if (conn->egress.loss.alarm_at < at && !is_point5rtt_with_no_handshake_data_to_send(conn)) - at = conn->egress.loss.alarm_at; - if (conn->egress.send_ack_at < at) - at = conn->egress.send_ack_at; + for (size_t path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) { + struct st_quicly_conn_path_t *path = conn->paths[path_index]; + if (path == NULL) + continue; + if (amp_window > 0) { + if (path->egress->loss.alarm_at < at && !is_point5rtt_with_no_handshake_data_to_send(conn)) + at = path->egress->loss.alarm_at; + int64_t send_ack_at = calc_min_send_ack_at(conn); + if (send_ack_at < at) + at = send_ack_at; + } + if (at > path->path_challenge.send_at) + at = path->path_challenge.send_at; + if (path->path_response.send_) + at = 0; } - if (at > conn->egress.send_probe_at) - at = conn->egress.send_probe_at; return at; } uint64_t quicly_get_next_expected_packet_number(quicly_conn_t *conn) { - if (!conn->application) + if (conn->application == NULL || conn->application->vtable->multipath) return UINT64_MAX; - - return conn->application->super.next_expected_packet_number; + return conn->application->non_multipath.space->next_expected_packet_number; /* FIXME support multipath? */ } static int setup_path_dcid(quicly_conn_t *conn, size_t path_index) @@ -3434,15 +3769,13 @@ struct st_quicly_send_context_t { * DCID to be used for the path */ quicly_cid_t *dcid; - /** - * if `conn->egress.send_probe_at` should be recalculated - */ - unsigned recalc_send_probe_at : 1; }; static int commit_send_packet(quicly_conn_t *conn, quicly_send_context_t *s, int coalesced) { + struct st_quicly_conn_path_t *path = conn->paths[s->path_index]; size_t datagram_size, packet_bytes_in_flight; + uint64_t encrypt_dcid = 0; assert(s->target.cipher->aead != NULL); @@ -3473,15 +3806,17 @@ static int commit_send_packet(quicly_conn_t *conn, quicly_send_context_t *s, int break; } } else { - if (conn->egress.packet_number >= conn->application->cipher.egress.key_update_pn.next) { + if (conn->super.stats.num_packets.sent >= conn->application->cipher.egress.key_update_at.next) { int ret; if ((ret = update_1rtt_egress_key(conn)) != 0) return ret; } if ((conn->application->cipher.egress.key_phase & 1) != 0) *s->target.first_byte_at |= QUICLY_KEY_PHASE_BIT; + if (conn->application->vtable->multipath) + encrypt_dcid = path->dcid; } - quicly_encode16(s->dst_payload_from - QUICLY_SEND_PN_SIZE, (uint16_t)conn->egress.packet_number); + quicly_encode16(s->dst_payload_from - QUICLY_SEND_PN_SIZE, (uint16_t)path->egress->packet_number); /* encrypt the packet */ s->dst += s->target.cipher->aead->algo->tag_size; @@ -3491,30 +3826,31 @@ static int commit_send_packet(quicly_conn_t *conn, quicly_send_context_t *s, int conn->super.ctx->crypto_engine->encrypt_packet( conn->super.ctx->crypto_engine, conn, s->target.cipher->header_protection, s->target.cipher->aead, ptls_iovec_init(s->payload_buf.datagram, datagram_size), s->target.first_byte_at - s->payload_buf.datagram, - s->dst_payload_from - s->payload_buf.datagram, conn->egress.packet_number, coalesced); + s->dst_payload_from - s->payload_buf.datagram, encrypt_dcid, path->egress->packet_number, coalesced); /* update CC, commit sentmap */ - int on_promoted_path = s->path_index == 0 && !conn->paths[0]->initial; + int on_promoted_path = (quicly_is_multipath(conn) || s->path_index == 0) && !conn->paths[s->path_index]->initial; if (s->target.ack_eliciting) { packet_bytes_in_flight = s->dst - s->target.first_byte_at; s->send_window -= packet_bytes_in_flight; } else { packet_bytes_in_flight = 0; } - if (quicly_sentmap_is_open(&conn->egress.loss.sentmap)) - quicly_sentmap_commit(&conn->egress.loss.sentmap, (uint16_t)packet_bytes_in_flight, on_promoted_path); + if (quicly_sentmap_is_open(&path->egress->loss.sentmap)) + quicly_sentmap_commit(&path->egress->loss.sentmap, (uint16_t)packet_bytes_in_flight, on_promoted_path, + (*s->target.first_byte_at & QUICLY_KEY_PHASE_BIT) != 0); - conn->egress.cc.type->cc_on_sent(&conn->egress.cc, &conn->egress.loss, (uint32_t)packet_bytes_in_flight, conn->stash.now); - QUICLY_PROBE(PACKET_SENT, conn, conn->stash.now, conn->egress.packet_number, s->dst - s->target.first_byte_at, + path->egress->cc.type->cc_on_sent(&path->egress->cc, &path->egress->loss, (uint32_t)packet_bytes_in_flight, conn->stash.now); + QUICLY_PROBE(PACKET_SENT, conn, conn->stash.now, path->egress->packet_number, s->dst - s->target.first_byte_at, get_epoch(*s->target.first_byte_at), !s->target.ack_eliciting); QUICLY_LOG_CONN(packet_sent, conn, { - PTLS_LOG_ELEMENT_UNSIGNED(pn, conn->egress.packet_number); + PTLS_LOG_ELEMENT_UNSIGNED(pn, path->egress->packet_number); PTLS_LOG_ELEMENT_UNSIGNED(len, s->dst - s->target.first_byte_at); PTLS_LOG_ELEMENT_UNSIGNED(packet_type, get_epoch(*s->target.first_byte_at)); PTLS_LOG_ELEMENT_BOOL(ack_only, !s->target.ack_eliciting); }); - ++conn->egress.packet_number; + ++path->egress->packet_number; ++conn->super.stats.num_packets.sent; if (on_promoted_path) ++conn->super.stats.num_packets.sent_promoted_paths; @@ -3529,18 +3865,18 @@ static int commit_send_packet(quicly_conn_t *conn, quicly_send_context_t *s, int /* insert PN gap if necessary, registering the PN to the ack queue so that we'd close the connection in the event of receiving * an ACK for that gap. */ - if (conn->egress.packet_number >= conn->egress.next_pn_to_skip && !QUICLY_PACKET_IS_LONG_HEADER(s->current.first_byte) && + if (path->egress->packet_number >= path->egress->next_pn_to_skip && !QUICLY_PACKET_IS_LONG_HEADER(s->current.first_byte) && conn->super.state < QUICLY_STATE_CLOSING) { int ret; - if ((ret = quicly_sentmap_prepare(&conn->egress.loss.sentmap, conn->egress.packet_number, conn->stash.now, + if ((ret = quicly_sentmap_prepare(&path->egress->loss.sentmap, path->egress->packet_number, conn->stash.now, QUICLY_EPOCH_1RTT)) != 0) return ret; - if (quicly_sentmap_allocate(&conn->egress.loss.sentmap, on_invalid_ack) == NULL) + if (quicly_sentmap_allocate(&path->egress->loss.sentmap, on_invalid_ack) == NULL) return PTLS_ERROR_NO_MEMORY; - quicly_sentmap_commit(&conn->egress.loss.sentmap, 0, 0); - ++conn->egress.packet_number; - conn->egress.next_pn_to_skip = calc_next_pn_to_skip(conn->super.ctx->tls, conn->egress.packet_number, conn->egress.cc.cwnd, - conn->egress.max_udp_payload_size); + quicly_sentmap_commit(&path->egress->loss.sentmap, 0, 0, 0); + ++path->egress->packet_number; + path->egress->next_pn_to_skip = calc_next_pn_to_skip(conn->super.ctx->tls, path->egress->packet_number, + path->egress->cc.cwnd, conn->egress.max_udp_payload_size); } return 0; @@ -3563,6 +3899,7 @@ enum allocate_frame_type { static int do_allocate_frame(quicly_conn_t *conn, quicly_send_context_t *s, size_t min_space, enum allocate_frame_type frame_type) { + struct st_quicly_conn_path_t *path = conn->paths[s->path_index]; int coalescible, ret; assert((s->current.first_byte & QUICLY_QUIC_BIT) != 0); @@ -3621,8 +3958,10 @@ static int do_allocate_frame(quicly_conn_t *conn, quicly_send_context_t *s, size } s->target.ack_eliciting = 0; - QUICLY_PROBE(PACKET_PREPARE, conn, conn->stash.now, s->current.first_byte, QUICLY_PROBE_HEXDUMP(s->dcid->cid, s->dcid->len)); + QUICLY_PROBE(PACKET_PREPARE, conn, conn->stash.now, s->path_index, s->current.first_byte, + QUICLY_PROBE_HEXDUMP(s->dcid->cid, s->dcid->len)); QUICLY_LOG_CONN(packet_prepare, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(path_index, s->path_index); PTLS_LOG_ELEMENT_UNSIGNED(first_octet, s->current.first_byte); PTLS_LOG_ELEMENT_HEXDUMP(dcid, s->dcid->cid, s->dcid->len); }); @@ -3662,14 +4001,15 @@ static int do_allocate_frame(quicly_conn_t *conn, quicly_send_context_t *s, size uint8_t ack_epoch = get_epoch(s->current.first_byte); if (ack_epoch == QUICLY_EPOCH_0RTT) ack_epoch = QUICLY_EPOCH_1RTT; - if ((ret = quicly_sentmap_prepare(&conn->egress.loss.sentmap, conn->egress.packet_number, conn->stash.now, ack_epoch)) != 0) + if ((ret = quicly_sentmap_prepare(&path->egress->loss.sentmap, path->egress->packet_number, conn->stash.now, ack_epoch)) != + 0) return ret; /* adjust ack-frequency */ - if (conn->stash.now >= conn->egress.ack_frequency.update_at) { + if (s->path_index == 0 && conn->stash.now >= conn->egress.ack_frequency.update_at) { assert(conn->super.remote.transport_params.min_ack_delay_usec != UINT64_MAX); - if (conn->egress.cc.num_loss_episodes >= QUICLY_FIRST_ACK_FREQUENCY_LOSS_EPISODE && conn->initial == NULL && + if (path->egress->cc.num_loss_episodes >= QUICLY_FIRST_ACK_FREQUENCY_LOSS_EPISODE && conn->initial == NULL && conn->handshake == NULL) { - uint32_t fraction_of_cwnd = (uint32_t)((uint64_t)conn->egress.cc.cwnd * conn->super.ctx->ack_frequency / 1024); + uint32_t fraction_of_cwnd = (uint32_t)((uint64_t)path->egress->cc.cwnd * conn->super.ctx->ack_frequency / 1024); if (fraction_of_cwnd >= conn->egress.max_udp_payload_size * 3) { uint32_t packet_tolerance = fraction_of_cwnd / conn->egress.max_udp_payload_size; if (packet_tolerance > QUICLY_MAX_PACKET_TOLERANCE) @@ -3686,7 +4026,7 @@ static int do_allocate_frame(quicly_conn_t *conn, quicly_send_context_t *s, size TargetReady: if (frame_type != ALLOCATE_FRAME_TYPE_NON_ACK_ELICITING) { s->target.ack_eliciting = 1; - conn->egress.last_retransmittable_sent_at = conn->stash.now; + path->egress->last_retransmittable_sent_at = conn->stash.now; } return 0; } @@ -3698,14 +4038,15 @@ static int allocate_ack_eliciting_frame(quicly_conn_t *conn, quicly_send_context if ((ret = do_allocate_frame(conn, s, min_space, ALLOCATE_FRAME_TYPE_ACK_ELICITING)) != 0) return ret; - if ((*sent = quicly_sentmap_allocate(&conn->egress.loss.sentmap, acked)) == NULL) + if ((*sent = quicly_sentmap_allocate(&conn->paths[s->path_index]->egress->loss.sentmap, acked)) == NULL) return PTLS_ERROR_NO_MEMORY; return ret; } -static int send_ack(quicly_conn_t *conn, struct st_quicly_pn_space_t *space, quicly_send_context_t *s) +static int send_ack(quicly_conn_t *conn, uint64_t cid, struct st_quicly_pn_space_t *space, quicly_send_context_t *s) { + struct st_quicly_conn_path_t *path = conn->paths[s->path_index]; uint64_t ack_delay; int ret; @@ -3722,10 +4063,11 @@ static int send_ack(quicly_conn_t *conn, struct st_quicly_pn_space_t *space, qui } Emit: /* emit an ACK frame */ - if ((ret = do_allocate_frame(conn, s, QUICLY_ACK_FRAME_CAPACITY, ALLOCATE_FRAME_TYPE_NON_ACK_ELICITING)) != 0) + if ((ret = do_allocate_frame(conn, s, cid == UINT64_MAX ? QUICLY_ACK_FRAME_CAPACITY : QUICLY_ACK_MP_FRAME_CAPACITY, + ALLOCATE_FRAME_TYPE_NON_ACK_ELICITING)) != 0) return ret; uint8_t *dst = s->dst; - dst = quicly_encode_ack_frame(dst, s->dst_end, &space->ack_queue, ack_delay); + dst = quicly_encode_ack_frame(dst, s->dst_end, cid, &space->ack_queue, space->ecn_counts, ack_delay); /* when there's no space, retry with a new MTU-sized packet */ if (dst == NULL) { @@ -3741,15 +4083,20 @@ static int send_ack(quicly_conn_t *conn, struct st_quicly_pn_space_t *space, qui goto Emit; } - ++conn->super.stats.num_frames_sent.ack; - QUICLY_PROBE(ACK_SEND, conn, conn->stash.now, space->ack_queue.ranges[space->ack_queue.num_ranges - 1].end - 1, ack_delay); + if (cid == UINT64_MAX) { + ++conn->super.stats.num_frames_sent.ack; + } else { + ++conn->super.stats.num_frames_sent.ack_mp; + } + QUICLY_PROBE(ACK_SEND, conn, conn->stash.now, cid, space->ack_queue.ranges[space->ack_queue.num_ranges - 1].end - 1, ack_delay); QUICLY_LOG_CONN(ack_send, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(dcid_sequence_number, cid); PTLS_LOG_ELEMENT_UNSIGNED(largest_acked, space->ack_queue.ranges[space->ack_queue.num_ranges - 1].end - 1); PTLS_LOG_ELEMENT_UNSIGNED(ack_delay, ack_delay); }); /* when there are no less than QUICLY_NUM_ACK_BLOCKS_TO_INDUCE_ACKACK (8) gaps, bundle PING once every 4 packets being sent */ - if (space->ack_queue.num_ranges >= QUICLY_NUM_ACK_BLOCKS_TO_INDUCE_ACKACK && conn->egress.packet_number % 4 == 0 && + if (space->ack_queue.num_ranges >= QUICLY_NUM_ACK_BLOCKS_TO_INDUCE_ACKACK && path->egress->packet_number % 4 == 0 && dst < s->dst_end) { *dst++ = QUICLY_FRAME_TYPE_PING; ++conn->super.stats.num_frames_sent.ping; @@ -3765,18 +4112,23 @@ static int send_ack(quicly_conn_t *conn, struct st_quicly_pn_space_t *space, qui quicly_sent_t *sent; struct st_quicly_sent_ack_additional_t *additional, *additional_end; /* allocate */ - if ((sent = quicly_sentmap_allocate(&conn->egress.loss.sentmap, on_ack_ack_ranges8)) == NULL) + if ((sent = quicly_sentmap_allocate(&path->egress->loss.sentmap, on_ack_ack_ranges8)) == NULL) return PTLS_ERROR_NO_MEMORY; + /* make certain path_id is storable in 8-bit */ + PTLS_BUILD_ASSERT((1u << 8 * sizeof(sent->data.ack.ranges8.path_id)) > QUICLY_MAX_PATH_ID); + assert(cid == UINT64_MAX || cid < (1u << 8 * sizeof(sent->data.ack.ranges8.path_id))); /* store the first range, as well as preparing references to the additional slots */ sent->data.ack.start = space->ack_queue.ranges[range_index].start; uint64_t length = space->ack_queue.ranges[range_index].end - space->ack_queue.ranges[range_index].start; if (length <= UINT8_MAX) { sent->data.ack.ranges8.start_length = length; + sent->data.ack.ranges8.path_id = cid; additional = sent->data.ack.ranges8.additional; additional_end = additional + PTLS_ELEMENTSOF(sent->data.ack.ranges8.additional); } else { sent->acked = on_ack_ack_ranges64; sent->data.ack.ranges64.start_length = length; + sent->data.ack.ranges64.path_id = cid; additional = sent->data.ack.ranges64.additional; additional_end = additional + PTLS_ELEMENTSOF(sent->data.ack.ranges64.additional); } @@ -4144,28 +4496,32 @@ int quicly_send_stream(quicly_stream_t *stream, quicly_send_context_t *s) return 0; } -static inline int init_acks_iter(quicly_conn_t *conn, quicly_sentmap_iter_t *iter) +static inline int init_acks_iter(quicly_conn_t *conn, size_t path_index, quicly_sentmap_iter_t *iter) { - return quicly_loss_init_sentmap_iter(&conn->egress.loss, iter, conn->stash.now, + return quicly_loss_init_sentmap_iter(&conn->paths[path_index]->egress->loss, iter, conn->stash.now, conn->super.remote.transport_params.max_ack_delay, - conn->super.state >= QUICLY_STATE_CLOSING); + conn->super.state >= QUICLY_STATE_CLOSING, conn); } int discard_sentmap_by_epoch(quicly_conn_t *conn, unsigned ack_epochs) { quicly_sentmap_iter_t iter; const quicly_sent_packet_t *sent; - int ret; - - if ((ret = init_acks_iter(conn, &iter)) != 0) - return ret; + int ret = 0; - while ((sent = quicly_sentmap_get(&iter))->packet_number != UINT64_MAX) { - if ((ack_epochs & (1u << sent->ack_epoch)) != 0) { - if ((ret = quicly_sentmap_update(&conn->egress.loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_EXPIRED)) != 0) - return ret; - } else { - quicly_sentmap_skip(&iter); + for (size_t path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) { + if (conn->paths[path_index] == NULL) + continue; + if ((ret = init_acks_iter(conn, path_index, &iter)) != 0) + return ret; + while ((sent = quicly_sentmap_get(&iter))->packet_number != UINT64_MAX) { + if ((ack_epochs & (1u << sent->ack_epoch)) != 0) { + if ((ret = quicly_sentmap_update(&conn->paths[path_index]->egress->loss.sentmap, &iter, + QUICLY_SENTMAP_EVENT_EXPIRED, conn)) != 0) + return ret; + } else { + quicly_sentmap_skip(&iter); + } } } @@ -4175,19 +4531,20 @@ int discard_sentmap_by_epoch(quicly_conn_t *conn, unsigned ack_epochs) /** * Mark frames of given epoch as pending, until `*bytes_to_mark` becomes zero. */ -static int mark_frames_on_pto(quicly_conn_t *conn, uint8_t ack_epoch, size_t *bytes_to_mark) +static int mark_frames_on_pto(quicly_conn_t *conn, size_t path_index, uint8_t ack_epoch, size_t *bytes_to_mark) { quicly_sentmap_iter_t iter; const quicly_sent_packet_t *sent; int ret; - if ((ret = init_acks_iter(conn, &iter)) != 0) + if ((ret = init_acks_iter(conn, path_index, &iter)) != 0) return ret; while ((sent = quicly_sentmap_get(&iter))->packet_number != UINT64_MAX) { if (sent->ack_epoch == ack_epoch && sent->frames_in_flight) { *bytes_to_mark = *bytes_to_mark > sent->cc_bytes_in_flight ? *bytes_to_mark - sent->cc_bytes_in_flight : 0; - if ((ret = quicly_sentmap_update(&conn->egress.loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_PTO)) != 0) + if ((ret = quicly_sentmap_update(&conn->paths[path_index]->egress->loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_PTO, + conn)) != 0) return ret; assert(!sent->frames_in_flight); if (*bytes_to_mark == 0) @@ -4200,31 +4557,39 @@ static int mark_frames_on_pto(quicly_conn_t *conn, uint8_t ack_epoch, size_t *by return 0; } -static void on_loss_detected(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold) +static void notify_congestion_to_cc(quicly_conn_t *conn, struct st_quicly_path_egress_t *space, uint16_t lost_bytes, + uint64_t lost_pn) { - quicly_conn_t *conn = (void *)((char *)loss - offsetof(quicly_conn_t, egress.loss)); + space->cc.type->cc_on_lost(&space->cc, &space->loss, lost_bytes, lost_pn, space->packet_number, conn->stash.now, + conn->egress.max_udp_payload_size); + QUICLY_PROBE(CC_CONGESTION, conn, conn->stash.now, lost_pn + 1, space->loss.sentmap.bytes_in_flight, + space->cc.cwnd); + QUICLY_LOG_CONN(cc_congestion, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(max_lost_pn, lost_pn + 1); + PTLS_LOG_ELEMENT_UNSIGNED(flight, space->loss.sentmap.bytes_in_flight); + PTLS_LOG_ELEMENT_UNSIGNED(cwnd, space->cc.cwnd); + }); +} + + +static void on_loss_detected(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold, + quicly_conn_t *conn) +{ + struct st_quicly_path_egress_t *space = (void *)((char *)loss - offsetof(struct st_quicly_path_egress_t, loss)); + + assert(lost_packet->cc_bytes_in_flight != 0); ++conn->super.stats.num_packets.lost; if (is_time_threshold) ++conn->super.stats.num_packets.lost_time_threshold; conn->super.stats.num_bytes.lost += lost_packet->cc_bytes_in_flight; - conn->egress.cc.type->cc_on_lost(&conn->egress.cc, &conn->egress.loss, lost_packet->cc_bytes_in_flight, - lost_packet->packet_number, conn->egress.packet_number, conn->stash.now, - conn->egress.max_udp_payload_size); QUICLY_PROBE(PACKET_LOST, conn, conn->stash.now, lost_packet->packet_number, lost_packet->ack_epoch); QUICLY_LOG_CONN(packet_lost, conn, { PTLS_LOG_ELEMENT_UNSIGNED(pn, lost_packet->packet_number); PTLS_LOG_ELEMENT_UNSIGNED(packet_type, lost_packet->ack_epoch); }); - QUICLY_PROBE(CC_CONGESTION, conn, conn->stash.now, lost_packet->packet_number + 1, conn->egress.loss.sentmap.bytes_in_flight, - conn->egress.cc.cwnd); - QUICLY_LOG_CONN(cc_congestion, conn, { - PTLS_LOG_ELEMENT_UNSIGNED(max_lost_pn, lost_packet->packet_number + 1); - PTLS_LOG_ELEMENT_UNSIGNED(flight, conn->egress.loss.sentmap.bytes_in_flight); - PTLS_LOG_ELEMENT_UNSIGNED(cwnd, conn->egress.cc.cwnd); - }); - QUICLY_PROBE(QUICTRACE_CC_LOST, conn, conn->stash.now, &conn->egress.loss.rtt, conn->egress.cc.cwnd, - conn->egress.loss.sentmap.bytes_in_flight); + notify_congestion_to_cc(conn, space, lost_packet->cc_bytes_in_flight, lost_packet->packet_number); + QUICLY_PROBE(QUICTRACE_CC_LOST, conn, conn->stash.now, &space->loss.rtt, space->cc.cwnd, space->loss.sentmap.bytes_in_flight); } static int send_max_streams(quicly_conn_t *conn, int uni, quicly_send_context_t *s) @@ -4533,39 +4898,39 @@ size_t quicly_send_retry(quicly_context_t *ctx, ptls_aead_context_t *token_encry return ret == 0 ? buf.off : SIZE_MAX; } -static struct st_quicly_pn_space_t *setup_send_space(quicly_conn_t *conn, size_t epoch, quicly_send_context_t *s) +static int setup_send_context(quicly_conn_t *conn, size_t epoch, quicly_send_context_t *s, struct st_quicly_pn_space_t **space) { - struct st_quicly_pn_space_t *space = NULL; - switch (epoch) { case QUICLY_EPOCH_INITIAL: if (conn->initial == NULL || (s->current.cipher = &conn->initial->cipher.egress)->aead == NULL) - return NULL; + return 0; s->current.first_byte = QUICLY_PACKET_TYPE_INITIAL; - space = &conn->initial->super; + if (space != NULL) + *space = &conn->initial->super; break; case QUICLY_EPOCH_HANDSHAKE: if (conn->handshake == NULL || (s->current.cipher = &conn->handshake->cipher.egress)->aead == NULL) - return NULL; + return 0; s->current.first_byte = QUICLY_PACKET_TYPE_HANDSHAKE; - space = &conn->handshake->super; + if (space != NULL) + *space = &conn->handshake->super; break; case QUICLY_EPOCH_0RTT: case QUICLY_EPOCH_1RTT: if (conn->application == NULL || conn->application->cipher.egress.key.header_protection == NULL) - return NULL; + return 0; if ((epoch == QUICLY_EPOCH_0RTT) == conn->application->one_rtt_writable) - return NULL; + return 0; s->current.cipher = &conn->application->cipher.egress.key; s->current.first_byte = epoch == QUICLY_EPOCH_0RTT ? QUICLY_PACKET_TYPE_0RTT : QUICLY_QUIC_BIT; - space = &conn->application->super; + assert(space == NULL); break; default: assert(!"logic flaw"); break; } - return space; + return 1; } static int send_handshake_flow(quicly_conn_t *conn, size_t epoch, quicly_send_context_t *s, int ack_only, int send_probe) @@ -4574,13 +4939,15 @@ static int send_handshake_flow(quicly_conn_t *conn, size_t epoch, quicly_send_co int ret = 0; /* setup send epoch, or return if it's impossible to send in this epoch */ - if ((space = setup_send_space(conn, epoch, s)) == NULL) + if (!setup_send_context(conn, epoch, s, &space)) return 0; /* send ACK */ - if (space != NULL && (space->unacked_count != 0 || send_probe)) - if ((ret = send_ack(conn, space, s)) != 0) + if (space != NULL && (space->unacked_count != 0 || send_probe)) { + if ((ret = send_ack(conn, UINT64_MAX, space, s)) != 0) goto Exit; + space->send_ack_at = INT64_MAX; + } if (!ack_only) { /* send data */ @@ -4598,7 +4965,7 @@ static int send_handshake_flow(quicly_conn_t *conn, size_t epoch, quicly_send_co if ((ret = do_allocate_frame(conn, s, 1, ALLOCATE_FRAME_TYPE_ACK_ELICITING)) != 0) goto Exit; *s->dst++ = QUICLY_FRAME_TYPE_PING; - conn->egress.last_retransmittable_sent_at = conn->stash.now; + conn->paths[s->path_index]->egress->last_retransmittable_sent_at = conn->stash.now; ++conn->super.stats.num_frames_sent.ping; QUICLY_PROBE(PING_SEND, conn, conn->stash.now); QUICLY_LOG_CONN(ping_send, conn, {}); @@ -4616,7 +4983,7 @@ static int send_connection_close(quicly_conn_t *conn, size_t epoch, quicly_send_ int ret; /* setup send epoch, or return if it's impossible to send in this epoch */ - if (setup_send_space(conn, epoch, s) == NULL) + if (!setup_send_context(conn, epoch, s, NULL)) return 0; /* determine the payload, masking the application error when sending the frame using an unauthenticated epoch */ @@ -4902,9 +5269,12 @@ static int send_other_control_frames(quicly_conn_t *conn, quicly_send_context_t static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) { + struct st_quicly_conn_path_t *path = conn->paths[s->path_index]; int restrict_sending = 0, ack_only = 0, ret; size_t min_packets_to_send = 0; + s->first_packet_number = path->egress->packet_number; + /* handle timeouts */ if (conn->idle_timeout.at <= conn->stash.now) { QUICLY_PROBE(IDLE_TIMEOUT, conn, conn->stash.now); @@ -4913,12 +5283,12 @@ static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) } /* handle handshake timeouts */ if ((conn->initial != NULL || conn->handshake != NULL) && - conn->created_at + (uint64_t)conn->super.ctx->handshake_timeout_rtt_multiplier * conn->egress.loss.rtt.smoothed <= + conn->created_at + (uint64_t)conn->super.ctx->handshake_timeout_rtt_multiplier * path->egress->loss.rtt.smoothed <= conn->stash.now) { - QUICLY_PROBE(HANDSHAKE_TIMEOUT, conn, conn->stash.now, conn->stash.now - conn->created_at, conn->egress.loss.rtt.smoothed); + QUICLY_PROBE(HANDSHAKE_TIMEOUT, conn, conn->stash.now, conn->stash.now - conn->created_at, path->egress->loss.rtt.smoothed); QUICLY_LOG_CONN(handshake_timeout, conn, { PTLS_LOG_ELEMENT_SIGNED(elapsed, conn->stash.now - conn->created_at); - PTLS_LOG_ELEMENT_UNSIGNED(rtt_smoothed, conn->egress.loss.rtt.smoothed); + PTLS_LOG_ELEMENT_UNSIGNED(rtt_smoothed, path->egress->loss.rtt.smoothed); }); conn->super.stats.num_handshake_timeouts++; goto CloseNow; @@ -4930,10 +5300,10 @@ static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) conn->super.stats.num_initial_handshake_exceeded++; goto CloseNow; } - if (conn->egress.loss.alarm_at <= conn->stash.now) { - if ((ret = quicly_loss_on_alarm(&conn->egress.loss, conn->stash.now, conn->super.remote.transport_params.max_ack_delay, + if (path->egress->loss.alarm_at <= conn->stash.now) { + if ((ret = quicly_loss_on_alarm(&path->egress->loss, conn->stash.now, conn->super.remote.transport_params.max_ack_delay, conn->initial == NULL && conn->handshake == NULL, &min_packets_to_send, &restrict_sending, - on_loss_detected)) != 0) + conn, on_loss_detected)) != 0) goto Exit; assert(min_packets_to_send > 0); assert(min_packets_to_send <= s->max_datagrams); @@ -4942,30 +5312,38 @@ static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) /* PTO: when handshake is in progress, send from the very first unacknowledged byte so as to maximize the chance of * making progress. When handshake is complete, transmit new data if any, else retransmit the oldest unacknowledged data * that is considered inflight. */ - QUICLY_PROBE(PTO, conn, conn->stash.now, conn->egress.loss.sentmap.bytes_in_flight, conn->egress.cc.cwnd, - conn->egress.loss.pto_count); + QUICLY_PROBE(PTO, conn, conn->stash.now, path->egress->loss.sentmap.bytes_in_flight, path->egress->cc.cwnd, + path->egress->loss.pto_count); QUICLY_LOG_CONN(pto, conn, { - PTLS_LOG_ELEMENT_SIGNED(inflight, conn->egress.loss.sentmap.bytes_in_flight); - PTLS_LOG_ELEMENT_UNSIGNED(cwnd, conn->egress.cc.cwnd); - PTLS_LOG_ELEMENT_SIGNED(pto_count, conn->egress.loss.pto_count); + PTLS_LOG_ELEMENT_SIGNED(inflight, path->egress->loss.sentmap.bytes_in_flight); + PTLS_LOG_ELEMENT_UNSIGNED(cwnd, path->egress->cc.cwnd); + PTLS_LOG_ELEMENT_SIGNED(pto_count, path->egress->loss.pto_count); }); ++conn->super.stats.num_ptos; size_t bytes_to_mark = min_packets_to_send * conn->egress.max_udp_payload_size; - if (conn->initial != NULL && (ret = mark_frames_on_pto(conn, QUICLY_EPOCH_INITIAL, &bytes_to_mark)) != 0) - goto Exit; - if (bytes_to_mark != 0 && conn->handshake != NULL && - (ret = mark_frames_on_pto(conn, QUICLY_EPOCH_HANDSHAKE, &bytes_to_mark)) != 0) - goto Exit; + if (s->path_index == 0) { + if (conn->initial != NULL && (ret = mark_frames_on_pto(conn, 0, QUICLY_EPOCH_INITIAL, &bytes_to_mark)) != 0) + goto Exit; + if (bytes_to_mark != 0 && conn->handshake != NULL && + (ret = mark_frames_on_pto(conn, 0, QUICLY_EPOCH_HANDSHAKE, &bytes_to_mark)) != 0) + goto Exit; + } /* Mark already sent 1-RTT data for PTO only if there's no new data, i.e., when scheduler_can_send() return false. */ if (bytes_to_mark != 0 && !scheduler_can_send(conn) && - (ret = mark_frames_on_pto(conn, QUICLY_EPOCH_1RTT, &bytes_to_mark)) != 0) + (ret = mark_frames_on_pto(conn, s->path_index, QUICLY_EPOCH_1RTT, &bytes_to_mark)) != 0) goto Exit; } } + /* disable ECN if zero packets where acked in the first 3 PTO of the connection during which all sent packets are ECT(0) */ + if (path->ecn.state == QUICLY_ECN_PROBING && path->created_at + path->egress->loss.rtt.smoothed * 3 < conn->stash.now) { + update_ecn_state(conn, s->path_index, QUICLY_ECN_OFF); + /* TODO reset CC? */ + } + s->dcid = get_dcid(conn, s->path_index); - s->send_window = calc_send_window(conn, min_packets_to_send * conn->egress.max_udp_payload_size, + s->send_window = calc_send_window(conn, s->path_index, min_packets_to_send * conn->egress.max_udp_payload_size, calc_amplification_limit_allowance(conn), restrict_sending); if (s->send_window == 0) ack_only = 1; @@ -4982,33 +5360,36 @@ static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) /* setup 0-RTT or 1-RTT send context (as the availability of the two epochs are mutually exclusive, we can try 1-RTT first as an * optimization), then send application data if that succeeds */ - if (setup_send_space(conn, QUICLY_EPOCH_1RTT, s) != NULL || setup_send_space(conn, QUICLY_EPOCH_0RTT, s) != NULL) { - { /* path_challenge / response */ - struct st_quicly_conn_path_t *path = conn->paths[s->path_index]; - assert(path != NULL); - if (path->path_challenge.send_at <= conn->stash.now) { - /* emit path challenge frame, doing exponential back off using PTO(initial_rtt) */ - if ((ret = send_path_challenge(conn, s, 0, path->path_challenge.data)) != 0) - goto Exit; - path->path_challenge.num_sent += 1; - path->path_challenge.send_at = - conn->stash.now + ((3 * conn->super.ctx->loss.default_initial_rtt) << (path->path_challenge.num_sent - 1)); - s->recalc_send_probe_at = 1; - } - if (path->path_response.send_) { - if ((ret = send_path_challenge(conn, s, 1, path->path_response.data)) != 0) - goto Exit; - path->path_response.send_ = 0; - s->recalc_send_probe_at = 1; - } + if (setup_send_context(conn, QUICLY_EPOCH_1RTT, s, NULL) || setup_send_context(conn, QUICLY_EPOCH_0RTT, s, NULL)) { + /* path_challenge / response */ + if (path->path_challenge.send_at <= conn->stash.now) { + /* emit path challenge frame, doing exponential back off using PTO(initial_rtt) */ + if ((ret = send_path_challenge(conn, s, 0, path->path_challenge.data)) != 0) + goto Exit; + path->path_challenge.num_sent += 1; + path->path_challenge.send_at = + conn->stash.now + ((3 * conn->super.ctx->loss.default_initial_rtt) << (path->path_challenge.num_sent - 1)); } - /* non probing frames are sent only on path zero */ - if (s->path_index == 0) { - /* acks */ - if (conn->application->one_rtt_writable && conn->egress.send_ack_at <= conn->stash.now && - conn->application->super.unacked_count != 0) { - if ((ret = send_ack(conn, &conn->application->super, s)) != 0) - goto Exit; + if (path->path_response.send_) { + if ((ret = send_path_challenge(conn, s, 1, path->path_response.data)) != 0) + goto Exit; + path->path_response.send_ = 0; + } + /* emit non-probing frames; unless multipath is used, this is done only on the active path which is one and only */ + if (s->path_index == 0 || quicly_is_multipath(conn)) { + /* acks (in case of multipath the paths on which we send acks should be stable; we use path 0 all the time) */ + if (conn->application->one_rtt_writable && s->path_index == 0) { + struct st_quicly_pn_space_t *space; + uint64_t cid; + ssize_t i = -1; + while ((i = conn->application->vtable->foreach_space(conn, &space, &cid, i)) != -1) { + if (space->send_ack_at <= conn->stash.now) { + assert(space->unacked_count != 0); + if ((ret = send_ack(conn, cid, space, s)) != 0) + goto Exit; + space->send_ack_at = INT64_MAX; + } + } } /* DATAGRAM frame. Notes regarding current implementation: * * Not limited by CC, nor the bytes counted by CC. @@ -5097,18 +5478,16 @@ static int do_send(quicly_conn_t *conn, quicly_send_context_t *s) } if (ret == 0) { /* update timers, start / stop delivery rate estimator */ - if (conn->application == NULL || conn->application->super.unacked_count == 0) - conn->egress.send_ack_at = INT64_MAX; /* we have sent ACKs for every epoch (or before address validation) */ int can_send_stream_data = scheduler_can_send(conn); - update_send_alarm(conn, can_send_stream_data, 1); + update_send_alarm(conn, s->path_index, can_send_stream_data, 1); if (can_send_stream_data && - (s->num_datagrams == s->max_datagrams || conn->egress.loss.sentmap.bytes_in_flight >= conn->egress.cc.cwnd)) { + (s->num_datagrams == s->max_datagrams || path->egress->loss.sentmap.bytes_in_flight >= path->egress->cc.cwnd)) { /* as the flow is CWND-limited, start delivery rate estimator */ - quicly_ratemeter_in_cwnd_limited(&conn->egress.ratemeter, s->first_packet_number); + quicly_ratemeter_in_cwnd_limited(&path->egress->ratemeter, s->first_packet_number); } else { - quicly_ratemeter_not_cwnd_limited(&conn->egress.ratemeter, conn->egress.packet_number); + quicly_ratemeter_not_cwnd_limited(&path->egress->ratemeter, path->egress->packet_number); } - conn->paths[s->path_index]->num_packets.sent += 1; + path->num_packets.sent += 1; if (s->num_datagrams != 0) update_idle_timeout(conn, 0); } @@ -5136,7 +5515,8 @@ void quicly_send_datagram_frames(quicly_conn_t *conn, ptls_iovec_t *datagrams, s int quicly_set_cc(quicly_conn_t *conn, quicly_cc_type_t *cc) { - return cc->cc_switch(&conn->egress.cc); + /* FIXME retain provided `cc`, set CC of all paths (incl. those instantiated in future) to the retained value */ + return cc->cc_switch(&conn->paths[0]->egress->cc); } int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *src, struct iovec *datagrams, size_t *num_datagrams, @@ -5145,8 +5525,7 @@ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *s quicly_send_context_t s = {.current = {.first_byte = -1}, .datagrams = datagrams, .max_datagrams = *num_datagrams, - .payload_buf = {.datagram = buf, .end = (uint8_t *)buf + bufsize}, - .first_packet_number = conn->egress.packet_number}; + .payload_buf = {.datagram = buf, .end = (uint8_t *)buf + bufsize}}; int ret; lock_now(conn, 0); @@ -5164,7 +5543,7 @@ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *s assert(success); } - if ((QUICLY_SEND_ENABLED() || ptls_log.is_active) && !ptls_skip_tracing(conn->crypto.tls)) { + if ((QUICLY_PROBE_ENABLED(SEND) || ptls_log.is_active) && !ptls_skip_tracing(conn->crypto.tls)) { const quicly_cid_t *dcid = get_dcid(conn, 0); QUICLY_PROBE(SEND, conn, conn->stash.now, conn->super.state, QUICLY_PROBE_HEXDUMP(dcid->cid, dcid->len)); QUICLY_LOG_CONN(send, conn, { @@ -5175,7 +5554,7 @@ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *s if (conn->super.state >= QUICLY_STATE_CLOSING) { quicly_sentmap_iter_t iter; - if ((ret = init_acks_iter(conn, &iter)) != 0) + if ((ret = init_acks_iter(conn, 0, &iter)) != 0) goto Exit; /* check if the connection can be closed now (after 3 pto) */ if (conn->super.state == QUICLY_STATE_DRAINING || @@ -5186,7 +5565,7 @@ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *s goto Exit; } } - if (conn->super.state == QUICLY_STATE_CLOSING && conn->egress.send_ack_at <= conn->stash.now) { + if (conn->super.state == QUICLY_STATE_CLOSING && conn->egress.connection_close.send_at <= conn->stash.now) { /* destroy all streams; doing so is delayed until the emission of CONNECTION_CLOSE frame to allow quicly_close to be * called from a stream handler */ destroy_all_streams(conn, 0, 0); @@ -5200,61 +5579,74 @@ int quicly_send(quicly_conn_t *conn, quicly_address_t *dest, quicly_address_t *s goto Exit; } /* wait at least 1ms */ - if ((conn->egress.send_ack_at = quicly_sentmap_get(&iter)->sent_at + get_sentmap_expiration_time(conn)) <= conn->stash.now) - conn->egress.send_ack_at = conn->stash.now + 1; + if ((conn->egress.connection_close.send_at = quicly_sentmap_get(&iter)->sent_at + get_sentmap_expiration_time(conn, 0)) <= + conn->stash.now) + conn->egress.connection_close.send_at = conn->stash.now + 1; ret = 0; goto Exit; } - /* try emitting one probe packet on one of the backup paths, or ... (note: API of `quicly_send` allows us to send packets on no - * more than one path at a time) */ - if (conn->egress.send_probe_at <= conn->stash.now) { - for (s.path_index = 1; s.path_index < PTLS_ELEMENTSOF(conn->paths); ++s.path_index) { - if (conn->paths[s.path_index] == NULL || conn->stash.now < conn->paths[s.path_index]->path_challenge.send_at) - continue; + /* emit packets */ + for (s.path_index = 0; s.path_index < PTLS_ELEMENTSOF(conn->paths); ++s.path_index) { + if (conn->paths[s.path_index] == NULL) { + assert(s.path_index != 0); + continue; + } + if (conn->paths[s.path_index]->path_challenge.send_at <= conn->stash.now && s.path_index != 0) { if (conn->paths[s.path_index]->path_challenge.num_sent > conn->super.ctx->max_probe_packets) { - delete_path(conn, 0, s.path_index); - s.recalc_send_probe_at = 1; + assert(s.path_index != 0); + if ((ret = delete_path(conn, s.path_index, DELETE_PATH_MODE_DELETE)) != 0) + goto Exit; continue; } - /* determine DCID to be used, if not yet been done; upon failure, this path (being secondary) is discarded */ - if (conn->paths[s.path_index]->dcid == UINT64_MAX && !setup_path_dcid(conn, s.path_index)) { - delete_path(conn, 0, s.path_index); - s.recalc_send_probe_at = 1; - conn->super.stats.num_paths.closed_no_dcid += 1; + } else if (!conn->paths[s.path_index]->path_response.send_) { + /* We can skip backup paths if neither PATH_CHALLENGE nor PATH_RESPONSE is to be sent. If this is an active path or if + * multipath is used, we have to run the per-path loss recovery / CC. */ + if (!(s.path_index == 0 || quicly_is_multipath(conn))) continue; - } - if ((ret = do_send(conn, &s)) != 0) + } + /* determine DCID to be used, if not yet been done; upon failure, this path (being secondary) is discarded */ + if (conn->paths[s.path_index]->dcid == UINT64_MAX && !setup_path_dcid(conn, s.path_index)) { + assert(s.path_index != 0); + if ((ret = delete_path(conn, s.path_index, DELETE_PATH_MODE_DELETE)) != 0) goto Exit; - if (s.num_datagrams != 0) - break; + conn->super.stats.num_paths.closed_no_dcid += 1; + continue; } - } - /* otherwise, emit non-probing packets */ - if (s.num_datagrams == 0) { - s.path_index = 0; if ((ret = do_send(conn, &s)) != 0) goto Exit; - } else { + /* API of `quicly_send` allows us to build packets for only one set of 4-tuple at once, therefore return what we have built. + * Successive calls to `quicly_send` will generate packets for all the paths. */ + if (s.num_datagrams != 0) + break; + } + /* if quicly_send was called for all the paths but nothing could be be sent (e.g., when PTO fires but there's amplification + * limit), change the result to success and let the consistency check run fo the active path */ + if (s.path_index == PTLS_ELEMENTSOF(conn->paths)) { ret = 0; + s.path_index = 0; } - assert_consistency(conn, 1); + assert_consistency(conn, s.path_index, 1); Exit: if (s.path_index == 0) clear_datagram_frame_payloads(conn); - if (s.recalc_send_probe_at) - recalc_send_probe_at(conn); if (s.num_datagrams != 0) { *dest = conn->paths[s.path_index]->address.remote; *src = conn->paths[s.path_index]->address.local; + conn->egress.send_ecn_bits = conn->paths[s.path_index]->ecn.state == QUICLY_ECN_OFF ? 0 : 2; /* NON-ECT or ECT(0) */ } *num_datagrams = s.num_datagrams; unlock_now(conn); return ret; } +uint8_t quicly_send_get_ecn_bits(quicly_conn_t *conn) +{ + return conn->egress.send_ecn_bits; +} + size_t quicly_send_close_invalid_token(quicly_context_t *ctx, uint32_t protocol_version, ptls_iovec_t dest_cid, ptls_iovec_t src_cid, const char *err_desc, void *datagram) { @@ -5298,7 +5690,7 @@ size_t quicly_send_close_invalid_token(quicly_context_t *ctx, uint32_t protocol_ /* encrypt packet */ quicly_default_crypto_engine.encrypt_packet(&quicly_default_crypto_engine, NULL, egress.header_protection, egress.aead, ptls_iovec_init(datagram, datagram_len), 0, payload_from - (uint8_t *)datagram, 0, - 0); + 0, 0); dispose_cipher(&egress); return datagram_len; @@ -5327,7 +5719,8 @@ int quicly_send_resumption_token(quicly_conn_t *conn) return 0; } -static int on_end_closing(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_end_closing(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + quicly_conn_t *conn) { /* we stop accepting frames by the time this ack callback is being registered */ assert(!acked); @@ -5343,23 +5736,23 @@ static int enter_close(quicly_conn_t *conn, int local_is_initiating, int wait_dr /* release all inflight info, register a close timeout */ if ((ret = discard_sentmap_by_epoch(conn, ~0u)) != 0) return ret; - if ((ret = quicly_sentmap_prepare(&conn->egress.loss.sentmap, conn->egress.packet_number, conn->stash.now, + if ((ret = quicly_sentmap_prepare(&conn->paths[0]->egress->loss.sentmap, conn->paths[0]->egress->packet_number, conn->stash.now, QUICLY_EPOCH_INITIAL)) != 0) return ret; - if (quicly_sentmap_allocate(&conn->egress.loss.sentmap, on_end_closing) == NULL) + if (quicly_sentmap_allocate(&conn->paths[0]->egress->loss.sentmap, on_end_closing) == NULL) return PTLS_ERROR_NO_MEMORY; - quicly_sentmap_commit(&conn->egress.loss.sentmap, 0, 0); - ++conn->egress.packet_number; + quicly_sentmap_commit(&conn->paths[0]->egress->loss.sentmap, 0, 0, 0); + ++conn->paths[0]->egress->packet_number; if (local_is_initiating) { conn->super.state = QUICLY_STATE_CLOSING; - conn->egress.send_ack_at = 0; + conn->egress.connection_close.send_at = 0; } else { conn->super.state = QUICLY_STATE_DRAINING; - conn->egress.send_ack_at = wait_draining ? conn->stash.now + get_sentmap_expiration_time(conn) : 0; + conn->egress.connection_close.send_at = wait_draining ? conn->stash.now + get_sentmap_expiration_time(conn, 0) : 0; } - setup_next_send(conn); + setup_next_send(conn, 0); return 0; } @@ -5528,37 +5921,50 @@ static int handle_ack_frame(quicly_conn_t *conn, struct st_quicly_handle_payload uint64_t pn; int64_t sent_at; } largest_newly_acked = {UINT64_MAX, INT64_MAX}; - size_t bytes_acked = 0; + size_t bytes_acked = 0, path_index = 0; int includes_ack_eliciting = 0, includes_late_ack = 0, ret; - if ((ret = quicly_decode_ack_frame(&state->src, state->end, &frame, state->frame_type == QUICLY_FRAME_TYPE_ACK_ECN)) != 0) + if ((ret = quicly_decode_ack_frame(state->frame_type, &state->src, state->end, &frame)) != 0) return ret; + /* multipath: check for violation, then lookup the path. Upon lookup failure, the frame is discarded. */ + if (frame.multipath_cid != UINT64_MAX) { + if (!(state->epoch == QUICLY_EPOCH_1RTT && conn->application->vtable->multipath)) + return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; + if (frame.multipath_cid > conn->super.remote.cid_set._largest_sequence_expected) /* FIXME need wapper? */ + return QUICLY_TRANSPORT_ERROR_MP_PROTOCOL_VIOLATION; + for (path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) { + if (conn->paths[path_index] == NULL) + continue; + if (conn->paths[path_index]->dcid == frame.multipath_cid) + break; + } + if (path_index == PTLS_ELEMENTSOF(conn->paths)) { + /* FIXME log */ + return 0; + } + } + uint64_t pn_acked = frame.smallest_acknowledged; - switch (state->epoch) { - case QUICLY_EPOCH_0RTT: - return QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION; - case QUICLY_EPOCH_HANDSHAKE: + if (state->epoch == QUICLY_EPOCH_HANDSHAKE) conn->super.remote.address_validation.send_probe = 0; - break; - default: - break; - } - if ((ret = init_acks_iter(conn, &iter)) != 0) + if ((ret = init_acks_iter(conn, path_index, &iter)) != 0) return ret; /* TODO log PNs being ACKed too late */ + struct st_quicly_conn_path_t *path = conn->paths[path_index]; size_t gap_index = frame.num_gaps; while (1) { assert(frame.ack_block_lengths[gap_index] != 0); /* Ack blocks are organized in the ACK frame and consequently in the ack_block_lengths array from the largest acked down. * Processing acks in packet number order requires processing the ack blocks in reverse order. */ uint64_t pn_block_max = pn_acked + frame.ack_block_lengths[gap_index] - 1; - QUICLY_PROBE(ACK_BLOCK_RECEIVED, conn, conn->stash.now, pn_acked, pn_block_max); + QUICLY_PROBE(ACK_BLOCK_RECEIVED, conn, conn->stash.now, frame.multipath_cid, pn_acked, pn_block_max); QUICLY_LOG_CONN(ack_block_received, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(dcid_sequence_number, frame.multipath_cid); PTLS_LOG_ELEMENT_UNSIGNED(ack_block_begin, pn_acked); PTLS_LOG_ELEMENT_UNSIGNED(ack_block_end, pn_block_max); }); @@ -5567,6 +5973,7 @@ static int handle_ack_frame(quicly_conn_t *conn, struct st_quicly_handle_payload do { const quicly_sent_packet_t *sent = quicly_sentmap_get(&iter); uint64_t pn_sent = sent->packet_number; + uint8_t key_phase_bit_sent = sent->key_phase_bit; assert(pn_acked <= pn_sent); if (pn_acked < pn_sent) { /* set pn_acked to pn_sent; or past the end of the ack block, for use with the next ack block */ @@ -5603,16 +6010,18 @@ static int handle_ack_frame(quicly_conn_t *conn, struct st_quicly_handle_payload bytes_acked += sent->cc_bytes_in_flight; conn->super.stats.num_bytes.ack_received += sent->cc_bytes_in_flight; } - if ((ret = quicly_sentmap_update(&conn->egress.loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_ACKED)) != 0) + if ((ret = quicly_sentmap_update(&path->egress->loss.sentmap, &iter, QUICLY_SENTMAP_EVENT_ACKED, conn)) != 0) return ret; + sent = NULL; /* the object is destroyed by `quicly_sentmap_update` */ if (state->epoch == QUICLY_EPOCH_1RTT) { struct st_quicly_application_space_t *space = conn->application; - if (space->cipher.egress.key_update_pn.last <= pn_acked) { - space->cipher.egress.key_update_pn.last = UINT64_MAX; - space->cipher.egress.key_update_pn.next = conn->egress.packet_number + conn->super.ctx->max_packets_per_key; - QUICLY_PROBE(CRYPTO_SEND_KEY_UPDATE_CONFIRMED, conn, conn->stash.now, space->cipher.egress.key_update_pn.next); + if (space->cipher.egress.key_update_at.next == UINT64_MAX && + key_phase_bit_sent == (space->cipher.egress.key_phase & 1)) { + space->cipher.egress.key_update_at.next = + space->cipher.egress.key_update_at.last + conn->super.ctx->max_packets_per_key; + QUICLY_PROBE(CRYPTO_SEND_KEY_UPDATE_CONFIRMED, conn, conn->stash.now, space->cipher.egress.key_update_at.next); QUICLY_LOG_CONN(crypto_send_key_update_confirmed, conn, - { PTLS_LOG_ELEMENT_UNSIGNED(next_pn, space->cipher.egress.key_update_pn.next); }); + { PTLS_LOG_ELEMENT_UNSIGNED(next, space->cipher.egress.key_update_at.next); }); } } ++pn_acked; @@ -5629,12 +6038,12 @@ static int handle_ack_frame(quicly_conn_t *conn, struct st_quicly_handle_payload QUICLY_PROBE(ACK_DELAY_RECEIVED, conn, conn->stash.now, frame.ack_delay); QUICLY_LOG_CONN(ack_delay_received, conn, { PTLS_LOG_ELEMENT_UNSIGNED(ack_delay, frame.ack_delay); }); - quicly_ratemeter_on_ack(&conn->egress.ratemeter, conn->stash.now, conn->super.stats.num_bytes.ack_received, + quicly_ratemeter_on_ack(&path->egress->ratemeter, conn->stash.now, conn->super.stats.num_bytes.ack_received, largest_newly_acked.pn); /* Update loss detection engine on ack. The function uses ack_delay only when the largest_newly_acked is also the largest acked * so far. So, it does not matter if the ack_delay being passed in does not apply to the largest_newly_acked. */ - quicly_loss_on_ack_received(&conn->egress.loss, largest_newly_acked.pn, state->epoch, conn->stash.now, + quicly_loss_on_ack_received(&path->egress->loss, largest_newly_acked.pn, state->epoch, conn->stash.now, largest_newly_acked.sent_at, frame.ack_delay, includes_ack_eliciting ? includes_late_ack ? QUICLY_LOSS_ACK_RECEIVED_KIND_ACK_ELICITING_LATE_ACK : QUICLY_LOSS_ACK_RECEIVED_KIND_ACK_ELICITING @@ -5642,31 +6051,70 @@ static int handle_ack_frame(quicly_conn_t *conn, struct st_quicly_handle_payload /* OnPacketAcked and OnPacketAckedCC */ if (bytes_acked > 0) { - conn->egress.cc.type->cc_on_acked(&conn->egress.cc, &conn->egress.loss, (uint32_t)bytes_acked, frame.largest_acknowledged, - (uint32_t)(conn->egress.loss.sentmap.bytes_in_flight + bytes_acked), - conn->egress.packet_number, conn->stash.now, conn->egress.max_udp_payload_size); - QUICLY_PROBE(QUICTRACE_CC_ACK, conn, conn->stash.now, &conn->egress.loss.rtt, conn->egress.cc.cwnd, - conn->egress.loss.sentmap.bytes_in_flight); + path->egress->cc.type->cc_on_acked(&path->egress->cc, &path->egress->loss, (uint32_t)bytes_acked, + frame.largest_acknowledged, + (uint32_t)(path->egress->loss.sentmap.bytes_in_flight + bytes_acked), + path->egress->packet_number, conn->stash.now, conn->egress.max_udp_payload_size); + QUICLY_PROBE(QUICTRACE_CC_ACK, conn, conn->stash.now, &path->egress->loss.rtt, path->egress->cc.cwnd, + path->egress->loss.sentmap.bytes_in_flight); } - QUICLY_PROBE(CC_ACK_RECEIVED, conn, conn->stash.now, frame.largest_acknowledged, bytes_acked, conn->egress.cc.cwnd, - conn->egress.loss.sentmap.bytes_in_flight); + QUICLY_PROBE(CC_ACK_RECEIVED, conn, conn->stash.now, frame.largest_acknowledged, bytes_acked, path->egress->cc.cwnd, + path->egress->loss.sentmap.bytes_in_flight); QUICLY_LOG_CONN(cc_ack_received, conn, { PTLS_LOG_ELEMENT_UNSIGNED(largest_acked, frame.largest_acknowledged); PTLS_LOG_ELEMENT_UNSIGNED(bytes_acked, bytes_acked); - PTLS_LOG_ELEMENT_UNSIGNED(cwnd, conn->egress.cc.cwnd); - PTLS_LOG_ELEMENT_UNSIGNED(inflight, conn->egress.loss.sentmap.bytes_in_flight); + PTLS_LOG_ELEMENT_UNSIGNED(cwnd, path->egress->cc.cwnd); + PTLS_LOG_ELEMENT_UNSIGNED(inflight, path->egress->loss.sentmap.bytes_in_flight); }); /* loss-detection */ - if ((ret = quicly_loss_detect_loss(&conn->egress.loss, conn->stash.now, conn->super.remote.transport_params.max_ack_delay, - conn->initial == NULL && conn->handshake == NULL, on_loss_detected)) != 0) + if ((ret = quicly_loss_detect_loss(&path->egress->loss, conn->stash.now, conn->super.remote.transport_params.max_ack_delay, + conn->initial == NULL && conn->handshake == NULL, conn, on_loss_detected)) != 0) return ret; - setup_next_send(conn); + + /* ECN */ + if (path->ecn.state != QUICLY_ECN_OFF && largest_newly_acked.pn != UINT64_MAX) { + /* if things look suspicious (ECT(1) count becoming non-zero), turn ECN off */ + if (frame.ecn_counts[1] != 0) + update_ecn_state(conn, path_index, QUICLY_ECN_OFF); + /* TODO: maybe compare num_packets.acked vs. sum(ecn_counts) to see if any packet has been received as NON-ECT? */ + + /* ECN validation succeeds if at least one packet is acked using one of the expected marks during the probing period */ + if (path->ecn.state == QUICLY_ECN_PROBING && frame.ecn_counts[0] + frame.ecn_counts[2] > 0) + update_ecn_state(conn, path_index, QUICLY_ECN_ON); + + /* check if congestion should be reported */ + int report_congestion = + path->ecn.state != QUICLY_ECN_OFF && frame.ecn_counts[2] > path->ecn.counts[state->epoch][2]; + + /* update counters */ + for (size_t i = 0; i < PTLS_ELEMENTSOF(frame.ecn_counts); ++i) { + if (frame.ecn_counts[i] > path->ecn.counts[state->epoch][i]) { + conn->super.stats.num_packets.acked_ecn_counts[i] += frame.ecn_counts[i] - path->ecn.counts[state->epoch][i]; + path->ecn.counts[state->epoch][i] = frame.ecn_counts[i]; + } + } + + /* report congestion */ + if (report_congestion) { + QUICLY_PROBE(ECN_CONGESTION, conn, conn->stash.now, conn->super.stats.num_packets.acked_ecn_counts[2]); + QUICLY_LOG_CONN(ecn_congestion, conn, + { PTLS_LOG_ELEMENT_UNSIGNED(ce_count, conn->super.stats.num_packets.acked_ecn_counts[2]); }); + notify_congestion_to_cc(conn, path->egress, 0, largest_newly_acked.pn); + } + } + + setup_next_send(conn, path_index); return 0; } +static int handle_ack_mp_frame(quicly_conn_t *conn, struct st_quicly_handle_payload_state_t *state) +{ + return handle_ack_frame(conn, state); +} + static int handle_max_stream_data_frame(quicly_conn_t *conn, struct st_quicly_handle_payload_state_t *state) { quicly_max_stream_data_frame_t frame; @@ -5814,7 +6262,6 @@ static int handle_path_challenge_frame(quicly_conn_t *conn, struct st_quicly_han struct st_quicly_conn_path_t *path = conn->paths[state->path_index]; memcpy(path->path_response.data, frame.data, QUICLY_PATH_CHALLENGE_DATA_LEN); path->path_response.send_ = 1; - conn->egress.send_probe_at = 0; return 0; } @@ -5961,28 +6408,35 @@ static int handle_version_negotiation_packet(quicly_conn_t *conn, quicly_decoded return negotiate_using_version(conn, selected_version); } -static int compare_socket_address(struct sockaddr *x, struct sockaddr *y) +static int compare_socket_address(const struct sockaddr *x, int x_allow_unspecified, const struct sockaddr *y) { #define CMP(a, b) \ if (a != b) \ return a < b ? -1 : 1 + if (x_allow_unspecified && x->sa_family == AF_UNSPEC) + return 0; + CMP(x->sa_family, y->sa_family); if (x->sa_family == AF_INET) { - struct sockaddr_in *xin = (void *)x, *yin = (void *)y; - CMP(ntohl(xin->sin_addr.s_addr), ntohl(yin->sin_addr.s_addr)); + const struct sockaddr_in *xin = (void *)x, *yin = (void *)y; + if (!(x_allow_unspecified && xin->sin_addr.s_addr == 0)) + CMP(ntohl(xin->sin_addr.s_addr), ntohl(yin->sin_addr.s_addr)); CMP(ntohs(xin->sin_port), ntohs(yin->sin_port)); } else if (x->sa_family == AF_INET6) { - struct sockaddr_in6 *xin6 = (void *)x, *yin6 = (void *)y; - int r = memcmp(xin6->sin6_addr.s6_addr, yin6->sin6_addr.s6_addr, sizeof(xin6->sin6_addr.s6_addr)); - if (r != 0) - return r; + struct in6_addr zeroaddr = {}; + const struct sockaddr_in6 *xin6 = (void *)x, *yin6 = (void *)y; + if (!(x_allow_unspecified && memcmp(xin6->sin6_addr.s6_addr, &zeroaddr, sizeof(xin6->sin6_addr.s6_addr)) == 0)) { + int r = memcmp(xin6->sin6_addr.s6_addr, yin6->sin6_addr.s6_addr, sizeof(xin6->sin6_addr.s6_addr)); + if (r != 0) + return r; + } CMP(ntohs(xin6->sin6_port), ntohs(yin6->sin6_port)); CMP(xin6->sin6_flowinfo, yin6->sin6_flowinfo); CMP(xin6->sin6_scope_id, yin6->sin6_scope_id); } else if (x->sa_family == AF_UNSPEC) { - return 1; + return x_allow_unspecified ? 0 : 1; } else { assert(!"unknown sa_family"); } @@ -6021,10 +6475,9 @@ int quicly_is_destination(quicly_conn_t *conn, struct sockaddr *dest_addr, struc { if (QUICLY_PACKET_IS_LONG_HEADER(decoded->octets.base[0])) { /* long header: validate address, then consult the CID */ - if (compare_socket_address(&conn->paths[0]->address.remote.sa, src_addr) != 0) + if (compare_socket_address(&conn->paths[0]->address.remote.sa, 0, src_addr) != 0) return 0; - if (conn->paths[0]->address.local.sa.sa_family != AF_UNSPEC && - compare_socket_address(&conn->paths[0]->address.local.sa, dest_addr) != 0) + if (compare_socket_address(&conn->paths[0]->address.local.sa, 1, dest_addr) != 0) return 0; /* server may see the CID generated by the client for Initial and 0-RTT packets */ if (!quicly_is_client(conn) && decoded->cid.dest.might_be_client_generated) { @@ -6047,10 +6500,9 @@ int quicly_is_destination(quicly_conn_t *conn, struct sockaddr *dest_addr, struc if (is_stateless_reset(conn, decoded)) goto Found_StatelessReset; } else { - if (compare_socket_address(&conn->paths[0]->address.remote.sa, src_addr) == 0) + if (compare_socket_address(&conn->paths[0]->address.remote.sa, 0, src_addr) == 0) goto Found; - if (conn->paths[0]->address.local.sa.sa_family != AF_UNSPEC && - compare_socket_address(&conn->paths[0]->address.local.sa, dest_addr) != 0) + if (compare_socket_address(&conn->paths[0]->address.local.sa, 1, dest_addr) != 0) return 0; } @@ -6168,8 +6620,9 @@ static int handle_new_connection_id_frame(quicly_conn_t *conn, struct st_quicly_ static int handle_retire_connection_id_frame(quicly_conn_t *conn, struct st_quicly_handle_payload_state_t *state) { - int ret, has_pending; quicly_retire_connection_id_frame_t frame; + struct st_quicly_pn_space_t *multipath_space; + int ret, has_pending; if ((ret = quicly_decode_retire_connection_id_frame(&state->src, state->end, &frame)) != 0) return ret; @@ -6183,8 +6636,15 @@ static int handle_retire_connection_id_frame(quicly_conn_t *conn, struct st_quic return QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION; } - if ((ret = quicly_local_cid_retire(&conn->super.local.cid_set, frame.sequence, &has_pending)) != 0) + if ((ret = quicly_local_cid_retire(&conn->super.local.cid_set, frame.sequence, &has_pending, &multipath_space)) != 0) return ret; + if (multipath_space != NULL) { + if (state->pn_space == multipath_space) { + state->pn_space->delayed_free = 1; + } else { + free_pn_space(multipath_space); + } + } if (has_pending) conn->egress.pending_flows |= QUICLY_PENDING_FLOW_OTHERS_BIT; @@ -6208,7 +6668,7 @@ static int handle_handshake_done_frame(quicly_conn_t *conn, struct st_quicly_han conn->super.remote.address_validation.send_probe = 0; if ((ret = discard_handshake_context(conn, QUICLY_EPOCH_HANDSHAKE)) != 0) return ret; - setup_next_send(conn); + setup_next_send(conn, state->path_index); return 0; } @@ -6261,16 +6721,64 @@ static int handle_ack_frequency_frame(quicly_conn_t *conn, struct st_quicly_hand if (frame.sequence >= conn->ingress.ack_frequency.next_sequence) { conn->ingress.ack_frequency.next_sequence = frame.sequence + 1; - conn->application->super.packet_tolerance = + assert(!conn->application->vtable->multipath && "ack-frequency is incompatible with multipath"); + conn->application->non_multipath.space->packet_tolerance = (uint32_t)(frame.packet_tolerance < QUICLY_MAX_PACKET_TOLERANCE ? frame.packet_tolerance : QUICLY_MAX_PACKET_TOLERANCE); - conn->application->super.ignore_order = frame.ignore_order; + conn->application->non_multipath.space->ignore_order = frame.ignore_order; } return 0; } -static int handle_payload(quicly_conn_t *conn, size_t epoch, size_t path_index, const uint8_t *_src, size_t _len, - uint64_t *offending_frame_type, int *is_ack_only, int *is_probe_only) +static int handle_path_abandon_frame(quicly_conn_t *conn, struct st_quicly_handle_payload_state_t *state) +{ + quicly_path_abandon_frame_t frame; + int ret; + + if (!quicly_is_multipath(conn)) + return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; + + if ((ret = quicly_decode_path_abandon_frame(&state->src, state->end, &frame)) != 0) + return ret; + + QUICLY_PROBE(PATH_ABANDON_RECEIVE, conn, conn->stash.now, frame.dcid, frame.error_code, + QUICLY_PROBE_ESCAPE_UNSAFE_STRING(frame.reason_phrase.base, frame.reason_phrase.len)); + QUICLY_LOG_CONN(path_abandon_receive, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(dcid, frame.dcid); + PTLS_LOG_ELEMENT_UNSIGNED(error_code, frame.error_code); + PTLS_LOG_ELEMENT_UNSAFESTR(reason_phrase, (const char *)frame.reason_phrase.base, frame.reason_phrase.len); + }); + + /* TODO handle the frame */ + + return 0; +} + +static int handle_path_status_frame(quicly_conn_t *conn, struct st_quicly_handle_payload_state_t *state) +{ + quicly_path_status_frame_t frame; + int ret; + + if (!quicly_is_multipath(conn)) + return QUICLY_TRANSPORT_ERROR_FRAME_ENCODING; + + if ((ret = quicly_decode_path_status_frame(&state->src, state->end, &frame)) != 0) + return ret; + + QUICLY_PROBE(PATH_STATUS_RECEIVE, conn, conn->stash.now, frame.dcid, frame.sequence, frame.available ? 2 : 1); + QUICLY_LOG_CONN(path_status_receive, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(dcid, frame.dcid); + PTLS_LOG_ELEMENT_UNSIGNED(sequence, frame.sequence); + PTLS_LOG_ELEMENT_UNSIGNED(available, frame.available ? 2 : 1); + }); + + /* TODO handle the frame */ + + return 0; +} + +static int handle_payload(quicly_conn_t *conn, size_t epoch, size_t path_index, struct st_quicly_pn_space_t *pn_space, + const uint8_t *_src, size_t _len, uint64_t *offending_frame_type, int *is_ack_only, int *is_probe_only) { /* clang-format off */ @@ -6352,13 +6860,17 @@ static int handle_payload(quicly_conn_t *conn, size_t epoch, size_t path_index, FRAME( DATAGRAM_NOLEN , datagram , 0 , 1, 0, 1 , 1 , 0 ), FRAME( DATAGRAM_WITHLEN , datagram , 0 , 1, 0, 1 , 1 , 0 ), FRAME( ACK_FREQUENCY , ack_frequency , 0 , 0 , 0 , 1 , 1 , 0 ), + FRAME( ACK_MP , ack_mp , 0 , 0 , 0 , 1 , 0 , 0 ), + FRAME( PATH_ABANDON , path_abandon , 0 , 0 , 0 , 1 , 1 , 0 ), + FRAME( PATH_STATUS , path_status , 0 , 0 , 0 , 1 , 1 , 0 ), /* +------------------+---------------+-------------------+---------------+---------+ */ #undef FRAME {UINT64_MAX}, }; /* clang-format on */ - struct st_quicly_handle_payload_state_t state = {.epoch = epoch, .path_index = path_index, .src = _src, .end = _src + _len}; + struct st_quicly_handle_payload_state_t state = { + .epoch = epoch, .path_index = path_index, .pn_space = pn_space, .src = _src, .end = _src + _len}; size_t num_frames_ack_eliciting = 0, num_frames_non_probing = 0; int ret; @@ -6499,8 +7011,11 @@ int quicly_accept(quicly_conn_t **conn, quicly_context_t *ctx, struct sockaddr * PTLS_LOG_ELEMENT_HEXDUMP(dcid, packet->cid.dest.encrypted.base, packet->cid.dest.encrypted.len); PTLS_LOG_ELEMENT_PTR(address_token, address_token); }); - QUICLY_PROBE(PACKET_RECEIVED, *conn, (*conn)->stash.now, pn, payload.base, payload.len, get_epoch(packet->octets.base[0])); + QUICLY_PROBE(PACKET_RECEIVED, *conn, (*conn)->stash.now, SIZE_MAX, 0, pn, payload.base, payload.len, + get_epoch(packet->octets.base[0])); QUICLY_LOG_CONN(packet_received, *conn, { + PTLS_LOG_ELEMENT_UNSIGNED(path_index, SIZE_MAX); + PTLS_LOG_ELEMENT_UNSIGNED(dcid_sequence_number, 0); PTLS_LOG_ELEMENT_UNSIGNED(pn, pn); PTLS_LOG_APPDATA_ELEMENT_HEXDUMP(decrypted, payload.base, payload.len); PTLS_LOG_ELEMENT_UNSIGNED(packet_type, get_epoch(packet->octets.base[0])); @@ -6508,11 +7023,13 @@ int quicly_accept(quicly_conn_t **conn, quicly_context_t *ctx, struct sockaddr * /* handle the input; we ignore is_ack_only, we consult if there's any output from TLS in response to CH anyways */ (*conn)->super.stats.num_packets.received += 1; + if (packet->ecn != 0) + (*conn)->super.stats.num_packets.received_ecn_counts[get_ecn_index_from_bits(packet->ecn)] += 1; (*conn)->super.stats.num_bytes.received += packet->datagram_size; - if ((ret = handle_payload(*conn, QUICLY_EPOCH_INITIAL, 0, payload.base, payload.len, &offending_frame_type, &is_ack_only, - &is_probe_only)) != 0) + if ((ret = handle_payload(*conn, QUICLY_EPOCH_INITIAL, 0, &(*conn)->initial->super, payload.base, payload.len, + &offending_frame_type, &is_ack_only, &is_probe_only)) != 0) goto Exit; - if ((ret = record_receipt(&(*conn)->initial->super, pn, 0, (*conn)->stash.now, &(*conn)->egress.send_ack_at, + if ((ret = record_receipt(&(*conn)->initial->super, pn, packet->ecn, 0, 1, (*conn)->stash.now, &(*conn)->super.stats.num_packets.received_out_of_order)) != 0) goto Exit; @@ -6540,8 +7057,8 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka int (*cb)(void *, uint64_t, quicly_decoded_packet_t *, size_t, size_t *); void *ctx; } aead; - struct st_quicly_pn_space_t **space; - size_t epoch, path_index; + struct st_quicly_pn_space_t *space = NULL; + size_t epoch, path_index = SIZE_MAX; ptls_iovec_t payload; uint64_t pn, offending_frame_type = QUICLY_FRAME_TYPE_PADDING; int is_ack_only, is_probe_only, ret; @@ -6561,13 +7078,13 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka /* drop packets with invalid server tuple (note: when running as a server, `dest_addr` may not be available depending on the * socket option being used */ if (quicly_is_client(conn)) { - if (compare_socket_address(src_addr, &conn->paths[0]->address.remote.sa) != 0) { + if (compare_socket_address(src_addr, 0, &conn->paths[0]->address.remote.sa) != 0) { ret = QUICLY_ERROR_PACKET_IGNORED; goto Exit; } } else if (dest_addr != NULL && dest_addr->sa_family != AF_UNSPEC) { assert(conn->paths[0]->address.local.sa.sa_family != AF_UNSPEC); - if (compare_socket_address(dest_addr, &conn->paths[0]->address.local.sa) != 0) { + if (compare_socket_address(&conn->paths[0]->address.local.sa, 0, dest_addr) != 0) { ret = QUICLY_ERROR_PACKET_IGNORED; goto Exit; } @@ -6586,7 +7103,7 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka ++conn->egress.connection_close.num_packets_received; /* respond with a CONNECTION_CLOSE frame using exponential back-off */ if (__builtin_popcountl(conn->egress.connection_close.num_packets_received) == 1) - conn->egress.send_ack_at = 0; + conn->egress.connection_close.send_at = 0; ret = 0; goto Exit; case QUICLY_STATE_DRAINING: @@ -6673,7 +7190,7 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka } aead.cb = aead_decrypt_fixed_key; aead.ctx = conn->initial->cipher.ingress.aead; - space = (void *)&conn->initial; + space = &conn->initial->super; epoch = QUICLY_EPOCH_INITIAL; break; case QUICLY_PACKET_TYPE_HANDSHAKE: @@ -6683,7 +7200,7 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka } aead.cb = aead_decrypt_fixed_key; aead.ctx = conn->handshake->cipher.ingress.aead; - space = (void *)&conn->handshake; + space = &conn->handshake->super; epoch = QUICLY_EPOCH_HANDSHAKE; break; case QUICLY_PACKET_TYPE_0RTT: @@ -6698,7 +7215,8 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka } aead.cb = aead_decrypt_fixed_key; aead.ctx = conn->application->cipher.ingress.aead[1]; - space = (void *)&conn->application; + if ((ret = conn->application->vtable->get_space(conn, &space, packet->cid.dest.plaintext.path_id)) != 0) + goto Exit; epoch = QUICLY_EPOCH_0RTT; break; default: @@ -6714,30 +7232,48 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka } aead.cb = aead_decrypt_1rtt; aead.ctx = conn; - space = (void *)&conn->application; + if ((ret = conn->application->vtable->get_space(conn, &space, packet->cid.dest.plaintext.path_id)) != 0) + goto Exit; epoch = QUICLY_EPOCH_1RTT; } /* decrypt */ - if ((ret = decrypt_packet(header_protection, aead.cb, aead.ctx, &(*space)->next_expected_packet_number, packet, &pn, - &payload)) != 0) { + if ((ret = decrypt_packet(header_protection, aead.cb, aead.ctx, &space->next_expected_packet_number, packet, &pn, &payload)) != + 0) { ++conn->super.stats.num_packets.decryption_failed; QUICLY_PROBE(PACKET_DECRYPTION_FAILED, conn, conn->stash.now, pn); goto Exit; } - QUICLY_PROBE(PACKET_RECEIVED, conn, conn->stash.now, pn, payload.base, payload.len, get_epoch(packet->octets.base[0])); + /* determine the incoming path; if it is a new path, open a new one (note: error is kept and we bail out after running the + * probe) */ + for (path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) { + struct st_quicly_conn_path_t *path = conn->paths[path_index]; + if (path != NULL && compare_socket_address(&path->address.remote.sa, 0, src_addr) == 0) { + if (compare_socket_address(&path->address.local.sa, 1, dest_addr) == 0) + break; + } + } + if (path_index == PTLS_ELEMENTSOF(conn->paths)) { + /* packets arriving from new paths will start to get ignored once the number of paths that failed to validate reaches the + * defined threshold */ + if (conn->super.stats.num_paths.validation_failed < conn->super.ctx->max_path_validation_failures) { + ret = open_path(conn, &path_index, src_addr, dest_addr, packet->cid.dest.plaintext.path_id); + } else { + ret = QUICLY_ERROR_PACKET_IGNORED; + } + } + /* emit probe, then bail out if corresponding path is unavailable */ + QUICLY_PROBE(PACKET_RECEIVED, conn, conn->stash.now, ret == 0 ? path_index : SIZE_MAX, + (uint32_t)packet->cid.dest.plaintext.path_id, pn, payload.base, payload.len, get_epoch(packet->octets.base[0])); QUICLY_LOG_CONN(packet_received, conn, { + PTLS_LOG_ELEMENT_UNSIGNED(path_index, ret == 0 ? path_index : SIZE_MAX); + PTLS_LOG_ELEMENT_UNSIGNED(dcid_sequence_number, (uint32_t)packet->cid.dest.plaintext.path_id); PTLS_LOG_ELEMENT_UNSIGNED(pn, pn); PTLS_LOG_ELEMENT_UNSIGNED(decrypted_len, payload.len); PTLS_LOG_ELEMENT_UNSIGNED(packet_type, get_epoch(packet->octets.base[0])); }); - - /* determine the incoming path; if it is a new path, open a new one */ - for (path_index = 0; path_index < PTLS_ELEMENTSOF(conn->paths); ++path_index) - if (conn->paths[path_index] != NULL && compare_socket_address(src_addr, &conn->paths[path_index]->address.remote.sa) == 0) - break; - if (path_index == PTLS_ELEMENTSOF(conn->paths) && (ret = open_path(conn, &path_index, src_addr, dest_addr)) != 0) + if (ret != 0) goto Exit; /* update states */ @@ -6745,7 +7281,10 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka conn->super.state = QUICLY_STATE_CONNECTED; conn->super.stats.num_packets.received += 1; conn->paths[path_index]->packet_last_received = conn->super.stats.num_packets.received; + conn->paths[path_index]->ingress_cid = packet->cid.dest.plaintext.path_id; conn->paths[path_index]->num_packets.received += 1; + if (packet->ecn != 0) + conn->super.stats.num_packets.received_ecn_counts[get_ecn_index_from_bits(packet->ecn)] += 1; /* state updates, that are triggered by the receipt of a packet */ switch (epoch) { @@ -6760,7 +7299,7 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka if (conn->initial != NULL) { if ((ret = discard_handshake_context(conn, QUICLY_EPOCH_INITIAL)) != 0) goto Exit; - setup_next_send(conn); + setup_next_send(conn, path_index); conn->super.remote.address_validation.validated = 1; } break; @@ -6769,19 +7308,24 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka } /* handle the payload */ - if ((ret = handle_payload(conn, epoch, path_index, payload.base, payload.len, &offending_frame_type, &is_ack_only, + if ((ret = handle_payload(conn, epoch, path_index, space, payload.base, payload.len, &offending_frame_type, &is_ack_only, &is_probe_only)) != 0) goto Exit; - if (!is_probe_only && conn->paths[path_index]->probe_only) { + if (space->delayed_free) { + assert(epoch == QUICLY_EPOCH_0RTT || epoch == QUICLY_EPOCH_1RTT); + free_pn_space(space); + space = NULL; + } + if (!is_probe_only && conn->paths[path_index]->probe_only && !quicly_is_multipath(conn)) { assert(path_index != 0); conn->paths[path_index]->probe_only = 0; ++conn->super.stats.num_paths.migration_elicited; - QUICLY_ELICIT_PATH_MIGRATION(conn, conn->stash.now, path_index); + QUICLY_PROBE(ELICIT_PATH_MIGRATION, conn, conn->stash.now, path_index); QUICLY_LOG_CONN(elicit_path_migration, conn, { PTLS_LOG_ELEMENT_UNSIGNED(path_index, path_index); }); } - if (*space != NULL && conn->super.state < QUICLY_STATE_CLOSING) { - if ((ret = record_receipt(*space, pn, is_ack_only, conn->stash.now, &conn->egress.send_ack_at, - &conn->super.stats.num_packets.received_out_of_order)) != 0) + if (conn->super.state < QUICLY_STATE_CLOSING && space != NULL) { + if ((ret = record_receipt(space, pn, packet->ecn, is_ack_only, conn->initial != NULL || conn->handshake != NULL, + conn->stash.now, &conn->super.stats.num_packets.received_out_of_order)) != 0) goto Exit; } @@ -6792,13 +7336,13 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka if (quicly_is_client(conn) && conn->handshake != NULL && conn->handshake->cipher.egress.aead != NULL) { if ((ret = discard_handshake_context(conn, QUICLY_EPOCH_INITIAL)) != 0) goto Exit; - setup_next_send(conn); + setup_next_send(conn, path_index); } break; case QUICLY_EPOCH_HANDSHAKE: if (quicly_is_client(conn)) { - /* Running as a client. - * Respect "disable_migration" TP sent by the remote peer at the end of the TLS handshake. */ + /* Running as a client. If "disable_active_migration" TP was sent by the peer, pin the local address to the destination + * address of the current packet at the end of the handshake, so that we do not migrate unintentionally. */ if (conn->paths[0]->address.local.sa.sa_family == AF_UNSPEC && dest_addr != NULL && dest_addr->sa_family != AF_UNSPEC && ptls_handshake_is_complete(conn->crypto.tls) && conn->super.remote.transport_params.disable_active_migration) set_address(&conn->paths[0]->address.local, dest_addr); @@ -6810,18 +7354,32 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka goto Exit; assert(conn->handshake == NULL); conn->egress.pending_flows |= QUICLY_PENDING_FLOW_HANDSHAKE_DONE_BIT; - setup_next_send(conn); + setup_next_send(conn, path_index); } } break; case QUICLY_EPOCH_1RTT: if (!is_ack_only && should_send_max_data(conn)) conn->egress.pending_flows |= QUICLY_PENDING_FLOW_OTHERS_BIT; - /* switch active path to current path, if current path is validated and not probe-only */ - if (path_index != 0 && conn->paths[path_index]->path_challenge.send_at == INT64_MAX && - !conn->paths[path_index]->probe_only) { - delete_path(conn, 1 /* promote */, path_index); - recalc_send_probe_at(conn); + /* prune / promote paths, if current path has been validated */ + if (conn->paths[path_index]->path_challenge.send_at == INT64_MAX) { + if (quicly_is_multipath(conn)) { + /* if multipath is used, paths sharing the ingress CID are removed at the moment current path is validated, as they + * are the source of NAT rebinding and therefore likely to have ceased */ + if (!conn->paths[path_index]->multipath_rebinding_check_complete) { + conn->paths[path_index]->multipath_rebinding_check_complete = 1; + if ((ret = multipath_handle_rebinding(conn, &path_index)) != 0) + goto Exit; + } + } else { + /* if multipath is not used, active path is path zero. Receipt of a probe only packet on an alternate path is used + * as the trigger to promote path. */ + if (path_index != 0 && !conn->paths[path_index]->probe_only) { + if ((ret = delete_path(conn, path_index, DELETE_PATH_MODE_PROMOTE)) != 0) + goto Exit; + path_index = 0; + } + } } break; default: @@ -6837,9 +7395,12 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka * suspended by the 3x limit (in which case we have loss.alarm_at set but return INT64_MAX from quicly_get_first_timeout * until we receive something from the client). */ - if (conn->egress.loss.alarm_at < conn->stash.now) - conn->egress.loss.alarm_at = conn->stash.now; - assert_consistency(conn, 0); + if (path_index < PTLS_ELEMENTSOF(conn->paths)) { + struct st_quicly_conn_path_t *path = conn->paths[path_index]; + if (path->egress->loss.alarm_at < conn->stash.now) + path->egress->loss.alarm_at = conn->stash.now; + assert_consistency(conn, path_index, 0); + } break; case PTLS_ERROR_NO_MEMORY: case QUICLY_ERROR_STATE_EXHAUSTION: @@ -6854,6 +7415,18 @@ int quicly_receive(quicly_conn_t *conn, struct sockaddr *dest_addr, struct socka return ret; } +int quicly_add_path(quicly_conn_t *conn, struct sockaddr *local) +{ + size_t path_index; + + assert(quicly_is_client(conn)); + + if (conn->paths[0]->address.remote.sa.sa_family != local->sa_family) + return QUICLY_ERROR_INVALID_PARAMETERS; + + return open_path(conn, &path_index, &conn->paths[0]->address.remote.sa, local, UINT64_MAX); +} + int quicly_open_stream(quicly_conn_t *conn, quicly_stream_t **_stream, int uni) { quicly_stream_t *stream; @@ -7285,7 +7858,7 @@ const quicly_stream_callbacks_t quicly_stream_noop_callbacks = { void quicly__debug_printf(quicly_conn_t *conn, const char *function, int line, const char *fmt, ...) { - if (QUICLY_DEBUG_MESSAGE_ENABLED() || ptls_log.is_active) { + if (QUICLY_PROBE_ENABLED(DEBUG_MESSAGE) || ptls_log.is_active) { char buf[1024]; va_list args; @@ -7293,7 +7866,7 @@ void quicly__debug_printf(quicly_conn_t *conn, const char *function, int line, c vsnprintf(buf, sizeof(buf), fmt, args); va_end(args); - QUICLY_DEBUG_MESSAGE(conn, function, line, buf); + QUICLY_PROBE(DEBUG_MESSAGE, conn, function, line, buf); QUICLY_LOG_CONN(debug_message, conn, { PTLS_LOG_ELEMENT_UNSAFESTR(function, function, strlen(function)); PTLS_LOG_ELEMENT_SIGNED(line, line); diff --git a/lib/sentmap.c b/lib/sentmap.c index 64b2bfdd..1e677557 100644 --- a/lib/sentmap.c +++ b/lib/sentmap.c @@ -84,6 +84,9 @@ static void discard_entry(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter) void quicly_sentmap_dispose(quicly_sentmap_t *map) { + /* size of `quicly_sent_t` is meant to be 4 pointers */ + PTLS_BUILD_ASSERT(sizeof(void *) == 8 ? sizeof(quicly_sent_t) == sizeof(uint64_t) * 4 : 1); + struct st_quicly_sent_block_t *block; while ((block = map->head) != NULL) { @@ -129,7 +132,8 @@ void quicly_sentmap_skip(quicly_sentmap_iter_t *iter) } while (iter->p->acked != quicly_sentmap__type_packet); } -int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, quicly_sentmap_event_t event) +int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, quicly_sentmap_event_t event, + struct st_quicly_conn_t *conn) { quicly_sent_packet_t packet; int ret = 0; @@ -159,7 +163,7 @@ int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, qu --map->num_packets; } for (next_entry(iter); iter->p->acked != quicly_sentmap__type_packet; next_entry(iter)) { - if (should_notify && (ret = iter->p->acked(map, &packet, event == QUICLY_SENTMAP_EVENT_ACKED, iter->p)) != 0) + if (should_notify && (ret = iter->p->acked(map, &packet, event == QUICLY_SENTMAP_EVENT_ACKED, iter->p, conn)) != 0) goto Exit; if (should_discard) discard_entry(map, iter); @@ -169,7 +173,8 @@ int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, qu return ret; } -int quicly_sentmap__type_packet(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +int quicly_sentmap__type_packet(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + struct st_quicly_conn_t *conn) { assert(!"quicly_sentmap__type_packet cannot be called"); return QUICLY_TRANSPORT_ERROR_INTERNAL; diff --git a/quicly-probes.d b/quicly-probes.d index 53aea788..f2310fc8 100644 --- a/quicly-probes.d +++ b/quicly-probes.d @@ -50,7 +50,7 @@ provider quicly { probe initial_handshake_packet_exceed(struct st_quicly_conn_t *conn, int64_t at, uint64_t num_packets); probe stateless_reset_receive(struct st_quicly_conn_t *conn, int64_t at); - probe new_path(struct st_quicly_conn_t *conn, int64_t at, size_t path_index, const char *remote); + probe new_path(struct st_quicly_conn_t *conn, int64_t at, size_t path_index, const char *remote, const char *local); probe delete_path(struct st_quicly_conn_t *conn, int64_t at, size_t path_index); probe promote_path(struct st_quicly_conn_t *conn, int64_t at, size_t path_index); probe elicit_path_migration(struct st_quicly_conn_t *conn, int64_t at, size_t path_index); @@ -58,13 +58,14 @@ provider quicly { probe crypto_handshake(struct st_quicly_conn_t *conn, int64_t at, int ret); probe crypto_update_secret(struct st_quicly_conn_t *conn, int64_t at, int is_enc, uint8_t epoch, const char *label, const char *secret); probe crypto_send_key_update(struct st_quicly_conn_t *conn, int64_t at, uint64_t phase, const char *secret); - probe crypto_send_key_update_confirmed(struct st_quicly_conn_t *conn, int64_t at, uint64_t next_pn); + probe crypto_send_key_update_confirmed(struct st_quicly_conn_t *conn, int64_t at, uint64_t next); probe crypto_receive_key_update(struct st_quicly_conn_t *conn, int64_t at, uint64_t phase, const char *secret); probe crypto_receive_key_update_prepare(struct st_quicly_conn_t *conn, int64_t at, uint64_t phase, const char *secret); probe packet_sent(struct st_quicly_conn_t *conn, int64_t at, uint64_t pn, size_t len, uint8_t packet_type, int ack_only); - probe packet_received(struct st_quicly_conn_t *conn, int64_t at, uint64_t pn, const void *decrypted, size_t decrypted_len, uint8_t packet_type); - probe packet_prepare(struct st_quicly_conn_t *conn, int64_t at, uint8_t first_octet, const char *dcid); + probe packet_received(struct st_quicly_conn_t *conn, int64_t at, size_t path_index, uint32_t dcid_sequence_number, + uint64_t pn, const void *decrypted, size_t decrypted_len, uint8_t packet_type); + probe packet_prepare(struct st_quicly_conn_t *conn, int64_t at, size_t path_index, uint8_t first_octet, const char *dcid); probe packet_acked(struct st_quicly_conn_t *conn, int64_t at, uint64_t pn, int is_late_ack); probe packet_lost(struct st_quicly_conn_t *conn, int64_t at, uint64_t pn, uint8_t packet_type); probe packet_decryption_failed(struct st_quicly_conn_t *conn, int64_t at, uint64_t pn); @@ -74,9 +75,11 @@ provider quicly { size_t inflight); probe cc_congestion(struct st_quicly_conn_t *conn, int64_t at, uint64_t max_lost_pn, size_t inflight, uint32_t cwnd); - probe ack_block_received(struct st_quicly_conn_t *conn, int64_t at, uint64_t ack_block_begin, uint64_t ack_block_end); + probe ack_block_received(struct st_quicly_conn_t *conn, int64_t at, uint64_t dcid_sequence_number, uint64_t ack_block_begin, + uint64_t ack_block_end); probe ack_delay_received(struct st_quicly_conn_t *conn, int64_t at, uint64_t ack_delay); - probe ack_send(struct st_quicly_conn_t *conn, int64_t at, uint64_t largest_acked, uint64_t ack_delay); + probe ack_send(struct st_quicly_conn_t *conn, int64_t at, uint64_t dcid_sequence_number, uint64_t largest_acked, + uint64_t ack_delay); probe ping_send(struct st_quicly_conn_t *conn, int64_t at); probe ping_receive(struct st_quicly_conn_t *conn, int64_t at); @@ -137,6 +140,9 @@ provider quicly { probe path_response_send(struct st_quicly_conn_t *conn, int64_t at, const void *bytes, size_t bytes_len); probe path_response_receive(struct st_quicly_conn_t *conn, int64_t at, const void *bytes, size_t bytes_len); + probe ecn_validation(struct st_quicly_conn_t *conn, int64_t at, int ecn_state); + probe ecn_congestion(struct st_quicly_conn_t *conn, int64_t at, uint64_t ce_count); + probe datagram_send(struct st_quicly_conn_t *conn, int64_t at, const void *payload, size_t payload_len); probe datagram_receive(struct st_quicly_conn_t *conn, int64_t at, const void *payload, size_t payload_len); @@ -159,6 +165,11 @@ provider quicly { const void *src, size_t src_len); probe stream_on_receive_reset(struct st_quicly_conn_t *conn, int64_t at, struct st_quicly_stream_t *stream, int err); + probe path_abandon_receive(struct st_quicly_conn_t *conn, int64_t at, uint64_t dcid, uint64_t error_code, + const char *reason_phrase); + + probe path_status_receive(struct st_quicly_conn_t *conn, int64_t at, uint64_t dcid, uint64_t sequence, uint64_t status); + probe debug_message(struct st_quicly_conn_t *conn, const char *function, int line, const char *message); probe conn_stats(struct st_quicly_conn_t *conn, int64_t at, struct st_quicly_stats_t *stats, size_t size); diff --git a/src/cli.c b/src/cli.c index f5243ad6..bff21e1a 100644 --- a/src/cli.c +++ b/src/cli.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -121,30 +122,42 @@ struct st_stream_data_t { FILE *outfp; }; -static int new_socket(int af) -{ +struct fdinfo { int fd; + quicly_address_t localaddr; +}; + +static int new_socket(struct fdinfo *fd, sa_family_t af) +{ + int bind_to_specified; + + if (af == AF_UNSPEC) { + bind_to_specified = 1; + af = fd->localaddr.sa.sa_family; + } else { + bind_to_specified = 0; + } - if ((fd = socket(af, SOCK_DGRAM, IPPROTO_UDP)) == -1) { + if ((fd->fd = socket(af, SOCK_DGRAM, IPPROTO_UDP)) == -1) { perror("socket(2) failed"); return -1; } - fcntl(fd, F_SETFL, O_NONBLOCK); + fcntl(fd->fd, F_SETFL, O_NONBLOCK); { int on = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) != 0) { + if (setsockopt(fd->fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) != 0) { perror("setsockopt(SO_REUSEADDR) failed"); return -1; } } if (udpbufsize != 0) { unsigned arg = udpbufsize; - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &arg, sizeof(arg)) != 0) { + if (setsockopt(fd->fd, SOL_SOCKET, SO_RCVBUF, &arg, sizeof(arg)) != 0) { perror("setsockopt(SO_RCVBUF) failed"); return -1; } arg = udpbufsize; - if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &arg, sizeof(arg)) != 0) { + if (setsockopt(fd->fd, SOL_SOCKET, SO_SNDBUF, &arg, sizeof(arg)) != 0) { perror("setsockopt(SO_RCVBUF) failed"); return -1; } @@ -152,27 +165,34 @@ static int new_socket(int af) #if defined(IP_DONTFRAG) { int on = 1; - if (setsockopt(fd, IPPROTO_IP, IP_DONTFRAG, &on, sizeof(on)) != 0) + if (setsockopt(fd->fd, IPPROTO_IP, IP_DONTFRAG, &on, sizeof(on)) != 0) perror("Warning: setsockopt(IP_DONTFRAG) failed"); } #elif defined(IP_PMTUDISC_DO) { int opt = IP_PMTUDISC_DO; - if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER, &opt, sizeof(opt)) != 0) + if (setsockopt(fd->fd, IPPROTO_IP, IP_MTU_DISCOVER, &opt, sizeof(opt)) != 0) perror("Warning: setsockopt(IP_MTU_DISCOVER) failed"); } +#endif +#ifdef IP_RECVTOS + { + int on = 1; + if (setsockopt(fd->fd, IPPROTO_IP, IP_RECVTOS, &on, sizeof(on)) != 0) + perror("Warning: setsockopt(IP_RECVTOS) failed"); + } #endif switch (af) { case AF_INET: { #ifdef IP_PKTINFO int on = 1; - if (setsockopt(fd, IPPROTO_IP, IP_PKTINFO, &on, sizeof(on)) != 0) { + if (setsockopt(fd->fd, IPPROTO_IP, IP_PKTINFO, &on, sizeof(on)) != 0) { perror("setsockopt(IP_PKTINFO) failed"); return -1; } #elif defined(IP_RECVDSTADDR) int on = 1; - if (setsockopt(fd, IPPROTO_IP, IP_RECVDSTADDR, &on, sizeof(on)) != 0) { + if (setsockopt(fd->fd, IPPROTO_IP, IP_RECVDSTADDR, &on, sizeof(on)) != 0) { perror("setsockopt(IP_RECVDSTADDR) failed"); return -1; } @@ -180,7 +200,7 @@ static int new_socket(int af) } break; case AF_INET6: { int on = 1; - if (setsockopt(fd, IPPROTO_IP, IPV6_RECVPKTINFO, &on, sizeof(on)) != 0) { + if (setsockopt(fd->fd, IPPROTO_IP, IPV6_RECVPKTINFO, &on, sizeof(on)) != 0) { perror("setsockopt(IPV6_RECVPKTINNFO) failed"); return -1; } @@ -189,7 +209,54 @@ static int new_socket(int af) break; } - return fd; + socklen_t locallen = af == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6); + if (bind_to_specified) { + if (bind(fd->fd, &fd->localaddr.sa, locallen) != 0) { + perror("bind(2) failed"); + return -1; + } + } else { + assert(af != AF_UNSPEC); + quicly_address_t local = {.sa.sa_family = af}; + if (bind(fd->fd, &local.sa, locallen) != 0) { + perror("bind(2) failed"); + return -1; + } + if (getsockname(fd->fd, &fd->localaddr.sa, &locallen) != 0) { + perror("getsockname(2) failed"); + return -1; + } + } + + return fd->fd; +} + +static int tuple_is_equal(struct sockaddr *x, struct sockaddr *y, int check_addr, int check_port) +{ + /* check address, deferring the use of port number match to type-specific checks */ + if (x->sa_family != y->sa_family) + return 0; + switch (x->sa_family) { + case AF_INET: { + struct sockaddr_in *x4 = (void *)x, *y4 = (void *)y; + if (check_addr && x4->sin_addr.s_addr != y4->sin_addr.s_addr) + return 0; + if (check_port && x4->sin_port != y4->sin_port) + return 0; + } break; + case AF_INET6: { + struct sockaddr_in6 *x6 = (void *)x, *y6 = (void *)y; + if (check_addr && memcmp(&x6->sin6_addr, &y6->sin6_addr, sizeof(x6->sin6_addr)) != 0) + return 0; + if (check_port && x6->sin6_port == y6->sin6_port) + return 0; + } break; + default: + if (check_addr || check_port) + return 0; + } + + return 1; } static void on_stop_sending(quicly_stream_t *stream, int err); @@ -216,13 +283,18 @@ static void dump_stats(FILE *fp, quicly_conn_t *conn) quicly_get_stats(conn, &stats); fprintf(fp, - "packets-received: %" PRIu64 ", packets-decryption-failed: %" PRIu64 ", packets-sent: %" PRIu64 - ", packets-lost: %" PRIu64 ", ack-received: %" PRIu64 ", late-acked: %" PRIu64 ", bytes-received: %" PRIu64 - ", bytes-sent: %" PRIu64 ", paths-created %" PRIu64 ", paths-validated %" PRIu64 ", paths-promoted: %" PRIu64 - ", srtt: %" PRIu32 "\n", - stats.num_packets.received, stats.num_packets.decryption_failed, stats.num_packets.sent, stats.num_packets.lost, - stats.num_packets.ack_received, stats.num_packets.late_acked, stats.num_bytes.received, stats.num_bytes.sent, - stats.num_paths.created, stats.num_paths.validated, stats.num_paths.promoted, stats.rtt.smoothed); + "packets-received: %" PRIu64 ", received-ecn-ect0: %" PRIu64 ", received-ecn-ect1: %" PRIu64 + ", received-ecn-ce: %" PRIu64 ", packets-decryption-failed: %" PRIu64 ", packets-sent: %" PRIu64 + ", packets-lost: %" PRIu64 ", ack-received: %" PRIu64 ", ack-ecn-ect0: %" PRIu64 ", ack-ecn-ect1: %" PRIu64 + ", ack-ecn-ce: %" PRIu64 ", late-acked: %" PRIu64 ", bytes-received: %" PRIu64 ", bytes-sent: %" PRIu64 + ", paths-created %" PRIu64 ", paths-validated %" PRIu64 ", paths-promoted: %" PRIu64 ", srtt: %" PRIu32 + ", num-loss-episodes: %" PRIu32 ", num-ecn-loss-episodes: %" PRIu32 "\n", + stats.num_packets.received, stats.num_packets.received_ecn_counts[0], stats.num_packets.received_ecn_counts[1], + stats.num_packets.received_ecn_counts[2], stats.num_packets.decryption_failed, stats.num_packets.sent, + stats.num_packets.lost, stats.num_packets.ack_received, stats.num_packets.acked_ecn_counts[0], + stats.num_packets.acked_ecn_counts[1], stats.num_packets.acked_ecn_counts[2], stats.num_packets.late_acked, + stats.num_bytes.received, stats.num_bytes.sent, stats.num_paths.created, stats.num_paths.validated, + stats.num_paths.promoted, stats.rtt.smoothed, stats.cc.num_loss_episodes, stats.cc.num_ecn_loss_episodes); } static int validate_path(const char *path) @@ -494,10 +566,10 @@ static int on_generate_resumption_token(quicly_generate_resumption_token_t *self static quicly_generate_resumption_token_t generate_resumption_token = {&on_generate_resumption_token}; /* buf should be ctx.transport_params.max_udp_payload_size bytes long */ -static ssize_t receive_datagram(int fd, void *buf, quicly_address_t *dest, quicly_address_t *src) +static ssize_t receive_datagram(int fd, void *buf, quicly_address_t *dest, quicly_address_t *src, uint8_t *ecn) { struct iovec vec = {.iov_base = buf, .iov_len = ctx.transport_params.max_udp_payload_size}; - char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo))] = {}; + char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo) + sizeof(int) /* == max(V4_TOS, V6_TCLASS) */)] = {}; struct msghdr mess = { .msg_name = &src->sa, .msg_namelen = sizeof(*src), @@ -518,6 +590,7 @@ static ssize_t receive_datagram(int fd, void *buf, quicly_address_t *dest, quicl if (rret >= 0) { dest->sa.sa_family = AF_UNSPEC; + *ecn = 0; for (struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mess); cmsg != NULL; cmsg = CMSG_NXTHDR(&mess, cmsg)) { #ifdef IP_PKTINFO if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == IP_PKTINFO) { @@ -540,6 +613,18 @@ static ssize_t receive_datagram(int fd, void *buf, quicly_address_t *dest, quicl sizeof(dest->sin6.sin6_addr)); dest->sin6.sin6_port = localaddr.sin6.sin6_port; } +#endif +#ifdef IP_RECVTOS + if (cmsg->cmsg_level == IPPROTO_IP && cmsg->cmsg_type == +#ifdef __APPLE__ + IP_RECVTOS +#else + IP_TOS +#endif + ) { + assert((char *)CMSG_DATA(cmsg) - (char *)cmsg + 1 == cmsg->cmsg_len); + *ecn = *(uint8_t *)CMSG_DATA(cmsg) & IPTOS_ECN_MASK; + } #endif } } @@ -548,8 +633,10 @@ static ssize_t receive_datagram(int fd, void *buf, quicly_address_t *dest, quicl } /* in6_pktinfo would be the largest structure among the ones that might be stored */ -static void set_srcaddr(struct cmsghdr *cmsg, quicly_address_t *addr, socklen_t *space) +static void set_srcaddr(struct msghdr *mess, quicly_address_t *addr) { + struct cmsghdr *cmsg = (struct cmsghdr *)((char *)mess->msg_control + mess->msg_controllen); + switch (addr->sa.sa_family) { case AF_INET: { #ifdef IP_PKTINFO @@ -558,13 +645,13 @@ static void set_srcaddr(struct cmsghdr *cmsg, quicly_address_t *addr, socklen_t cmsg->cmsg_type = IP_PKTINFO; cmsg->cmsg_len = CMSG_LEN(sizeof(info)); memcpy(CMSG_DATA(cmsg), &info, sizeof(info)); - *space += CMSG_SPACE(sizeof(info)); + mess->msg_controllen += CMSG_SPACE(sizeof(info)); #elif defined(IP_SENDSRCADDR) cmsg->cmsg_level = IPPROTO_IP; cmsg->cmsg_type = IP_SENDSRCADDR; cmsg->cmsg_len = CMSG_LEN(sizeof(addr->sin)); memcpy(CMSG_DATA(cmsg), &addr->sin, sizeof(addr->sin)); - *space += CMSG_SPACE(sizeof(addr->sin)); + mess->msg_controllen += CMSG_SPACE(sizeof(addr->sin)); #else assert(!"FIXME"); #endif @@ -575,7 +662,7 @@ static void set_srcaddr(struct cmsghdr *cmsg, quicly_address_t *addr, socklen_t cmsg->cmsg_type = IPV6_PKTINFO; cmsg->cmsg_len = CMSG_LEN(sizeof(info)); memcpy(CMSG_DATA(cmsg), &info, sizeof(info)); - *space += CMSG_SPACE(sizeof(info)); + mess->msg_controllen += CMSG_SPACE(sizeof(info)); } break; default: assert(!"FIXME"); @@ -583,24 +670,39 @@ static void set_srcaddr(struct cmsghdr *cmsg, quicly_address_t *addr, socklen_t } } -static void send_packets_default(int fd, quicly_address_t *dest, quicly_address_t *src, struct iovec *packets, size_t num_packets) +static void set_ecn(struct msghdr *mess, int ecn) +{ + if (ecn == 0) + return; + + struct cmsghdr *cmsg = (struct cmsghdr *)((char *)mess->msg_control + mess->msg_controllen); + + cmsg->cmsg_level = IPPROTO_IP; + cmsg->cmsg_type = IP_TOS; + cmsg->cmsg_len = CMSG_LEN(sizeof(ecn)); + memcpy(CMSG_DATA(cmsg), &ecn, sizeof(ecn)); + + mess->msg_controllen += CMSG_SPACE(sizeof(ecn)); +} + +static void send_packets_default(int fd, quicly_address_t *dest, quicly_address_t *src, struct iovec *packets, size_t num_packets, + uint8_t ecn) { for (size_t i = 0; i != num_packets; ++i) { - char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo))]; + char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(int))]; struct msghdr mess = { - .msg_name = &dest->sa, + .msg_name = dest, .msg_namelen = quicly_get_socklen(&dest->sa), .msg_iov = &packets[i], .msg_iovlen = 1, .msg_control = cmsgbuf, - .msg_controllen = sizeof(cmsgbuf), }; - socklen_t cmsglen = 0; if (src != NULL && src->sa.sa_family != AF_UNSPEC) - set_srcaddr(CMSG_FIRSTHDR(&mess), src, &cmsglen); - if (cmsglen == 0) + set_srcaddr(&mess, src); + set_ecn(&mess, ecn); + assert(mess.msg_controllen <= sizeof(cmsgbuf)); + if (mess.msg_controllen == 0) mess.msg_control = NULL; - mess.msg_controllen = cmsglen; if (verbosity >= 2) hexdump("sendmsg", packets[i].iov_base, packets[i].iov_len); int ret; @@ -617,36 +719,35 @@ static void send_packets_default(int fd, quicly_address_t *dest, quicly_address_ #define UDP_SEGMENT 103 #endif -static void send_packets_gso(int fd, quicly_address_t *dest, quicly_address_t *src, struct iovec *packets, size_t num_packets) +static void send_packets_gso(int fd, quicly_address_t *dest, quicly_address_t *src, struct iovec *packets, size_t num_packets, + uint8_t ecn) { struct iovec vec = {.iov_base = (void *)packets[0].iov_base, .iov_len = packets[num_packets - 1].iov_base + packets[num_packets - 1].iov_len - packets[0].iov_base}; - char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(uint16_t))]; + char cmsgbuf[CMSG_SPACE(sizeof(struct in6_pktinfo)) + CMSG_SPACE(sizeof(uint16_t)) /* UDP_SEGMENT */ + + CMSG_SPACE(sizeof(int)) /* IP_TOS */]; struct msghdr mess = { .msg_name = dest, .msg_namelen = quicly_get_socklen(&dest->sa), .msg_iov = &vec, .msg_iovlen = 1, .msg_control = cmsgbuf, - .msg_controllen = sizeof(cmsgbuf), }; - struct cmsghdr *cmsg = CMSG_FIRSTHDR(&mess); - socklen_t cmsglen = 0; - if (src != NULL && src->sa.sa_family != AF_UNSPEC) { - set_srcaddr(cmsg, src, &cmsglen); - cmsg = CMSG_NXTHDR(&mess, cmsg); - } + if (src != NULL && src->sa.sa_family != AF_UNSPEC) + set_srcaddr(&mess, src); if (num_packets != 1) { + struct cmsghdr *cmsg = (struct cmsghdr *)((char *)mess.msg_control + mess.msg_controllen); cmsg->cmsg_level = SOL_UDP; cmsg->cmsg_type = UDP_SEGMENT; cmsg->cmsg_len = CMSG_LEN(sizeof(uint16_t)); *(uint16_t *)CMSG_DATA(cmsg) = packets[0].iov_len; - cmsglen += CMSG_SPACE(sizeof(uint16_t)); + mess.msg_controllen += CMSG_SPACE(sizeof(uint16_t)); } - if (cmsglen == 0) + set_ecn(&mess, ecn); + assert(mess.msg_controllen <= sizeof(cmsgbuf)); + if (mess.msg_controllen == 0) mess.msg_control = NULL; - mess.msg_controllen = cmsglen; int ret; while ((ret = sendmsg(fd, &mess, 0)) == -1 && errno == EINTR) @@ -657,15 +758,15 @@ static void send_packets_gso(int fd, quicly_address_t *dest, quicly_address_t *s #endif -static void (*send_packets)(int, quicly_address_t *, quicly_address_t *, struct iovec *, size_t) = send_packets_default; +static void (*send_packets)(int, quicly_address_t *, quicly_address_t *, struct iovec *, size_t, uint8_t) = send_packets_default; static void send_one_packet(int fd, quicly_address_t *dest, quicly_address_t *src, const void *payload, size_t payload_len) { struct iovec vec = {.iov_base = (void *)payload, .iov_len = payload_len}; - send_packets(fd, dest, src, &vec, 1); + send_packets(fd, dest, src, &vec, 1, 0); } -static int send_pending(int fd, quicly_conn_t *conn) +static int send_pending(struct fdinfo *fds, size_t numfds, quicly_conn_t *conn) { quicly_address_t dest, src; struct iovec packets[MAX_BURST_PACKETS]; @@ -673,8 +774,25 @@ static int send_pending(int fd, quicly_conn_t *conn) size_t num_packets = MAX_BURST_PACKETS; int ret; - if ((ret = quicly_send(conn, &dest, &src, packets, &num_packets, buf, sizeof(buf))) == 0 && num_packets != 0) - send_packets(fd, &dest, &src, packets, num_packets); + if ((ret = quicly_send(conn, &dest, &src, packets, &num_packets, buf, sizeof(buf))) == 0 && num_packets != 0) { + // find fd match by IP and port + size_t fd_index; + if (numfds > 1) { + for (fd_index = 0; fd_index < numfds; ++fd_index) { + if (tuple_is_equal(&src.sa, &fds[fd_index].localaddr.sa, 1, 1)) + goto Found_FD; + } + for (fd_index = 0; fd_index < numfds; ++fd_index) { + if (tuple_is_equal(&src.sa, &fds[fd_index].localaddr.sa, 0, 1)) + goto Found_FD; + } + fd_index = 0; /* fd zero as last resort */ + Found_FD:; + } else { + fd_index = 0; + } + send_packets(fds[fd_index].fd, &dest, &src, packets, num_packets, quicly_send_get_ecn_bits(conn)); + } return ret; } @@ -714,43 +832,75 @@ static void enqueue_requests(quicly_conn_t *conn) enqueue_requests_at = INT64_MAX; } -static volatile int got_sigusr1 = 0; +static volatile int got_sig_rebind = 0, got_sig_addpath = 0; + +static void on_sig_rebind(int unused) +{ + got_sig_rebind = 1; +} -static void on_sigusr1(int unused) +static void on_sig_addpath(int unused) { - got_sigusr1 = 1; + got_sig_addpath = 1; } -static int run_client(int fd, struct sockaddr *sa, const char *host) +static int run_client(struct sockaddr *sa, const char *host) { - quicly_address_t local; + + struct fdinfo fds[QUICLY_LOCAL_ACTIVE_CONNECTION_ID_LIMIT]; int ret; + size_t numfds; quicly_conn_t *conn = NULL; - signal(SIGUSR1, on_sigusr1); + signal(SIGUSR1, on_sig_rebind); + signal(SIGUSR2, on_sig_addpath); - memset(&local, 0, sizeof(local)); - local.sa.sa_family = sa->sa_family; - if (bind(fd, &local.sa, local.sa.sa_family == AF_INET ? sizeof(local.sin) : sizeof(local.sin6)) != 0) { - perror("bind(2) failed"); + if (new_socket(&fds[0], sa->sa_family) == -1) return 1; - } - ret = quicly_connect(&conn, &ctx, host, sa, NULL, &next_cid, resumption_token, &hs_properties, &resumed_transport_params, NULL); + numfds = 1; + + ret = quicly_connect(&conn, &ctx, host, sa, &fds[0].localaddr.sa, &next_cid, resumption_token, &hs_properties, + &resumed_transport_params, NULL); assert(ret == 0); ++next_cid.master_id; enqueue_requests(conn); - send_pending(fd, conn); + send_pending(fds, 1, conn); while (1) { fd_set readfds; + int maxfd; struct timeval *tv, tvbuf; do { - if (got_sigusr1) { - got_sigusr1 = 0; - int newfd = new_socket(local.sa.sa_family); - if (newfd != -1) { - close(fd); - fd = newfd; + if (got_sig_rebind) { + got_sig_rebind = 0; + struct fdinfo newfd; + if (new_socket(&newfd, sa->sa_family) != -1) { + close(fds[0].fd); + fds[0] = newfd; + /* nasty hack that replaces the local port number retained by quicly */ + switch (sa->sa_family) { + case AF_INET: + ((struct sockaddr_in *)quicly_get_sockname(conn))->sin_port = newfd.localaddr.sin.sin_port; + break; + case AF_INET6: + ((struct sockaddr_in6 *)quicly_get_sockname(conn))->sin6_port = newfd.localaddr.sin6.sin6_port; + break; + default: + assert(!"FIXME"); + break; + } + } + } + if (got_sig_addpath && quicly_is_multipath(conn)) { + got_sig_addpath = 0; + struct fdinfo newfd; + if (new_socket(&newfd, sa->sa_family) != -1) { + if (numfds < PTLS_ELEMENTSOF(fds) && quicly_add_path(conn, &newfd.localaddr.sa) == 0) { + fds[numfds++] = newfd; + } else { + fprintf(stderr, "[multipath] failed to add path\n"); + close(newfd.fd); + } } } int64_t timeout_at = conn != NULL ? quicly_get_first_timeout(conn) : INT64_MAX; @@ -771,15 +921,22 @@ static int run_client(int fd, struct sockaddr *sa, const char *host) tv = NULL; } FD_ZERO(&readfds); - FD_SET(fd, &readfds); - } while (select(fd + 1, &readfds, NULL, NULL, tv) == -1 && errno == EINTR); + maxfd = 0; + for (size_t i = 0; i < numfds; ++i) { + FD_SET(fds[i].fd, &readfds); + if (maxfd < fds[i].fd) + maxfd = fds[i].fd; + } + } while (select(maxfd + 1, &readfds, NULL, NULL, tv) == -1 && errno == EINTR); if (enqueue_requests_at <= ctx.now->cb(ctx.now)) enqueue_requests(conn); - if (FD_ISSET(fd, &readfds)) { + for (size_t i = 0; i < numfds; ++i) { + if (!FD_ISSET(fds[i].fd, &readfds)) + continue; while (1) { - uint8_t buf[ctx.transport_params.max_udp_payload_size]; + uint8_t buf[ctx.transport_params.max_udp_payload_size], ecn; quicly_address_t dest, src; - ssize_t rret = receive_datagram(fd, buf, &dest, &src); + ssize_t rret = receive_datagram(fds[i].fd, buf, &dest, &src, &ecn); if (rret <= 0) break; if (verbosity >= 2) @@ -789,6 +946,7 @@ static int run_client(int fd, struct sockaddr *sa, const char *host) quicly_decoded_packet_t packet; if (quicly_decode_packet(&ctx, &packet, buf, rret, &off) == SIZE_MAX) break; + packet.ecn = ecn; quicly_receive(conn, &dest.sa, &src.sa, &packet); if (send_datagram_frame && quicly_connection_is_ready(conn)) { const char *message = "hello datagram!"; @@ -800,7 +958,7 @@ static int run_client(int fd, struct sockaddr *sa, const char *host) } } if (conn != NULL) { - ret = send_pending(fd, conn); + ret = send_pending(fds, numfds, conn); if (ret != 0) { ech_save_retry_configs(); quicly_free(conn); @@ -835,38 +993,21 @@ static int validate_token(struct sockaddr *remote, ptls_iovec_t client_cid, ptls quicly_address_token_plaintext_t *token, const char **err_desc) { int64_t age; - int port_is_equal; /* calculate and normalize age */ if ((age = ctx.now->cb(ctx.now) - token->issued_at) < 0) age = 0; /* check address, deferring the use of port number match to type-specific checks */ - if (remote->sa_family != token->remote.sa.sa_family) + if (!tuple_is_equal(remote, &token->remote.sa, 1, 0)) goto AddressMismatch; - switch (remote->sa_family) { - case AF_INET: { - struct sockaddr_in *sin = (struct sockaddr_in *)remote; - if (sin->sin_addr.s_addr != token->remote.sin.sin_addr.s_addr) - goto AddressMismatch; - port_is_equal = sin->sin_port == token->remote.sin.sin_port; - } break; - case AF_INET6: { - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)remote; - if (memcmp(&sin6->sin6_addr, &token->remote.sin6.sin6_addr, sizeof(sin6->sin6_addr)) != 0) - goto AddressMismatch; - port_is_equal = sin6->sin6_port == token->remote.sin6.sin6_port; - } break; - default: - goto UnknownAddressType; - } /* type-specific checks */ switch (token->type) { case QUICLY_ADDRESS_TOKEN_TYPE_RETRY: if (age > 30000) goto Expired; - if (!port_is_equal) + if (!tuple_is_equal(remote, &token->remote.sa, 1, 1)) goto AddressMismatch; if (!quicly_cid_is_equal(&token->retry.client_cid, client_cid)) goto CIDMismatch; @@ -890,9 +1031,6 @@ static int validate_token(struct sockaddr *remote, ptls_iovec_t client_cid, ptls AddressMismatch: *err_desc = "token address mismatch"; return 0; -UnknownAddressType: - *err_desc = "unknown address type"; - return 0; Expired: *err_desc = "token expired"; return 0; @@ -901,15 +1039,16 @@ static int validate_token(struct sockaddr *remote, ptls_iovec_t client_cid, ptls return 0; } -static int run_server(int fd, struct sockaddr *sa, socklen_t salen) +static int run_server(struct sockaddr *sa, socklen_t salen) { + struct fdinfo fd; + signal(SIGINT, on_signal); signal(SIGHUP, on_signal); - if (bind(fd, sa, salen) != 0) { - perror("bind(2) failed"); + memcpy(&fd.localaddr.sa, sa, salen); + if (new_socket(&fd, AF_UNSPEC) == -1) return 1; - } while (1) { fd_set readfds; @@ -936,13 +1075,13 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) tv = NULL; } FD_ZERO(&readfds); - FD_SET(fd, &readfds); - } while (select(fd + 1, &readfds, NULL, NULL, tv) == -1 && errno == EINTR); - if (FD_ISSET(fd, &readfds)) { + FD_SET(fd.fd, &readfds); + } while (select(fd.fd + 1, &readfds, NULL, NULL, tv) == -1 && errno == EINTR); + if (FD_ISSET(fd.fd, &readfds)) { while (1) { quicly_address_t local, remote; - uint8_t buf[ctx.transport_params.max_udp_payload_size]; - ssize_t rret = receive_datagram(fd, buf, &local, &remote); + uint8_t buf[ctx.transport_params.max_udp_payload_size], ecn; + ssize_t rret = receive_datagram(fd.fd, buf, &local, &remote, &ecn); if (rret == -1) break; if (verbosity >= 2) @@ -952,13 +1091,14 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) quicly_decoded_packet_t packet; if (quicly_decode_packet(&ctx, &packet, buf, rret, &off) == SIZE_MAX) break; + packet.ecn = ecn; if (QUICLY_PACKET_IS_LONG_HEADER(packet.octets.base[0])) { if (packet.version != 0 && !quicly_is_supported_version(packet.version)) { uint8_t payload[ctx.transport_params.max_udp_payload_size]; size_t payload_len = quicly_send_version_negotiation(&ctx, packet.cid.src, packet.cid.dest.encrypted, quicly_supported_versions, payload); assert(payload_len != SIZE_MAX); - send_one_packet(fd, &remote, &local, payload, payload_len); + send_one_packet(fd.fd, &remote, &local, payload, payload_len); break; } /* there is no way to send response to these v1 packets */ @@ -995,7 +1135,7 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) size_t payload_len = quicly_send_close_invalid_token(&ctx, packet.version, packet.cid.src, packet.cid.dest.encrypted, err_desc, payload); assert(payload_len != SIZE_MAX); - send_one_packet(fd, &remote, NULL, payload, payload_len); + send_one_packet(fd.fd, &remote, NULL, payload, payload_len); } } if (enforce_retry && token == NULL && packet.cid.dest.encrypted.len >= 8) { @@ -1010,7 +1150,7 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) ptls_iovec_init(new_server_cid, sizeof(new_server_cid)), packet.cid.dest.encrypted, ptls_iovec_init(NULL, 0), ptls_iovec_init(NULL, 0), NULL, payload); assert(payload_len != SIZE_MAX); - send_one_packet(fd, &remote, NULL, payload, payload_len); + send_one_packet(fd.fd, &remote, NULL, payload, payload_len); break; } else { /* new connection */ @@ -1033,7 +1173,7 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) uint8_t payload[ctx.transport_params.max_udp_payload_size]; size_t payload_len = quicly_send_stateless_reset(&ctx, packet.cid.dest.encrypted.base, payload); assert(payload_len != SIZE_MAX); - send_one_packet(fd, &remote, NULL, payload, payload_len); + send_one_packet(fd.fd, &remote, NULL, payload, payload_len); } } } @@ -1043,7 +1183,7 @@ static int run_server(int fd, struct sockaddr *sa, socklen_t salen) size_t i; for (i = 0; i != num_conns; ++i) { if (quicly_get_first_timeout(conns[i]) <= ctx.now->cb(ctx.now)) { - if (send_pending(fd, conns[i]) != 0) { + if (send_pending(&fd, 1, conns[i]) != 0) { dump_stats(stderr, conns[i]); quicly_free(conns[i]); memmove(conns + i, conns + i + 1, (num_conns - i - 1) * sizeof(*conns)); @@ -1261,7 +1401,7 @@ int main(int argc, char **argv) const char *cert_file = NULL, *raw_pubkey_file = NULL, *host, *port, *cid_key = NULL; struct sockaddr_storage sa; socklen_t salen; - int ch, opt_index, fd; + int ch, opt_index; ERR_load_crypto_strings(); OpenSSL_add_all_algorithms(); @@ -1290,8 +1430,11 @@ int main(int argc, char **argv) address_token_aead.dec = ptls_aead_new(&ptls_openssl_aes128gcm, &ptls_openssl_sha256, 0, secret, ""); } - static const struct option longopts[] = { - {"ech-key", required_argument, NULL, 0}, {"ech-configs", required_argument, NULL, 0}, {NULL}}; + static const struct option longopts[] = {{"ech-key", required_argument, NULL, 0}, + {"ech-configs", required_argument, NULL, 0}, + {"disable-ecn", no_argument, NULL, 0}, + {"multipath", no_argument, NULL, 0}, + {NULL}}; while ((ch = getopt_long(argc, argv, "a:b:B:c:C:Dd:k:Ee:f:Gi:I:K:l:M:m:NnOp:P:Rr:S:s:u:U:Vvw:W:x:X:y:h", longopts, &opt_index)) != -1) { switch (ch) { @@ -1300,6 +1443,10 @@ int main(int argc, char **argv) ech_setup_key(&tlsctx, optarg); } else if (strcmp(longopts[opt_index].name, "ech-configs") == 0) { ech_setup_configs(optarg); + } else if (strcmp(longopts[opt_index].name, "disable-ecn") == 0) { + ctx.enable_ecn = 0; + } else if (strcmp(longopts[opt_index].name, "multipath") == 0) { + ctx.transport_params.enable_multipath = 1; } else { assert(!"unexpected longname"); } @@ -1606,7 +1753,7 @@ int main(int argc, char **argv) load_session(); hs_properties.client.ech.configs = ech.config_list; hs_properties.client.ech.retry_configs = &ech.retry.configs; - use_cid_encryptor = cid_key != NULL; + use_cid_encryptor = cid_key != NULL || ctx.transport_params.enable_multipath; } if (use_cid_encryptor) { if (cid_key == NULL) { @@ -1627,8 +1774,5 @@ int main(int argc, char **argv) if (resolve_address((void *)&sa, &salen, host, port, AF_INET, SOCK_DGRAM, IPPROTO_UDP) != 0) exit(1); - if ((fd = new_socket(sa.ss_family)) == -1) - return 1; - - return ctx.tls->certificates.count != 0 ? run_server(fd, (void *)&sa, salen) : run_client(fd, (void *)&sa, host); + return ctx.tls->certificates.count != 0 ? run_server((void *)&sa, salen) : run_client((void *)&sa, host); } diff --git a/t/e2e.t b/t/e2e.t index eb3187e7..85c808e5 100755 --- a/t/e2e.t +++ b/t/e2e.t @@ -349,14 +349,14 @@ subtest "raw-certificates-ec" => sub { subtest "path-migration" => sub { my $doit = sub { - my @client_opts = @_; - my $guard = spawn_server("-e", "$tempdir/events"); + my ($client_opts, $server_opts) = @_; + my $guard = spawn_server(@$server_opts, "-e", "$tempdir/events"); # spawn client that sends one request every second, recording events to file my $pid = fork; die "fork failed:$!" unless defined $pid; if ($pid == 0) { - exec $cli, @client_opts, qw(-O -i 1000 127.0.0.1), $port; + exec $cli, @$client_opts, qw(-O -i 1000 127.0.0.1), $port; die "exec $cli failed:$!"; } # send two USR1 signals, each of them causing path migration between requests @@ -375,7 +375,7 @@ subtest "path-migration" => sub { like $log, qr{"type":"promote_path".*\n.*"type":"promote_path"}s; subtest "CID seq 1 is used for 1st path probe" => sub { plan skip_all => "zero-length CID" - unless @client_opts; + unless @$client_opts; complex $log, sub { /"type":"new_connection_id_receive",[^\n]*"sequence":1,[^\n]*"cid":"(.*?)"/s; my $cid1 = $1; @@ -385,11 +385,20 @@ subtest "path-migration" => sub { }; }; }; - subtest "without-cid" => sub { - $doit->(); + my $do_set = sub { + my @opts = @_; + subtest "without-cid" => sub { + $doit->(\@opts, \@opts); + }; + subtest "with-cid" => sub { + $doit->([@opts, qw(-B 01234567)], \@opts); + }; + }; + subtest "non-multipath" => sub { + $do_set->(); }; - subtest "with-cid" => sub { - $doit->(qw(-B 01234567)); + subtest "multipath" => sub { + $do_set->("--multipath"); }; }; diff --git a/t/frame.c b/t/frame.c index 297b9614..48b62c27 100644 --- a/t/frame.c +++ b/t/frame.c @@ -28,8 +28,9 @@ static void test_ack_decode_underflow(void) { /* ack pn=0 */ const uint8_t pat[] = {0, 0, 0, 0}, *src = pat; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) == 0); ok(src == pat + sizeof(pat)); + ok(decoded.multipath_cid == UINT64_MAX); ok(decoded.largest_acknowledged == 0); ok(decoded.num_gaps == 0); ok(decoded.ack_block_lengths[0] == 1); @@ -37,13 +38,14 @@ static void test_ack_decode_underflow(void) } { /* underflow in first block length */ const uint8_t pat[] = {0, 0, 0, 1}, *src = pat; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) != 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) != 0); } { /* frame with gap going down to pn=0 */ const uint8_t pat[] = {2, 0, 1, 0, 0, 0}, *src = pat; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) == 0); ok(src == pat + sizeof(pat)); + ok(decoded.multipath_cid == UINT64_MAX); ok(decoded.largest_acknowledged == 2); ok(decoded.num_gaps == 1); ok(decoded.ack_block_lengths[0] == 1); @@ -53,11 +55,11 @@ static void test_ack_decode_underflow(void) { /* additional block length going negative */ const uint8_t pat[] = {2, 0, 1, 0, 0, 1}, *src = pat; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) != 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) != 0); } { /* gap going negative */ const uint8_t pat[] = {2, 0, 1, 0, 3, 0}, *src = pat; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) != 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) != 0); } } @@ -66,7 +68,7 @@ static void test_ack_decode(void) { const uint8_t pat[] = {0x34, 0x00, 0x00, 0x11}, *src = pat; quicly_ack_frame_t decoded; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) == 0); ok(src == pat + sizeof(pat)); ok(decoded.largest_acknowledged == 0x34); ok(decoded.num_gaps == 0); @@ -77,7 +79,7 @@ static void test_ack_decode(void) { const uint8_t pat[] = {0x34, 0x00, 0x02, 0x00, 0x01, 0x02, 0x03, 0x04}, *src = pat; quicly_ack_frame_t decoded; - ok(quicly_decode_ack_frame(&src, pat + sizeof(pat), &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, pat + sizeof(pat), &decoded) == 0); ok(src == pat + sizeof(pat)); ok(decoded.largest_acknowledged == 0x34); ok(decoded.num_gaps == 2); @@ -103,7 +105,7 @@ static void test_ack_decode(void) end = quicly_encodev(end, i % 10); // ack-range } - ok(quicly_decode_ack_frame(&src, end, &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, end, &decoded) == 0); ok(decoded.largest_acknowledged == 0xFA00); ok(decoded.ack_delay == 0); ok(decoded.num_gaps == QUICLY_ACK_MAX_GAPS); @@ -127,30 +129,37 @@ static void test_ack_encode(void) uint8_t buf[256], *end; const uint8_t *src; quicly_ack_frame_t decoded; + uint64_t ecn_counts[3] = {}; quicly_ranges_init(&ranges); quicly_ranges_add(&ranges, 0x12, 0x14); /* encode */ - end = quicly_encode_ack_frame(buf, buf + sizeof(buf), &ranges, 63); + end = quicly_encode_ack_frame(buf, buf + sizeof(buf), SIZE_MAX, &ranges, ecn_counts, 63); ok(end - buf == 5); /* decode */ src = buf + 1; - ok(quicly_decode_ack_frame(&src, end, &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK, &src, end, &decoded) == 0); ok(src == end); ok(decoded.ack_delay == 63); ok(decoded.num_gaps == 0); ok(decoded.largest_acknowledged == 0x13); ok(decoded.ack_block_lengths[0] == 2); + ok(decoded.ecn_counts[0] == 0); + ok(decoded.ecn_counts[1] == 0); + ok(decoded.ecn_counts[2] == 0); quicly_ranges_add(&ranges, 0x10, 0x11); + ecn_counts[0] = 12; + ecn_counts[1] = 34; + ecn_counts[2] = 56; /* encode */ - end = quicly_encode_ack_frame(buf, buf + sizeof(buf), &ranges, 63); - ok(end - buf == 7); + end = quicly_encode_ack_frame(buf, buf + sizeof(buf), SIZE_MAX, &ranges, ecn_counts, 63); + ok(end - buf == 10); /* decode */ src = buf + 1; - ok(quicly_decode_ack_frame(&src, end, &decoded, 0) == 0); + ok(quicly_decode_ack_frame(QUICLY_FRAME_TYPE_ACK_ECN, &src, end, &decoded) == 0); ok(src == end); ok(decoded.ack_delay == 63); ok(decoded.num_gaps == 1); @@ -158,6 +167,9 @@ static void test_ack_encode(void) ok(decoded.ack_block_lengths[0] == 2); ok(decoded.gaps[0] == 1); ok(decoded.ack_block_lengths[1] == 1); + ok(decoded.ecn_counts[0] == 12); + ok(decoded.ecn_counts[1] == 34); + ok(decoded.ecn_counts[2] == 56); quicly_ranges_clear(&ranges); } diff --git a/t/local_cid.c b/t/local_cid.c index b7132186..d1e42c76 100644 --- a/t/local_cid.c +++ b/t/local_cid.c @@ -156,13 +156,14 @@ void test_local_cid(void) /* retire everything */ int has_pending; - ok(quicly_local_cid_retire(&set, 0, &has_pending) == 0); + struct st_quicly_pn_space_t *space; + ok(quicly_local_cid_retire(&set, 0, &has_pending, &space) == 0); ok(has_pending); - ok(quicly_local_cid_retire(&set, 1, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 1, &has_pending, &space) == 0); ok(has_pending); - ok(quicly_local_cid_retire(&set, 2, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 2, &has_pending, &space) == 0); ok(has_pending); - ok(quicly_local_cid_retire(&set, 3, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 3, &has_pending, &space) == 0); ok(has_pending); ok(count_by_state(&set, QUICLY_LOCAL_CID_STATE_PENDING) == 4); /* partial send */ @@ -175,7 +176,7 @@ void test_local_cid(void) ok(exists_once(&set, 7, QUICLY_LOCAL_CID_STATE_PENDING)); /* retire one in the middle of PENDING CIDs */ - ok(quicly_local_cid_retire(&set, 6, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 6, &has_pending, &space) == 0); ok(has_pending); ok(verify_array(&set) == 0); @@ -189,9 +190,9 @@ void test_local_cid(void) ok(exists_once(&set, 8, QUICLY_LOCAL_CID_STATE_PENDING)); /* at this moment sequence=0,1,2,3,6 have been retired */ - ok(quicly_local_cid_retire(&set, 4, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 4, &has_pending, &space) == 0); ok(has_pending); - ok(quicly_local_cid_retire(&set, 5, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, 5, &has_pending, &space) == 0); ok(has_pending); /* sequence=0-6 have been retired */ @@ -201,9 +202,9 @@ void test_local_cid(void) while (num_retired < QUICLY_MAX_PATH_ID) { if (seq_to_retire == QUICLY_MAX_PATH_ID - 1) { /* this is the maximum CID we can generate -- after retiring it, there should be no CID to send */ - ok(quicly_local_cid_retire(&set, seq_to_retire, &has_pending) == QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION); + ok(quicly_local_cid_retire(&set, seq_to_retire, &has_pending, &space) == QUICLY_TRANSPORT_ERROR_PROTOCOL_VIOLATION); } else { - ok(quicly_local_cid_retire(&set, seq_to_retire, &has_pending) == 0); + ok(quicly_local_cid_retire(&set, seq_to_retire, &has_pending, &space) == 0); ok(has_pending); } num_retired++; @@ -229,7 +230,7 @@ void test_local_cid(void) ok(exists_once(&small_set, 1, QUICLY_LOCAL_CID_STATE_PENDING)); ok(exists_once(&small_set, 2, QUICLY_LOCAL_CID_STATE_PENDING)); ok(!exists_once(&small_set, 3, QUICLY_LOCAL_CID_STATE_PENDING)); /* seq=3 should not exist yet */ - ok(quicly_local_cid_retire(&small_set, 0, &has_pending) == 0); + ok(quicly_local_cid_retire(&small_set, 0, &has_pending, &space) == 0); ok(has_pending); ok(exists_once(&small_set, 3, QUICLY_LOCAL_CID_STATE_PENDING)); } diff --git a/t/loss.c b/t/loss.c index 20cced1d..579781e1 100644 --- a/t/loss.c +++ b/t/loss.c @@ -26,7 +26,8 @@ static int64_t now; static uint64_t num_packets_lost = 0; -static void on_loss_detected(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold) +static void on_loss_detected(quicly_loss_t *loss, const quicly_sent_packet_t *lost_packet, int is_time_threshold, + struct st_quicly_conn_t *conn) { ++num_packets_lost; } @@ -36,13 +37,13 @@ static void acked(quicly_loss_t *loss, uint64_t pn, size_t epoch) quicly_sentmap_iter_t iter; const quicly_sent_packet_t *sent; - quicly_loss_init_sentmap_iter(loss, &iter, now, quicly_spec_context.transport_params.max_ack_delay, 0); + quicly_loss_init_sentmap_iter(loss, &iter, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL); while ((sent = quicly_sentmap_get(&iter))->packet_number != pn) { assert(sent->packet_number != UINT64_MAX); quicly_sentmap_skip(&iter); } int64_t sent_at = sent->sent_at; - ok(quicly_sentmap_update(&loss->sentmap, &iter, QUICLY_SENTMAP_EVENT_ACKED) == 0); + ok(quicly_sentmap_update(&loss->sentmap, &iter, QUICLY_SENTMAP_EVENT_ACKED, NULL) == 0); quicly_loss_on_ack_received(loss, pn, epoch, now, sent_at, 0, 1); } @@ -60,31 +61,31 @@ static void test_time_detection(void) /* commit 3 packets (pn=0..2); check that loss timer is not active */ ok(quicly_sentmap_prepare(&loss.sentmap, 0, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 1, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 2, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); now += 10; /* receive ack for the 1st packet; check that loss timer is not active */ acked(&loss, 0, QUICLY_EPOCH_INITIAL); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); now += 10; /* receive ack for the 3rd packet; check that loss timer is active */ acked(&loss, 2, QUICLY_EPOCH_INITIAL); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time != INT64_MAX); ok(num_packets_lost == 0); now = loss.loss_time; - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); ok(num_packets_lost == 1); @@ -104,25 +105,25 @@ static void test_pn_detection(void) /* commit 4 packets (pn=0..3); check that loss timer is not active */ ok(quicly_sentmap_prepare(&loss.sentmap, 0, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 1, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 2, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 3, now, QUICLY_EPOCH_INITIAL) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); /* receive ack for the 3rd packet; loss timer is activated but no packets are declared as lost */ acked(&loss, 2, QUICLY_EPOCH_INITIAL); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time != INT64_MAX); ok(num_packets_lost == 0); /* receive ack for the 4th packet; loss timer is active and pn=0 is declared lost */ acked(&loss, 3, QUICLY_EPOCH_INITIAL); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time != INT64_MAX); ok(num_packets_lost == 1); @@ -145,9 +146,9 @@ static void test_slow_cert_verify(void) /* sent Handshake+1RTT packet */ ok(quicly_sentmap_prepare(&loss.sentmap, 1, now, QUICLY_EPOCH_HANDSHAKE) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 2, now, QUICLY_EPOCH_1RTT) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); last_retransmittable_sent_at = now; quicly_loss_update_alarm(&loss, now, last_retransmittable_sent_at, 1, 0, 1, 0, 1); @@ -155,29 +156,29 @@ static void test_slow_cert_verify(void) /* receive ack for the Handshake packet, but 1RTT packet remains unacknowledged */ acked(&loss, 1, QUICLY_EPOCH_HANDSHAKE); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); ok(num_packets_lost == 0); /* PTO fires */ now = loss.alarm_at; ok(quicly_loss_on_alarm(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, &min_packets_to_send, - &restrict_sending, on_loss_detected) == 0); + &restrict_sending, NULL, on_loss_detected) == 0); ok(restrict_sending); ok(min_packets_to_send == 2); ok(num_packets_lost == 0); /* therefore send probes */ ok(quicly_sentmap_prepare(&loss.sentmap, 3, now, QUICLY_EPOCH_HANDSHAKE) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); ok(quicly_sentmap_prepare(&loss.sentmap, 4, now, QUICLY_EPOCH_1RTT) == 0); - quicly_sentmap_commit(&loss.sentmap, 10, 0); + quicly_sentmap_commit(&loss.sentmap, 10, 0, 0); now += 10; /* again receives an ack for the Handshake packet, but 1RTT packet remains unacknowledged */ acked(&loss, 3, QUICLY_EPOCH_HANDSHAKE); - ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, on_loss_detected) == 0); + ok(quicly_loss_detect_loss(&loss, now, quicly_spec_context.transport_params.max_ack_delay, 0, NULL, on_loss_detected) == 0); ok(loss.loss_time == INT64_MAX); ok(num_packets_lost == 0); diff --git a/t/lossy.c b/t/lossy.c index 194e6106..d532c989 100644 --- a/t/lossy.c +++ b/t/lossy.c @@ -403,10 +403,10 @@ static void loss_check_stats(int64_t *time_spent, unsigned max_failures, double printf("fail: %u, times: mean: %.1f, median: %.1f, 90th: %.1f\n", num_failures_in_loss_core, time_mean, time_median, time_90th); ok(num_failures_in_loss_core <= max_failures); - ok(time_mean >= expected_time_mean * 0.6); - ok(time_mean <= expected_time_mean * 1.2); - ok(time_median >= expected_time_median * 0.6); - ok(time_median <= expected_time_median * 1.2); + ok(time_mean >= expected_time_mean / 1.6); + ok(time_mean <= expected_time_mean * 1.6); + ok(time_median >= expected_time_median / 1.6); + ok(time_median <= expected_time_median * 1.6); // ok(time_90th >= expected_time_90th * 0.9); 90th is fragile to errors, we track this as an guarantee ok(time_90th <= expected_time_90th * 1.2); @@ -426,49 +426,49 @@ static void test_downstream(void) subtest("75%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 4, 14193, 3610, 17579); + loss_check_stats(time_spent, 4, 13920.2, 3122, 16912); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 2); subtest("50%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 2220, 608, 2779); + loss_check_stats(time_spent, 0, 1152.3, 726, 1994); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 4); subtest("25%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 228.7, 230, 408); + loss_check_stats(time_spent, 0, 222, 200, 419); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 10); subtest("10%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 140.2, 80, 298); + loss_check_stats(time_spent, 0, 136.1, 80, 298); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 20); subtest("5%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 99.9, 80, 230); + loss_check_stats(time_spent, 0, 102.1, 80, 190); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 40); subtest("2.5%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 90.8, 80, 80); + loss_check_stats(time_spent, 0, 95.5, 80, 190); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 64); subtest("1.6%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 91.1, 80, 80); + loss_check_stats(time_spent, 0, 88.8, 80, 80); } static void test_bidirectional(void) @@ -483,7 +483,7 @@ static void test_bidirectional(void) subtest("75%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 20, 240012.7, 126541, 652328); + loss_check_stats(time_spent, 26, 280354.4, 114054.5, 692796); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 2); @@ -491,7 +491,7 @@ static void test_bidirectional(void) subtest("50%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 2286.9, 1175, 6424); + loss_check_stats(time_spent, 0, 3374, 1325.5, 4649); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 4); @@ -499,7 +499,7 @@ static void test_bidirectional(void) subtest("25%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 328.7, 237, 530); + loss_check_stats(time_spent, 0, 304.8, 284, 635); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 10); @@ -507,7 +507,7 @@ static void test_bidirectional(void) subtest("10%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 150.1, 80, 298); + loss_check_stats(time_spent, 0, 135.6, 80, 298); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 20); @@ -515,7 +515,7 @@ static void test_bidirectional(void) subtest("5%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 103.5, 80, 192); + loss_check_stats(time_spent, 0, 120.3, 80, 200); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 40); @@ -523,7 +523,7 @@ static void test_bidirectional(void) subtest("2.5%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 96.7, 80, 80); + loss_check_stats(time_spent, 0, 90, 80, 119); for (i = 0; i != 100; ++i) { init_cond_rand(&loss_cond_down, 1, 64); @@ -531,7 +531,7 @@ static void test_bidirectional(void) subtest("1.6%", loss_core); time_spent[i] = quic_now - 1; } - loss_check_stats(time_spent, 0, 96.7, 80, 190); + loss_check_stats(time_spent, 0, 85.7, 80, 80); } void test_lossy(void) diff --git a/t/sentmap.c b/t/sentmap.c index ace8ce57..f9387023 100644 --- a/t/sentmap.c +++ b/t/sentmap.c @@ -24,7 +24,8 @@ static int on_acked_callcnt, on_acked_ackcnt; -static int on_acked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent) +static int on_acked(quicly_sentmap_t *map, const quicly_sent_packet_t *packet, int acked, quicly_sent_t *sent, + struct st_quicly_conn_t *conn) { ++on_acked_callcnt; if (acked) @@ -59,7 +60,7 @@ static void test_basic(void) quicly_sentmap_prepare(&map, at * 5 + i, at, QUICLY_EPOCH_INITIAL); quicly_sentmap_allocate(&map, on_acked); quicly_sentmap_allocate(&map, on_acked); - quicly_sentmap_commit(&map, 1, 0); + quicly_sentmap_commit(&map, 1, 0, 0); } } @@ -84,7 +85,7 @@ static void test_basic(void) quicly_sentmap_skip(&iter); assert(quicly_sentmap_get(&iter)->packet_number == 11); while (quicly_sentmap_get(&iter)->packet_number <= 40) - quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_EXPIRED); + quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_EXPIRED, NULL); ok(on_acked_callcnt == 30 * 2); ok(on_acked_ackcnt == 0); @@ -115,17 +116,17 @@ static void test_late_ack(void) /* commit pn 1, 2 */ quicly_sentmap_prepare(&map, 1, 0, QUICLY_EPOCH_INITIAL); quicly_sentmap_allocate(&map, on_acked); - quicly_sentmap_commit(&map, 10, 0); + quicly_sentmap_commit(&map, 10, 0, 0); quicly_sentmap_prepare(&map, 2, 0, QUICLY_EPOCH_INITIAL); quicly_sentmap_allocate(&map, on_acked); - quicly_sentmap_commit(&map, 20, 0); + quicly_sentmap_commit(&map, 20, 0, 0); ok(map.bytes_in_flight == 30); /* mark pn 1 as lost */ quicly_sentmap_init_iter(&map, &iter); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 1); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_LOST) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_LOST, NULL) == 0); ok(on_acked_callcnt == 1); ok(on_acked_ackcnt == 0); ok(map.bytes_in_flight == 20); @@ -134,10 +135,10 @@ static void test_late_ack(void) quicly_sentmap_init_iter(&map, &iter); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 1); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED, NULL) == 0); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 2); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED, NULL) == 0); ok(on_acked_callcnt == 3); ok(on_acked_ackcnt == 2); ok(map.bytes_in_flight == 0); @@ -159,17 +160,17 @@ static void test_pto(void) /* commit pn 1, 2 */ quicly_sentmap_prepare(&map, 1, 0, QUICLY_EPOCH_INITIAL); quicly_sentmap_allocate(&map, on_acked); - quicly_sentmap_commit(&map, 10, 0); + quicly_sentmap_commit(&map, 10, 0, 0); quicly_sentmap_prepare(&map, 2, 0, QUICLY_EPOCH_INITIAL); quicly_sentmap_allocate(&map, on_acked); - quicly_sentmap_commit(&map, 20, 0); + quicly_sentmap_commit(&map, 20, 0, 0); ok(map.bytes_in_flight == 30); /* mark pn 1 for PTO */ quicly_sentmap_init_iter(&map, &iter); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 1); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_PTO) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_PTO, NULL) == 0); ok(on_acked_callcnt == 1); ok(on_acked_ackcnt == 0); ok(map.bytes_in_flight == 30); @@ -178,10 +179,10 @@ static void test_pto(void) quicly_sentmap_init_iter(&map, &iter); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 1); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED, NULL) == 0); sent = quicly_sentmap_get(&iter); assert(sent->packet_number == 2); - ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED) == 0); + ok(quicly_sentmap_update(&map, &iter, QUICLY_SENTMAP_EVENT_ACKED, NULL) == 0); ok(on_acked_callcnt == 3); ok(on_acked_ackcnt == 2); ok(map.bytes_in_flight == 0); diff --git a/t/test.c b/t/test.c index 1f34b1f2..d233df36 100644 --- a/t/test.c +++ b/t/test.c @@ -516,65 +516,65 @@ static void do_test_record_receipt(size_t epoch) struct st_quicly_pn_space_t *space = alloc_pn_space(sizeof(*space), epoch == QUICLY_EPOCH_1RTT ? QUICLY_DEFAULT_PACKET_TOLERANCE : 1); uint64_t pn = 0, out_of_order_cnt = 0; - int64_t now = 12345, send_ack_at = INT64_MAX; + int64_t now = 12345; if (epoch == QUICLY_EPOCH_1RTT) { /* 2nd packet triggers an ack */ - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now + QUICLY_DELAYED_ACK_TIMEOUT); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now + QUICLY_DELAYED_ACK_TIMEOUT); now += 1; - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now); now += 1; } else { /* every packet triggers an ack */ - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now); now += 1; } /* reset */ space->unacked_count = 0; - send_ack_at = INT64_MAX; + space->send_ack_at = INT64_MAX; /* ack-only packets do not elicit an ack */ - ok(record_receipt(space, pn++, 1, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == INT64_MAX); + ok(record_receipt(space, pn++, 0, 1, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == INT64_MAX); now += 1; - ok(record_receipt(space, pn++, 1, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == INT64_MAX); + ok(record_receipt(space, pn++, 0, 1, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == INT64_MAX); now += 1; pn++; /* gap */ - ok(record_receipt(space, pn++, 1, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == INT64_MAX); + ok(record_receipt(space, pn++, 0, 1, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == INT64_MAX); now += 1; - ok(record_receipt(space, pn++, 1, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == INT64_MAX); + ok(record_receipt(space, pn++, 0, 1, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == INT64_MAX); now += 1; /* gap triggers an ack */ pn += 1; /* gap */ - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now); now += 1; /* reset */ space->unacked_count = 0; - send_ack_at = INT64_MAX; + space->send_ack_at = INT64_MAX; /* if 1-RTT, test ignore-order */ if (epoch == QUICLY_EPOCH_1RTT) { space->ignore_order = 1; pn++; /* gap */ - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now + QUICLY_DELAYED_ACK_TIMEOUT); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now + QUICLY_DELAYED_ACK_TIMEOUT); now += 1; - ok(record_receipt(space, pn++, 0, now, &send_ack_at, &out_of_order_cnt) == 0); - ok(send_ack_at == now); + ok(record_receipt(space, pn++, 0, 0, 0, now, &out_of_order_cnt) == 0); + ok(space->send_ack_at == now); now += 1; } - do_free_pn_space(space); + free_pn_space(space); } static void test_record_receipt(void) @@ -716,6 +716,13 @@ static void test_set_cc(void) ok(strcmp(stats.cc.type->name, "reno") == 0); } +void test_ecn_index_from_bits(void) +{ + ok(get_ecn_index_from_bits(1) == 1); + ok(get_ecn_index_from_bits(2) == 0); + ok(get_ecn_index_from_bits(3) == 2); +} + int main(int argc, char **argv) { static ptls_iovec_t cert; @@ -791,6 +798,7 @@ int main(int argc, char **argv) subtest("lossy", test_lossy); subtest("test-nondecryptable-initial", test_nondecryptable_initial); subtest("set_cc", test_set_cc); + subtest("ecn-index-from-bits", test_ecn_index_from_bits); return done_testing(); }