From aa8d3bafb73c9e6a4207b36a06836e4514d7a1cc Mon Sep 17 00:00:00 2001 From: Kazuho Oku Date: Sat, 28 Aug 2021 10:29:11 +0900 Subject: [PATCH] [sentmap] split packet-level and frame-level structure. Doing so improves the interation speed at the cost of memory footprint. Iteration speed becomes an issue when ACKs with many gaps are registered, as one ack range occupies one element of the frame-level structure. --- include/quicly/sentmap.h | 149 +++++++++++++++++++-------------------- lib/sentmap.c | 127 +++++++++++++++++++++++---------- t/sentmap.c | 4 +- 3 files changed, 162 insertions(+), 118 deletions(-) diff --git a/include/quicly/sentmap.h b/include/quicly/sentmap.h index 20a6bfb3b..7535d6991 100644 --- a/include/quicly/sentmap.h +++ b/include/quicly/sentmap.h @@ -34,52 +34,7 @@ extern "C" { typedef struct st_quicly_sent_t quicly_sent_t; typedef struct st_quicly_sentmap_t quicly_sentmap_t; - -typedef struct st_quicly_sent_packet_t { - /** - * - */ - uint64_t packet_number; - /** - * - */ - int64_t sent_at; - /** - * epoch to be acked in - */ - uint8_t ack_epoch; - /** - * - */ - uint8_t ack_eliciting : 1; - /** - * if the frames being contained are considered inflight (becomes zero when deemed lost or when PTO fires) - */ - uint8_t frames_in_flight : 1; - /** - * number of bytes in-flight for the packet, from the context of CC (becomes zero when deemed lost, but not when PTO fires) - */ - uint16_t cc_bytes_in_flight; -} quicly_sent_packet_t; - -typedef enum en_quicly_sentmap_event_t { - /** - * a packet has been acked - */ - QUICLY_SENTMAP_EVENT_ACKED, - /** - * PTO - the packet is still considered inflight, but the contents of the frames are scheduled for retransmission - */ - QUICLY_SENTMAP_EVENT_PTO, - /** - * a packet is deemed lost - */ - QUICLY_SENTMAP_EVENT_LOST, - /** - * a packet is being removed from the sentmap (e.g., after 3 pto, the epoch being discarded) - */ - QUICLY_SENTMAP_EVENT_EXPIRED -} quicly_sentmap_event_t; +typedef struct st_quicly_sent_packet_t quicly_sent_packet_t; /** * Callback called when a frame is either acknowledged or deemed lost. When there is a late ACK, an entry will get marked as acked @@ -94,7 +49,6 @@ typedef int (*quicly_sent_acked_cb)(quicly_sentmap_t *map, const quicly_sent_pac struct st_quicly_sent_t { quicly_sent_acked_cb acked; union { - quicly_sent_packet_t packet; struct { quicly_range_t range; } ack; @@ -140,6 +94,66 @@ struct st_quicly_sent_t { } data; }; +struct st_quicly_sent_packet_t { + /** + * + */ + uint64_t packet_number; + /** + * + */ + int64_t sent_at; + /** + * epoch to be acked in + */ + uint8_t ack_epoch; + /** + * + */ + uint8_t ack_eliciting : 1; + /** + * if the frames being contained are considered inflight (becomes zero when deemed lost or when PTO fires) + */ + uint8_t frames_in_flight : 1; + /** + * number of bytes in-flight for the packet, from the context of CC (becomes zero when deemed lost, but not when PTO fires) + */ + uint16_t cc_bytes_in_flight; + /** + * + */ + uint16_t num_frames; + /** + * + */ + union { + quicly_sent_t embedded[3]; + struct { + quicly_sent_t *base; + size_t capacity; + } detached; + } _frames; +}; + +typedef enum en_quicly_sentmap_event_t { + /** + * a packet has been acked + */ + QUICLY_SENTMAP_EVENT_ACKED, + /** + * PTO - the packet is still considered inflight, but the contents of the frames are scheduled for retransmission + */ + QUICLY_SENTMAP_EVENT_PTO, + /** + * a packet is deemed lost + */ + QUICLY_SENTMAP_EVENT_LOST, + /** + * a packet is being removed from the sentmap (e.g., after 3 pto, the epoch being discarded) + */ + QUICLY_SENTMAP_EVENT_EXPIRED +} quicly_sentmap_event_t; + struct st_quicly_sent_block_t { /** * next block if exists (or NULL) @@ -156,7 +170,7 @@ struct st_quicly_sent_block_t { /** * slots */ - quicly_sent_t entries[16]; + quicly_sent_packet_t entries[16]; }; /** @@ -194,16 +208,16 @@ struct st_quicly_sentmap_t { /** * is non-NULL between prepare and commit, pointing to the packet header that is being written to */ - quicly_sent_t *_pending_packet; + quicly_sent_packet_t *_pending_packet; }; typedef struct st_quicly_sentmap_iter_t { - quicly_sent_t *p; + quicly_sent_packet_t *p; size_t count; struct st_quicly_sent_block_t **ref; } quicly_sentmap_iter_t; -extern const quicly_sent_t quicly_sentmap__end_iter; +extern const quicly_sent_packet_t quicly_sentmap__end_iter; /** * initializes the sentmap @@ -229,7 +243,7 @@ static void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_fligh /** * Allocates a slot to contain a callback for a frame. The function MUST be called after _prepare but before _commit. */ -static quicly_sent_t *quicly_sentmap_allocate(quicly_sentmap_t *map, quicly_sent_acked_cb acked); +quicly_sent_t *quicly_sentmap_allocate(quicly_sentmap_t *map, quicly_sent_acked_cb acked); /** * initializes the iterator @@ -268,53 +282,34 @@ inline void quicly_sentmap_commit(quicly_sentmap_t *map, uint16_t bytes_in_fligh assert(quicly_sentmap_is_open(map)); if (bytes_in_flight != 0) { - map->_pending_packet->data.packet.ack_eliciting = 1; - map->_pending_packet->data.packet.cc_bytes_in_flight = bytes_in_flight; + map->_pending_packet->ack_eliciting = 1; + map->_pending_packet->cc_bytes_in_flight = bytes_in_flight; map->bytes_in_flight += bytes_in_flight; } - map->_pending_packet->data.packet.frames_in_flight = 1; + map->_pending_packet->frames_in_flight = 1; map->_pending_packet = NULL; ++map->num_packets; } -inline quicly_sent_t *quicly_sentmap_allocate(quicly_sentmap_t *map, quicly_sent_acked_cb acked) -{ - struct st_quicly_sent_block_t *block; - - if ((block = map->tail) == NULL || block->next_insert_at == PTLS_ELEMENTSOF(block->entries)) { - if ((block = quicly_sentmap__new_block(map)) == NULL) - return NULL; - } - - quicly_sent_t *sent = block->entries + block->next_insert_at++; - ++block->num_entries; - - sent->acked = acked; - - return sent; -} - inline void quicly_sentmap_init_iter(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter) { /* set up the iterator */ iter->ref = &map->head; if (map->head != NULL) { assert(map->head->num_entries != 0); - for (iter->p = map->head->entries; iter->p->acked == NULL; ++iter->p) + for (iter->p = map->head->entries; iter->p->ack_epoch == UINT8_MAX; ++iter->p) ; - assert(iter->p->acked == quicly_sentmap__type_packet); iter->count = map->head->num_entries; } else { - iter->p = (quicly_sent_t *)&quicly_sentmap__end_iter; + iter->p = (quicly_sent_packet_t *)&quicly_sentmap__end_iter; iter->count = 0; } } inline const quicly_sent_packet_t *quicly_sentmap_get(quicly_sentmap_iter_t *iter) { - assert(iter->p->acked == quicly_sentmap__type_packet); - return &iter->p->data.packet; + return iter->p; } #ifdef __cplusplus diff --git a/lib/sentmap.c b/lib/sentmap.c index 64b2bfdd1..1a6122330 100644 --- a/lib/sentmap.c +++ b/lib/sentmap.c @@ -24,14 +24,14 @@ #include "picotls.h" #include "quicly/sentmap.h" -const quicly_sent_t quicly_sentmap__end_iter = {quicly_sentmap__type_packet, {{UINT64_MAX, INT64_MAX}}}; +const quicly_sent_packet_t quicly_sentmap__end_iter = {UINT64_MAX, INT64_MAX}; static void next_entry(quicly_sentmap_iter_t *iter) { if (--iter->count != 0) { ++iter->p; } else if (*(iter->ref = &(*iter->ref)->next) == NULL) { - iter->p = (quicly_sent_t *)&quicly_sentmap__end_iter; + iter->p = (quicly_sent_packet_t *)&quicly_sentmap__end_iter; iter->count = 0; return; } else { @@ -39,7 +39,7 @@ static void next_entry(quicly_sentmap_iter_t *iter) iter->count = (*iter->ref)->num_entries; iter->p = (*iter->ref)->entries; } - while (iter->p->acked == NULL) + while (iter->p->ack_epoch == UINT8_MAX) ++iter->p; } @@ -49,6 +49,8 @@ static struct st_quicly_sent_block_t **free_block(quicly_sentmap_t *map, struct static const struct st_quicly_sent_block_t *const dummy_ref = &dummy; struct st_quicly_sent_block_t *block = *ref; + assert(block->num_entries == 0); + if (block->next != NULL) { *ref = block->next; assert((*ref)->num_entries != 0); @@ -70,8 +72,10 @@ static struct st_quicly_sent_block_t **free_block(quicly_sentmap_t *map, struct static void discard_entry(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter) { - assert(iter->p->acked != NULL); - iter->p->acked = NULL; + assert(iter->p->ack_epoch != UINT8_MAX); + iter->p->ack_epoch = UINT8_MAX; + if (iter->p->num_frames > PTLS_ELEMENTSOF(iter->p->_frames.embedded)) + free(iter->p->_frames.detached.base); struct st_quicly_sent_block_t *block = *iter->ref; if (--block->num_entries == 0) { @@ -84,24 +88,70 @@ static void discard_entry(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter) void quicly_sentmap_dispose(quicly_sentmap_t *map) { - struct st_quicly_sent_block_t *block; + quicly_sentmap_iter_t iter; - while ((block = map->head) != NULL) { - map->head = block->next; - free(block); + quicly_sentmap_init_iter(map, &iter); + + while (iter.p->packet_number != UINT64_MAX) { + discard_entry(map, &iter); + --map->num_packets; + next_entry(&iter); } + + assert(map->num_packets == 0); + assert(map->head == NULL); } int quicly_sentmap_prepare(quicly_sentmap_t *map, uint64_t packet_number, int64_t now, uint8_t ack_epoch) { assert(map->_pending_packet == NULL); - if ((map->_pending_packet = quicly_sentmap_allocate(map, quicly_sentmap__type_packet)) == NULL) - return PTLS_ERROR_NO_MEMORY; - map->_pending_packet->data.packet = (quicly_sent_packet_t){packet_number, now, ack_epoch}; + struct st_quicly_sent_block_t *block; + + if ((block = map->tail) == NULL || block->next_insert_at == PTLS_ELEMENTSOF(block->entries)) { + if ((block = quicly_sentmap__new_block(map)) == NULL) + return PTLS_ERROR_NO_MEMORY; + } + + map->_pending_packet = block->entries + block->next_insert_at++; + ++block->num_entries; + + *map->_pending_packet = (quicly_sent_packet_t){packet_number, now, ack_epoch}; + return 0; } +quicly_sent_t *quicly_sentmap_allocate(quicly_sentmap_t *map, quicly_sent_acked_cb acked) +{ + quicly_sent_packet_t *packet = map->_pending_packet; + quicly_sent_t *sent; + + if (packet->num_frames < PTLS_ELEMENTSOF(packet->_frames.embedded)) { + sent = packet->_frames.embedded + packet->num_frames++; + } else { + if (packet->num_frames == PTLS_ELEMENTSOF(packet->_frames.embedded)) { + quicly_sent_t *frames; + size_t capacity = PTLS_ELEMENTSOF(packet->_frames.embedded) * 2; + if ((frames = malloc(sizeof(*frames) * capacity)) == NULL) + return NULL; + memcpy(frames, packet->_frames.embedded, sizeof(packet->_frames.embedded)); + packet->_frames.detached.base = frames; + packet->_frames.detached.capacity = capacity; + } else if (packet->num_frames == packet->_frames.detached.capacity) { + quicly_sent_t *frames; + size_t capacity = packet->_frames.detached.capacity * 2; + if ((frames = realloc(packet->_frames.detached.base, sizeof(*frames) * capacity)) == NULL) + return NULL; + packet->_frames.detached.base = frames; + packet->_frames.detached.capacity = capacity; + } + sent = packet->_frames.detached.base + packet->num_frames++; + } + + sent->acked = acked; + return sent; +} + struct st_quicly_sent_block_t *quicly_sentmap__new_block(quicly_sentmap_t *map) { struct st_quicly_sent_block_t *block; @@ -124,46 +174,45 @@ struct st_quicly_sent_block_t *quicly_sentmap__new_block(quicly_sentmap_t *map) void quicly_sentmap_skip(quicly_sentmap_iter_t *iter) { - do { - next_entry(iter); - } while (iter->p->acked != quicly_sentmap__type_packet); + next_entry(iter); } int quicly_sentmap_update(quicly_sentmap_t *map, quicly_sentmap_iter_t *iter, quicly_sentmap_event_t event) { - quicly_sent_packet_t packet; - int ret = 0; + quicly_sent_packet_t *packet = iter->p; + int clear_cc_bytes_in_flight = 0, ret = 0; - assert(iter->p != &quicly_sentmap__end_iter); - assert(iter->p->acked == quicly_sentmap__type_packet); - - /* copy packet info */ - packet = iter->p->data.packet; + assert(packet != &quicly_sentmap__end_iter); + assert(packet->ack_epoch != UINT8_MAX); /* update CC state unless the event is PTO */ - if (packet.cc_bytes_in_flight != 0 && event != QUICLY_SENTMAP_EVENT_PTO) { - assert(map->bytes_in_flight >= packet.cc_bytes_in_flight); - map->bytes_in_flight -= packet.cc_bytes_in_flight; - iter->p->data.packet.cc_bytes_in_flight = 0; + if (packet->cc_bytes_in_flight != 0 && event != QUICLY_SENTMAP_EVENT_PTO) { + assert(map->bytes_in_flight >= packet->cc_bytes_in_flight); + map->bytes_in_flight -= packet->cc_bytes_in_flight; + clear_cc_bytes_in_flight = 1; } - iter->p->data.packet.frames_in_flight = 0; - int should_notify = event == QUICLY_SENTMAP_EVENT_ACKED || packet.frames_in_flight, - should_discard = event == QUICLY_SENTMAP_EVENT_ACKED || event == QUICLY_SENTMAP_EVENT_EXPIRED; + /* invoke the frame-level callbacks when the frames are inflight or if it has been late-acked */ + if (event == QUICLY_SENTMAP_EVENT_ACKED || packet->frames_in_flight) { + quicly_sent_t *frames = packet->num_frames <= PTLS_ELEMENTSOF(packet->_frames.embedded) ? packet->_frames.embedded + : packet->_frames.detached.base; + for (size_t i = 0; i < packet->num_frames; ++i) { + quicly_sent_t *sent = frames + i; + if ((ret = sent->acked(map, packet, event == QUICLY_SENTMAP_EVENT_ACKED, sent)) != 0) + goto Exit; + } + } - /* Advance to next packet, while if necessary, doing either or both of the following: - * * discard entries (if should_discard is set) - * * invoke the frame-level callbacks (if should_notify is set) */ - if (should_discard) { + if (event == QUICLY_SENTMAP_EVENT_ACKED || event == QUICLY_SENTMAP_EVENT_EXPIRED) { discard_entry(map, iter); --map->num_packets; + } else { + if (clear_cc_bytes_in_flight) + packet->cc_bytes_in_flight = 0; + packet->frames_in_flight = 0; } - for (next_entry(iter); iter->p->acked != quicly_sentmap__type_packet; next_entry(iter)) { - if (should_notify && (ret = iter->p->acked(map, &packet, event == QUICLY_SENTMAP_EVENT_ACKED, iter->p)) != 0) - goto Exit; - if (should_discard) - discard_entry(map, iter); - } + + next_entry(iter); Exit: return ret; diff --git a/t/sentmap.c b/t/sentmap.c index 2aaeaedab..4d94458a4 100644 --- a/t/sentmap.c +++ b/t/sentmap.c @@ -76,7 +76,7 @@ static void test_basic(void) } } ok(quicly_sentmap_get(&iter)->packet_number == UINT64_MAX); - ok(num_blocks(&map) == 150 / 16 + 1); + ok(num_blocks(&map) == 50 / 16 + 1); /* pop acks between 11 <= packet_number <= 40 */ quicly_sentmap_init_iter(&map, &iter); @@ -96,7 +96,7 @@ static void test_basic(void) ++cnt; } ok(cnt == 20); - ok(num_blocks(&map) == 30 / 16 + 1 + 1 + 30 / 16 + 1); + ok(num_blocks(&map) == 10 / 16 + 1 + 1 + 10 / 16 + 1); quicly_sentmap_dispose(&map); }