diff --git a/xen/common/argo.c b/xen/common/argo.c index 1537fcc89846..13f7fd541aec 100644 --- a/xen/common/argo.c +++ b/xen/common/argo.c @@ -30,6 +30,7 @@ #include #define MAX_RINGS_PER_DOMAIN 128U +#define MAX_NOTIFY_COUNT 256U #define MAX_PENDING_PER_RING 32U /* All messages on the ring are padded to a multiple of the slot size. */ @@ -49,6 +50,8 @@ DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t); @@ -411,6 +414,18 @@ signal_domain(struct domain *d) send_guest_global_virq(d, VIRQ_ARGO_MESSAGE); } +static void +signal_domid(domid_t domain_id) +{ + struct domain *d = get_domain_by_id(domain_id); + + if ( !d ) + return; + + signal_domain(d); + put_domain(d); +} + static void ring_unmap(const struct domain *d, struct argo_ring_info *ring_info) { @@ -599,6 +614,66 @@ get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring, return 0; } +static unsigned int +ringbuf_payload_space(const struct domain *d, struct argo_ring_info *ring_info) +{ + xen_argo_ring_t ring; + unsigned int len; + int ret; + + ASSERT(LOCKING_L3(d, ring_info)); + + len = ring_info->len; + if ( !len ) + return 0; + + if ( get_sanitized_ring(d, &ring, ring_info) ) + return 0; + + argo_dprintk("sanitized ringbuf_payload_space: tx_ptr=%u rx_ptr=%u\n", + ring.tx_ptr, ring.rx_ptr); + + /* + * rx_ptr == tx_ptr means that the ring has been emptied. + * See message size checking logic in the entry to ringbuf_insert which + * ensures that there is always one message slot of size ROUNDUP_MESSAGE(1) + * left available, preventing a ring from being entirely filled. + * This ensures that matching ring indexes always indicate an empty ring + * and never a full one. + */ + ret = ring.rx_ptr - ring.tx_ptr; + if ( ret <= 0 ) + ret += len; + + /* + * In a sanitized ring, we can rely on: + * (rx_ptr < ring_info->len) && + * (tx_ptr < ring_info->len) && + * (ring_info->len <= XEN_ARGO_MAX_RING_SIZE) + * + * and since: XEN_ARGO_MAX_RING_SIZE < INT32_MAX + * therefore right here: ret < INT32_MAX + * and we are safe to return it as a unsigned value from this function. + * The subtractions below cannot increase its value. + */ + + /* + * The maximum size payload for a message that will be accepted is: + * (the available space between the ring indexes) + * minus (space for a message header) + * minus (space for one message slot) + * since ringbuf_insert requires that one message slot be left + * unfilled, to avoid filling the ring to capacity and confusing a full + * ring with an empty one. + * Since the ring indexes are sanitized, the value in ret is aligned, so + * the simple subtraction here works to return the aligned value needed: + */ + ret -= sizeof(struct xen_argo_ring_message_header); + ret -= ROUNDUP_MESSAGE(1); + + return (ret < 0) ? 0 : ret; +} + /* * iov_count returns its count on success via an out variable to avoid * potential for a negative return value to be used incorrectly @@ -933,6 +1008,61 @@ pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info) ring_info->npending = 0; } +static void +pending_notify(struct hlist_head *to_notify) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_Read_L1); + + hlist_for_each_entry_safe(ent, node, next, to_notify, node) + { + hlist_del(&ent->node); + signal_domid(ent->domain_id); + xfree(ent); + } +} + +static void +pending_find(const struct domain *d, struct argo_ring_info *ring_info, + unsigned int payload_space, struct hlist_head *to_notify) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_Read_rings_L2(d)); + + /* + * TODO: Current policy here is to signal _all_ of the waiting domains + * interested in sending a message of size less than payload_space. + * + * This is likely to be suboptimal, since once one of them has added + * their message to the ring, there may well be insufficient room + * available for any of the others to transmit, meaning that they were + * woken in vain, which created extra work just to requeue their wait. + * + * Retain this simple policy for now since it at least avoids starving a + * domain of available space notifications because of a policy that only + * notified other domains instead. Improvement may be possible; + * investigation required. + */ + + spin_lock(&ring_info->L3_lock); + hlist_for_each_entry_safe(ent, node, next, &ring_info->pending, node) + { + if ( payload_space >= ent->len ) + { + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + wildcard_pending_list_remove(ent->domain_id, ent); + hlist_del(&ent->node); + ring_info->npending--; + hlist_add_head(&ent->node, to_notify); + } + } + spin_unlock(&ring_info->L3_lock); +} + static int pending_queue(const struct domain *d, struct argo_ring_info *ring_info, domid_t src_id, unsigned int len) @@ -993,6 +1123,28 @@ pending_requeue(const struct domain *d, struct argo_ring_info *ring_info, return pending_queue(d, ring_info, src_id, len); } +static void +pending_cancel(const struct domain *d, struct argo_ring_info *ring_info, + domid_t src_id) +{ + struct hlist_node *node, *next; + struct pending_ent *ent; + + ASSERT(LOCKING_L3(d, ring_info)); + + hlist_for_each_entry_safe(ent, node, next, &ring_info->pending, node) + { + if ( ent->domain_id == src_id ) + { + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + wildcard_pending_list_remove(ent->domain_id, ent); + hlist_del(&ent->node); + xfree(ent); + ring_info->npending--; + } + } +} + static void wildcard_rings_pending_remove(struct domain *d) { @@ -1120,6 +1272,88 @@ partner_rings_remove(struct domain *src_d) * FIXME for 4.12: investigate using check_get_page_from_gfn() * and rewrite this function using it or with adopted logic */ +static int +fill_ring_data(const struct domain *currd, + XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) data_ent_hnd) +{ + xen_argo_ring_data_ent_t ent; + struct domain *dst_d; + struct argo_ring_info *ring_info; + + ASSERT(currd == current->domain); + ASSERT(LOCKING_Read_L1); + + if ( __copy_from_guest(&ent, data_ent_hnd, 1) ) + return -EFAULT; + + argo_dprintk("fill_ring_data: ent.ring.domain=%u,ent.ring.aport=%x\n", + ent.ring.domain_id, ent.ring.aport); + + ent.flags = 0; + + dst_d = get_domain_by_id(ent.ring.domain_id); + if ( dst_d ) + { + if ( dst_d->argo ) + { + read_lock(&dst_d->argo->rings_L2_rwlock); + + ring_info = find_ring_info_by_match(dst_d, ent.ring.aport, + currd->domain_id); + if ( ring_info ) + { + unsigned int space_avail; + + ent.flags |= XEN_ARGO_RING_DATA_F_EXISTS; + ent.max_message_size = ring_info->len - + sizeof(struct xen_argo_ring_message_header) - + ROUNDUP_MESSAGE(1); + + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + ent.flags |= XEN_ARGO_RING_DATA_F_SHARED; + + spin_lock(&ring_info->L3_lock); + + space_avail = ringbuf_payload_space(dst_d, ring_info); + + argo_dprintk("fill_ring_data: aport=%x space_avail=%u" + " space_wanted=%u\n", + ring_info->id.aport, space_avail, + ent.space_required); + + /* Do not queue a notification for an unachievable size */ + if ( ent.space_required > ent.max_message_size ) + ent.flags |= XEN_ARGO_RING_DATA_F_EMSGSIZE; + else if ( space_avail >= ent.space_required ) + { + pending_cancel(dst_d, ring_info, currd->domain_id); + ent.flags |= XEN_ARGO_RING_DATA_F_SUFFICIENT; + } + else + { + pending_requeue(dst_d, ring_info, currd->domain_id, + ent.space_required); + ent.flags |= XEN_ARGO_RING_DATA_F_PENDING; + } + + spin_unlock(&ring_info->L3_lock); + + if ( space_avail == ent.max_message_size ) + ent.flags |= XEN_ARGO_RING_DATA_F_EMPTY; + + } + read_unlock(&dst_d->argo->rings_L2_rwlock); + } + put_domain(dst_d); + } + + if ( __copy_field_to_guest(data_ent_hnd, &ent, flags) || + __copy_field_to_guest(data_ent_hnd, &ent, max_message_size) ) + return -EFAULT; + + return 0; +} + static int find_ring_mfn(struct domain *d, gfn_t gfn, mfn_t *mfn) { @@ -1553,6 +1787,109 @@ register_ring(struct domain *currd, return ret; } +static void +notify_ring(const struct domain *d, struct argo_ring_info *ring_info, + struct hlist_head *to_notify) +{ + unsigned int space; + + ASSERT(LOCKING_Read_rings_L2(d)); + + spin_lock(&ring_info->L3_lock); + + if ( ring_info->len ) + space = ringbuf_payload_space(d, ring_info); + else + space = 0; + + spin_unlock(&ring_info->L3_lock); + + if ( space ) + pending_find(d, ring_info, space, to_notify); +} + +static void +notify_check_pending(struct domain *d) +{ + unsigned int i; + HLIST_HEAD(to_notify); + + ASSERT(LOCKING_Read_L1); + + read_lock(&d->argo->rings_L2_rwlock); + + for ( i = 0; i < ARGO_HTABLE_SIZE; i++ ) + { + struct hlist_node *node, *next; + struct argo_ring_info *ring_info; + + hlist_for_each_entry_safe(ring_info, node, next, + &d->argo->ring_hash[i], node) + notify_ring(d, ring_info, &to_notify); + } + + read_unlock(&d->argo->rings_L2_rwlock); + + if ( !hlist_empty(&to_notify) ) + pending_notify(&to_notify); +} + +static long +notify(struct domain *currd, + XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd) +{ + XEN_GUEST_HANDLE(xen_argo_ring_data_ent_t) ent_hnd; + xen_argo_ring_data_t ring_data; + int ret = 0; + + ASSERT(currd == current->domain); + + read_lock(&L1_global_argo_rwlock); + + if ( !currd->argo ) + { + argo_dprintk("!d->argo, ENODEV\n"); + ret = -ENODEV; + goto out; + } + + notify_check_pending(currd); + + if ( guest_handle_is_null(ring_data_hnd) ) + goto out; + + ret = copy_from_guest(&ring_data, ring_data_hnd, 1) ? -EFAULT : 0; + if ( ret ) + goto out; + + if ( ring_data.nent > MAX_NOTIFY_COUNT ) + { + gprintk(XENLOG_ERR, "argo: notify entry count(%u) exceeds max(%u)\n", + ring_data.nent, MAX_NOTIFY_COUNT); + ret = -EACCES; + goto out; + } + + ent_hnd = guest_handle_for_field(ring_data_hnd, + xen_argo_ring_data_ent_t, data[0]); + if ( unlikely(!guest_handle_okay(ent_hnd, ring_data.nent)) ) + { + ret = -EFAULT; + goto out; + } + + while ( !ret && ring_data.nent-- ) + { + ret = fill_ring_data(currd, ent_hnd); + guest_handle_add_offset(ent_hnd, 1); + } + + out: + read_unlock(&L1_global_argo_rwlock); + + return ret; +} + static long sendv(struct domain *src_d, const xen_argo_addr_t *src_addr, const xen_argo_addr_t *dst_addr, @@ -1752,6 +2089,21 @@ do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1, break; } + case XEN_ARGO_OP_notify: + { + XEN_GUEST_HANDLE_PARAM(xen_argo_ring_data_t) ring_data_hnd = + guest_handle_cast(arg1, xen_argo_ring_data_t); + + if ( unlikely((!guest_handle_is_null(arg2)) || arg3 || arg4) ) + { + rc = -EINVAL; + break; + } + + rc = notify(currd, ring_data_hnd); + break; + } + default: rc = -EOPNOTSUPP; break; diff --git a/xen/common/compat/argo.c b/xen/common/compat/argo.c index 6290ed61f085..4fac59751531 100644 --- a/xen/common/compat/argo.c +++ b/xen/common/compat/argo.c @@ -41,4 +41,22 @@ static inline int __maybe_unused name(k xen_ ## n *x, k compat_ ## n *c) \ } CHECK_argo_send_addr; +CHECK_argo_ring_data_ent; CHECK_argo_iov; + +/* + * Disable sizeof type checking for the following struct checks because + * these structs have fields of types that differ in the compat vs non-compat + * structs with variable size which prevents the size check validation. + */ + +#undef CHECK_FIELD_COMMON_ +#define CHECK_FIELD_COMMON_(k, name, n, f) \ +static inline int __maybe_unused name(k xen_ ## n *x, k compat_ ## n *c) \ +{ \ + BUILD_BUG_ON(offsetof(k xen_ ## n, f) != \ + offsetof(k compat_ ## n, f)); \ + return 1; \ +} + +CHECK_argo_ring_data; diff --git a/xen/include/public/argo.h b/xen/include/public/argo.h index c12a50f67d2b..d2cb59443cac 100644 --- a/xen/include/public/argo.h +++ b/xen/include/public/argo.h @@ -123,6 +123,42 @@ typedef struct xen_argo_unregister_ring /* Messages on the ring are padded to a multiple of this size. */ #define XEN_ARGO_MSG_SLOT_SIZE 0x10 +/* + * Notify flags + */ +/* Ring is empty */ +#define XEN_ARGO_RING_DATA_F_EMPTY (1U << 0) +/* Ring exists */ +#define XEN_ARGO_RING_DATA_F_EXISTS (1U << 1) +/* Pending interrupt exists. Do not rely on this field - for profiling only */ +#define XEN_ARGO_RING_DATA_F_PENDING (1U << 2) +/* Sufficient space to queue space_required bytes exists */ +#define XEN_ARGO_RING_DATA_F_SUFFICIENT (1U << 3) +/* Insufficient ring size for space_required bytes */ +#define XEN_ARGO_RING_DATA_F_EMSGSIZE (1U << 4) +/* Ring is shared, not unicast */ +#define XEN_ARGO_RING_DATA_F_SHARED (1U << 5) + +typedef struct xen_argo_ring_data_ent +{ + xen_argo_addr_t ring; + uint16_t flags; + uint16_t pad; + uint32_t space_required; + uint32_t max_message_size; +} xen_argo_ring_data_ent_t; + +typedef struct xen_argo_ring_data +{ + uint32_t nent; + uint32_t pad; +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + xen_argo_ring_data_ent_t data[]; +#elif defined(__GNUC__) + xen_argo_ring_data_ent_t data[0]; +#endif +} xen_argo_ring_data_t; + struct xen_argo_ring_message_header { uint32_t len; @@ -217,4 +253,35 @@ struct xen_argo_ring_message_header */ #define XEN_ARGO_OP_sendv 3 +/* + * XEN_ARGO_OP_notify + * + * Asks Xen for information about other rings in the system. + * + * ent->ring is the xen_argo_addr_t of the ring you want information on. + * Uses the same ring matching rules as XEN_ARGO_OP_sendv. + * + * ent->space_required : if this field is not null then Xen will check + * that there is space in the destination ring for this many bytes of payload. + * If the ring is too small for the requested space_required, it will set the + * XEN_ARGO_RING_DATA_F_EMSGSIZE flag on return. + * If sufficient space is available, it will set XEN_ARGO_RING_DATA_F_SUFFICIENT + * and CANCEL any pending notification for that ent->ring; otherwise it + * will schedule a notification event and the flag will not be set. + * + * These flags are set by Xen when notify replies: + * XEN_ARGO_RING_DATA_F_EMPTY ring is empty + * XEN_ARGO_RING_DATA_F_PENDING notify event is pending *don't rely on this* + * XEN_ARGO_RING_DATA_F_SUFFICIENT sufficient space for space_required is there + * XEN_ARGO_RING_DATA_F_EXISTS ring exists + * XEN_ARGO_RING_DATA_F_EMSGSIZE space_required too large for the ring size + * XEN_ARGO_RING_DATA_F_SHARED ring is registered for wildcard partner + * + * arg1: XEN_GUEST_HANDLE(xen_argo_ring_data_t) ring_data (may be NULL) + * arg2: NULL + * arg3: 0 (ZERO) + * arg4: 0 (ZERO) + */ +#define XEN_ARGO_OP_notify 4 + #endif diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index 3723980a877e..e45b60e3cafa 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -154,3 +154,5 @@ ? argo_unregister_ring argo.h ? argo_iov argo.h ? argo_send_addr argo.h +? argo_ring_data_ent argo.h +? argo_ring_data argo.h