diff --git a/xen/common/argo.c b/xen/common/argo.c index 16c3a3e16549..1537fcc89846 100644 --- a/xen/common/argo.c +++ b/xen/common/argo.c @@ -30,10 +30,15 @@ #include #define MAX_RINGS_PER_DOMAIN 128U +#define MAX_PENDING_PER_RING 32U /* All messages on the ring are padded to a multiple of the slot size. */ #define ROUNDUP_MESSAGE(a) ROUNDUP((a), XEN_ARGO_MSG_SLOT_SIZE) +/* The maximum size of a message that may be sent on the largest Argo ring. */ +#define MAX_ARGO_MESSAGE_SIZE ((XEN_ARGO_MAX_RING_SIZE) - \ + (sizeof(struct xen_argo_ring_message_header)) - ROUNDUP_MESSAGE(1)) + /* Number of PAGEs needed to hold a ring of a given size in bytes */ #define NPAGES_RING(ring_len) \ (ROUNDUP((ROUNDUP_MESSAGE(ring_len) + sizeof(xen_argo_ring_t)), PAGE_SIZE) \ @@ -41,8 +46,10 @@ DEFINE_XEN_GUEST_HANDLE(xen_argo_addr_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_gfn_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_iov_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_register_ring_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_ring_t); +DEFINE_XEN_GUEST_HANDLE(xen_argo_send_addr_t); DEFINE_XEN_GUEST_HANDLE(xen_argo_unregister_ring_t); static bool __read_mostly opt_argo; @@ -347,6 +354,28 @@ find_ring_info(const struct domain *d, const struct argo_ring_id *id) return NULL; } +static struct argo_ring_info * +find_ring_info_by_match(const struct domain *d, xen_argo_port_t aport, + domid_t partner_id) +{ + struct argo_ring_id id; + struct argo_ring_info *ring_info; + + ASSERT(LOCKING_Read_rings_L2(d)); + + id.aport = aport; + id.domain_id = d->domain_id; + id.partner_id = partner_id; + + ring_info = find_ring_info(d, &id); + if ( ring_info ) + return ring_info; + + id.partner_id = XEN_ARGO_DOMID_ANY; + + return find_ring_info(d, &id); +} + static struct argo_send_info * find_send_info(const struct domain *d, const struct argo_ring_id *id) { @@ -374,6 +403,14 @@ find_send_info(const struct domain *d, const struct argo_ring_id *id) return NULL; } +static void +signal_domain(struct domain *d) +{ + argo_dprintk("signalling domid:%u\n", d->domain_id); + + send_guest_global_virq(d, VIRQ_ARGO_MESSAGE); +} + static void ring_unmap(const struct domain *d, struct argo_ring_info *ring_info) { @@ -457,6 +494,389 @@ update_tx_ptr(const struct domain *d, struct argo_ring_info *ring_info, smp_wmb(); } +static int +memcpy_to_guest_ring(const struct domain *d, struct argo_ring_info *ring_info, + unsigned int offset, + const void *src, XEN_GUEST_HANDLE(uint8_t) src_hnd, + unsigned int len) +{ + unsigned int mfns_index = offset >> PAGE_SHIFT; + void *dst; + int ret; + unsigned int src_offset = 0; + + ASSERT(LOCKING_L3(d, ring_info)); + + offset &= ~PAGE_MASK; + + if ( len + offset > XEN_ARGO_MAX_RING_SIZE ) + return -EFAULT; + + while ( len ) + { + unsigned int head_len = len > PAGE_SIZE ? PAGE_SIZE - offset : len; + + ret = ring_map_page(d, ring_info, mfns_index, &dst); + if ( ret ) + return ret; + + if ( src ) + { + memcpy(dst + offset, src + src_offset, head_len); + src_offset += head_len; + } + else + { + if ( copy_from_guest(dst + offset, src_hnd, head_len) ) + return -EFAULT; + + guest_handle_add_offset(src_hnd, head_len); + } + + mfns_index++; + len -= head_len; + offset = 0; + } + + return 0; +} + +/* + * Use this with caution: rx_ptr is under guest control and may be bogus. + * See get_sanitized_ring for a safer alternative. + */ +static int +get_rx_ptr(const struct domain *d, struct argo_ring_info *ring_info, + uint32_t *rx_ptr) +{ + void *src; + xen_argo_ring_t *ringp; + int ret; + + ASSERT(LOCKING_L3(d, ring_info)); + + if ( !ring_info->nmfns || ring_info->nmfns < NPAGES_RING(ring_info->len) ) + return -EINVAL; + + ret = ring_map_page(d, ring_info, 0, &src); + if ( ret ) + return ret; + + ringp = (xen_argo_ring_t *)src; + + *rx_ptr = read_atomic(&ringp->rx_ptr); + + return 0; +} + +/* + * get_sanitized_ring creates a modified copy of the ring pointers where + * the rx_ptr is rounded up to ensure it is aligned, and then ring + * wrap is handled. Simplifies safe use of the rx_ptr for available + * space calculation. + */ +static int +get_sanitized_ring(const struct domain *d, xen_argo_ring_t *ring, + struct argo_ring_info *ring_info) +{ + uint32_t rx_ptr; + int ret; + + ASSERT(LOCKING_L3(d, ring_info)); + + ret = get_rx_ptr(d, ring_info, &rx_ptr); + if ( ret ) + return ret; + + ring->tx_ptr = ring_info->tx_ptr; + + rx_ptr = ROUNDUP_MESSAGE(rx_ptr); + if ( rx_ptr >= ring_info->len ) + rx_ptr = 0; + + ring->rx_ptr = rx_ptr; + + return 0; +} + +/* + * iov_count returns its count on success via an out variable to avoid + * potential for a negative return value to be used incorrectly + * (eg. coerced into an unsigned variable resulting in a large incorrect value) + */ +static int +iov_count(const xen_argo_iov_t *piov, unsigned long niov, uint32_t *count) +{ + uint32_t sum_iov_lens = 0; + + if ( niov > XEN_ARGO_MAXIOV ) + return -EINVAL; + + for ( ; niov--; piov++ ) + { + /* valid iovs must have the padding field set to zero */ + if ( piov->pad ) + { + argo_dprintk("invalid iov: padding is not zero\n"); + return -EINVAL; + } + + /* check each to protect sum against integer overflow */ + if ( piov->iov_len > MAX_ARGO_MESSAGE_SIZE ) + { + argo_dprintk("invalid iov_len: too big (%u)>%llu\n", + piov->iov_len, MAX_ARGO_MESSAGE_SIZE); + return -EINVAL; + } + + sum_iov_lens += piov->iov_len; + + /* + * Again protect sum from integer overflow + * and ensure total msg size will be within bounds. + */ + if ( sum_iov_lens > MAX_ARGO_MESSAGE_SIZE ) + { + argo_dprintk("invalid iov series: total message too big\n"); + return -EMSGSIZE; + } + } + + *count = sum_iov_lens; + + return 0; +} + +static int +ringbuf_insert(const struct domain *d, struct argo_ring_info *ring_info, + const struct argo_ring_id *src_id, + XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd, + unsigned long niov, uint32_t message_type, + unsigned long *out_len) +{ + xen_argo_ring_t ring; + struct xen_argo_ring_message_header mh = { }; + int32_t sp; + int32_t ret; + uint32_t len = 0; + xen_argo_iov_t iovs[XEN_ARGO_MAXIOV]; + xen_argo_iov_t *piov; + XEN_GUEST_HANDLE(uint8_t) NULL_hnd = + guest_handle_from_param(guest_handle_from_ptr(NULL, uint8_t), uint8_t); + + ASSERT(LOCKING_L3(d, ring_info)); + + ret = __copy_from_guest(iovs, iovs_hnd, niov) ? -EFAULT : 0; + if ( ret ) + return ret; + + /* + * Obtain the total size of data to transmit -- sets the 'len' variable + * -- and sanity check that the iovs conform to size and number limits. + * Enforced below: no more than 'len' bytes of guest data + * (plus the message header) will be sent in this operation. + */ + ret = iov_count(iovs, niov, &len); + if ( ret ) + return ret; + + /* + * Size bounds check against ring size and static maximum message limit. + * The message must not fill the ring; there must be at least one slot + * remaining so we can distinguish a full ring from an empty one. + */ + if ( ((ROUNDUP_MESSAGE(len) + + sizeof(struct xen_argo_ring_message_header)) >= ring_info->len) || + (len > MAX_ARGO_MESSAGE_SIZE) ) + return -EMSGSIZE; + + ret = get_sanitized_ring(d, &ring, ring_info); + if ( ret ) + return ret; + + argo_dprintk("ring.tx_ptr=%u ring.rx_ptr=%u ring len=%u" + " ring_info->tx_ptr=%u\n", + ring.tx_ptr, ring.rx_ptr, ring_info->len, ring_info->tx_ptr); + + if ( ring.rx_ptr == ring.tx_ptr ) + sp = ring_info->len; + else + { + sp = ring.rx_ptr - ring.tx_ptr; + if ( sp < 0 ) + sp += ring_info->len; + } + + /* + * Size bounds check against currently available space in the ring. + * Again: the message must not fill the ring leaving no space remaining. + */ + if ( (ROUNDUP_MESSAGE(len) + + sizeof(struct xen_argo_ring_message_header)) >= sp ) + { + argo_dprintk("EAGAIN\n"); + return -EAGAIN; + } + + mh.len = len + sizeof(struct xen_argo_ring_message_header); + mh.source.aport = src_id->aport; + mh.source.domain_id = src_id->domain_id; + mh.message_type = message_type; + + /* + * For this copy to the guest ring, tx_ptr is always 16-byte aligned + * and the message header is 16 bytes long. + */ + BUILD_BUG_ON( + sizeof(struct xen_argo_ring_message_header) != ROUNDUP_MESSAGE(1)); + + /* + * First data write into the destination ring: fixed size, message header. + * This cannot overrun because the available free space (value in 'sp') + * is checked above and must be at least this size. + */ + ret = memcpy_to_guest_ring(d, ring_info, + ring.tx_ptr + sizeof(xen_argo_ring_t), + &mh, NULL_hnd, sizeof(mh)); + if ( ret ) + { + gprintk(XENLOG_ERR, + "argo: failed to write message header to ring (vm%u:%x vm%u)\n", + ring_info->id.domain_id, ring_info->id.aport, + ring_info->id.partner_id); + + return ret; + } + + ring.tx_ptr += sizeof(mh); + if ( ring.tx_ptr == ring_info->len ) + ring.tx_ptr = 0; + + for ( piov = iovs; niov--; piov++ ) + { + XEN_GUEST_HANDLE_64(uint8_t) buf_hnd = piov->iov_hnd; + uint32_t iov_len = piov->iov_len; + + /* If no data is provided in this iov, moan and skip on to the next */ + if ( !iov_len ) + { + gprintk(XENLOG_ERR, + "argo: no data iov_len=0 iov_hnd=%p ring (vm%u:%x vm%u)\n", + buf_hnd.p, ring_info->id.domain_id, ring_info->id.aport, + ring_info->id.partner_id); + + continue; + } + + if ( unlikely(!guest_handle_okay(buf_hnd, iov_len)) ) + { + gprintk(XENLOG_ERR, + "argo: bad iov handle [%p, %"PRIx32"] (vm%u:%x vm%u)\n", + buf_hnd.p, iov_len, + ring_info->id.domain_id, ring_info->id.aport, + ring_info->id.partner_id); + + return -EFAULT; + } + + sp = ring_info->len - ring.tx_ptr; + + /* Check: iov data size versus free space at the tail of the ring */ + if ( iov_len > sp ) + { + /* + * Second possible data write: ring-tail-wrap-write. + * Populate the ring tail and update the internal tx_ptr to handle + * wrapping at the end of ring. + * Size of data written here: sp + * which is the exact full amount of free space available at the + * tail of the ring, so this cannot overrun. + */ + ret = memcpy_to_guest_ring(d, ring_info, + ring.tx_ptr + sizeof(xen_argo_ring_t), + NULL, buf_hnd, sp); + if ( ret ) + { + gprintk(XENLOG_ERR, + "argo: failed to copy {%p, %"PRIx32"} (vm%u:%x vm%u)\n", + buf_hnd.p, sp, + ring_info->id.domain_id, ring_info->id.aport, + ring_info->id.partner_id); + + return ret; + } + + ring.tx_ptr = 0; + iov_len -= sp; + guest_handle_add_offset(buf_hnd, sp); + + ASSERT(iov_len <= ring_info->len); + } + + /* + * Third possible data write: all data remaining for this iov. + * Size of data written here: iov_len + * + * Case 1: if the ring-tail-wrap-write above was performed, then + * iov_len has been decreased by 'sp' and ring.tx_ptr is zero. + * + * We know from checking the result of iov_count: + * len + sizeof(message_header) <= ring_info->len + * We also know that len is the total of summing all iov_lens, so: + * iov_len <= len + * so by transitivity: + * iov_len <= len <= (ring_info->len - sizeof(msgheader)) + * and therefore: + * (iov_len + sizeof(msgheader) <= ring_info->len) && + * (ring.tx_ptr == 0) + * so this write cannot overrun here. + * + * Case 2: ring-tail-wrap-write above was not performed + * -> so iov_len is the guest-supplied value and: (iov_len <= sp) + * ie. less than available space at the tail of the ring: + * so this write cannot overrun. + */ + ret = memcpy_to_guest_ring(d, ring_info, + ring.tx_ptr + sizeof(xen_argo_ring_t), + NULL, buf_hnd, iov_len); + if ( ret ) + { + gprintk(XENLOG_ERR, + "argo: failed to copy [%p, %"PRIx32"] (vm%u:%x vm%u)\n", + buf_hnd.p, iov_len, ring_info->id.domain_id, + ring_info->id.aport, ring_info->id.partner_id); + + return ret; + } + + ring.tx_ptr += iov_len; + + if ( ring.tx_ptr == ring_info->len ) + ring.tx_ptr = 0; + } + + ring.tx_ptr = ROUNDUP_MESSAGE(ring.tx_ptr); + + if ( ring.tx_ptr >= ring_info->len ) + ring.tx_ptr -= ring_info->len; + + update_tx_ptr(d, ring_info, ring.tx_ptr); + + /* + * At this point (and also on an error exit paths from this function) it is + * possible to unmap the ring_info, ie: + * ring_unmap(d, ring_info); + * but performance should be improved by not doing so, and retaining + * the mapping. + * An XSM policy control over level of confidentiality required + * versus performance cost could be added to decide that here. + */ + + *out_len = len; + + return ret; +} + static void wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent) { @@ -476,6 +896,25 @@ wildcard_pending_list_remove(domid_t domain_id, struct pending_ent *ent) put_domain(d); } +static void +wildcard_pending_list_insert(domid_t domain_id, struct pending_ent *ent) +{ + struct domain *d = get_domain_by_id(domain_id); + + if ( !d ) + return; + + ASSERT(LOCKING_Read_L1); + + if ( d->argo ) + { + spin_lock(&d->argo->wildcard_L2_lock); + hlist_add_head(&ent->wildcard_node, &d->argo->wildcard_pend_list); + spin_unlock(&d->argo->wildcard_L2_lock); + } + put_domain(d); +} + static void pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info) { @@ -494,6 +933,66 @@ pending_remove_all(const struct domain *d, struct argo_ring_info *ring_info) ring_info->npending = 0; } +static int +pending_queue(const struct domain *d, struct argo_ring_info *ring_info, + domid_t src_id, unsigned int len) +{ + struct pending_ent *ent; + + ASSERT(LOCKING_L3(d, ring_info)); + + if ( ring_info->npending >= MAX_PENDING_PER_RING ) + return -ENOSPC; + + ent = xmalloc(struct pending_ent); + if ( !ent ) + return -ENOMEM; + + ent->len = len; + ent->domain_id = src_id; + ent->ring_info = ring_info; + + if ( ring_info->id.partner_id == XEN_ARGO_DOMID_ANY ) + wildcard_pending_list_insert(src_id, ent); + hlist_add_head(&ent->node, &ring_info->pending); + ring_info->npending++; + + return 0; +} + +static int +pending_requeue(const struct domain *d, struct argo_ring_info *ring_info, + domid_t src_id, unsigned int len) +{ + struct hlist_node *node; + struct pending_ent *ent; + + ASSERT(LOCKING_L3(d, ring_info)); + + hlist_for_each_entry(ent, node, &ring_info->pending, node) + { + if ( ent->domain_id == src_id ) + { + /* + * Reuse an existing queue entry for a notification rather than add + * another. If the existing entry is waiting for a smaller size than + * the current message then adjust the record to wait for the + * current (larger) size to be available before triggering a + * notification. + * This assists the waiting sender by ensuring that whenever a + * notification is triggered, there is sufficient space available + * for (at least) any one of the messages awaiting transmission. + */ + if ( ent->len < len ) + ent->len = len; + + return 0; + } + } + + return pending_queue(d, ring_info, src_id, len); +} + static void wildcard_rings_pending_remove(struct domain *d) { @@ -1054,6 +1553,95 @@ register_ring(struct domain *currd, return ret; } +static long +sendv(struct domain *src_d, const xen_argo_addr_t *src_addr, + const xen_argo_addr_t *dst_addr, + XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd, unsigned long niov, + uint32_t message_type) +{ + struct domain *dst_d = NULL; + struct argo_ring_id src_id; + struct argo_ring_info *ring_info; + int ret = 0; + unsigned long len = 0; + + ASSERT(src_d->domain_id == src_addr->domain_id); + + argo_dprintk("sendv: (%u:%x)->(%u:%x) niov:%lu iov:%p type:%u\n", + src_addr->domain_id, src_addr->aport, + dst_addr->domain_id, dst_addr->aport, + niov, iovs_hnd.p, message_type); + + read_lock(&L1_global_argo_rwlock); + + if ( !src_d->argo ) + { + ret = -ENODEV; + goto out_unlock; + } + + src_id.aport = src_addr->aport; + src_id.domain_id = src_d->domain_id; + src_id.partner_id = dst_addr->domain_id; + + dst_d = get_domain_by_id(dst_addr->domain_id); + if ( !dst_d ) + { + argo_dprintk("!dst_d, ESRCH\n"); + ret = -ESRCH; + goto out_unlock; + } + + if ( !dst_d->argo ) + { + argo_dprintk("!dst_d->argo, ECONNREFUSED\n"); + ret = -ECONNREFUSED; + goto out_unlock; + } + + read_lock(&dst_d->argo->rings_L2_rwlock); + + ring_info = find_ring_info_by_match(dst_d, dst_addr->aport, + src_addr->domain_id); + if ( !ring_info ) + { + gprintk(XENLOG_ERR, + "argo: vm%u connection refused, src (vm%u:%x) dst (vm%u:%x)\n", + current->domain->domain_id, src_id.domain_id, src_id.aport, + dst_addr->domain_id, dst_addr->aport); + + ret = -ECONNREFUSED; + read_unlock(&dst_d->argo->rings_L2_rwlock); + goto out_unlock; + } + + spin_lock(&ring_info->L3_lock); + + ret = ringbuf_insert(dst_d, ring_info, &src_id, iovs_hnd, niov, + message_type, &len); + if ( ret == -EAGAIN ) + { + argo_dprintk("argo_ringbuf_sendv failed, EAGAIN\n"); + /* requeue to issue a notification when space is there */ + ret = pending_requeue(dst_d, ring_info, src_addr->domain_id, len); + } + + spin_unlock(&ring_info->L3_lock); + + if ( ret >= 0 ) + signal_domain(dst_d); + + read_unlock(&dst_d->argo->rings_L2_rwlock); + + out_unlock: + if ( dst_d ) + put_domain(dst_d); + + read_unlock(&L1_global_argo_rwlock); + + return ( ret < 0 ) ? ret : len; +} + long do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1, XEN_GUEST_HANDLE_PARAM(void) arg2, unsigned long arg3, @@ -1117,6 +1705,53 @@ do_argo_op(unsigned int cmd, XEN_GUEST_HANDLE_PARAM(void) arg1, break; } + case XEN_ARGO_OP_sendv: + { + xen_argo_send_addr_t send_addr; + + XEN_GUEST_HANDLE_PARAM(xen_argo_send_addr_t) send_addr_hnd = + guest_handle_cast(arg1, xen_argo_send_addr_t); + XEN_GUEST_HANDLE_PARAM(xen_argo_iov_t) iovs_hnd = + guest_handle_cast(arg2, xen_argo_iov_t); + /* arg3 is niov */ + /* arg4 is message_type. Must be a 32-bit value. */ + + rc = copy_from_guest(&send_addr, send_addr_hnd, 1) ? -EFAULT : 0; + if ( rc ) + break; + + /* + * Check padding is zeroed. Reject niov above limit or message_types + * that are outside 32 bit range. + */ + if ( unlikely(send_addr.src.pad || send_addr.dst.pad || + (arg3 > XEN_ARGO_MAXIOV) || (arg4 & ~0xffffffffUL)) ) + { + rc = -EINVAL; + break; + } + + if ( send_addr.src.domain_id == XEN_ARGO_DOMID_ANY ) + send_addr.src.domain_id = currd->domain_id; + + /* No domain is currently authorized to send on behalf of another */ + if ( unlikely(send_addr.src.domain_id != currd->domain_id) ) + { + rc = -EPERM; + break; + } + + /* + * Check access to the whole array here so we can use the faster __copy + * operations to read each element later. + */ + if ( unlikely(!guest_handle_okay(iovs_hnd, arg3)) ) + break; + + rc = sendv(currd, &send_addr.src, &send_addr.dst, iovs_hnd, arg3, arg4); + break; + } + default: rc = -EOPNOTSUPP; break; diff --git a/xen/common/compat/argo.c b/xen/common/compat/argo.c index 6a1671c2cf5c..6290ed61f085 100644 --- a/xen/common/compat/argo.c +++ b/xen/common/compat/argo.c @@ -23,3 +23,22 @@ CHECK_argo_addr; CHECK_argo_register_ring; CHECK_argo_ring; CHECK_argo_unregister_ring; + +/* + * Disable strict type checking in this compat validation macro for the + * following struct checks because it cannot handle fields within structs that + * have types that differ in the compat versus non-compat structs. + * Replace it with a field size check which is sufficient here. + */ + +#undef CHECK_FIELD_COMMON_ +#define CHECK_FIELD_COMMON_(k, name, n, f) \ +static inline int __maybe_unused name(k xen_ ## n *x, k compat_ ## n *c) \ +{ \ + BUILD_BUG_ON(offsetof(k xen_ ## n, f) != \ + offsetof(k compat_ ## n, f)); \ + return sizeof(x->f) == sizeof(c->f); \ +} + +CHECK_argo_send_addr; +CHECK_argo_iov; diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c index f34d4f039dbd..6fbe346490fb 100644 --- a/xen/common/event_channel.c +++ b/xen/common/event_channel.c @@ -746,7 +746,7 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq) spin_unlock_irqrestore(&v->virq_lock, flags); } -static void send_guest_global_virq(struct domain *d, uint32_t virq) +void send_guest_global_virq(struct domain *d, uint32_t virq) { unsigned long flags; int port; diff --git a/xen/include/public/argo.h b/xen/include/public/argo.h index 3eabf836d71e..c12a50f67d2b 100644 --- a/xen/include/public/argo.h +++ b/xen/include/public/argo.h @@ -46,6 +46,34 @@ typedef uint32_t xen_argo_port_t; /* gfn type: 64-bit on all architectures to aid avoiding a compat ABI */ typedef uint64_t xen_argo_gfn_t; +/* + * XEN_ARGO_MAXIOV : maximum number of iovs accepted in a single sendv. + * Caution is required if this value is increased: this determines the size of + * an array of xen_argo_iov_t structs on the hypervisor stack, so could cause + * stack overflow if the value is too large. + * The Linux Argo driver never passes more than two iovs. + * + * This value should also not exceed 128 to ensure that the total amount of data + * posted in a single Argo sendv operation cannot exceed 2^31 bytes, to reduce + * risk of integer overflow defects: + * Each argo iov can hold ~ 2^24 bytes, so XEN_ARGO_MAXIOV <= 2^(31-24), + * ie. keep XEN_ARGO_MAXIOV <= 128. +*/ +#define XEN_ARGO_MAXIOV 8U + +DEFINE_XEN_GUEST_HANDLE(uint8_t); + +typedef struct xen_argo_iov +{ +#ifdef XEN_GUEST_HANDLE_64 + XEN_GUEST_HANDLE_64(uint8_t) iov_hnd; +#else + uint64_t iov_hnd; +#endif + uint32_t iov_len; + uint32_t pad; +} xen_argo_iov_t; + typedef struct xen_argo_addr { xen_argo_port_t aport; @@ -53,6 +81,12 @@ typedef struct xen_argo_addr uint16_t pad; } xen_argo_addr_t; +typedef struct xen_argo_send_addr +{ + xen_argo_addr_t src; + xen_argo_addr_t dst; +} xen_argo_send_addr_t; + typedef struct xen_argo_ring { /* Guests should use atomic operations to access rx_ptr */ @@ -157,4 +191,30 @@ struct xen_argo_ring_message_header */ #define XEN_ARGO_OP_unregister_ring 2 +/* + * XEN_ARGO_OP_sendv + * + * Send a list of buffers contained in iovs. + * + * The send address struct specifies the source and destination addresses + * for the message being sent, which are used to find the destination ring: + * Xen first looks for a most-specific match with a registered ring with + * (id.addr == dst) and (id.partner == sending_domain) ; + * if that fails, it then looks for a wildcard match (aka multicast receiver) + * where (id.addr == dst) and (id.partner == DOMID_ANY). + * + * For each iov entry, send iov_len bytes from iov_base to the destination ring. + * If insufficient space exists in the destination ring, it will return -EAGAIN + * and Xen will notify the caller when sufficient space becomes available. + * + * The message type is a 32-bit data field available to communicate message + * context data (eg. kernel-to-kernel, rather than application layer). + * + * arg1: XEN_GUEST_HANDLE(xen_argo_send_addr_t) source and dest addresses + * arg2: XEN_GUEST_HANDLE(xen_argo_iov_t) iovs + * arg3: unsigned long niov + * arg4: unsigned long message type + */ +#define XEN_ARGO_OP_sendv 3 + #endif diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h index b3f649176420..b650aba670f1 100644 --- a/xen/include/public/xen.h +++ b/xen/include/public/xen.h @@ -178,7 +178,7 @@ DEFINE_XEN_GUEST_HANDLE(xen_ulong_t); #define VIRQ_CON_RING 8 /* G. (DOM0) Bytes received on console */ #define VIRQ_PCPU_STATE 9 /* G. (DOM0) PCPU state changed */ #define VIRQ_MEM_EVENT 10 /* G. (DOM0) A memory event has occurred */ -#define VIRQ_XC_RESERVED 11 /* G. Reserved for XenClient */ +#define VIRQ_ARGO_MESSAGE 11 /* G. Argo interdomain message notification */ #define VIRQ_ENOMEM 12 /* G. (DOM0) Low on heap memory */ #define VIRQ_XENPMU 13 /* V. PMC interrupt */ diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h index ebb879e88ddf..46508878bb37 100644 --- a/xen/include/xen/event.h +++ b/xen/include/xen/event.h @@ -28,6 +28,13 @@ void send_guest_vcpu_virq(struct vcpu *v, uint32_t virq); */ void send_global_virq(uint32_t virq); +/* + * send_guest_global_virq: + * @d: Domain to which VIRQ should be sent + * @virq: Virtual IRQ number (VIRQ_*), must be global + */ +void send_guest_global_virq(struct domain *d, uint32_t virq); + /* * sent_global_virq_handler: Set a global VIRQ handler. * @d: New target domain for this VIRQ diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst index 411c66107c95..3723980a877e 100644 --- a/xen/include/xlat.lst +++ b/xen/include/xlat.lst @@ -152,3 +152,5 @@ ? argo_ring argo.h ? argo_register_ring argo.h ? argo_unregister_ring argo.h +? argo_iov argo.h +? argo_send_addr argo.h