This article describes how to configure VM-to-VM notifications using event channels.

Table of Contents

Taxonomy

Xen: the Xen Project hypervisor

VM: Virtual Machine

OS: Operating System

Dom0: privileged VM, booted directly from Xen

DomU: regular unprivileged VM, started from Dom0 using the “xl” tool

Dom0less DomU: regular unprivilehed VM started from Xen at boot (in parallel with Dom0)

Introduction

A simple, yet effective, way to setup VM-to-VM communication between Xen guests is plain shared memory and notifications. Shared memory can be used to setup a ring buffer and exchange data, while notifications can be used to avoid having to poll for updates on the ring.

Xen offers several ways to share memory between regular DomUs, between Dom0 and DomUs, and between Dom0less DomUs. See this page on how to setup shared memory.

Notifications using event channels

Xen comes with “event channels”: a simple, yet powerful and flexible way for domains to send each other notifications.

Linux comes with complete support for event channels already. You can use the functions already provided to allocate, bind, and send event channels notifications to other domains.

If you are using another OS, or would like to know how it works under the hood, this article provides information and code snippets on how to use Xen event channels in a baremetal OS or RTOS without any prior support for it.

Example

The following is an easy-to-embed header file that can be included in any application to provide a very small implementation of the Xen interfaces, just enough to get Xen event channels working:

/* SPDX-License-Identifier: (BSD-3-Clause) */
/*
 * Xen definitions, hypercalls, and functions used to setup event
 * channels and send and receive event notifications.
 */

#ifndef XEN_H
#define XEN_H

#define GUEST_EVTCHN_PPI        31
#define DOMID_SELF              0x7FF0U

struct vcpu_time_info {
    uint32_t version;
    uint32_t pad0;
    uint64_t tsc_timestamp;
    uint64_t system_time;
    uint32_t tsc_to_system_mul;
    int8_t   tsc_shift;
    uint8_t  flags;
    uint8_t  pad1[2];
} __attribute__((__packed__)); /* 32 bytes */

struct pvclock_wall_clock {
    uint32_t version;
    uint32_t sec;
    uint32_t nsec;
    uint32_t sec_hi;
} __attribute__((__packed__));

struct arch_vcpu_info { };
struct arch_shared_info { };

struct vcpu_info {
    uint8_t evtchn_upcall_pending;
    uint8_t evtchn_upcall_mask;
    uint64_t evtchn_pending_sel;
    struct arch_vcpu_info arch;
    struct vcpu_time_info time;
};

struct shared_info {
    struct vcpu_info vcpu_info[1];
    uint64_t evtchn_pending[sizeof(uint64_t) * 8];
    uint64_t evtchn_mask[sizeof(uint64_t) * 8];

    struct pvclock_wall_clock wc;
    uint32_t wc_sec_hi;
    struct arch_shared_info arch;
};

#define active_evtchns(cpu,sh,idx)              \
    ((sh)->evtchn_pending[idx] &                \
     ~(sh)->evtchn_mask[idx])

#define HYPERVISOR_memory_op            12
#define HYPERVISOR_xen_version          17
#define HYPERVISOR_console_io           18
#define HYPERVISOR_grant_table_op       20
#define HYPERVISOR_vcpu_op              24
#define HYPERVISOR_xsm_op               27
#define HYPERVISOR_sched_op             29
#define HYPERVISOR_callback_op          30
#define HYPERVISOR_event_channel_op     32
#define HYPERVISOR_physdev_op           33
#define HYPERVISOR_hvm_op               34
#define HYPERVISOR_sysctl               35
#define HYPERVISOR_domctl               36
#define HYPERVISOR_argo_op              39
#define HYPERVISOR_dm_op                41
#define HYPERVISOR_hypfs_op             42


/* hypercalls */
static inline int64_t xen_hypercall(unsigned long arg0, unsigned long arg1,
                                    unsigned long arg2, unsigned long arg3,
                                    unsigned long hypercall)
{
    register uintptr_t a0 asm("x0") = arg0;
    register uintptr_t a1 asm("x1") = arg1;
    register uintptr_t a2 asm("x2") = arg2;
    register uintptr_t a3 asm("x3") = arg3;
    register uintptr_t nr asm("x16") = hypercall;
    asm volatile("hvc 0xea1\n"
                     : "=r" (a0), "=r"(a1), "=r" (a2), "=r" (a3), "=r" (nr)
                     : "0" (a0),
                       "r" (a1),
                       "r" (a2),
                       "r" (a3),
                       "r" (nr));
    return a0;
}


/* console_io */
#define CONSOLEIO_write 0


/* memory_op */
#define XENMAPSPACE_shared_info  0 /* shared info page */
#define XENMAPSPACE_grant_table  1 /* grant table page */

#define XENMEM_add_to_physmap      7

struct xen_add_to_physmap {
    /* Which domain to change the mapping for. */
    uint16_t domid;

    /* Number of pages to go through for gmfn_range */
    uint16_t    size;

    /* Source mapping space. */
    unsigned int space;

    /* Index into source mapping space. */
    uint64_t idx;

    /* GPFN where the source mapping page should appear. */
    uint64_t gpfn;
};

static inline int xen_register_shared_info(struct shared_info *shared_info)
{
    int rc;
    struct xen_add_to_physmap xatp;

    xatp.domid = DOMID_SELF;
    xatp.idx = 0;
    xatp.space = XENMAPSPACE_shared_info;
    xatp.gpfn = ((unsigned long)shared_info) >> 12;
    rc = xen_hypercall(XENMEM_add_to_physmap, (unsigned long)&xatp, 0, 0,
                       HYPERVISOR_memory_op);
    return rc;
}


/* event_channel_op */
#define EVTCHNOP_bind_interdomain 0
#define EVTCHNOP_close            3
#define EVTCHNOP_send             4
#define EVTCHNOP_status           5
#define EVTCHNOP_alloc_unbound    6
#define EVTCHNOP_unmask           9

struct evtchn_bind_interdomain {
    /* IN parameters. */
    uint16_t remote_dom;
    uint32_t remote_port;
    /* OUT parameters. */
    uint32_t local_port;
};

struct evtchn_alloc_unbound {
    /* IN parameters */
    uint16_t dom, remote_dom;
    /* OUT parameters */
    uint32_t port;
};

struct evtchn_send {
    /* IN parameters. */
    uint32_t port;
};


/* printf */
static inline void xen_console_write(const char *str)
{
    ssize_t len = strlen(str);

    xen_hypercall(CONSOLEIO_write, len, (unsigned long)str, 0,
                  HYPERVISOR_console_io);
}

static inline void xen_printf(const char *fmt, ...)
{
    char buf[128];
    va_list ap;
    char *str = &buf[0];
    memset(buf, 0x0, 128);

    va_start(ap, fmt);
    vsprintf(str, fmt, ap);
    va_end(ap);

    xen_console_write(buf);
}


/* 
 * utility functions, not xen specific, but needed by the function
 * below
 */
#define xchg(ptr,v) __atomic_exchange_n(ptr, v, __ATOMIC_SEQ_CST)

static __inline__ unsigned long __ffs(unsigned long word)
{
        return __builtin_ctzl(word);
}


/* event handling */
static inline void handle_event_irq(struct shared_info *s,
                                    void (*do_event)(unsigned int event))
{
    uint64_t  l1, l2, l1i, l2i;
    unsigned int   port;
    int            cpu = 0;
    struct vcpu_info   *vcpu_info = &s->vcpu_info[cpu];

    vcpu_info->evtchn_upcall_pending = 0;
    mb();

    l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
    while ( l1 != 0 )
    {
        l1i = __ffs(l1);
        l1 &= ~(1UL << l1i);
        l2 = xchg(&s->evtchn_pending[l1i], 0);

        while ( l2 != 0 )
        {
            l2i = __ffs(l2);
            l2 &= ~(1UL << l2i);

            port = (l1i * sizeof(uint64_t)) + l2i;

            do_event(port);
        }
    }
}

#endif /* XEN_H */

The following is the example application that makes use of xen.h to setup an event channel. apu.c is used by two domains, the sender and the receiver; behavior changes depending on the domid.

/* SPDX-License-Identifier: (BSD-3-Clause) */
/*
 * TBM application to send and receive Xen event channels.
 *
 * Written by Stefano Stabellini
 */
#define _MINIC_SOURCE

#include <stdio.h>
#include <stdbool.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>
#include "sys.h"

#include "drivers/arm/gic.h"

#include "xen.h"

static uint16_t domid = 0;
static struct shared_info *shared_info = 0;
/* statically configured shared memory at address 0x7fe00000 */
static char* shared_mem = (char *)0x7fe00000;

static void print_event(unsigned int event)
{
    xen_printf("handle_event domid=%u event=%u\n", domid, event);
}

static void irq_handler(struct excp_frame *f)
{
    uint32_t irq;

    irq = gic_ack_irq(GIC_CPU_BASE);

    handle_event_irq(shared_info, print_event);

    gic_end_of_irq(GIC_CPU_BASE, irq);
    gic_deactivate_irq(GIC_CPU_BASE, irq);
    local_cpu_ei();
}

static void gic_init(int irq)
{
    assert(irq < 32);

    /* Disable interrupts while we configure the GIC.  */
    local_cpu_di();

    /* Setup the GIC.  */
    gicd_set_irq_group(GIC_DIST_BASE, irq, 0);
    gicd_set_irq_target(GIC_DIST_BASE, irq, 0);
    gicd_enable_irq(GIC_DIST_BASE, irq);
    gicd_set_irq_group(GIC_DIST_BASE, 5, 0);
    gicd_set_irq_target(GIC_DIST_BASE, 5, 0);
    gicd_enable_irq(GIC_DIST_BASE, 5);

    writel(GIC_DIST_BASE + GICD_CTRL, 3);
    writel(GIC_CPU_BASE + GICC_CTRL, 3);
    writel(GIC_CPU_BASE + GICC_PMR, 0xff);
    mb();
    local_cpu_ei();
}

void debug_get_domid()
{
    register uintptr_t a0 asm("x0");
    __asm__ __volatile__("hvc 0xfffd\n" 
            : "=r" (a0)
            : "0" (a0));
    domid = a0;
}

static void sleep(unsigned long long sec)
{
    unsigned long long i = 0;

    while ( i < sec*1000000000 )
    {
        i++;
        mb();
    }
}

void app_run(void)
{
    int ret = 0;

    /* Setup GIC and interrupt handler for Xen events */
    gic_init(GUEST_EVTCHN_PPI);
    aarch64_set_irq_h(irq_handler);

    /* Register shared_info page */
    shared_info = aligned_alloc(4096, 4096);
    memset(shared_info, 0x0, 4096);
    xen_register_shared_info(shared_info);

    /* Get our domid with debug hypercall */
    debug_get_domid();
    xen_printf("DEBUG domid=%d\n", domid);

    /* If domid == 1 allocate an unbound event to receive notifications */
    if (domid == 1) {
        uint16_t remote_domid = 2;
        struct evtchn_alloc_unbound alloc;

        alloc.dom = DOMID_SELF;
        alloc.remote_dom = remote_domid;
        alloc.port = 0;

        ret = xen_hypercall(EVTCHNOP_alloc_unbound, (unsigned long)&alloc,
                            0, 0, HYPERVISOR_event_channel_op);
        mb();

        xen_printf("DEBUG domid=%d alloc_unbound ret=%d port=%u\n", domid, ret, alloc.port);

        /* first message to signal readiness */
        memcpy(shared_mem, "go", sizeof("go"));
        mb();
        /* send port number to other domain */
        memcpy(shared_mem + 4, &alloc.port, sizeof(alloc.port));

    /* if domid == 2 bind to foreign event channel and send event notifications */
    } else {
        uint16_t remote_domid = 1;
        uint16_t remote_port;
        struct evtchn_bind_interdomain bind;
        struct evtchn_send send;

        /* wait for readiness signal */
        while (1) {
            if (strcmp(shared_mem, "go") == 0)
                break;
            mb();
        }
        mb();
        /* read port number of the other domain */
        memcpy(&remote_port, shared_mem + 4, sizeof(remote_port));

        xen_printf("DEBUG domid=%d remote_port=%u\n", domid, remote_port);

        bind.remote_dom = remote_domid;
        bind.remote_port = remote_port;
        bind.local_port = 0;
        ret = xen_hypercall(EVTCHNOP_bind_interdomain, (unsigned long)&bind,
                            0, 0, HYPERVISOR_event_channel_op);

        xen_printf("DEBUG domid=%d bind_interdomain ret=%d local_port=%u\n", domid, ret, bind.local_port);

        send.port = bind.local_port;
        xen_hypercall(EVTCHNOP_send, (unsigned long)&send,
                      0, 0, HYPERVISOR_event_channel_op);
        sleep(2);
        xen_hypercall(EVTCHNOP_send, (unsigned long)&send,
                      0, 0, HYPERVISOR_event_channel_op);
    }

    while (1)
        ;
}

Receiving Xen events in general

PPI

Xen events are delivered as GUEST_EVTCHN_PPI, statically defined as interrupt 31. Firstly, register a handler in your OS for PPI 31.

    gic_init(GUEST_EVTCHN_PPI);
    aarch64_set_irq_h(irq_handler);

Shared_info page

Information about event channels is provided on a memory page called shared_info because it is shared between the VM and Xen. In order to access the shared_info page it is necessary to register it:

xen_register_shared_info(shared_info);

Event Channels Parsing

Upon receiving an interrupt from Xen, we can check the bitmasks on the shared_info page to know which event exactly was delivered. xen.h provides an implementation of the parsing function which can be called from the interrupt handler.

/* event handling */
static inline void handle_event_irq(struct shared_info *s,
                                    void (*do_event)(unsigned int event))
{
    uint64_t  l1, l2, l1i, l2i;
    unsigned int   port;
    int            cpu = 0;
    struct vcpu_info   *vcpu_info = &s->vcpu_info[cpu];

    vcpu_info->evtchn_upcall_pending = 0;
    mb();

    l1 = xchg(&vcpu_info->evtchn_pending_sel, 0);
    while ( l1 != 0 )
    {
        l1i = __ffs(l1);
        l1 &= ~(1UL << l1i);
        l2 = xchg(&s->evtchn_pending[l1i], 0);

        while ( l2 != 0 )
        {
            l2i = __ffs(l2);
            l2 &= ~(1UL << l2i);

            port = (l1i * sizeof(uint64_t)) + l2i;

            do_event(port);
        }
    }
}

Allocating and Binding Event Channels

The previous section described how to receive event channels in general. This chapter describes how to allocate and bind one event channel so that it can be used for notifications between domains.

In example.c the domains detect their domids by issuing a debug hypercall.

void debug_get_domid()
{
    register uintptr_t a0 asm("x0");
    __asm__ __volatile__("hvc 0xfffd\n" 
            : "=r" (a0)
            : "0" (a0));
    domid = a0;
}

  1. The receiver domain allocates a new unbound event channel by calling EVTCHNOP_alloc_unbound.

            struct evtchn_alloc_unbound alloc;
            alloc.dom = DOMID_SELF;
            alloc.remote_dom = remote_domid;
            alloc.port = 0;
            ret = xen_hypercall(EVTCHNOP_alloc_unbound, (unsigned long)&alloc,
                                0, 0, HYPERVISOR_event_channel_op);

  2. The sender domain needs to know the remote event channel number, typically refer to as port, to bind to it. As the remote port number is dynamically allocated by the EVTCHNOP_alloc_unbound hypercall, it is best if the port number is passed over shared memory from the receiver to the sender.

            /* wait for readiness signal */
            while (1) {
                if (strcmp(shared_mem, "go") == 0)
                    break;
                mb();
            }
            mb();
            /* read port number of the other domain */
            memcpy(&remote_port, shared_mem + 4, sizeof(remote_port));

  3. The sender domain issues an EVTCHNOP_bind_interdomain call to bind to the remote port.

            struct evtchn_bind_interdomain bind;
            bind.remote_dom = remote_domid;
            bind.remote_port = remote_port;
            bind.local_port = 0;
            ret = xen_hypercall(EVTCHNOP_bind_interdomain, (unsigned long)&bind,
                                0, 0, HYPERVISOR_event_channel_op);

  4. The sender domain can start sending notifications to the receiver domain by issuing EVTCHNOP_send hypercalls

            struct evtchn_send send;
            send.port = bind.local_port;
            xen_hypercall(EVTCHNOP_send, (unsigned long)&send,
                          0, 0, HYPERVISOR_event_channel_op);