lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <551B2872.2010802@oracle.com>
Date:	Wed, 01 Apr 2015 07:06:26 +0800
From:	Bob Liu <bob.liu@...cle.com>
To:	Juergen Gross <jgross@...e.com>
CC:	xen-devel@...ts.xenproject.org, paul.durrant@...rix.com,
	david.vrabel@...rix.com, roger.pau@...rix.com,
	linux-kernel@...r.kernel.org, konrad.wilk@...cle.com,
	Wei Liu <wei.liu2@...rix.com>,
	Boris Ostrovsky <boris.ostrovsky@...cle.com>
Subject: Re: [PATCH RESEND 1/2] xenbus_client: Extend interface to support
 multi-page ring

Hi Juergen,

On 03/31/2015 08:36 PM, Juergen Gross wrote:
> On 03/31/2015 02:15 PM, Bob Liu wrote:
>> From: Wei Liu <wei.liu2@...rix.com>
>>
>> Originally Xen PV drivers only use single-page ring to pass along
>> information. This might limit the throughput between frontend and
>> backend.
>>
>> The patch extends Xenbus driver to support multi-page ring, which in
>> general should improve throughput if ring is the bottleneck. Changes to
>> various frontend / backend to adapt to the new interface are also
>> included.
>>
>> Affected Xen drivers:
>> * blkfront/back
>> * netfront/back
>> * pcifront/back
> 
> What about pvscsi drivers?
> They are affected, too!
> 

Thanks for the reminding, I'll send an new version fix it.

Regards,
-Bob

> 
> Juergen
> 
>>
>> The interface is documented, as before, in xenbus_client.c.
>>
>> Change in V2:
>> * allow ring has arbitrary number of pages <= XENBUS_MAX_RING_PAGES
>>
>> Change in V3:
>> * update function prototypes
>> * carefully deal with types of different sizes
>>
>> Change in V4:
>> * use PAGE_KERNEL instead of PAGE_KERNEL_IO to avoid breakage on Arm
>>
>> Change in V5:
>> * fix off-by-one error and other minor glitches spotted by Mathew Daley
>>
>> Signed-off-by: Wei Liu <wei.liu2@...rix.com>
>> Signed-off-by: Paul Durrant <paul.durrant@...rix.com>
>> Signed-off-by: Bob Liu <bob.liu@...cle.com>
>> Cc: Konrad Wilk <konrad.wilk@...cle.com>
>> Cc: David Vrabel <david.vrabel@...rix.com>
>> Cc: Boris Ostrovsky <boris.ostrovsky@...cle.com>
>> ---
>>   drivers/block/xen-blkback/xenbus.c |   5 +-
>>   drivers/block/xen-blkfront.c       |   5 +-
>>   drivers/net/xen-netback/netback.c  |   4 +-
>>   drivers/net/xen-netfront.c         |   9 +-
>>   drivers/pci/xen-pcifront.c         |   5 +-
>>   drivers/xen/xen-pciback/xenbus.c   |   2 +-
>>   drivers/xen/xenbus/xenbus_client.c | 387
>> +++++++++++++++++++++++++++----------
>>   include/xen/xenbus.h               |  20 +-
>>   8 files changed, 317 insertions(+), 120 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/xenbus.c
>> b/drivers/block/xen-blkback/xenbus.c
>> index e3afe97..ff30259 100644
>> --- a/drivers/block/xen-blkback/xenbus.c
>> +++ b/drivers/block/xen-blkback/xenbus.c
>> @@ -193,7 +193,7 @@ fail:
>>       return ERR_PTR(-ENOMEM);
>>   }
>>
>> -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long
>> shared_page,
>> +static int xen_blkif_map(struct xen_blkif *blkif, grant_ref_t gref,
>>                unsigned int evtchn)
>>   {
>>       int err;
>> @@ -202,7 +202,8 @@ static int xen_blkif_map(struct xen_blkif *blkif,
>> unsigned long shared_page,
>>       if (blkif->irq)
>>           return 0;
>>
>> -    err = xenbus_map_ring_valloc(blkif->be->dev, shared_page,
>> &blkif->blk_ring);
>> +    err = xenbus_map_ring_valloc(blkif->be->dev, &gref, 1,
>> +                     &blkif->blk_ring);
>>       if (err < 0)
>>           return err;
>>
>> diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
>> index 37779e4..2c61cf8 100644
>> --- a/drivers/block/xen-blkfront.c
>> +++ b/drivers/block/xen-blkfront.c
>> @@ -1245,6 +1245,7 @@ static int setup_blkring(struct xenbus_device *dev,
>>                struct blkfront_info *info)
>>   {
>>       struct blkif_sring *sring;
>> +    grant_ref_t gref;
>>       int err;
>>
>>       info->ring_ref = GRANT_INVALID_REF;
>> @@ -1257,13 +1258,13 @@ static int setup_blkring(struct xenbus_device
>> *dev,
>>       SHARED_RING_INIT(sring);
>>       FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
>>
>> -    err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring));
>> +    err = xenbus_grant_ring(dev, info->ring.sring, 1, &gref);
>>       if (err < 0) {
>>           free_page((unsigned long)sring);
>>           info->ring.sring = NULL;
>>           goto fail;
>>       }
>> -    info->ring_ref = err;
>> +    info->ring_ref = gref;
>>
>>       err = xenbus_alloc_evtchn(dev, &info->evtchn);
>>       if (err)
>> diff --git a/drivers/net/xen-netback/netback.c
>> b/drivers/net/xen-netback/netback.c
>> index 997cf09..865203f 100644
>> --- a/drivers/net/xen-netback/netback.c
>> +++ b/drivers/net/xen-netback/netback.c
>> @@ -1782,7 +1782,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>       int err = -ENOMEM;
>>
>>       err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> -                     tx_ring_ref, &addr);
>> +                     &tx_ring_ref, 1, &addr);
>>       if (err)
>>           goto err;
>>
>> @@ -1790,7 +1790,7 @@ int xenvif_map_frontend_rings(struct
>> xenvif_queue *queue,
>>       BACK_RING_INIT(&queue->tx, txs, PAGE_SIZE);
>>
>>       err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(queue->vif),
>> -                     rx_ring_ref, &addr);
>> +                     &rx_ring_ref, 1, &addr);
>>       if (err)
>>           goto err;
>>
>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>> index e9b960f..13f5e7f 100644
>> --- a/drivers/net/xen-netfront.c
>> +++ b/drivers/net/xen-netfront.c
>> @@ -1486,6 +1486,7 @@ static int setup_netfront(struct xenbus_device
>> *dev,
>>   {
>>       struct xen_netif_tx_sring *txs;
>>       struct xen_netif_rx_sring *rxs;
>> +    grant_ref_t gref;
>>       int err;
>>
>>       queue->tx_ring_ref = GRANT_INVALID_REF;
>> @@ -1502,10 +1503,10 @@ static int setup_netfront(struct xenbus_device
>> *dev,
>>       SHARED_RING_INIT(txs);
>>       FRONT_RING_INIT(&queue->tx, txs, PAGE_SIZE);
>>
>> -    err = xenbus_grant_ring(dev, virt_to_mfn(txs));
>> +    err = xenbus_grant_ring(dev, txs, 1, &gref);
>>       if (err < 0)
>>           goto grant_tx_ring_fail;
>> -    queue->tx_ring_ref = err;
>> +    queue->tx_ring_ref = gref;
>>
>>       rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO |
>> __GFP_HIGH);
>>       if (!rxs) {
>> @@ -1516,10 +1517,10 @@ static int setup_netfront(struct xenbus_device
>> *dev,
>>       SHARED_RING_INIT(rxs);
>>       FRONT_RING_INIT(&queue->rx, rxs, PAGE_SIZE);
>>
>> -    err = xenbus_grant_ring(dev, virt_to_mfn(rxs));
>> +    err = xenbus_grant_ring(dev, rxs, 1, &gref);
>>       if (err < 0)
>>           goto grant_rx_ring_fail;
>> -    queue->rx_ring_ref = err;
>> +    queue->rx_ring_ref = gref;
>>
>>       if (feature_split_evtchn)
>>           err = setup_netfront_split(queue);
>> diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c
>> index b1ffebe..7cfd2db 100644
>> --- a/drivers/pci/xen-pcifront.c
>> +++ b/drivers/pci/xen-pcifront.c
>> @@ -777,12 +777,13 @@ static int pcifront_publish_info(struct
>> pcifront_device *pdev)
>>   {
>>       int err = 0;
>>       struct xenbus_transaction trans;
>> +    grant_ref_t gref;
>>
>> -    err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info));
>> +    err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &gref);
>>       if (err < 0)
>>           goto out;
>>
>> -    pdev->gnt_ref = err;
>> +    pdev->gnt_ref = gref;
>>
>>       err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn);
>>       if (err)
>> diff --git a/drivers/xen/xen-pciback/xenbus.c
>> b/drivers/xen/xen-pciback/xenbus.c
>> index fe17c80..98bc345 100644
>> --- a/drivers/xen/xen-pciback/xenbus.c
>> +++ b/drivers/xen/xen-pciback/xenbus.c
>> @@ -113,7 +113,7 @@ static int xen_pcibk_do_attach(struct
>> xen_pcibk_device *pdev, int gnt_ref,
>>           "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n",
>>           gnt_ref, remote_evtchn);
>>
>> -    err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr);
>> +    err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr);
>>       if (err < 0) {
>>           xenbus_dev_fatal(pdev->xdev, err,
>>                   "Error mapping other domain page in ours.");
>> diff --git a/drivers/xen/xenbus/xenbus_client.c
>> b/drivers/xen/xenbus/xenbus_client.c
>> index ca74410..96b2011 100644
>> --- a/drivers/xen/xenbus/xenbus_client.c
>> +++ b/drivers/xen/xenbus/xenbus_client.c
>> @@ -52,17 +52,25 @@
>>   struct xenbus_map_node {
>>       struct list_head next;
>>       union {
>> -        struct vm_struct *area; /* PV */
>> -        struct page *page;     /* HVM */
>> +        struct {
>> +            struct vm_struct *area;
>> +        } pv;
>> +        struct {
>> +            struct page *pages[XENBUS_MAX_RING_PAGES];
>> +            void *addr;
>> +        } hvm;
>>       };
>> -    grant_handle_t handle;
>> +    grant_handle_t handles[XENBUS_MAX_RING_PAGES];
>> +    unsigned int   nr_handles;
>>   };
>>
>>   static DEFINE_SPINLOCK(xenbus_valloc_lock);
>>   static LIST_HEAD(xenbus_valloc_pages);
>>
>>   struct xenbus_ring_ops {
>> -    int (*map)(struct xenbus_device *dev, int gnt, void **vaddr);
>> +    int (*map)(struct xenbus_device *dev,
>> +           grant_ref_t *gnt_refs, unsigned int nr_grefs,
>> +           void **vaddr);
>>       int (*unmap)(struct xenbus_device *dev, void *vaddr);
>>   };
>>
>> @@ -355,17 +363,39 @@ static void xenbus_switch_fatal(struct
>> xenbus_device *dev, int depth, int err,
>>   /**
>>    * xenbus_grant_ring
>>    * @dev: xenbus device
>> - * @ring_mfn: mfn of ring to grant
>> -
>> - * Grant access to the given @ring_mfn to the peer of the given
>> device.  Return
>> - * a grant reference on success, or -errno on error. On error, the
>> device will
>> - * switch to XenbusStateClosing, and the error will be saved in the
>> store.
>> + * @vaddr: starting virtual address of the ring
>> + * @nr_pages: number of pages to be granted
>> + * @grefs: grant reference array to be filled in
>> + *
>> + * Grant access to the given @vaddr to the peer of the given device.
>> + * Then fill in @grefs with grant references.  Return 0 on success, or
>> + * -errno on error.  On error, the device will switch to
>> + * XenbusStateClosing, and the error will be saved in the store.
>>    */
>> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn)
>> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
>> +              unsigned int nr_pages, grant_ref_t *grefs)
>>   {
>> -    int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn,
>> 0);
>> -    if (err < 0)
>> -        xenbus_dev_fatal(dev, err, "granting access to ring page");
>> +    int err;
>> +    int i, j;
>> +
>> +    for (i = 0; i < nr_pages; i++) {
>> +        unsigned long addr = (unsigned long)vaddr +
>> +            (PAGE_SIZE * i);
>> +        err = gnttab_grant_foreign_access(dev->otherend_id,
>> +                          virt_to_mfn(addr), 0);
>> +        if (err < 0) {
>> +            xenbus_dev_fatal(dev, err,
>> +                     "granting access to ring page");
>> +            goto fail;
>> +        }
>> +        grefs[i] = err;
>> +    }
>> +
>> +    return 0;
>> +
>> +fail:
>> +    for (j = 0; j < i; j++)
>> +        gnttab_end_foreign_access_ref(grefs[j], 0);
>>       return err;
>>   }
>>   EXPORT_SYMBOL_GPL(xenbus_grant_ring);
>> @@ -419,62 +449,130 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn);
>>   /**
>>    * xenbus_map_ring_valloc
>>    * @dev: xenbus device
>> - * @gnt_ref: grant reference
>> + * @gnt_refs: grant reference array
>> + * @nr_grefs: number of grant references
>>    * @vaddr: pointer to address to be filled out by mapping
>>    *
>> - * Based on Rusty Russell's skeleton driver's map_page.
>> - * Map a page of memory into this domain from another domain's grant
>> table.
>> - * xenbus_map_ring_valloc allocates a page of virtual address space,
>> maps the
>> - * page to that address, and sets *vaddr to that address.
>> - * Returns 0 on success, and GNTST_* (see
>> xen/include/interface/grant_table.h)
>> - * or -ENOMEM on error. If an error is returned, device will switch to
>> + * Map @nr_grefs pages of memory into this domain from another
>> + * domain's grant table.  xenbus_map_ring_valloc allocates @nr_grefs
>> + * pages of virtual address space, maps the pages to that address, and
>> + * sets *vaddr to that address.  Returns 0 on success, and GNTST_*
>> + * (see xen/include/interface/grant_table.h) or -ENOMEM / -EINVAL on
>> + * error. If an error is returned, device will switch to
>>    * XenbusStateClosing and the error message will be saved in XenStore.
>>    */
>> -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref,
>> void **vaddr)
>> +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t
>> *gnt_refs,
>> +               unsigned int nr_grefs, void **vaddr)
>>   {
>> -    return ring_ops->map(dev, gnt_ref, vaddr);
>> +    return ring_ops->map(dev, gnt_refs, nr_grefs, vaddr);
>>   }
>>   EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc);
>>
>> +/* N.B. sizeof(phys_addr_t) doesn't always equal to sizeof(unsigned
>> + * long), e.g. 32-on-64.  Caller is responsible for preparing the
>> + * right array to feed into this function */
>> +static int __xenbus_map_ring(struct xenbus_device *dev,
>> +                 grant_ref_t *gnt_refs,
>> +                 unsigned int nr_grefs,
>> +                 grant_handle_t *handles,
>> +                 phys_addr_t *addrs,
>> +                 unsigned int flags,
>> +                 bool *leaked)
>> +{
>> +    struct gnttab_map_grant_ref map[XENBUS_MAX_RING_PAGES];
>> +    struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
>> +    int i, j;
>> +    int err = GNTST_okay;
>> +
>> +    if (nr_grefs > XENBUS_MAX_RING_PAGES)
>> +        return -EINVAL;
>> +
>> +    for (i = 0; i < nr_grefs; i++) {
>> +        memset(&map[i], 0, sizeof(map[i]));
>> +        gnttab_set_map_op(&map[i], addrs[i], flags, gnt_refs[i],
>> +                  dev->otherend_id);
>> +        handles[i] = INVALID_GRANT_HANDLE;
>> +    }
>> +
>> +    gnttab_batch_map(map, i);
>> +
>> +    for (i = 0; i < nr_grefs; i++) {
>> +        if (map[i].status != GNTST_okay) {
>> +            err = map[i].status;
>> +            xenbus_dev_fatal(dev, map[i].status,
>> +                     "mapping in shared page %d from domain %d",
>> +                     gnt_refs[i], dev->otherend_id);
>> +            goto fail;
>> +        } else
>> +            handles[i] = map[i].handle;
>> +    }
>> +
>> +    return GNTST_okay;
>> +
>> + fail:
>> +    for (i = j = 0; i < nr_grefs; i++) {
>> +        if (handles[i] != INVALID_GRANT_HANDLE) {
>> +            memset(&unmap[j], 0, sizeof(unmap[j]));
>> +            gnttab_set_unmap_op(&unmap[j], (phys_addr_t)addrs[i],
>> +                        GNTMAP_host_map, handles[i]);
>> +            j++;
>> +        }
>> +    }
>> +
>> +    if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, j))
>> +        BUG();
>> +
>> +    *leaked = false;
>> +    for (i = 0; i < j; i++) {
>> +        if (unmap[i].status != GNTST_okay) {
>> +            *leaked = true;
>> +            break;
>> +        }
>> +    }
>> +
>> +    return err;
>> +}
>> +
>>   static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev,
>> -                     int gnt_ref, void **vaddr)
>> +                     grant_ref_t *gnt_refs,
>> +                     unsigned int nr_grefs,
>> +                     void **vaddr)
>>   {
>> -    struct gnttab_map_grant_ref op = {
>> -        .flags = GNTMAP_host_map | GNTMAP_contains_pte,
>> -        .ref   = gnt_ref,
>> -        .dom   = dev->otherend_id,
>> -    };
>>       struct xenbus_map_node *node;
>>       struct vm_struct *area;
>> -    pte_t *pte;
>> +    pte_t *ptes[XENBUS_MAX_RING_PAGES];
>> +    phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
>> +    int err = GNTST_okay;
>> +    int i;
>> +    bool leaked;
>>
>>       *vaddr = NULL;
>>
>> +    if (nr_grefs > XENBUS_MAX_RING_PAGES)
>> +        return -EINVAL;
>> +
>>       node = kzalloc(sizeof(*node), GFP_KERNEL);
>>       if (!node)
>>           return -ENOMEM;
>>
>> -    area = alloc_vm_area(PAGE_SIZE, &pte);
>> +    area = alloc_vm_area(PAGE_SIZE * nr_grefs, ptes);
>>       if (!area) {
>>           kfree(node);
>>           return -ENOMEM;
>>       }
>>
>> -    op.host_addr = arbitrary_virt_to_machine(pte).maddr;
>> +    for (i = 0; i < nr_grefs; i++)
>> +        phys_addrs[i] = arbitrary_virt_to_machine(ptes[i]).maddr;
>>
>> -    gnttab_batch_map(&op, 1);
>> -
>> -    if (op.status != GNTST_okay) {
>> -        free_vm_area(area);
>> -        kfree(node);
>> -        xenbus_dev_fatal(dev, op.status,
>> -                 "mapping in shared page %d from domain %d",
>> -                 gnt_ref, dev->otherend_id);
>> -        return op.status;
>> -    }
>> +    err = __xenbus_map_ring(dev, gnt_refs, nr_grefs, node->handles,
>> +                phys_addrs,
>> +                GNTMAP_host_map | GNTMAP_contains_pte,
>> +                &leaked);
>> +    if (err)
>> +        goto failed;
>>
>> -    node->handle = op.handle;
>> -    node->area = area;
>> +    node->nr_handles = nr_grefs;
>> +    node->pv.area = area;
>>
>>       spin_lock(&xenbus_valloc_lock);
>>       list_add(&node->next, &xenbus_valloc_pages);
>> @@ -482,14 +580,33 @@ static int xenbus_map_ring_valloc_pv(struct
>> xenbus_device *dev,
>>
>>       *vaddr = area->addr;
>>       return 0;
>> +
>> +failed:
>> +    if (!leaked)
>> +        free_vm_area(area);
>> +    else
>> +        pr_alert("leaking VM area %p size %u page(s)", area, nr_grefs);
>> +
>> +    kfree(node);
>> +    return err;
>>   }
>>
>>   static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev,
>> -                      int gnt_ref, void **vaddr)
>> +                      grant_ref_t *gnt_ref,
>> +                      unsigned int nr_grefs,
>> +                      void **vaddr)
>>   {
>>       struct xenbus_map_node *node;
>> +    int i;
>>       int err;
>>       void *addr;
>> +    bool leaked = false;
>> +    /* Why do we need two arrays? See comment of __xenbus_map_ring */
>> +    phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
>> +    unsigned long addrs[XENBUS_MAX_RING_PAGES];
>> +
>> +    if (nr_grefs > XENBUS_MAX_RING_PAGES)
>> +        return -EINVAL;
>>
>>       *vaddr = NULL;
>>
>> @@ -497,15 +614,32 @@ static int xenbus_map_ring_valloc_hvm(struct
>> xenbus_device *dev,
>>       if (!node)
>>           return -ENOMEM;
>>
>> -    err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */);
>> +    err = alloc_xenballooned_pages(nr_grefs, node->hvm.pages,
>> +                       false /* lowmem */);
>>       if (err)
>>           goto out_err;
>>
>> -    addr = pfn_to_kaddr(page_to_pfn(node->page));
>> +    for (i = 0; i < nr_grefs; i++) {
>> +        unsigned long pfn = page_to_pfn(node->hvm.pages[i]);
>> +        phys_addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
>> +        addrs[i] = (unsigned long)pfn_to_kaddr(pfn);
>> +    }
>> +
>> +    err = __xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handles,
>> +                phys_addrs, GNTMAP_host_map, &leaked);
>> +    node->nr_handles = nr_grefs;
>>
>> -    err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr);
>>       if (err)
>> -        goto out_err_free_ballooned_pages;
>> +        goto out_free_ballooned_pages;
>> +
>> +    addr = vmap(node->hvm.pages, nr_grefs, VM_MAP | VM_IOREMAP,
>> +            PAGE_KERNEL);
>> +    if (!addr) {
>> +        err = -ENOMEM;
>> +        goto out_xenbus_unmap_ring;
>> +    }
>> +
>> +    node->hvm.addr = addr;
>>
>>       spin_lock(&xenbus_valloc_lock);
>>       list_add(&node->next, &xenbus_valloc_pages);
>> @@ -514,8 +648,16 @@ static int xenbus_map_ring_valloc_hvm(struct
>> xenbus_device *dev,
>>       *vaddr = addr;
>>       return 0;
>>
>> - out_err_free_ballooned_pages:
>> -    free_xenballooned_pages(1, &node->page);
>> + out_xenbus_unmap_ring:
>> +    if (!leaked)
>> +        xenbus_unmap_ring(dev, node->handles, node->nr_handles,
>> +                  addrs);
>> +    else
>> +        pr_alert("leaking %p size %u page(s)",
>> +             addr, nr_grefs);
>> + out_free_ballooned_pages:
>> +    if (!leaked)
>> +        free_xenballooned_pages(nr_grefs, node->hvm.pages);
>>    out_err:
>>       kfree(node);
>>       return err;
>> @@ -525,35 +667,37 @@ static int xenbus_map_ring_valloc_hvm(struct
>> xenbus_device *dev,
>>   /**
>>    * xenbus_map_ring
>>    * @dev: xenbus device
>> - * @gnt_ref: grant reference
>> - * @handle: pointer to grant handle to be filled
>> - * @vaddr: address to be mapped to
>> + * @gnt_refs: grant reference array
>> + * @nr_grefs: number of grant reference
>> + * @handles: pointer to grant handle to be filled
>> + * @vaddrs: addresses to be mapped to
>> + * @leaked: fail to clean up a failed map, caller should not free vaddr
>>    *
>> - * Map a page of memory into this domain from another domain's grant
>> table.
>> + * Map pages of memory into this domain from another domain's grant
>> table.
>>    * xenbus_map_ring does not allocate the virtual address space (you
>> must do
>> - * this yourself!). It only maps in the page to the specified address.
>> + * this yourself!). It only maps in the pages to the specified address.
>>    * Returns 0 on success, and GNTST_* (see
>> xen/include/interface/grant_table.h)
>> - * or -ENOMEM on error. If an error is returned, device will switch to
>> - * XenbusStateClosing and the error message will be saved in XenStore.
>> + * or -ENOMEM / -EINVAL on error. If an error is returned, device
>> will switch to
>> + * XenbusStateClosing and the first error message will be saved in
>> XenStore.
>> + * Further more if we fail to map the ring, caller should check @leaked.
>> + * If @leaked is not zero it means xenbus_map_ring fails to clean up,
>> caller
>> + * should not free the address space of @vaddr.
>>    */
>> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
>> -            grant_handle_t *handle, void *vaddr)
>> +int xenbus_map_ring(struct xenbus_device *dev, grant_ref_t *gnt_refs,
>> +            unsigned int nr_grefs, grant_handle_t *handles,
>> +            unsigned long *vaddrs, bool *leaked)
>>   {
>> -    struct gnttab_map_grant_ref op;
>> -
>> -    gnttab_set_map_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
>> gnt_ref,
>> -              dev->otherend_id);
>> +    phys_addr_t phys_addrs[XENBUS_MAX_RING_PAGES];
>> +    int i;
>>
>> -    gnttab_batch_map(&op, 1);
>> +    if (nr_grefs > XENBUS_MAX_RING_PAGES)
>> +        return -EINVAL;
>>
>> -    if (op.status != GNTST_okay) {
>> -        xenbus_dev_fatal(dev, op.status,
>> -                 "mapping in shared page %d from domain %d",
>> -                 gnt_ref, dev->otherend_id);
>> -    } else
>> -        *handle = op.handle;
>> +    for (i = 0; i < nr_grefs; i++)
>> +        phys_addrs[i] = (unsigned long)vaddrs[i];
>>
>> -    return op.status;
>> +    return __xenbus_map_ring(dev, gnt_refs, nr_grefs, handles,
>> +                 phys_addrs, GNTMAP_host_map, leaked);
>>   }
>>   EXPORT_SYMBOL_GPL(xenbus_map_ring);
>>
>> @@ -579,14 +723,15 @@ EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree);
>>   static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev,
>> void *vaddr)
>>   {
>>       struct xenbus_map_node *node;
>> -    struct gnttab_unmap_grant_ref op = {
>> -        .host_addr = (unsigned long)vaddr,
>> -    };
>> +    struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
>>       unsigned int level;
>> +    int i;
>> +    bool leaked = false;
>> +    int err;
>>
>>       spin_lock(&xenbus_valloc_lock);
>>       list_for_each_entry(node, &xenbus_valloc_pages, next) {
>> -        if (node->area->addr == vaddr) {
>> +        if (node->pv.area->addr == vaddr) {
>>               list_del(&node->next);
>>               goto found;
>>           }
>> @@ -601,22 +746,41 @@ static int xenbus_unmap_ring_vfree_pv(struct
>> xenbus_device *dev, void *vaddr)
>>           return GNTST_bad_virt_addr;
>>       }
>>
>> -    op.handle = node->handle;
>> -    op.host_addr = arbitrary_virt_to_machine(
>> -        lookup_address((unsigned long)vaddr, &level)).maddr;
>> +    for (i = 0; i < node->nr_handles; i++) {
>> +        unsigned long addr;
>> +
>> +        memset(&unmap[i], 0, sizeof(unmap[i]));
>> +        addr = (unsigned long)vaddr + (PAGE_SIZE * i);
>> +        unmap[i].host_addr = arbitrary_virt_to_machine(
>> +            lookup_address(addr, &level)).maddr;
>> +        unmap[i].dev_bus_addr = 0;
>> +        unmap[i].handle = node->handles[i];
>> +    }
>>
>> -    if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
>> +    if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
>>           BUG();
>>
>> -    if (op.status == GNTST_okay)
>> -        free_vm_area(node->area);
>> +    err = GNTST_okay;
>> +    leaked = false;
>> +    for (i = 0; i < node->nr_handles; i++) {
>> +        if (unmap[i].status != GNTST_okay) {
>> +            leaked = true;
>> +            xenbus_dev_error(dev, unmap[i].status,
>> +                     "unmapping page at handle %d error %d",
>> +                     node->handles[i], unmap[i].status);
>> +            err = unmap[i].status;
>> +            break;
>> +        }
>> +    }
>> +
>> +    if (!leaked)
>> +        free_vm_area(node->pv.area);
>>       else
>> -        xenbus_dev_error(dev, op.status,
>> -                 "unmapping page at handle %d error %d",
>> -                 node->handle, op.status);
>> +        pr_alert("leaking VM area %p size %u page(s)",
>> +             node->pv.area, node->nr_handles);
>>
>>       kfree(node);
>> -    return op.status;
>> +    return err;
>>   }
>>
>>   static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev,
>> void *vaddr)
>> @@ -624,10 +788,12 @@ static int xenbus_unmap_ring_vfree_hvm(struct
>> xenbus_device *dev, void *vaddr)
>>       int rv;
>>       struct xenbus_map_node *node;
>>       void *addr;
>> +    unsigned long addrs[XENBUS_MAX_RING_PAGES];
>> +    int i;
>>
>>       spin_lock(&xenbus_valloc_lock);
>>       list_for_each_entry(node, &xenbus_valloc_pages, next) {
>> -        addr = pfn_to_kaddr(page_to_pfn(node->page));
>> +        addr = node->hvm.addr;
>>           if (addr == vaddr) {
>>               list_del(&node->next);
>>               goto found;
>> @@ -643,12 +809,16 @@ static int xenbus_unmap_ring_vfree_hvm(struct
>> xenbus_device *dev, void *vaddr)
>>           return GNTST_bad_virt_addr;
>>       }
>>
>> -    rv = xenbus_unmap_ring(dev, node->handle, addr);
>> +    for (i = 0; i < node->nr_handles; i++)
>> +        addrs[i] = (unsigned
>> long)pfn_to_kaddr(page_to_pfn(node->hvm.pages[i]));
>>
>> +    rv = xenbus_unmap_ring(dev, node->handles, node->nr_handles,
>> +                   addrs);
>>       if (!rv)
>> -        free_xenballooned_pages(1, &node->page);
>> +        vunmap(vaddr);
>>       else
>> -        WARN(1, "Leaking %p\n", vaddr);
>> +        WARN(1, "Leaking %p, size %u page(s)\n", vaddr,
>> +             node->nr_handles);
>>
>>       kfree(node);
>>       return rv;
>> @@ -657,29 +827,44 @@ static int xenbus_unmap_ring_vfree_hvm(struct
>> xenbus_device *dev, void *vaddr)
>>   /**
>>    * xenbus_unmap_ring
>>    * @dev: xenbus device
>> - * @handle: grant handle
>> - * @vaddr: addr to unmap
>> + * @handles: grant handle array
>> + * @nr_handles: number of handles in the array
>> + * @vaddrs: addresses to unmap
>>    *
>> - * Unmap a page of memory in this domain that was imported from
>> another domain.
>> + * Unmap memory in this domain that was imported from another domain.
>>    * Returns 0 on success and returns GNTST_* on error
>>    * (see xen/include/interface/grant_table.h).
>>    */
>>   int xenbus_unmap_ring(struct xenbus_device *dev,
>> -              grant_handle_t handle, void *vaddr)
>> +              grant_handle_t *handles, unsigned int nr_handles,
>> +              unsigned long *vaddrs)
>>   {
>> -    struct gnttab_unmap_grant_ref op;
>> +    struct gnttab_unmap_grant_ref unmap[XENBUS_MAX_RING_PAGES];
>> +    int i;
>> +    int err;
>>
>> -    gnttab_set_unmap_op(&op, (unsigned long)vaddr, GNTMAP_host_map,
>> handle);
>> +    if (nr_handles > XENBUS_MAX_RING_PAGES)
>> +        return -EINVAL;
>>
>> -    if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1))
>> +    for (i = 0; i < nr_handles; i++)
>> +        gnttab_set_unmap_op(&unmap[i], vaddrs[i],
>> +                    GNTMAP_host_map, handles[i]);
>> +
>> +    if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap, i))
>>           BUG();
>>
>> -    if (op.status != GNTST_okay)
>> -        xenbus_dev_error(dev, op.status,
>> -                 "unmapping page at handle %d error %d",
>> -                 handle, op.status);
>> +    err = GNTST_okay;
>> +    for (i = 0; i < nr_handles; i++) {
>> +        if (unmap[i].status != GNTST_okay) {
>> +            xenbus_dev_error(dev, unmap[i].status,
>> +                     "unmapping page at handle %d error %d",
>> +                     handles[i], unmap[i].status);
>> +            err = unmap[i].status;
>> +            break;
>> +        }
>> +    }
>>
>> -    return op.status;
>> +    return err;
>>   }
>>   EXPORT_SYMBOL_GPL(xenbus_unmap_ring);
>>
>> diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h
>> index b0f1c9e..289c0b5 100644
>> --- a/include/xen/xenbus.h
>> +++ b/include/xen/xenbus.h
>> @@ -46,6 +46,10 @@
>>   #include <xen/interface/io/xenbus.h>
>>   #include <xen/interface/io/xs_wire.h>
>>
>> +#define XENBUS_MAX_RING_PAGE_ORDER 4
>> +#define XENBUS_MAX_RING_PAGES      (1U << XENBUS_MAX_RING_PAGE_ORDER)
>> +#define INVALID_GRANT_HANDLE       (~0U)
>> +
>>   /* Register callback to watch this node. */
>>   struct xenbus_watch
>>   {
>> @@ -199,15 +203,19 @@ int xenbus_watch_pathfmt(struct xenbus_device
>> *dev, struct xenbus_watch *watch,
>>                const char *pathfmt, ...);
>>
>>   int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state
>> new_state);
>> -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long
>> ring_mfn);
>> -int xenbus_map_ring_valloc(struct xenbus_device *dev,
>> -               int gnt_ref, void **vaddr);
>> -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref,
>> -               grant_handle_t *handle, void *vaddr);
>> +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
>> +              unsigned int nr_pages, grant_ref_t *grefs);
>> +int xenbus_map_ring_valloc(struct xenbus_device *dev, grant_ref_t
>> *gnt_refs,
>> +               unsigned int nr_grefs, void **vaddr);
>> +int xenbus_map_ring(struct xenbus_device *dev,
>> +            grant_ref_t *gnt_refs, unsigned int nr_grefs,
>> +            grant_handle_t *handles, unsigned long *vaddrs,
>> +            bool *leaked);
>>
>>   int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr);
>>   int xenbus_unmap_ring(struct xenbus_device *dev,
>> -              grant_handle_t handle, void *vaddr);
>> +              grant_handle_t *handles, unsigned int nr_handles,
>> +              unsigned long *vaddrs);
>>
>>   int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port);
>>   int xenbus_free_evtchn(struct xenbus_device *dev, int port);
>>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ