[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <bd5adb0e-af98-528f-d6f9-9d5888ff2412@oracle.com>
Date: Tue, 12 Jun 2018 22:58:30 -0400
From: Boris Ostrovsky <boris.ostrovsky@...cle.com>
To: Oleksandr Andrushchenko <andr2000@...il.com>,
xen-devel@...ts.xenproject.org, linux-kernel@...r.kernel.org,
dri-devel@...ts.freedesktop.org, linux-media@...r.kernel.org,
jgross@...e.com, konrad.wilk@...cle.com
Cc: daniel.vetter@...el.com, dongwon.kim@...el.com,
matthew.d.roper@...el.com,
Oleksandr Andrushchenko <oleksandr_andrushchenko@...m.com>
Subject: Re: [PATCH v3 8/9] xen/gntdev: Implement dma-buf export functionality
On 06/12/2018 09:41 AM, Oleksandr Andrushchenko wrote:
> From: Oleksandr Andrushchenko <oleksandr_andrushchenko@...m.com>
>
> 1. Create a dma-buf from grant references provided by the foreign
> domain. By default dma-buf is backed by system memory pages, but
> by providing GNTDEV_DMA_FLAG_XXX flags it can also be created
> as a DMA write-combine/coherent buffer, e.g. allocated with
> corresponding dma_alloc_xxx API.
> Export the resulting buffer as a new dma-buf.
>
> 2. Implement waiting for the dma-buf to be released: block until the
> dma-buf with the file descriptor provided is released.
> If within the time-out provided the buffer is not released then
> -ETIMEDOUT error is returned. If the buffer with the file descriptor
> does not exist or has already been released, then -ENOENT is
> returned. For valid file descriptors this must not be treated as
> error.
>
> 3. Make gntdev's common code and structures available to dma-buf.
>
> Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushchenko@...m.com>
> ---
> drivers/xen/gntdev-common.h | 4 +
> drivers/xen/gntdev-dmabuf.c | 470 +++++++++++++++++++++++++++++++++++-
> drivers/xen/gntdev.c | 10 +
> 3 files changed, 482 insertions(+), 2 deletions(-)
>
> diff --git a/drivers/xen/gntdev-common.h b/drivers/xen/gntdev-common.h
> index a3408fd39b07..72f80dbce861 100644
> --- a/drivers/xen/gntdev-common.h
> +++ b/drivers/xen/gntdev-common.h
> @@ -89,4 +89,8 @@ bool gntdev_account_mapped_pages(int count);
>
> int gntdev_map_grant_pages(struct gntdev_grant_map *map);
>
> +#ifdef CONFIG_XEN_GNTDEV_DMABUF
> +void gntdev_remove_map(struct gntdev_priv *priv, struct gntdev_grant_map *map);
> +#endif
> +
> #endif
> diff --git a/drivers/xen/gntdev-dmabuf.c b/drivers/xen/gntdev-dmabuf.c
> index dc57c6a25525..84cba67c6ad7 100644
> --- a/drivers/xen/gntdev-dmabuf.c
> +++ b/drivers/xen/gntdev-dmabuf.c
> @@ -3,13 +3,53 @@
> /*
> * Xen dma-buf functionality for gntdev.
> *
> + * DMA buffer implementation is based on drivers/gpu/drm/drm_prime.c.
> + *
> * Copyright (c) 2018 Oleksandr Andrushchenko, EPAM Systems Inc.
> */
>
> +#include <linux/dma-buf.h>
> #include <linux/slab.h>
>
> +#include <xen/grant_table.h>
> +#include <xen/gntdev.h>
> +
> +#include "gntdev-common.h"
> #include "gntdev-dmabuf.h"
>
> +struct gntdev_dmabuf {
> + struct gntdev_dmabuf_priv *priv;
> + struct dma_buf *dmabuf;
> + struct list_head next;
> + int fd;
> +
> + union {
> + struct {
> + /* Exported buffers are reference counted. */
> + struct kref refcount;
> +
> + struct gntdev_priv *priv;
> + struct gntdev_grant_map *map;
> + } exp;
> + } u;
> +
> + /* Number of pages this buffer has. */
> + int nr_pages;
> + /* Pages of this buffer. */
> + struct page **pages;
> +};
> +
> +struct gntdev_dmabuf_wait_obj {
> + struct list_head next;
> + struct gntdev_dmabuf *gntdev_dmabuf;
> + struct completion completion;
> +};
> +
> +struct gntdev_dmabuf_attachment {
> + struct sg_table *sgt;
> + enum dma_data_direction dir;
> +};
> +
> struct gntdev_dmabuf_priv {
> /* List of exported DMA buffers. */
> struct list_head exp_list;
> @@ -23,17 +63,439 @@ struct gntdev_dmabuf_priv {
>
> /* Implementation of wait for exported DMA buffer to be released. */
>
> +static void dmabuf_exp_release(struct kref *kref);
> +
> +static struct gntdev_dmabuf_wait_obj *
> +dmabuf_exp_wait_obj_new(struct gntdev_dmabuf_priv *priv,
> + struct gntdev_dmabuf *gntdev_dmabuf)
> +{
> + struct gntdev_dmabuf_wait_obj *obj;
> +
> + obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> + if (!obj)
> + return ERR_PTR(-ENOMEM);
> +
> + init_completion(&obj->completion);
> + obj->gntdev_dmabuf = gntdev_dmabuf;
> +
> + mutex_lock(&priv->lock);
> + list_add(&obj->next, &priv->exp_wait_list);
> + /* Put our reference and wait for gntdev_dmabuf's release to fire. */
> + kref_put(&gntdev_dmabuf->u.exp.refcount, dmabuf_exp_release);
> + mutex_unlock(&priv->lock);
> + return obj;
> +}
> +
> +static void dmabuf_exp_wait_obj_free(struct gntdev_dmabuf_priv *priv,
> + struct gntdev_dmabuf_wait_obj *obj)
> +{
> + struct gntdev_dmabuf_wait_obj *cur_obj, *q;
> +
> + mutex_lock(&priv->lock);
> + list_for_each_entry_safe(cur_obj, q, &priv->exp_wait_list, next)
> + if (cur_obj == obj) {
> + list_del(&obj->next);
> + kfree(obj);
> + break;
> + }
> + mutex_unlock(&priv->lock);
Do we really need to walk the list?
And if we do, do we need the safe variant of the walk? We are holding
the lock. Here and elsewhere.
> +}
> +
> +static int dmabuf_exp_wait_obj_wait(struct gntdev_dmabuf_wait_obj *obj,
> + u32 wait_to_ms)
> +{
> + if (wait_for_completion_timeout(&obj->completion,
> + msecs_to_jiffies(wait_to_ms)) <= 0)
> + return -ETIMEDOUT;
> +
> + return 0;
> +}
> +
> +static void dmabuf_exp_wait_obj_signal(struct gntdev_dmabuf_priv *priv,
> + struct gntdev_dmabuf *gntdev_dmabuf)
> +{
> + struct gntdev_dmabuf_wait_obj *obj, *q;
> +
> + list_for_each_entry_safe(obj, q, &priv->exp_wait_list, next)
> + if (obj->gntdev_dmabuf == gntdev_dmabuf) {
> + pr_debug("Found gntdev_dmabuf in the wait list, wake\n");
> + complete_all(&obj->completion);
> + break;
> + }
> +}
> +
> +static struct gntdev_dmabuf *
> +dmabuf_exp_wait_obj_get_by_fd(struct gntdev_dmabuf_priv *priv, int fd)
The name of this routine implies (to me) that we are getting a wait
object but IIUIC we are getting a gntdev_dmabuf that we are going to
later associate with a wait object.
> +{
> + struct gntdev_dmabuf *q, *gntdev_dmabuf, *ret = ERR_PTR(-ENOENT);
> +
> + mutex_lock(&priv->lock);
> + list_for_each_entry_safe(gntdev_dmabuf, q, &priv->exp_list, next)
> + if (gntdev_dmabuf->fd == fd) {
> + pr_debug("Found gntdev_dmabuf in the wait list\n");
> + kref_get(&gntdev_dmabuf->u.exp.refcount);
> + ret = gntdev_dmabuf;
> + break;
> + }
> + mutex_unlock(&priv->lock);
> + return ret;
> +}
> +
> int gntdev_dmabuf_exp_wait_released(struct gntdev_dmabuf_priv *priv, int fd,
> int wait_to_ms)
> {
> - return -EINVAL;
> + struct gntdev_dmabuf *gntdev_dmabuf;
> + struct gntdev_dmabuf_wait_obj *obj;
> + int ret;
> +
> + pr_debug("Will wait for dma-buf with fd %d\n", fd);
> + /*
> + * Try to find the DMA buffer: if not found means that
> + * either the buffer has already been released or file descriptor
> + * provided is wrong.
> + */
> + gntdev_dmabuf = dmabuf_exp_wait_obj_get_by_fd(priv, fd);
> + if (IS_ERR(gntdev_dmabuf))
> + return PTR_ERR(gntdev_dmabuf);
> +
> + /*
> + * gntdev_dmabuf still exists and is reference count locked by us now,
> + * so prepare to wait: allocate wait object and add it to the wait list,
> + * so we can find it on release.
> + */
> + obj = dmabuf_exp_wait_obj_new(priv, gntdev_dmabuf);
> + if (IS_ERR(obj))
> + return PTR_ERR(obj);
> +
> + ret = dmabuf_exp_wait_obj_wait(obj, wait_to_ms);
> + dmabuf_exp_wait_obj_free(priv, obj);
> + return ret;
> +}
> +
> +/* DMA buffer export support. */
> +
> +static struct sg_table *
> +dmabuf_pages_to_sgt(struct page **pages, unsigned int nr_pages)
> +{
> + struct sg_table *sgt;
> + int ret;
> +
> + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL);
> + if (!sgt) {
> + ret = -ENOMEM;
> + goto out;
> + }
> +
> + ret = sg_alloc_table_from_pages(sgt, pages, nr_pages, 0,
> + nr_pages << PAGE_SHIFT,
> + GFP_KERNEL);
> + if (ret)
> + goto out;
> +
> + return sgt;
> +
> +out:
> + kfree(sgt);
> + return ERR_PTR(ret);
> +}
> +
> +static int dmabuf_exp_ops_attach(struct dma_buf *dma_buf,
> + struct device *target_dev,
> + struct dma_buf_attachment *attach)
> +{
> + struct gntdev_dmabuf_attachment *gntdev_dmabuf_attach;
> +
> + gntdev_dmabuf_attach = kzalloc(sizeof(*gntdev_dmabuf_attach),
> + GFP_KERNEL);
> + if (!gntdev_dmabuf_attach)
> + return -ENOMEM;
> +
> + gntdev_dmabuf_attach->dir = DMA_NONE;
> + attach->priv = gntdev_dmabuf_attach;
> + return 0;
> +}
> +
> +static void dmabuf_exp_ops_detach(struct dma_buf *dma_buf,
> + struct dma_buf_attachment *attach)
> +{
> + struct gntdev_dmabuf_attachment *gntdev_dmabuf_attach = attach->priv;
> +
> + if (gntdev_dmabuf_attach) {
> + struct sg_table *sgt = gntdev_dmabuf_attach->sgt;
> +
> + if (sgt) {
> + if (gntdev_dmabuf_attach->dir != DMA_NONE)
> + dma_unmap_sg_attrs(attach->dev, sgt->sgl,
> + sgt->nents,
> + gntdev_dmabuf_attach->dir,
> + DMA_ATTR_SKIP_CPU_SYNC);
> + sg_free_table(sgt);
> + }
> +
> + kfree(sgt);
> + kfree(gntdev_dmabuf_attach);
> + attach->priv = NULL;
> + }
> +}
> +
> +static struct sg_table *
> +dmabuf_exp_ops_map_dma_buf(struct dma_buf_attachment *attach,
> + enum dma_data_direction dir)
> +{
> + struct gntdev_dmabuf_attachment *gntdev_dmabuf_attach = attach->priv;
> + struct gntdev_dmabuf *gntdev_dmabuf = attach->dmabuf->priv;
> + struct sg_table *sgt;
> +
> + pr_debug("Mapping %d pages for dev %p\n", gntdev_dmabuf->nr_pages,
> + attach->dev);
> +
> + if (dir == DMA_NONE || !gntdev_dmabuf_attach)
> + return ERR_PTR(-EINVAL);
> +
> + /* Return the cached mapping when possible. */
> + if (gntdev_dmabuf_attach->dir == dir)
> + return gntdev_dmabuf_attach->sgt;
> +
> + /*
> + * Two mappings with different directions for the same attachment are
> + * not allowed.
> + */
> + if (gntdev_dmabuf_attach->dir != DMA_NONE)
> + return ERR_PTR(-EBUSY);
> +
> + sgt = dmabuf_pages_to_sgt(gntdev_dmabuf->pages,
> + gntdev_dmabuf->nr_pages);
> + if (!IS_ERR(sgt)) {
> + if (!dma_map_sg_attrs(attach->dev, sgt->sgl, sgt->nents, dir,
> + DMA_ATTR_SKIP_CPU_SYNC)) {
> + sg_free_table(sgt);
> + kfree(sgt);
> + sgt = ERR_PTR(-ENOMEM);
> + } else {
> + gntdev_dmabuf_attach->sgt = sgt;
> + gntdev_dmabuf_attach->dir = dir;
> + }
> + }
> + if (IS_ERR(sgt))
> + pr_debug("Failed to map sg table for dev %p\n", attach->dev);
> + return sgt;
> +}
> +
> +static void dmabuf_exp_ops_unmap_dma_buf(struct dma_buf_attachment *attach,
> + struct sg_table *sgt,
> + enum dma_data_direction dir)
> +{
> + /* Not implemented. The unmap is done at dmabuf_exp_ops_detach(). */
> +}
> +
> +static void dmabuf_exp_release(struct kref *kref)
> +{
> + struct gntdev_dmabuf *gntdev_dmabuf =
> + container_of(kref, struct gntdev_dmabuf, u.exp.refcount);
> +
> + dmabuf_exp_wait_obj_signal(gntdev_dmabuf->priv, gntdev_dmabuf);
> + list_del(&gntdev_dmabuf->next);
> + kfree(gntdev_dmabuf);
> +}
> +
> +static void dmabuf_exp_ops_release(struct dma_buf *dma_buf)
> +{
> + struct gntdev_dmabuf *gntdev_dmabuf = dma_buf->priv;
> + struct gntdev_dmabuf_priv *priv = gntdev_dmabuf->priv;
> +
> + gntdev_remove_map(gntdev_dmabuf->u.exp.priv, gntdev_dmabuf->u.exp.map);
> + mutex_lock(&priv->lock);
> + kref_put(&gntdev_dmabuf->u.exp.refcount, dmabuf_exp_release);
> + mutex_unlock(&priv->lock);
> +}
> +
> +static void *dmabuf_exp_ops_kmap_atomic(struct dma_buf *dma_buf,
> + unsigned long page_num)
> +{
> + /* Not implemented. */
> + return NULL;
> +}
> +
> +static void dmabuf_exp_ops_kunmap_atomic(struct dma_buf *dma_buf,
> + unsigned long page_num, void *addr)
> +{
> + /* Not implemented. */
> +}
> +
> +static void *dmabuf_exp_ops_kmap(struct dma_buf *dma_buf,
> + unsigned long page_num)
> +{
> + /* Not implemented. */
> + return NULL;
> +}
> +
> +static void dmabuf_exp_ops_kunmap(struct dma_buf *dma_buf,
> + unsigned long page_num, void *addr)
> +{
> + /* Not implemented. */
> +}
> +
> +static int dmabuf_exp_ops_mmap(struct dma_buf *dma_buf,
> + struct vm_area_struct *vma)
> +{
> + /* Not implemented. */
> + return 0;
> +}
> +
> +static const struct dma_buf_ops dmabuf_exp_ops = {
> + .attach = dmabuf_exp_ops_attach,
> + .detach = dmabuf_exp_ops_detach,
> + .map_dma_buf = dmabuf_exp_ops_map_dma_buf,
> + .unmap_dma_buf = dmabuf_exp_ops_unmap_dma_buf,
> + .release = dmabuf_exp_ops_release,
> + .map = dmabuf_exp_ops_kmap,
> + .map_atomic = dmabuf_exp_ops_kmap_atomic,
> + .unmap = dmabuf_exp_ops_kunmap,
> + .unmap_atomic = dmabuf_exp_ops_kunmap_atomic,
> + .mmap = dmabuf_exp_ops_mmap,
> +};
> +
> +struct gntdev_dmabuf_export_args {
> + struct gntdev_priv *priv;
> + struct gntdev_grant_map *map;
> + struct gntdev_dmabuf_priv *dmabuf_priv;
> + struct device *dev;
> + int count;
> + struct page **pages;
> + u32 fd;
> +};
> +
> +static int dmabuf_exp_from_pages(struct gntdev_dmabuf_export_args *args)
> +{
> + DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
> + struct gntdev_dmabuf *gntdev_dmabuf;
> + int ret = 0;
Not necessary.
> +
> + gntdev_dmabuf = kzalloc(sizeof(*gntdev_dmabuf), GFP_KERNEL);
> + if (!gntdev_dmabuf)
> + return -ENOMEM;
> +
> + kref_init(&gntdev_dmabuf->u.exp.refcount);
> +
> + gntdev_dmabuf->priv = args->dmabuf_priv;
> + gntdev_dmabuf->nr_pages = args->count;
> + gntdev_dmabuf->pages = args->pages;
> + gntdev_dmabuf->u.exp.priv = args->priv;
> + gntdev_dmabuf->u.exp.map = args->map;
> +
> + exp_info.exp_name = KBUILD_MODNAME;
> + if (args->dev->driver && args->dev->driver->owner)
> + exp_info.owner = args->dev->driver->owner;
> + else
> + exp_info.owner = THIS_MODULE;
> + exp_info.ops = &dmabuf_exp_ops;
> + exp_info.size = args->count << PAGE_SHIFT;
> + exp_info.flags = O_RDWR;
> + exp_info.priv = gntdev_dmabuf;
> +
> + gntdev_dmabuf->dmabuf = dma_buf_export(&exp_info);
> + if (IS_ERR(gntdev_dmabuf->dmabuf)) {
> + ret = PTR_ERR(gntdev_dmabuf->dmabuf);
> + gntdev_dmabuf->dmabuf = NULL;
> + goto fail;
> + }
> +
> + ret = dma_buf_fd(gntdev_dmabuf->dmabuf, O_CLOEXEC);
> + if (ret < 0)
> + goto fail;
> +
> + gntdev_dmabuf->fd = ret;
> + args->fd = ret;
> +
> + pr_debug("Exporting DMA buffer with fd %d\n", ret);
> +
> + mutex_lock(&args->dmabuf_priv->lock);
> + list_add(&gntdev_dmabuf->next, &args->dmabuf_priv->exp_list);
> + mutex_unlock(&args->dmabuf_priv->lock);
> + return 0;
> +
> +fail:
> + if (gntdev_dmabuf->dmabuf)
> + dma_buf_put(gntdev_dmabuf->dmabuf);
> + kfree(gntdev_dmabuf);
> + return ret;
> +}
> +
> +static struct gntdev_grant_map *
> +dmabuf_exp_alloc_backing_storage(struct gntdev_priv *priv, int dmabuf_flags,
> + int count)
> +{
> + struct gntdev_grant_map *map;
> +
> + if (unlikely(count <= 0))
> + return ERR_PTR(-EINVAL);
> +
> + if ((dmabuf_flags & GNTDEV_DMA_FLAG_WC) &&
> + (dmabuf_flags & GNTDEV_DMA_FLAG_COHERENT)) {
> + pr_debug("Wrong dma-buf flags: either WC or coherent, not both\n");
Why not just print the value of the flags?
> + return ERR_PTR(-EINVAL);
> + }
> +
> + map = gntdev_alloc_map(priv, count, dmabuf_flags);
> + if (!map)
> + return ERR_PTR(-ENOMEM);
> +
> + if (unlikely(gntdev_account_mapped_pages(count))) {
> + pr_debug("can't map: over limit\n");
I think printing @count value here would be useful.
> + gntdev_put_map(NULL, map);
> + return ERR_PTR(-ENOMEM);
> + }
> + return map;
> }
>
> int gntdev_dmabuf_exp_from_refs(struct gntdev_priv *priv, int flags,
> int count, u32 domid, u32 *refs, u32 *fd)
> {
> + struct gntdev_grant_map *map;
> + struct gntdev_dmabuf_export_args args;
> + int i, ret;
> +
> *fd = -1;
Is this still needed?
> - return -EINVAL;
> +
> + map = dmabuf_exp_alloc_backing_storage(priv, flags, count);
> + if (IS_ERR(map))
> + return PTR_ERR(map);
> +
> + for (i = 0; i < count; i++) {
> + map->grants[i].domid = domid;
> + map->grants[i].ref = refs[i];
> + }
> +
> + mutex_lock(&priv->lock);
> + gntdev_add_map(priv, map);
> + mutex_unlock(&priv->lock);
> +
> + map->flags |= GNTMAP_host_map;
> +#if defined(CONFIG_X86)
> + map->flags |= GNTMAP_device_map;
> +#endif
> +
> + ret = gntdev_map_grant_pages(map);
> + if (ret < 0)
> + goto out;
> +
> + args.priv = priv;
> + args.map = map;
> + args.dev = priv->dma_dev;
> + args.dmabuf_priv = priv->dmabuf_priv;
> + args.count = map->count;
> + args.pages = map->pages;
> +
> + ret = dmabuf_exp_from_pages(&args);
> + if (ret < 0)
> + goto out;
> +
> + *fd = args.fd;
> + return 0;
> +
> +out:
> + gntdev_remove_map(priv, map);
> + return ret;
> }
>
> /* DMA buffer import support. */
> @@ -63,6 +525,10 @@ struct gntdev_dmabuf_priv *gntdev_dmabuf_init(void)
> if (!priv)
> return ERR_PTR(-ENOMEM);
>
> + mutex_init(&priv->lock);
> + INIT_LIST_HEAD(&priv->exp_list);
> + INIT_LIST_HEAD(&priv->exp_wait_list);
> +
> return priv;
> }
>
> diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c
> index e82660d81d7e..5f93cd534840 100644
> --- a/drivers/xen/gntdev.c
> +++ b/drivers/xen/gntdev.c
> @@ -262,6 +262,16 @@ void gntdev_put_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
> gntdev_free_map(map);
> }
>
> +#ifdef CONFIG_XEN_GNTDEV_DMABUF
> +void gntdev_remove_map(struct gntdev_priv *priv, struct gntdev_grant_map *map)
> +{
> + mutex_lock(&priv->lock);
> + list_del(&map->next);
> + gntdev_put_map(NULL /* already removed */, map);
Why not pass call gntdev_put_map(priv, map) and then not have this
routine at all?
I really dislike the fact that we are taking a lock here that
gntdev_put_map() takes as well, although not with NULL argument. (And
yes, I see that gntdev_release() does it too.)
-boris
> + mutex_unlock(&priv->lock);
> +}
> +#endif
> +
> /* ------------------------------------------------------------------ */
>
> static int find_grant_ptes(pte_t *pte, pgtable_t token,
>
Powered by blists - more mailing lists