[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <1309189510.21764.1.camel@localhost.localdomain>
Date: Mon, 27 Jun 2011 08:45:10 -0700
From: Shirley Ma <mashirle@...ibm.com>
To: David Miller <davem@...emloft.net>
Cc: mst@...hat.com, Eric Dumazet <eric.dumazet@...il.com>,
Avi Kivity <avi@...hat.com>, Arnd Bergmann <arnd@...db.de>,
netdev@...r.kernel.org, kvm@...r.kernel.org,
linux-kernel@...r.kernel.org
Subject: Re: [PATCH V7 2/4 net-next] skbuff: Add userspace zero-copy
buffers in skb
Hello Dave,
To support skb zero-copy, a pointer is needed to add to skb share info.
Do you agree with this approach? If not, do you have any other
suggestions?
Thanks
Shirley
On Sat, 2011-05-28 at 12:23 -0700, Shirley Ma wrote:
> From:
> Shirley Ma <mashirle@...ibm.com>
> To:
> David Miller <davem@...emloft.net>,
> mst@...hat.com, Eric Dumazet
> <eric.dumazet@...il.com>, Avi
> Kivity <avi@...hat.com>, Arnd
> Bergmann <arnd@...db.de>
> Cc:
> netdev@...r.kernel.org,
> kvm@...r.kernel.org,
> linux-kernel@...r.kernel.org
> Subject:
> [PATCH V7 2/4 net-next] skbuff: Add
> userspace zero-copy buffers in skb
> Date:
> 05/28/2011 12:23:08 PM
>
>
> This patch adds userspace buffers support in skb shared info. A new
> struct skb_ubuf_info is needed to maintain the userspace buffers
> argument and index, a callback is used to notify userspace to release
> the buffers once lower device has done DMA (Last reference to that skb
> has gone).
>
> If there is any userspace apps to reference these userspace buffers,
> then these userspaces buffers will be copied into kernel. This way we
> can prevent userspace apps to hold these userspace buffers too long.
>
> One userspace buffer info pointer is added in skb_shared_info. Is that
> safe to use destructor_arg? From the comments, this destructor_arg is
> used for destructor, destructor calls doesn't wait for last reference
> to
> that skb is gone.
>
> Signed-off-by: Shirley Ma <xma@...ibm.com>
> ---
> include/linux/skbuff.h | 24 ++++++++++++++++
> net/core/skbuff.c | 73
> ++++++++++++++++++++++++++++++++++++++++++++++++
> 2 files changed, 97 insertions(+), 0 deletions(-)
>
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index e8b78ce..37a2cb4 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -189,6 +189,17 @@ enum {
> SKBTX_DRV_NEEDS_SK_REF = 1 << 3,
> };
>
> +/*
> + * The callback notifies userspace to release buffers when skb DMA is
> done in
> + * lower device, the skb last reference should be 0 when calling
> this.
> + * The desc is used to track userspace buffer index.
> + */
> +struct ubuf_info {
> + void (*callback)(void *);
> + void *arg;
> + unsigned long desc;
> +};
> +
> /* This data is invariant across clones and lives at
> * the end of the header data, ie. at skb->end.
> */
> @@ -203,6 +214,9 @@ struct skb_shared_info {
> struct sk_buff *frag_list;
> struct skb_shared_hwtstamps hwtstamps;
>
> + /* DMA mapping from/to userspace buffers info */
> + void *ubuf_arg;
> +
> /*
> * Warning : all fields before dataref are cleared in
> __alloc_skb()
> */
> @@ -2261,5 +2275,15 @@ static inline void
> skb_checksum_none_assert(struct sk_buff *skb)
> }
>
> bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
> +
> +/*
> + * skb *uarg - is the buffer from userspace
> + * @skb: buffer to check
> + */
> +static inline int skb_ubuf(const struct sk_buff *skb)
> +{
> + return skb_shinfo(skb)->ubuf_arg != NULL;
> +}
> +
> #endif /* __KERNEL__ */
> #endif /* _LINUX_SKBUFF_H */
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 46cbd28..115c5ca 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -329,6 +329,19 @@ static void skb_release_data(struct sk_buff *skb)
> put_page(skb_shinfo(skb)->frags[i].page);
> }
>
> + /*
> + * If skb buf is from userspace, we need to notify the
> caller
> + * the lower device DMA has done;
> + */
> + if (skb_ubuf(skb)) {
> + struct ubuf_info *uarg;
> +
> + uarg = (struct ubuf_info
> *)skb_shinfo(skb)->ubuf_arg;
> +
> + if (uarg->callback)
> + uarg->callback(uarg);
> + }
> +
> if (skb_has_frag_list(skb))
> skb_drop_fraglist(skb);
>
> @@ -481,6 +494,9 @@ bool skb_recycle_check(struct sk_buff *skb, int
> skb_size)
> if (irqs_disabled())
> return false;
>
> + if (skb_ubuf(skb))
> + return false;
> +
> if (skb_is_nonlinear(skb) || skb->fclone !=
> SKB_FCLONE_UNAVAILABLE)
> return false;
>
> @@ -573,6 +589,7 @@ static struct sk_buff *__skb_clone(struct sk_buff
> *n, struct sk_buff *skb)
> atomic_set(&n->users, 1);
>
> atomic_inc(&(skb_shinfo(skb)->dataref));
> + skb_shinfo(skb)->ubuf_arg = NULL;
> skb->cloned = 1;
>
> return n;
> @@ -596,6 +613,51 @@ struct sk_buff *skb_morph(struct sk_buff *dst,
> struct sk_buff *src)
> }
> EXPORT_SYMBOL_GPL(skb_morph);
>
> +/* skb frags copy userspace buffers to kernel */
> +static int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
> +{
> + int i;
> + int num_frags = skb_shinfo(skb)->nr_frags;
> + struct page *page, *head = NULL;
> + struct ubuf_info *uarg = skb_shinfo(skb)->ubuf_arg;
> +
> + for (i = 0; i < num_frags; i++) {
> + u8 *vaddr;
> + skb_frag_t *f = &skb_shinfo(skb)->frags[i];
> +
> + page = alloc_page(GFP_ATOMIC);
> + if (!page) {
> + while (head) {
> + put_page(head);
> + head = (struct page *)head->private;
> + }
> + return -ENOMEM;
> + }
> + vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]);
> + memcpy(page_address(page),
> + vaddr + f->page_offset, f->size);
> + kunmap_skb_frag(vaddr);
> + page->private = (unsigned long)head;
> + head = page;
> + }
> +
> + /* skb frags release userspace buffers */
> + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
> + put_page(skb_shinfo(skb)->frags[i].page);
> +
> + uarg->callback(uarg);
> + skb_shinfo(skb)->ubuf_arg = NULL;
> +
> + /* skb frags point to kernel buffers */
> + for (i = skb_shinfo(skb)->nr_frags; i > 0; i--) {
> + skb_shinfo(skb)->frags[i - 1].page_offset = 0;
> + skb_shinfo(skb)->frags[i - 1].page = head;
> + head = (struct page *)head->private;
> + }
> + return 0;
> +}
> +
> +
> /**
> * skb_clone - duplicate an sk_buff
> * @skb: buffer to clone
> @@ -614,6 +676,11 @@ struct sk_buff *skb_clone(struct sk_buff *skb,
> gfp_t gfp_mask)
> {
> struct sk_buff *n;
>
> + if (skb_ubuf(skb)) {
> + if (skb_copy_ubufs(skb, gfp_mask))
> + return NULL;
> + }
> +
> n = skb + 1;
> if (skb->fclone == SKB_FCLONE_ORIG &&
> n->fclone == SKB_FCLONE_UNAVAILABLE) {
> @@ -731,6 +798,12 @@ struct sk_buff *pskb_copy(struct sk_buff *skb,
> gfp_t gfp_mask)
> if (skb_shinfo(skb)->nr_frags) {
> int i;
>
> + if (skb_ubuf(skb)) {
> + if (skb_copy_ubufs(skb, gfp_mask)) {
> + kfree(n);
> + goto out;
> + }
> + }
> for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> skb_shinfo(n)->frags[i] =
> skb_shinfo(skb)->frags[i];
> get_page(skb_shinfo(n)->frags[i].page);
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists