[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50A4BC7D.6060707@citrix.com>
Date: Thu, 15 Nov 2012 10:57:17 +0100
From: Roger Pau Monné <roger.pau@...rix.com>
To: Annie Li <annie.li@...cle.com>
CC: "xen-devel@...ts.xensource.com" <xen-devel@...ts.xensource.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"konrad.wilk@...cle.com" <konrad.wilk@...cle.com>,
Ian Campbell <Ian.Campbell@...rix.com>
Subject: Re: [Xen-devel] [PATCH 1/4] xen/netback: implements persistent grant
with one page pool.
On 15/11/12 08:04, Annie Li wrote:
> This patch implements persistent grant in netback driver. Tx and rx
> share the same page pool, this pool will be split into two parts
> in next patch.
>
> Signed-off-by: Annie Li <annie.li@...cle.com>
> ---
> drivers/net/xen-netback/common.h | 18 +++-
> drivers/net/xen-netback/interface.c | 22 ++++
> drivers/net/xen-netback/netback.c | 212 +++++++++++++++++++++++++++++++----
> drivers/net/xen-netback/xenbus.c | 14 ++-
> 4 files changed, 239 insertions(+), 27 deletions(-)
>
> diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
> index 94b79c3..a85cac6 100644
> --- a/drivers/net/xen-netback/common.h
> +++ b/drivers/net/xen-netback/common.h
> @@ -45,8 +45,19 @@
> #include <xen/grant_table.h>
> #include <xen/xenbus.h>
>
> +#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> +#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> +#define MAXIMUM_OUTSTANDING_BLOCK_REQS \
> + (XEN_NETIF_TX_RING_SIZE + XEN_NETIF_RX_RING_SIZE)
> +
> struct xen_netbk;
>
> +struct persistent_entry {
> + grant_ref_t forgranted;
> + struct page *fpage;
> + struct gnttab_map_grant_ref map;
> +};
This should be common with the blkback implementation, I think we should
move some structures/functions from blkback to a common place. When I
implementated some functions in blkback I though they could be reused by
other backends that wanted to use persistent grants, so I keep them free
of blkback specific structures.
> struct xenvif {
> /* Unique identifier for this interface. */
> domid_t domid;
> @@ -75,6 +86,7 @@ struct xenvif {
>
> /* Internal feature information. */
> u8 can_queue:1; /* can queue packets for receiver? */
> + u8 persistent_grant:1;
>
> /*
> * Allow xenvif_start_xmit() to peek ahead in the rx request
> @@ -98,6 +110,9 @@ struct xenvif {
> struct net_device *dev;
>
> wait_queue_head_t waiting_to_free;
> +
> + struct persistent_entry *persistent_gnt[MAXIMUM_OUTSTANDING_BLOCK_REQS];
> + unsigned int persistent_gntcnt;
This should be a red-black tree, which has the property of a search time
<= O(log n), using an array is more expensive in terms of memory and has
a worse search time O(n), this is specially interesting for netback,
which can have twice as much persistent grants as blkback (because two
rings are used).
Take a look at the following functions from blkback; foreach_grant,
add_persistent_gnt and get_persistent_gnt. They are generic functions to
deal with persistent grants.
> };
>
> static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
> @@ -105,9 +120,6 @@ static inline struct xenbus_device *xenvif_to_xenbus_device(struct xenvif *vif)
> return to_xenbus_device(vif->dev->dev.parent);
> }
>
> -#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
> -#define XEN_NETIF_RX_RING_SIZE __CONST_RING_SIZE(xen_netif_rx, PAGE_SIZE)
> -
> struct xenvif *xenvif_alloc(struct device *parent,
> domid_t domid,
> unsigned int handle);
> diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
> index b7d41f8..226d159 100644
> --- a/drivers/net/xen-netback/interface.c
> +++ b/drivers/net/xen-netback/interface.c
> @@ -300,6 +300,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
> return ERR_PTR(err);
> }
>
> + vif->persistent_gntcnt = 0;
> +
> netdev_dbg(dev, "Successfully created xenvif\n");
> return vif;
> }
> @@ -343,6 +345,23 @@ err:
> return err;
> }
>
> +void xenvif_free_grants(struct persistent_entry **pers_entry,
> + unsigned int count)
> +{
> + int i, ret;
> + struct gnttab_unmap_grant_ref unmap;
> +
> + for (i = 0; i < count; i++) {
> + gnttab_set_unmap_op(&unmap,
> + (unsigned long)page_to_pfn(pers_entry[i]->fpage),
> + GNTMAP_host_map,
> + pers_entry[i]->map.handle);
> + ret = gnttab_unmap_refs(&unmap, &pers_entry[i]->fpage,
> + 1, false);
This is not correct, you should call gnttab_set_unmap_op on a batch of
grants (up to BLKIF_MAX_SEGMENTS_PER_REQUEST), and then call
gnttab_unmap_refs on all of them. Here is a simple example (take a look
at blkback.c function xen_blkif_schedule to see an example with a
red-black tree, I think this part of the code should also be made common):
for (i = 0, segs_to_unmap = 0; i < count; i++) {
gnttab_set_unmap_op(&unmap[segs_to_unmap],
(unsigned long)page_to_pfn(pers_entry[i]->fpage),
GNTMAP_host_map,
pers_entry[i]->map.handle);
pages[segs_to_unmap] =
(unsigned long)page_to_pfn(pers_entry[i]->fpage);
if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST ||
(i + 1) == count) {
ret = gnttab_unmap_refs(unmap, NULL, pages,
segs_to_unmap);
BUG_ON(ret);
segs_to_unmap == 0;
}
}
> + BUG_ON(ret);
> + }
> +}
> +
> void xenvif_disconnect(struct xenvif *vif)
> {
> struct net_device *dev = vif->dev;
> @@ -366,6 +385,9 @@ void xenvif_disconnect(struct xenvif *vif)
> unregister_netdev(vif->dev);
>
> xen_netbk_unmap_frontend_rings(vif);
> + if (vif->persistent_grant)
> + xenvif_free_grants(vif->persistent_gnt,
> + vif->persistent_gntcnt);
>
> free_netdev(vif->dev);
> }
> diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
> index 2596401..a26d3fc 100644
> --- a/drivers/net/xen-netback/netback.c
> +++ b/drivers/net/xen-netback/netback.c
> @@ -80,6 +80,8 @@ union page_ext {
> void *mapping;
> };
>
> +struct xenvif;
> +
> struct xen_netbk {
> wait_queue_head_t wq;
> struct task_struct *task;
> @@ -102,6 +104,7 @@ struct xen_netbk {
>
> struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
> struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS];
> + struct xenvif *gnttab_tx_vif[MAX_PENDING_REQS];
>
> u16 pending_ring[MAX_PENDING_REQS];
>
> @@ -111,12 +114,139 @@ struct xen_netbk {
> * straddles two buffers in the frontend.
> */
> struct gnttab_copy grant_copy_op[2*XEN_NETIF_RX_RING_SIZE];
> + struct xenvif *gnttab_rx_vif[2*XEN_NETIF_RX_RING_SIZE];
> struct netbk_rx_meta meta[2*XEN_NETIF_RX_RING_SIZE];
> };
>
> static struct xen_netbk *xen_netbk;
> static int xen_netbk_group_nr;
>
> +static struct persistent_entry*
> +get_per_gnt(struct persistent_entry **pers_entry,
> + unsigned int count, grant_ref_t gref)
> +{
> + int i;
> +
> + for (i = 0; i < count; i++)
> + if (gref == pers_entry[i]->forgranted)
> + return pers_entry[i];
> +
> + return NULL;
> +}
This should be replaced with common code shared with all persistent
backends implementations.
> +
> +static void*
> +map_new_gnt(struct persistent_entry **pers_entry, unsigned int count,
> + grant_ref_t ref, domid_t domid)
> +{
> + struct gnttab_map_grant_ref *map;
> + struct page *page;
> + unsigned long vaddr;
> + unsigned long pfn;
> + uint32_t flags;
> + int ret = 0;
> +
> + pers_entry[count] = (struct persistent_entry *)
> + kmalloc(sizeof(struct persistent_entry),
> + GFP_KERNEL);
> + if (!pers_entry[count])
> + return ERR_PTR(-ENOMEM);
> +
> + map = &pers_entry[count]->map;
> + page = alloc_page(GFP_KERNEL);
> + if (!page) {
> + kfree(pers_entry[count]);
> + pers_entry[count] = NULL;
> + return ERR_PTR(-ENOMEM);
> + }
> +
> + pers_entry[count]->fpage = page;
> + pfn = page_to_pfn(page);
> + vaddr = (unsigned long)pfn_to_kaddr(pfn);
> + flags = GNTMAP_host_map;
> +
> + gnttab_set_map_op(map, vaddr, flags, ref, domid);
> + ret = gnttab_map_refs(map, NULL, &page, 1);
> + BUG_ON(ret);
This is highly inefficient, one of the points of using gnttab_set_map_op
is that you can queue a bunch of grants, and then map them at the same
time using gnttab_map_refs, but here you are using it to map a single
grant at a time. You should instead see how much grants you need to map
to complete the request and map them all at the same time.
> +
> + pers_entry[count]->forgranted = ref;
> +
> + return page_address(page);
> +}
> +
> +static void*
> +get_ref_page(struct persistent_entry **pers_entry, unsigned int *count,
> + grant_ref_t ref, domid_t domid, unsigned int total)
> +{
> + struct persistent_entry *per_gnt;
> + void *vaddr;
> +
> + per_gnt = get_per_gnt(pers_entry, *count, ref);
> +
> + if (per_gnt != NULL)
> + return page_address(per_gnt->fpage);
> + else {
> + BUG_ON(*count >= total);
> + vaddr = map_new_gnt(pers_entry, *count, ref, domid);
> + if (IS_ERR_OR_NULL(vaddr))
> + return vaddr;
> + *count += 1;
> + return vaddr;
> + }
> +}
> +
> +static int
> +grant_memory_copy_op(unsigned int cmd, void *vuop, unsigned int count,
> + struct xen_netbk *netbk, bool tx_pool)
> +{
> + int i;
> + struct xenvif *vif;
> + struct gnttab_copy *uop = vuop;
> + unsigned int *gnt_count;
> + unsigned int gnt_total;
> + struct persistent_entry **pers_entry;
> + int ret = 0;
> +
> + BUG_ON(cmd != GNTTABOP_copy);
> + for (i = 0; i < count; i++) {
> + if (tx_pool)
> + vif = netbk->gnttab_tx_vif[i];
> + else
> + vif = netbk->gnttab_rx_vif[i];
> +
> + pers_entry = vif->persistent_gnt;
> + gnt_count = &vif->persistent_gntcnt;
> + gnt_total = MAXIMUM_OUTSTANDING_BLOCK_REQS;
> +
> + if (vif->persistent_grant) {
> + void *saddr, *daddr;
> +
> + saddr = uop[i].source.domid == DOMID_SELF ?
> + (void *) uop[i].source.u.gmfn :
> + get_ref_page(pers_entry, gnt_count,
> + uop[i].source.u.ref,
> + uop[i].source.domid,
> + gnt_total);
> + if (IS_ERR_OR_NULL(saddr))
> + return -ENOMEM;
> +
> + daddr = uop[i].dest.domid == DOMID_SELF ?
> + (void *) uop[i].dest.u.gmfn :
> + get_ref_page(pers_entry, gnt_count,
> + uop[i].dest.u.ref,
> + uop[i].dest.domid,
> + gnt_total);
> + if (IS_ERR_OR_NULL(daddr))
> + return -ENOMEM;
> +
> + memcpy(daddr+uop[i].dest.offset,
> + saddr+uop[i].source.offset, uop[i].len);
> + } else
> + ret = HYPERVISOR_grant_table_op(cmd, &uop[i], 1);
> + }
> +
> + return ret;
> +}
> +
> void xen_netbk_add_xenvif(struct xenvif *vif)
> {
> int i;
> @@ -387,13 +517,15 @@ static struct netbk_rx_meta *get_next_rx_buffer(struct xenvif *vif,
> * Set up the grant operations for this fragment. If it's a flipping
> * interface, we also set up the unmap request from here.
> */
> -static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> - struct netrx_pending_operations *npo,
> - struct page *page, unsigned long size,
> - unsigned long offset, int *head)
> +static int netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> + struct netrx_pending_operations *npo,
> + struct page *page, unsigned long size,
> + unsigned long offset, int *head,
> + struct xenvif **grxvif)
> {
> struct gnttab_copy *copy_gop;
> struct netbk_rx_meta *meta;
> + int count = 0;
> /*
> * These variables are used iff get_page_ext returns true,
> * in which case they are guaranteed to be initialized.
> @@ -425,6 +557,10 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> bytes = MAX_BUFFER_OFFSET - npo->copy_off;
>
> copy_gop = npo->copy + npo->copy_prod++;
> + *grxvif = vif;
> + grxvif++;
> + count++;
> +
> copy_gop->flags = GNTCOPY_dest_gref;
> if (foreign) {
> struct xen_netbk *netbk = &xen_netbk[group];
> @@ -438,7 +574,10 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> } else {
> void *vaddr = page_address(page);
> copy_gop->source.domid = DOMID_SELF;
> - copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
> + if (!vif->persistent_grant)
> + copy_gop->source.u.gmfn = virt_to_mfn(vaddr);
> + else
> + copy_gop->source.u.gmfn = (unsigned long)vaddr;
> }
> copy_gop->source.offset = offset;
> copy_gop->dest.domid = vif->domid;
> @@ -460,6 +599,7 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> *head = 0; /* There must be something in this buffer now. */
>
> }
> + return count;
> }
>
> /*
> @@ -474,8 +614,10 @@ static void netbk_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
> * zero GSO descriptors (for non-GSO packets) or one descriptor (for
> * frontend-side LRO).
> */
> -static int netbk_gop_skb(struct sk_buff *skb,
> - struct netrx_pending_operations *npo)
> +static int netbk_gop_skb(struct xen_netbk *netbk,
> + struct sk_buff *skb,
> + struct netrx_pending_operations *npo,
> + struct xenvif **grxvif)
> {
> struct xenvif *vif = netdev_priv(skb->dev);
> int nr_frags = skb_shinfo(skb)->nr_frags;
> @@ -518,17 +660,19 @@ static int netbk_gop_skb(struct sk_buff *skb,
> if (data + len > skb_tail_pointer(skb))
> len = skb_tail_pointer(skb) - data;
>
> - netbk_gop_frag_copy(vif, skb, npo,
> - virt_to_page(data), len, offset, &head);
> + grxvif += netbk_gop_frag_copy(vif, skb, npo,
> + virt_to_page(data), len,
> + offset, &head, grxvif);
> +
> data += len;
> }
>
> for (i = 0; i < nr_frags; i++) {
> - netbk_gop_frag_copy(vif, skb, npo,
> - skb_frag_page(&skb_shinfo(skb)->frags[i]),
> - skb_frag_size(&skb_shinfo(skb)->frags[i]),
> - skb_shinfo(skb)->frags[i].page_offset,
> - &head);
> + grxvif += netbk_gop_frag_copy(vif, skb, npo,
> + skb_frag_page(&skb_shinfo(skb)->frags[i]),
> + skb_frag_size(&skb_shinfo(skb)->frags[i]),
> + skb_shinfo(skb)->frags[i].page_offset,
> + &head, grxvif);
> }
>
> return npo->meta_prod - old_meta_prod;
> @@ -593,6 +737,8 @@ struct skb_cb_overlay {
> static void xen_netbk_rx_action(struct xen_netbk *netbk)
> {
> struct xenvif *vif = NULL, *tmp;
> + struct xenvif **grxvif = netbk->gnttab_rx_vif;
> + int old_copy_prod = 0;
> s8 status;
> u16 irq, flags;
> struct xen_netif_rx_response *resp;
> @@ -619,7 +765,9 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> nr_frags = skb_shinfo(skb)->nr_frags;
>
> sco = (struct skb_cb_overlay *)skb->cb;
> - sco->meta_slots_used = netbk_gop_skb(skb, &npo);
> + sco->meta_slots_used = netbk_gop_skb(netbk, skb, &npo, grxvif);
> + grxvif += npo.copy_prod - old_copy_prod;
> + old_copy_prod = npo.copy_prod;
>
> count += nr_frags + 1;
>
> @@ -636,8 +784,10 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk)
> return;
>
> BUG_ON(npo.copy_prod > ARRAY_SIZE(netbk->grant_copy_op));
> - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy, &netbk->grant_copy_op,
> - npo.copy_prod);
> + ret = grant_memory_copy_op(GNTTABOP_copy,
> + &netbk->grant_copy_op,
> + npo.copy_prod, netbk,
> + false);
> BUG_ON(ret != 0);
>
> while ((skb = __skb_dequeue(&rxq)) != NULL) {
> @@ -918,7 +1068,8 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
> struct xenvif *vif,
> struct sk_buff *skb,
> struct xen_netif_tx_request *txp,
> - struct gnttab_copy *gop)
> + struct gnttab_copy *gop,
> + struct xenvif **gtxvif)
> {
> struct skb_shared_info *shinfo = skb_shinfo(skb);
> skb_frag_t *frags = shinfo->frags;
> @@ -944,7 +1095,11 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
> gop->source.domid = vif->domid;
> gop->source.offset = txp->offset;
>
> - gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + if (!vif->persistent_grant)
> + gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + else
> + gop->dest.u.gmfn = (unsigned long)page_address(page);
> +
> gop->dest.domid = DOMID_SELF;
> gop->dest.offset = txp->offset;
>
> @@ -952,6 +1107,9 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk,
> gop->flags = GNTCOPY_source_gref;
>
> gop++;
> + *gtxvif = vif;
> + gtxvif++;
> +
>
> memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp));
> xenvif_get(vif);
> @@ -1218,6 +1376,7 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
> static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> {
> struct gnttab_copy *gop = netbk->tx_copy_ops, *request_gop;
> + struct xenvif **gtxvif = netbk->gnttab_tx_vif;
> struct sk_buff *skb;
> int ret;
>
> @@ -1338,7 +1497,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> gop->source.domid = vif->domid;
> gop->source.offset = txreq.offset;
>
> - gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + if (!vif->persistent_grant)
> + gop->dest.u.gmfn = virt_to_mfn(page_address(page));
> + else
> + gop->dest.u.gmfn = (unsigned long)page_address(page);
> +
> gop->dest.domid = DOMID_SELF;
> gop->dest.offset = txreq.offset;
>
> @@ -1346,6 +1509,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> gop->flags = GNTCOPY_source_gref;
>
> gop++;
> + *gtxvif++ = vif;
>
> memcpy(&netbk->pending_tx_info[pending_idx].req,
> &txreq, sizeof(txreq));
> @@ -1369,12 +1533,13 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk)
> netbk->pending_cons++;
>
> request_gop = xen_netbk_get_requests(netbk, vif,
> - skb, txfrags, gop);
> + skb, txfrags, gop, gtxvif);
> if (request_gop == NULL) {
> kfree_skb(skb);
> netbk_tx_err(vif, &txreq, idx);
> continue;
> }
> + gtxvif += request_gop - gop;
> gop = request_gop;
>
> vif->tx.req_cons = idx;
> @@ -1467,8 +1632,9 @@ static void xen_netbk_tx_action(struct xen_netbk *netbk)
>
> if (nr_gops == 0)
> return;
> - ret = HYPERVISOR_grant_table_op(GNTTABOP_copy,
> - netbk->tx_copy_ops, nr_gops);
> + ret = grant_memory_copy_op(GNTTABOP_copy,
> + netbk->tx_copy_ops, nr_gops,
> + netbk, true);
> BUG_ON(ret);
>
> xen_netbk_tx_submit(netbk);
> diff --git a/drivers/net/xen-netback/xenbus.c b/drivers/net/xen-netback/xenbus.c
> index 410018c..938e908 100644
> --- a/drivers/net/xen-netback/xenbus.c
> +++ b/drivers/net/xen-netback/xenbus.c
> @@ -114,6 +114,13 @@ static int netback_probe(struct xenbus_device *dev,
> goto abort_transaction;
> }
>
> + err = xenbus_printf(xbt, dev->nodename,
> + "feature-persistent-grants", "%u", 1);
> + if (err) {
> + message = "writing feature-persistent-grants";
> + goto abort_transaction;
> + }
> +
> err = xenbus_transaction_end(xbt, 0);
> } while (err == -EAGAIN);
>
> @@ -453,7 +460,12 @@ static int connect_rings(struct backend_info *be)
> val = 0;
> vif->csum = !val;
>
> - /* Map the shared frame, irq etc. */
> + if (xenbus_scanf(XBT_NIL, dev->otherend, "feature-persistent-grants",
> + "%u", &val) < 0)
In block devices "feature-persistent" is used, so I think that for
clearness it should be announced the same way in net.
> + val = 0;
> + vif->persistent_grant = !!val;
> +
> +/* Map the shared frame, irq etc. */
> err = xenvif_connect(vif, tx_ring_ref, rx_ring_ref, evtchn);
> if (err) {
> xenbus_dev_fatal(dev, err,
> --
> 1.7.3.4
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@...ts.xen.org
> http://lists.xen.org/xen-devel
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists