lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50A4C987.3020308@citrix.com>
Date:	Thu, 15 Nov 2012 11:52:55 +0100
From:	Roger Pau Monné <roger.pau@...rix.com>
To:	Annie Li <annie.li@...cle.com>
CC:	"xen-devel@...ts.xensource.com" <xen-devel@...ts.xensource.com>,
	"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
	"konrad.wilk@...cle.com" <konrad.wilk@...cle.com>,
	Ian Campbell <Ian.Campbell@...rix.com>
Subject: Re: [Xen-devel] [PATCH 3/4] Xen/netfront: Implement persistent grant
 in netfront.

On 15/11/12 08:05, Annie Li wrote:
> Tx/rx page pool are maintained. New grant is mapped and put into
> pool, unmap only happens when releasing/removing device.
> 
> Signed-off-by: Annie Li <annie.li@...cle.com>
> ---
>  drivers/net/xen-netfront.c |  372 +++++++++++++++++++++++++++++++++++++-------
>  1 files changed, 315 insertions(+), 57 deletions(-)
> 
> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
> index 0ebbb19..17b81c0 100644
> --- a/drivers/net/xen-netfront.c
> +++ b/drivers/net/xen-netfront.c
> @@ -79,6 +79,13 @@ struct netfront_stats {
>         struct u64_stats_sync   syncp;
>  };
> 
> +struct gnt_list {
> +       grant_ref_t             gref;
> +       struct                  page *gnt_pages;
> +       void                    *gnt_target;
> +       struct                  gnt_list *tail;
> +};

This could also be shared with blkfront.

> +
>  struct netfront_info {
>         struct list_head list;
>         struct net_device *netdev;
> @@ -109,6 +116,10 @@ struct netfront_info {
>         grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
>         unsigned tx_skb_freelist;
> 
> +       struct gnt_list *tx_grant[NET_TX_RING_SIZE];
> +       struct gnt_list *tx_gnt_list;
> +       unsigned int tx_gnt_cnt;

I don't understand this, why do you need both an array and a list? I'm
not familiar with net code, so I don't know if this is some kind of
special netfront thing?

Anyway if you have to use a list I would recommend using one of the list
constructions that's already in the kernel, it simplifies the code and
makes it more easy to understand than creating your own list structure.

> +
>         spinlock_t   rx_lock ____cacheline_aligned_in_smp;
>         struct xen_netif_rx_front_ring rx;
>         int rx_ring_ref;
> @@ -126,6 +137,10 @@ struct netfront_info {
>         grant_ref_t gref_rx_head;
>         grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
> 
> +       struct gnt_list *rx_grant[NET_RX_RING_SIZE];
> +       struct gnt_list *rx_gnt_list;
> +       unsigned int rx_gnt_cnt;

Same comment above here.

> +
>         unsigned long rx_pfn_array[NET_RX_RING_SIZE];
>         struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
>         struct mmu_update rx_mmu[NET_RX_RING_SIZE];
> @@ -134,6 +149,7 @@ struct netfront_info {
>         struct netfront_stats __percpu *stats;
> 
>         unsigned long rx_gso_checksum_fixup;
> +       u8 persistent_gnt:1;
>  };
> 
>  struct netfront_rx_info {
> @@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
>         return ref;
>  }
> 
> +static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np,
> +                                           RING_IDX ri)
> +{
> +       int i = xennet_rxidx(ri);
> +       struct gnt_list *gntlist = np->rx_grant[i];
> +       np->rx_grant[i] = NULL;

Ok, I think I get why do you need both an array and a list, is that
because netfront doesn't have some kind of shadow ring to keep track of
issued requests?

So each issued request has an associated gnt_list with the list of used
grants? If so it would be good to add a comment about it.

> +       return gntlist;
> +}
> +
>  #ifdef CONFIG_SYSFS
>  static int xennet_sysfs_addif(struct net_device *netdev);
>  static void xennet_sysfs_delif(struct net_device *netdev);
> @@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev)
>                 netif_wake_queue(dev);
>  }
> 
> +static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev,
> +                                      unsigned long mfn, void *vaddr,
> +                                      unsigned int id,
> +                                      grant_ref_t ref)
> +{
> +       struct netfront_info *np = netdev_priv(dev);
> +       grant_ref_t gnt_ref;
> +       struct gnt_list *gnt_list_entry;
> +
> +       if (np->persistent_gnt && np->rx_gnt_cnt) {
> +               gnt_list_entry = np->rx_gnt_list;
> +               np->rx_gnt_list = np->rx_gnt_list->tail;
> +               np->rx_gnt_cnt--;
> +
> +               gnt_list_entry->gnt_target = vaddr;
> +               gnt_ref = gnt_list_entry->gref;
> +               np->rx_grant[id] = gnt_list_entry;
> +       } else {
> +               struct page *page;
> +
> +               BUG_ON(!np->persistent_gnt && np->rx_gnt_cnt);
> +               if (!ref)
> +                       gnt_ref =
> +                               gnttab_claim_grant_reference(&np->gref_rx_head);
> +               else
> +                       gnt_ref = ref;
> +               BUG_ON((signed short)gnt_ref < 0);
> +
> +               if (np->persistent_gnt) {

So you are only using persistent grants if the backend also supports
them. Have you benchmarked the performance of a persistent frontend with
a non-persistent backend. In the block case, usign a persistent frontend
with a non-persistent backend let to an overall performance improvement,
so blkfront uses persistent grants even if blkback doesn't support them.
Take a look at the following graph:

http://xenbits.xen.org/people/royger/persistent_grants/nonpers_read.png

> +                       page = alloc_page(GFP_KERNEL);
> +                       if (!page) {
> +                               if (!ref)
> +                                       gnttab_release_grant_reference(
> +                                                       &np->gref_rx_head, ref);
> +                               return -ENOMEM;
> +                       }
> +                       mfn = pfn_to_mfn(page_to_pfn(page));
> +
> +                       gnt_list_entry = kmalloc(sizeof(struct gnt_list),
> +                                                GFP_KERNEL);
> +                       if (!gnt_list_entry) {
> +                               __free_page(page);
> +                               if (!ref)
> +                                       gnttab_release_grant_reference(
> +                                                       &np->gref_rx_head, ref);
> +                               return -ENOMEM;
> +                       }
> +                       gnt_list_entry->gref = gnt_ref;
> +                       gnt_list_entry->gnt_pages = page;
> +                       gnt_list_entry->gnt_target = vaddr;
> +
> +                       np->rx_grant[id] = gnt_list_entry;
> +               }
> +
> +               gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id,
> +                                               mfn, 0);
> +       }
> +       np->grant_rx_ref[id] = gnt_ref;
> +
> +       return gnt_ref;
> +}
> +
>  static void xennet_alloc_rx_buffers(struct net_device *dev)
>  {
>         unsigned short id;
> @@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev)
>         int i, batch_target, notify;
>         RING_IDX req_prod = np->rx.req_prod_pvt;
>         grant_ref_t ref;
> -       unsigned long pfn;
> -       void *vaddr;
>         struct xen_netif_rx_request *req;
> 
>         if (unlikely(!netif_carrier_ok(dev)))
> @@ -306,19 +392,16 @@ no_skb:
>                 BUG_ON(np->rx_skbs[id]);
>                 np->rx_skbs[id] = skb;
> 
> -               ref = gnttab_claim_grant_reference(&np->gref_rx_head);
> -               BUG_ON((signed short)ref < 0);
> -               np->grant_rx_ref[id] = ref;
> +               page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
> 
> -               pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
> -               vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
> +               ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
> +                                         page_address(page), id, 0);
> +               if ((signed short)ref < 0) {
> +                       __skb_queue_tail(&np->rx_batch, skb);
> +                       break;
> +               }
> 
>                 req = RING_GET_REQUEST(&np->rx, req_prod + i);
> -               gnttab_grant_foreign_access_ref(ref,
> -                                               np->xbdev->otherend_id,
> -                                               pfn_to_mfn(pfn),
> -                                               0);
> -
>                 req->id = id;
>                 req->gref = ref;
>         }
> @@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev)
> 
>                         id  = txrsp->id;
>                         skb = np->tx_skbs[id].skb;
> -                       if (unlikely(gnttab_query_foreign_access(
> -                               np->grant_tx_ref[id]) != 0)) {
> -                               printk(KERN_ALERT "xennet_tx_buf_gc: warning "
> -                                      "-- grant still in use by backend "
> -                                      "domain.\n");
> -                               BUG();
> +
> +                       if (np->persistent_gnt) {
> +                               struct gnt_list *gnt_list_entry;
> +
> +                               gnt_list_entry = np->tx_grant[id];
> +                               BUG_ON(!gnt_list_entry);
> +
> +                               gnt_list_entry->tail = np->tx_gnt_list;
> +                               np->tx_gnt_list = gnt_list_entry;
> +                               np->tx_gnt_cnt++;
> +                       } else {
> +                               if (unlikely(gnttab_query_foreign_access(
> +                                       np->grant_tx_ref[id]) != 0)) {
> +                                       printk(KERN_ALERT "xennet_tx_buf_gc: warning "
> +                                              "-- grant still in use by backend "
> +                                              "domain.\n");
> +                                       BUG();
> +                               }
> +
> +                               gnttab_end_foreign_access_ref(
> +                                       np->grant_tx_ref[id], GNTMAP_readonly);

If I've read the code correctly, you are giving this frame both
read/write permissions to the other end on xennet_alloc_tx_ref, but then
you are only removing the read permissions? (see comment below on the
xennet_alloc_tx_ref function).

> +                               gnttab_release_grant_reference(
> +                                     &np->gref_tx_head, np->grant_tx_ref[id]);
>                         }
> -                       gnttab_end_foreign_access_ref(
> -                               np->grant_tx_ref[id], GNTMAP_readonly);
> -                       gnttab_release_grant_reference(
> -                               &np->gref_tx_head, np->grant_tx_ref[id]);
>                         np->grant_tx_ref[id] = GRANT_INVALID_REF;
>                         add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
>                         dev_kfree_skb_irq(skb);
> @@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev)
>         xennet_maybe_wake_tx(dev);
>  }
> 
> +static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev,
> +                                      unsigned long mfn,
> +                                      unsigned int id)
> +{
> +       struct netfront_info *np = netdev_priv(dev);
> +       grant_ref_t ref;
> +       struct page *granted_page;
> +
> +       if (np->persistent_gnt && np->tx_gnt_cnt) {
> +               struct gnt_list *gnt_list_entry;
> +
> +               gnt_list_entry = np->tx_gnt_list;
> +               np->tx_gnt_list = np->tx_gnt_list->tail;
> +               np->tx_gnt_cnt--;
> +
> +               ref = gnt_list_entry->gref;
> +               np->tx_grant[id] = gnt_list_entry;
> +       } else {
> +               struct gnt_list *gnt_list_entry;
> +
> +               BUG_ON(!np->persistent_gnt && np->tx_gnt_cnt);
> +               ref = gnttab_claim_grant_reference(&np->gref_tx_head);
> +               BUG_ON((signed short)ref < 0);
> +
> +               if (np->persistent_gnt) {
> +                       granted_page = alloc_page(GFP_KERNEL);
> +                       if (!granted_page) {
> +                               gnttab_release_grant_reference(
> +                                                       &np->gref_tx_head, ref);
> +                               return -ENOMEM;
> +                       }
> +
> +                       mfn = pfn_to_mfn(page_to_pfn(granted_page));
> +                       gnt_list_entry = kmalloc(sizeof(struct gnt_list),
> +                                                GFP_KERNEL);
> +                       if (!gnt_list_entry) {
> +                               __free_page(granted_page);
> +                               gnttab_release_grant_reference(
> +                                                       &np->gref_tx_head, ref);
> +                               return -ENOMEM;
> +                       }
> +
> +                       gnt_list_entry->gref = ref;
> +                       gnt_list_entry->gnt_pages = granted_page;
> +                       np->tx_grant[id] = gnt_list_entry;
> +               }
> +               gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
> +                                               mfn, 0);

If you are not always using persistent grants I guess you need to give
read only permissions to this frame (GNTMAP_readonly). Also, for keeping
things in logical order, isn't it best that this function comes before
xennet_tx_buf_gc?

> +       }
> +
> +       return ref;
> +}
> +
> @@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
>                 }
> 
>                 skb = np->rx_skbs[id];
> -               mfn = gnttab_end_foreign_transfer_ref(ref);
> -               gnttab_release_grant_reference(&np->gref_rx_head, ref);
> +               if (!np->persistent_gnt) {
> +                       mfn = gnttab_end_foreign_transfer_ref(ref);
> +                       gnttab_release_grant_reference(&np->gref_rx_head, ref);
> +               }
>                 np->grant_rx_ref[id] = GRANT_INVALID_REF;
> 
>                 if (0 == mfn) {
> @@ -1607,6 +1834,13 @@ again:
>                 goto abort_transaction;
>         }
> 
> +       err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants",
> +                           "%u", info->persistent_gnt);

As in netback, I think "feature-persistent" should be used.

> +       if (err) {
> +               message = "writing feature-persistent-grants";
> +               xenbus_dev_fatal(dev, err, "%s", message);
> +       }
> +
>         err = xenbus_transaction_end(xbt, 0);
>         if (err) {
>                 if (err == -EAGAIN)
> @@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev)
>         grant_ref_t ref;
>         struct xen_netif_rx_request *req;
>         unsigned int feature_rx_copy;
> +       int ret, val;
> 
>         err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>                            "feature-rx-copy", "%u", &feature_rx_copy);
> @@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev)
>                 return -ENODEV;
>         }
> 
> +       err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
> +                          "feature-persistent-grants", "%u", &val);
> +       if (err != 1)
> +               val = 0;
> +
> +       np->persistent_gnt = !!val;
> +
>         err = talk_to_netback(np->xbdev, np);
>         if (err)
>                 return err;
> @@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev)
>         spin_lock_bh(&np->rx_lock);
>         spin_lock_irq(&np->tx_lock);
> 
> +       np->tx_gnt_cnt = 0;
> +       np->rx_gnt_cnt = 0;
> +
>         /* Step 1: Discard all pending TX packet fragments. */
>         xennet_release_tx_bufs(np);
> 
> +       if (np->persistent_gnt) {
> +               struct gnt_list *gnt_list_entry;
> +
> +               while (np->rx_gnt_list) {
> +                       gnt_list_entry = np->rx_gnt_list;
> +                       np->rx_gnt_list = np->rx_gnt_list->tail;
> +                       gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
> +                       __free_page(gnt_list_entry->gnt_pages);
> +                       kfree(gnt_list_entry);
> +               }
> +       }
> +
>         /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
>         for (requeue_idx = 0, i = 0; i < NET_RX_RING_SIZE; i++) {
>                 skb_frag_t *frag;
> @@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev)
> 
>                 frag = &skb_shinfo(skb)->frags[0];
>                 page = skb_frag_page(frag);
> -               gnttab_grant_foreign_access_ref(
> -                       ref, np->xbdev->otherend_id,
> -                       pfn_to_mfn(page_to_pfn(page)),
> -                       0);
> +               ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
> +                                         page_address(page), requeue_idx, ref);
> +               if ((signed short)ret < 0)
> +                       break;
> +
>                 req->gref = ref;
>                 req->id   = requeue_idx;
> 
> --
> 1.7.3.4
> 
> 
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@...ts.xen.org
> http://lists.xen.org/xen-devel
> 

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ