[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <50A5CD7F.2010609@oracle.com>
Date: Fri, 16 Nov 2012 13:22:07 +0800
From: ANNIE LI <annie.li@...cle.com>
To: Roger Pau Monné <roger.pau@...rix.com>
CC: "xen-devel@...ts.xensource.com" <xen-devel@...ts.xensource.com>,
"netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"konrad.wilk@...cle.com" <konrad.wilk@...cle.com>,
Ian Campbell <Ian.Campbell@...rix.com>
Subject: Re: [Xen-devel] [PATCH 3/4] Xen/netfront: Implement persistent grant
in netfront.
On 2012-11-15 18:52, Roger Pau Monné wrote:
> On 15/11/12 08:05, Annie Li wrote:
>> Tx/rx page pool are maintained. New grant is mapped and put into
>> pool, unmap only happens when releasing/removing device.
>>
>> Signed-off-by: Annie Li<annie.li@...cle.com>
>> ---
>> drivers/net/xen-netfront.c | 372 +++++++++++++++++++++++++++++++++++++-------
>> 1 files changed, 315 insertions(+), 57 deletions(-)
>>
>> diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
>> index 0ebbb19..17b81c0 100644
>> --- a/drivers/net/xen-netfront.c
>> +++ b/drivers/net/xen-netfront.c
>> @@ -79,6 +79,13 @@ struct netfront_stats {
>> struct u64_stats_sync syncp;
>> };
>>
>> +struct gnt_list {
>> + grant_ref_t gref;
>> + struct page *gnt_pages;
>> + void *gnt_target;
>> + struct gnt_list *tail;
>> +};
> This could also be shared with blkfront.
Netfront does not have the shadow like blkfront, and it needs the
gnt_target to save skb address of rx path. So we can share this too?
blkfront would not use it actually.
>
>> +
>> struct netfront_info {
>> struct list_head list;
>> struct net_device *netdev;
>> @@ -109,6 +116,10 @@ struct netfront_info {
>> grant_ref_t grant_tx_ref[NET_TX_RING_SIZE];
>> unsigned tx_skb_freelist;
>>
>> + struct gnt_list *tx_grant[NET_TX_RING_SIZE];
>> + struct gnt_list *tx_gnt_list;
>> + unsigned int tx_gnt_cnt;
> I don't understand this, why do you need both an array and a list?
The array tx_grant is just like other tx_skbs, grant_tx_ref. It saves
grant entries corresponding every request in the ring. This is what
netfront different from blkfront, netfront does not have shadow ring,
and it only uses a ring size array to track every request in the ring.
The list is like a pool to save all available persistent grants.
> I'm
> not familiar with net code, so I don't know if this is some kind of
> special netfront thing?
Yes, this is different from blkfront. netfront uses ring size arrays to
track every request in the ring.
>
> Anyway if you have to use a list I would recommend using one of the list
> constructions that's already in the kernel, it simplifies the code and
> makes it more easy to understand than creating your own list structure.
Ok, thanks.
>
>> +
>> spinlock_t rx_lock ____cacheline_aligned_in_smp;
>> struct xen_netif_rx_front_ring rx;
>> int rx_ring_ref;
>> @@ -126,6 +137,10 @@ struct netfront_info {
>> grant_ref_t gref_rx_head;
>> grant_ref_t grant_rx_ref[NET_RX_RING_SIZE];
>>
>> + struct gnt_list *rx_grant[NET_RX_RING_SIZE];
>> + struct gnt_list *rx_gnt_list;
>> + unsigned int rx_gnt_cnt;
> Same comment above here.
Same as above.
>
>> +
>> unsigned long rx_pfn_array[NET_RX_RING_SIZE];
>> struct multicall_entry rx_mcl[NET_RX_RING_SIZE+1];
>> struct mmu_update rx_mmu[NET_RX_RING_SIZE];
>> @@ -134,6 +149,7 @@ struct netfront_info {
>> struct netfront_stats __percpu *stats;
>>
>> unsigned long rx_gso_checksum_fixup;
>> + u8 persistent_gnt:1;
>> };
>>
>> struct netfront_rx_info {
>> @@ -194,6 +210,16 @@ static grant_ref_t xennet_get_rx_ref(struct netfront_info *np,
>> return ref;
>> }
>>
>> +static struct gnt_list *xennet_get_rx_grant(struct netfront_info *np,
>> + RING_IDX ri)
>> +{
>> + int i = xennet_rxidx(ri);
>> + struct gnt_list *gntlist = np->rx_grant[i];
>> + np->rx_grant[i] = NULL;
> Ok, I think I get why do you need both an array and a list, is that
> because netfront doesn't have some kind of shadow ring to keep track of
> issued requests?
Yes.
>
> So each issued request has an associated gnt_list with the list of used
> grants?
gnt_list is kind of free grants. It is like a pool of free grants. If
free grants exist in this list, free grant will be gotten from this
list. If no, new grant will be allocated. In xennet_tx_buf_gc, free
grants will be put into the list again if response status is OK.
> If so it would be good to add a comment about it.
>
>> + return gntlist;
>> +}
>> +
>> #ifdef CONFIG_SYSFS
>> static int xennet_sysfs_addif(struct net_device *netdev);
>> static void xennet_sysfs_delif(struct net_device *netdev);
>> @@ -231,6 +257,68 @@ static void xennet_maybe_wake_tx(struct net_device *dev)
>> netif_wake_queue(dev);
>> }
>>
>> +static grant_ref_t xennet_alloc_rx_ref(struct net_device *dev,
>> + unsigned long mfn, void *vaddr,
>> + unsigned int id,
>> + grant_ref_t ref)
>> +{
>> + struct netfront_info *np = netdev_priv(dev);
>> + grant_ref_t gnt_ref;
>> + struct gnt_list *gnt_list_entry;
>> +
>> + if (np->persistent_gnt&& np->rx_gnt_cnt) {
>> + gnt_list_entry = np->rx_gnt_list;
>> + np->rx_gnt_list = np->rx_gnt_list->tail;
>> + np->rx_gnt_cnt--;
>> +
>> + gnt_list_entry->gnt_target = vaddr;
>> + gnt_ref = gnt_list_entry->gref;
>> + np->rx_grant[id] = gnt_list_entry;
>> + } else {
>> + struct page *page;
>> +
>> + BUG_ON(!np->persistent_gnt&& np->rx_gnt_cnt);
>> + if (!ref)
>> + gnt_ref =
>> + gnttab_claim_grant_reference(&np->gref_rx_head);
>> + else
>> + gnt_ref = ref;
>> + BUG_ON((signed short)gnt_ref< 0);
>> +
>> + if (np->persistent_gnt) {
> So you are only using persistent grants if the backend also supports
> them.
Current implementation is:
If netback supports persistent grant, the frontend will work with
persistent grant feature too.
If netback does not support persistent grant, the frontend will work
without persistent grant feature.
> Have you benchmarked the performance of a persistent frontend with
> a non-persistent backend.
I remember I did some test before, not so sure. Will check it.
> In the block case, usign a persistent frontend
> with a non-persistent backend let to an overall performance improvement,
> so blkfront uses persistent grants even if blkback doesn't support them.
> Take a look at the following graph:
>
> http://xenbits.xen.org/people/royger/persistent_grants/nonpers_read.png
Good idea, that makes sense. I will change netfront too, thanks.
>
>> + page = alloc_page(GFP_KERNEL);
>> + if (!page) {
>> + if (!ref)
>> + gnttab_release_grant_reference(
>> +&np->gref_rx_head, ref);
>> + return -ENOMEM;
>> + }
>> + mfn = pfn_to_mfn(page_to_pfn(page));
>> +
>> + gnt_list_entry = kmalloc(sizeof(struct gnt_list),
>> + GFP_KERNEL);
>> + if (!gnt_list_entry) {
>> + __free_page(page);
>> + if (!ref)
>> + gnttab_release_grant_reference(
>> +&np->gref_rx_head, ref);
>> + return -ENOMEM;
>> + }
>> + gnt_list_entry->gref = gnt_ref;
>> + gnt_list_entry->gnt_pages = page;
>> + gnt_list_entry->gnt_target = vaddr;
>> +
>> + np->rx_grant[id] = gnt_list_entry;
>> + }
>> +
>> + gnttab_grant_foreign_access_ref(gnt_ref, np->xbdev->otherend_id,
>> + mfn, 0);
>> + }
>> + np->grant_rx_ref[id] = gnt_ref;
>> +
>> + return gnt_ref;
>> +}
>> +
>> static void xennet_alloc_rx_buffers(struct net_device *dev)
>> {
>> unsigned short id;
>> @@ -240,8 +328,6 @@ static void xennet_alloc_rx_buffers(struct net_device *dev)
>> int i, batch_target, notify;
>> RING_IDX req_prod = np->rx.req_prod_pvt;
>> grant_ref_t ref;
>> - unsigned long pfn;
>> - void *vaddr;
>> struct xen_netif_rx_request *req;
>>
>> if (unlikely(!netif_carrier_ok(dev)))
>> @@ -306,19 +392,16 @@ no_skb:
>> BUG_ON(np->rx_skbs[id]);
>> np->rx_skbs[id] = skb;
>>
>> - ref = gnttab_claim_grant_reference(&np->gref_rx_head);
>> - BUG_ON((signed short)ref< 0);
>> - np->grant_rx_ref[id] = ref;
>> + page = skb_frag_page(&skb_shinfo(skb)->frags[0]);
>>
>> - pfn = page_to_pfn(skb_frag_page(&skb_shinfo(skb)->frags[0]));
>> - vaddr = page_address(skb_frag_page(&skb_shinfo(skb)->frags[0]));
>> + ref = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
>> + page_address(page), id, 0);
>> + if ((signed short)ref< 0) {
>> + __skb_queue_tail(&np->rx_batch, skb);
>> + break;
>> + }
>>
>> req = RING_GET_REQUEST(&np->rx, req_prod + i);
>> - gnttab_grant_foreign_access_ref(ref,
>> - np->xbdev->otherend_id,
>> - pfn_to_mfn(pfn),
>> - 0);
>> -
>> req->id = id;
>> req->gref = ref;
>> }
>> @@ -375,17 +458,30 @@ static void xennet_tx_buf_gc(struct net_device *dev)
>>
>> id = txrsp->id;
>> skb = np->tx_skbs[id].skb;
>> - if (unlikely(gnttab_query_foreign_access(
>> - np->grant_tx_ref[id]) != 0)) {
>> - printk(KERN_ALERT "xennet_tx_buf_gc: warning "
>> - "-- grant still in use by backend "
>> - "domain.\n");
>> - BUG();
>> +
>> + if (np->persistent_gnt) {
>> + struct gnt_list *gnt_list_entry;
>> +
>> + gnt_list_entry = np->tx_grant[id];
>> + BUG_ON(!gnt_list_entry);
>> +
>> + gnt_list_entry->tail = np->tx_gnt_list;
>> + np->tx_gnt_list = gnt_list_entry;
>> + np->tx_gnt_cnt++;
>> + } else {
>> + if (unlikely(gnttab_query_foreign_access(
>> + np->grant_tx_ref[id]) != 0)) {
>> + printk(KERN_ALERT "xennet_tx_buf_gc: warning "
>> + "-- grant still in use by backend "
>> + "domain.\n");
>> + BUG();
>> + }
>> +
>> + gnttab_end_foreign_access_ref(
>> + np->grant_tx_ref[id], GNTMAP_readonly);
> If I've read the code correctly, you are giving this frame both
> read/write permissions to the other end on xennet_alloc_tx_ref, but then
> you are only removing the read permissions? (see comment below on the
> xennet_alloc_tx_ref function).
Yes, this is a bug.
For non persistent grant, it should remove the read permissions. For
persistent grant, it should remove both.
As mentioned above, it is better to enable persistent grant, I will
change code and not consider non persistent grant.
See comments below about why needing both permissions in
xennet_alloc_tx_ref.
>
>> + gnttab_release_grant_reference(
>> +&np->gref_tx_head, np->grant_tx_ref[id]);
>> }
>> - gnttab_end_foreign_access_ref(
>> - np->grant_tx_ref[id], GNTMAP_readonly);
>> - gnttab_release_grant_reference(
>> -&np->gref_tx_head, np->grant_tx_ref[id]);
>> np->grant_tx_ref[id] = GRANT_INVALID_REF;
>> add_id_to_freelist(&np->tx_skb_freelist, np->tx_skbs, id);
>> dev_kfree_skb_irq(skb);
>> @@ -409,6 +505,59 @@ static void xennet_tx_buf_gc(struct net_device *dev)
>> xennet_maybe_wake_tx(dev);
>> }
>>
>> +static grant_ref_t xennet_alloc_tx_ref(struct net_device *dev,
>> + unsigned long mfn,
>> + unsigned int id)
>> +{
>> + struct netfront_info *np = netdev_priv(dev);
>> + grant_ref_t ref;
>> + struct page *granted_page;
>> +
>> + if (np->persistent_gnt&& np->tx_gnt_cnt) {
>> + struct gnt_list *gnt_list_entry;
>> +
>> + gnt_list_entry = np->tx_gnt_list;
>> + np->tx_gnt_list = np->tx_gnt_list->tail;
>> + np->tx_gnt_cnt--;
>> +
>> + ref = gnt_list_entry->gref;
>> + np->tx_grant[id] = gnt_list_entry;
>> + } else {
>> + struct gnt_list *gnt_list_entry;
>> +
>> + BUG_ON(!np->persistent_gnt&& np->tx_gnt_cnt);
>> + ref = gnttab_claim_grant_reference(&np->gref_tx_head);
>> + BUG_ON((signed short)ref< 0);
>> +
>> + if (np->persistent_gnt) {
>> + granted_page = alloc_page(GFP_KERNEL);
>> + if (!granted_page) {
>> + gnttab_release_grant_reference(
>> +&np->gref_tx_head, ref);
>> + return -ENOMEM;
>> + }
>> +
>> + mfn = pfn_to_mfn(page_to_pfn(granted_page));
>> + gnt_list_entry = kmalloc(sizeof(struct gnt_list),
>> + GFP_KERNEL);
>> + if (!gnt_list_entry) {
>> + __free_page(granted_page);
>> + gnttab_release_grant_reference(
>> +&np->gref_tx_head, ref);
>> + return -ENOMEM;
>> + }
>> +
>> + gnt_list_entry->gref = ref;
>> + gnt_list_entry->gnt_pages = granted_page;
>> + np->tx_grant[id] = gnt_list_entry;
>> + }
>> + gnttab_grant_foreign_access_ref(ref, np->xbdev->otherend_id,
>> + mfn, 0);
> If you are not always using persistent grants I guess you need to give
> read only permissions to this frame (GNTMAP_readonly).
We can not use GNTMAP_readonly here because tx path packet data will be
copied into these persistent grant pages. Mabbe it is better to use
GNTMAP_readonly for nonpersistent and 0 for persistent grant.
As mentioned above, it is better to enable persistent grant, I will
change code and not consider non persistent grant.
> Also, for keeping
> things in logical order, isn't it best that this function comes before
> xennet_tx_buf_gc?
xennet_alloc_tx_ref is called by following function xennet_make_frags,
so I assume xennet_alloc_tx_ref is better to be close to
xennet_make_frags. Xennet_tx_buf_gc does not have any connection with
xennet_alloc_tx_ref, did I miss something?
>
>> + }
>> +
>> + return ref;
>> +}
>> +
>> @@ -1132,8 +1357,10 @@ static void xennet_release_rx_bufs(struct netfront_info *np)
>> }
>>
>> skb = np->rx_skbs[id];
>> - mfn = gnttab_end_foreign_transfer_ref(ref);
>> - gnttab_release_grant_reference(&np->gref_rx_head, ref);
>> + if (!np->persistent_gnt) {
>> + mfn = gnttab_end_foreign_transfer_ref(ref);
>> + gnttab_release_grant_reference(&np->gref_rx_head, ref);
>> + }
>> np->grant_rx_ref[id] = GRANT_INVALID_REF;
>>
>> if (0 == mfn) {
>> @@ -1607,6 +1834,13 @@ again:
>> goto abort_transaction;
>> }
>>
>> + err = xenbus_printf(xbt, dev->nodename, "feature-persistent-grants",
>> + "%u", info->persistent_gnt);
> As in netback, I think "feature-persistent" should be used.
Same in blkback, I assume it is "feature-persistent-grants", right?
I referred your RFC patch, did you change it later? Or I missed something?
Thanks
Annie
>
>> + if (err) {
>> + message = "writing feature-persistent-grants";
>> + xenbus_dev_fatal(dev, err, "%s", message);
>> + }
>> +
>> err = xenbus_transaction_end(xbt, 0);
>> if (err) {
>> if (err == -EAGAIN)
>> @@ -1634,6 +1868,7 @@ static int xennet_connect(struct net_device *dev)
>> grant_ref_t ref;
>> struct xen_netif_rx_request *req;
>> unsigned int feature_rx_copy;
>> + int ret, val;
>>
>> err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>> "feature-rx-copy", "%u",&feature_rx_copy);
>> @@ -1646,6 +1881,13 @@ static int xennet_connect(struct net_device *dev)
>> return -ENODEV;
>> }
>>
>> + err = xenbus_scanf(XBT_NIL, np->xbdev->otherend,
>> + "feature-persistent-grants", "%u",&val);
>> + if (err != 1)
>> + val = 0;
>> +
>> + np->persistent_gnt = !!val;
>> +
>> err = talk_to_netback(np->xbdev, np);
>> if (err)
>> return err;
>> @@ -1657,9 +1899,24 @@ static int xennet_connect(struct net_device *dev)
>> spin_lock_bh(&np->rx_lock);
>> spin_lock_irq(&np->tx_lock);
>>
>> + np->tx_gnt_cnt = 0;
>> + np->rx_gnt_cnt = 0;
>> +
>> /* Step 1: Discard all pending TX packet fragments. */
>> xennet_release_tx_bufs(np);
>>
>> + if (np->persistent_gnt) {
>> + struct gnt_list *gnt_list_entry;
>> +
>> + while (np->rx_gnt_list) {
>> + gnt_list_entry = np->rx_gnt_list;
>> + np->rx_gnt_list = np->rx_gnt_list->tail;
>> + gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
>> + __free_page(gnt_list_entry->gnt_pages);
>> + kfree(gnt_list_entry);
>> + }
>> + }
>> +
>> /* Step 2: Rebuild the RX buffer freelist and the RX ring itself. */
>> for (requeue_idx = 0, i = 0; i< NET_RX_RING_SIZE; i++) {
>> skb_frag_t *frag;
>> @@ -1673,10 +1930,11 @@ static int xennet_connect(struct net_device *dev)
>>
>> frag =&skb_shinfo(skb)->frags[0];
>> page = skb_frag_page(frag);
>> - gnttab_grant_foreign_access_ref(
>> - ref, np->xbdev->otherend_id,
>> - pfn_to_mfn(page_to_pfn(page)),
>> - 0);
>> + ret = xennet_alloc_rx_ref(dev, pfn_to_mfn(page_to_pfn(page)),
>> + page_address(page), requeue_idx, ref);
>> + if ((signed short)ret< 0)
>> + break;
>> +
>> req->gref = ref;
>> req->id = requeue_idx;
>>
>> --
>> 1.7.3.4
>>
>>
>> _______________________________________________
>> Xen-devel mailing list
>> Xen-devel@...ts.xen.org
>> http://lists.xen.org/xen-devel
>>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists