linux-kernel - Re: [PATCH v3 7/9] xen/blkback: separate ring information out of struct xen

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <56188F4A.3060802@oracle.com>
Date:	Sat, 10 Oct 2015 12:08:42 +0800
From:	Bob Liu <bob.liu@...cle.com>
To:	Roger Pau Monné <roger.pau@...rix.com>
CC:	xen-devel@...ts.xen.org, david.vrabel@...rix.com,
	linux-kernel@...r.kernel.org, konrad.wilk@...cle.com,
	felipe.franciosi@...rix.com, axboe@...com, hch@...radead.org,
	avanzini.arianna@...il.com, rafal.mielniczuk@...rix.com,
	boris.ostrovsky@...cle.com, jonathan.davies@...rix.com
Subject: Re: [PATCH v3 7/9] xen/blkback: separate ring information out of
 struct xen_blkif


On 10/05/2015 10:55 PM, Roger Pau Monné wrote:
> El 05/09/15 a les 14.39, Bob Liu ha escrit:
>> Split per ring information to an new structure:xen_blkif_ring, so that one vbd
>> device can associate with one or more rings/hardware queues.
>>
>> This patch is a preparation for supporting multi hardware queues/rings.
>>
>> Signed-off-by: Arianna Avanzini <avanzini.arianna@...il.com>
>> Signed-off-by: Bob Liu <bob.liu@...cle.com>
>> ---
>>  drivers/block/xen-blkback/blkback.c |  365 ++++++++++++++++++-----------------
>>  drivers/block/xen-blkback/common.h  |   52 +++--
>>  drivers/block/xen-blkback/xenbus.c  |  130 +++++++------
>>  3 files changed, 295 insertions(+), 252 deletions(-)
>>
>> diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
>> index 954c002..fd02240 100644
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -113,71 +113,71 @@ module_param(log_stats, int, 0644);
>>  /* Number of free pages to remove on each call to gnttab_free_pages */
>>  #define NUM_BATCH_FREE_PAGES 10
>>  
>> -static inline int get_free_page(struct xen_blkif *blkif, struct page **page)
>> +static inline int get_free_page(struct xen_blkif_ring *ring, struct page **page)
>>  {
>>  	unsigned long flags;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> -	if (list_empty(&blkif->free_pages)) {
>> -		BUG_ON(blkif->free_pages_num != 0);
>> -		spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>> +	if (list_empty(&ring->free_pages)) {
> 
> I'm afraid the pool of free pages should be per-device, not per-ring.
> 
>> +		BUG_ON(ring->free_pages_num != 0);
>> +		spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  		return gnttab_alloc_pages(1, page);
>>  	}
>> -	BUG_ON(blkif->free_pages_num == 0);
>> -	page[0] = list_first_entry(&blkif->free_pages, struct page, lru);
>> +	BUG_ON(ring->free_pages_num == 0);
>> +	page[0] = list_first_entry(&ring->free_pages, struct page, lru);
>>  	list_del(&page[0]->lru);
>> -	blkif->free_pages_num--;
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	ring->free_pages_num--;
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  
>>  	return 0;
>>  }
>>  
>> -static inline void put_free_pages(struct xen_blkif *blkif, struct page **page,
>> +static inline void put_free_pages(struct xen_blkif_ring *ring, struct page **page,
>>                                    int num)
>>  {
>>  	unsigned long flags;
>>  	int i;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>>  	for (i = 0; i < num; i++)
>> -		list_add(&page[i]->lru, &blkif->free_pages);
>> -	blkif->free_pages_num += num;
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +		list_add(&page[i]->lru, &ring->free_pages);
>> +	ring->free_pages_num += num;
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  }
>>  
>> -static inline void shrink_free_pagepool(struct xen_blkif *blkif, int num)
>> +static inline void shrink_free_pagepool(struct xen_blkif_ring *ring, int num)
>>  {
>>  	/* Remove requested pages in batches of NUM_BATCH_FREE_PAGES */
>>  	struct page *page[NUM_BATCH_FREE_PAGES];
>>  	unsigned int num_pages = 0;
>>  	unsigned long flags;
>>  
>> -	spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> -	while (blkif->free_pages_num > num) {
>> -		BUG_ON(list_empty(&blkif->free_pages));
>> -		page[num_pages] = list_first_entry(&blkif->free_pages,
>> +	spin_lock_irqsave(&ring->free_pages_lock, flags);
>> +	while (ring->free_pages_num > num) {
>> +		BUG_ON(list_empty(&ring->free_pages));
>> +		page[num_pages] = list_first_entry(&ring->free_pages,
>>  		                                   struct page, lru);
>>  		list_del(&page[num_pages]->lru);
>> -		blkif->free_pages_num--;
>> +		ring->free_pages_num--;
>>  		if (++num_pages == NUM_BATCH_FREE_PAGES) {
>> -			spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +			spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  			gnttab_free_pages(num_pages, page);
>> -			spin_lock_irqsave(&blkif->free_pages_lock, flags);
>> +			spin_lock_irqsave(&ring->free_pages_lock, flags);
>>  			num_pages = 0;
>>  		}
>>  	}
>> -	spin_unlock_irqrestore(&blkif->free_pages_lock, flags);
>> +	spin_unlock_irqrestore(&ring->free_pages_lock, flags);
>>  	if (num_pages != 0)
>>  		gnttab_free_pages(num_pages, page);
>>  }
>>  
>>  #define vaddr(page) ((unsigned long)pfn_to_kaddr(page_to_pfn(page)))
>>  
>> -static int do_block_io_op(struct xen_blkif *blkif);
>> -static int dispatch_rw_block_io(struct xen_blkif *blkif,
>> +static int do_block_io_op(struct xen_blkif_ring *ring);
>> +static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
>>  				struct blkif_request *req,
>>  				struct pending_req *pending_req);
>> -static void make_response(struct xen_blkif *blkif, u64 id,
>> +static void make_response(struct xen_blkif_ring *ring, u64 id,
>>  			  unsigned short op, int st);
>>  
>>  #define foreach_grant_safe(pos, n, rbtree, node) \
>> @@ -198,19 +198,19 @@ static void make_response(struct xen_blkif *blkif, u64 id,
>>   * bit operations to modify the flags of a persistent grant and to count
>>   * the number of used grants.
>>   */
>> -static int add_persistent_gnt(struct xen_blkif *blkif,
>> +static int add_persistent_gnt(struct xen_blkif_ring *ring,
>>  			       struct persistent_gnt *persistent_gnt)
>>  {
>>  	struct rb_node **new = NULL, *parent = NULL;
>>  	struct persistent_gnt *this;
>>  
>> -	if (blkif->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> -		if (!blkif->vbd.overflow_max_grants)
>> -			blkif->vbd.overflow_max_grants = 1;
>> +	if (ring->persistent_gnt_c >= xen_blkif_max_pgrants) {
>> +		if (!ring->blkif->vbd.overflow_max_grants)
>> +			ring->blkif->vbd.overflow_max_grants = 1;
> 
> The same for the pool of persistent grants, it should be per-device and
> not per-ring.
> 
> And I think this issue is far worse than the others, because a frontend
> might use a persistent grant on different queues, forcing the backend
> map the grant several times for each queue, this is not acceptable IMO.
> 

Hi Roger,

I realize it would make things complicate if making persistent grant per-device instead of per-queue.
Extra locks are required to protect the per-device pool on both blkfront and blkback.

AFAIR, there was a discussion before about dropping persistent grant map at all.
The only reason we left this feature was backward compatibility.
So that I think we should not complicate xen-block code any more because of a going to be dropped feature.

How about disable feature-persistent if multi-queue was used?

-- 
Regards,
-Bob
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/