linux-kernel - Re: [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware queues/rings

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ED99C670-9E99-4721-845F-16E3AFA2CC21@oracle.com>
Date:	Wed, 04 Nov 2015 22:24:41 -0500
From:	Konrad Rzeszutek Wilk <konrad.wilk@...cle.com>
To:	Bob Liu <bob.liu@...cle.com>
CC:	xen-devel@...ts.xen.org, linux-kernel@...r.kernel.org,
	roger.pau@...rix.com, felipe.franciosi@...rix.com, axboe@...com,
	avanzini.arianna@...il.com, rafal.mielniczuk@...rix.com,
	jonathan.davies@...rix.com, david.vrabel@...rix.com
Subject: Re: [PATCH v4 07/10] xen/blkback: pseudo support for multi hardware queues/rings

On November 4, 2015 10:02:26 PM EST, Bob Liu <bob.liu@...cle.com> wrote:
>
>On 11/05/2015 10:30 AM, Konrad Rzeszutek Wilk wrote:
>> On Mon, Nov 02, 2015 at 12:21:43PM +0800, Bob Liu wrote:
>>> Preparatory patch for multiple hardware queues (rings). The number
>of
>>> rings is unconditionally set to 1, larger number will be enabled in
>next
>>> patch so as to make every single patch small and readable.
>> 
>> Instead of saying 'next patch' - spell out the title of the patch.
>> 
>> 
>>>
>>> Signed-off-by: Arianna Avanzini <avanzini.arianna@...il.com>
>>> Signed-off-by: Bob Liu <bob.liu@...cle.com>
>>> ---
>>>  drivers/block/xen-blkback/common.h |   3 +-
>>>  drivers/block/xen-blkback/xenbus.c | 292
>+++++++++++++++++++++++--------------
>>>  2 files changed, 185 insertions(+), 110 deletions(-)
>>>
>>> diff --git a/drivers/block/xen-blkback/common.h
>b/drivers/block/xen-blkback/common.h
>>> index f0dd69a..4de1326 100644
>>> --- a/drivers/block/xen-blkback/common.h
>>> +++ b/drivers/block/xen-blkback/common.h
>>> @@ -341,7 +341,8 @@ struct xen_blkif {
>>>  	struct work_struct	free_work;
>>>  	unsigned int nr_ring_pages;
>>>  	/* All rings for this device */
>>> -	struct xen_blkif_ring ring;
>>> +	struct xen_blkif_ring *rings;
>>> +	unsigned int nr_rings;
>>>  };
>>>  
>>>  struct seg_buf {
>>> diff --git a/drivers/block/xen-blkback/xenbus.c
>b/drivers/block/xen-blkback/xenbus.c
>>> index 7bdd5fd..ac4b458 100644
>>> --- a/drivers/block/xen-blkback/xenbus.c
>>> +++ b/drivers/block/xen-blkback/xenbus.c
>>> @@ -84,11 +84,12 @@ static int blkback_name(struct xen_blkif *blkif,
>char *buf)
>>>  
>>>  static void xen_update_blkif_status(struct xen_blkif *blkif)
>>>  {
>>> -	int err;
>>> +	int err, i;
>> 
>> unsigned int.
>>>  	char name[BLKBACK_NAME_LEN];
>>> +	struct xen_blkif_ring *ring;
>>>  
>>>  	/* Not ready to connect? */
>>> -	if (!blkif->ring.irq || !blkif->vbd.bdev)
>>> +	if (!blkif->rings || !blkif->rings[0].irq || !blkif->vbd.bdev)
>>>  		return;
>>>  
>>>  	/* Already connected? */
>>> @@ -113,19 +114,57 @@ static void xen_update_blkif_status(struct
>xen_blkif *blkif)
>>>  	}
>>>  	invalidate_inode_pages2(blkif->vbd.bdev->bd_inode->i_mapping);
>>>  
>>> -	blkif->ring.xenblkd = kthread_run(xen_blkif_schedule,
>&blkif->ring, "%s", name);
>>> -	if (IS_ERR(blkif->ring.xenblkd)) {
>>> -		err = PTR_ERR(blkif->ring.xenblkd);
>>> -		blkif->ring.xenblkd = NULL;
>>> -		xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>>> -		return;
>>> +	if (blkif->nr_rings == 1) {
>>> +		blkif->rings[0].xenblkd = kthread_run(xen_blkif_schedule,
>&blkif->rings[0], "%s", name);
>>> +		if (IS_ERR(blkif->rings[0].xenblkd)) {
>>> +			err = PTR_ERR(blkif->rings[0].xenblkd);
>>> +			blkif->rings[0].xenblkd = NULL;
>>> +			xenbus_dev_error(blkif->be->dev, err, "start xenblkd");
>>> +			return;
>>> +		}
>>> +	} else {
>>> +		for (i = 0; i < blkif->nr_rings; i++) {
>>> +			ring = &blkif->rings[i];
>>> +			ring->xenblkd = kthread_run(xen_blkif_schedule, ring, "%s-%d",
>name, i);
>>> +			if (IS_ERR(ring->xenblkd)) {
>>> +				err = PTR_ERR(ring->xenblkd);
>>> +				ring->xenblkd = NULL;
>>> +				xenbus_dev_error(blkif->be->dev, err,
>>> +						"start %s-%d xenblkd", name, i);
>>> +				return;
>>> +			}
>>> +		}
>> 
>> Please collapse this together and just have one loop.
>> 
>> And just use the same name throughout ('%s-%d')
>> 
>> Also this does not take care of actually freeing the allocated
>> ring->xenblkd if one of them fails to start.
>> 
>> Hmm, looking at this function .. we seem to forget to change the
>> state if something fails.
>> 
>> As in, connect switches the state to Connected.. And if anything
>blows
>> up after the connect() we don't switch the state. We do report an
>error
>> in the XenBus, but the state is the same.
>> 
>> We should be using xenbus_dev_fatal I believe - so at least the state
>> changes to Closing (and then the frontend can switch itself to
>> Closing->Closed) - and we will call 'xen_blkif_disconnect' on Closed.
>
>> 
>
>Will update..
>
>>> +	}
>>> +}
>>> +
>>> +static int xen_blkif_alloc_rings(struct xen_blkif *blkif)
>>> +{
>>> +	int r;
>> 
>> unsigned int i;
>> 
>>> +
>>> +	blkif->rings = kzalloc(blkif->nr_rings * sizeof(struct
>xen_blkif_ring), GFP_KERNEL);
>>> +	if (!blkif->rings)
>>> +		return -ENOMEM;
>>> +
>>> +	for (r = 0; r < blkif->nr_rings; r++) {
>>> +		struct xen_blkif_ring *ring = &blkif->rings[r];
>>> +
>>> +		spin_lock_init(&ring->blk_ring_lock);
>>> +		init_waitqueue_head(&ring->wq);
>>> +		INIT_LIST_HEAD(&ring->pending_free);
>>> +
>>> +		spin_lock_init(&ring->pending_free_lock);
>>> +		init_waitqueue_head(&ring->pending_free_wq);
>>> +		init_waitqueue_head(&ring->shutdown_wq);
>>> +		ring->blkif = blkif;
>>> +		xen_blkif_get(blkif);
>>>  	}
>>> +
>>> +	return 0;
>>>  }
>>>  
>>>  static struct xen_blkif *xen_blkif_alloc(domid_t domid)
>>>  {
>>>  	struct xen_blkif *blkif;
>>> -	struct xen_blkif_ring *ring;
>>>  
>>>  	BUILD_BUG_ON(MAX_INDIRECT_PAGES >
>BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST);
>>>  
>>> @@ -136,27 +175,17 @@ static struct xen_blkif
>*xen_blkif_alloc(domid_t domid)
>>>  	blkif->domid = domid;
>>>  	atomic_set(&blkif->refcnt, 1);
>>>  	init_completion(&blkif->drain_complete);
>>> -	atomic_set(&blkif->drain, 0);
>>>  	INIT_WORK(&blkif->free_work, xen_blkif_deferred_free);
>>>  	spin_lock_init(&blkif->free_pages_lock);
>>>  	INIT_LIST_HEAD(&blkif->free_pages);
>>> -	blkif->free_pages_num = 0;
>>> -	blkif->persistent_gnts.rb_node = NULL;
>>>  	INIT_LIST_HEAD(&blkif->persistent_purge_list);
>>> -	atomic_set(&blkif->persistent_gnt_in_use, 0);
>>>  	INIT_WORK(&blkif->persistent_purge_work,
>xen_blkbk_unmap_purged_grants);
>>>  
>>> -	ring = &blkif->ring;
>>> -	ring->blkif = blkif;
>>> -	spin_lock_init(&ring->blk_ring_lock);
>>> -	init_waitqueue_head(&ring->wq);
>>> -	ring->st_print = jiffies;
>>> -	atomic_set(&ring->inflight, 0);
>>> -
>>> -	INIT_LIST_HEAD(&ring->pending_free);
>>> -	spin_lock_init(&ring->pending_free_lock);
>>> -	init_waitqueue_head(&ring->pending_free_wq);
>>> -	init_waitqueue_head(&ring->shutdown_wq);
>>> +	blkif->nr_rings = 1;
>>> +	if (xen_blkif_alloc_rings(blkif)) {
>>> +		kmem_cache_free(xen_blkif_cachep, blkif);
>>> +		return ERR_PTR(-ENOMEM);
>>> +	}
>>>  
>>>  	return blkif;
>>>  }
>>> @@ -218,50 +247,54 @@ static int xen_blkif_map(struct xen_blkif_ring
>*ring, grant_ref_t *gref,
>>>  static int xen_blkif_disconnect(struct xen_blkif *blkif)
>>>  {
>>>  	struct pending_req *req, *n;
>>> -	int i = 0, j;
>>> -	struct xen_blkif_ring *ring = &blkif->ring;
>>> +	int j, r;
>>>  
>> 
>> unsigned int i;
>>> -	if (ring->xenblkd) {
>>> -		kthread_stop(ring->xenblkd);
>>> -		wake_up(&ring->shutdown_wq);
>>> -		ring->xenblkd = NULL;
>>> -	}
>>> +	for (r = 0; r < blkif->nr_rings; r++) {
>>> +		struct xen_blkif_ring *ring = &blkif->rings[r];
>>> +		int i = 0;
>> 
>> unsigned int
>>>  
>>> -	/* The above kthread_stop() guarantees that at this point we
>>> -	 * don't have any discard_io or other_io requests. So, checking
>>> -	 * for inflight IO is enough.
>>> -	 */
>>> -	if (atomic_read(&ring->inflight) > 0)
>>> -		return -EBUSY;
>>> +		if (ring->xenblkd) {
>>> +			kthread_stop(ring->xenblkd);
>>> +			wake_up(&ring->shutdown_wq);
>>> +			ring->xenblkd = NULL;
>>> +		}
>>>  
>>> -	if (ring->irq) {
>>> -		unbind_from_irqhandler(ring->irq, ring);
>>> -		ring->irq = 0;
>>> -	}
>>> +		/* The above kthread_stop() guarantees that at this point we
>>> +		 * don't have any discard_io or other_io requests. So, checking
>>> +		 * for inflight IO is enough.
>>> +		 */
>>> +		if (atomic_read(&ring->inflight) > 0)
>>> +			return -EBUSY;
>>>  
>>> -	if (ring->blk_rings.common.sring) {
>>> -		xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>>> -		ring->blk_rings.common.sring = NULL;
>>> -	}
>>> +		if (ring->irq) {
>>> +			unbind_from_irqhandler(ring->irq, ring);
>>> +			ring->irq = 0;
>>> +		}
>>>  
>>> -	/* Remove all persistent grants and the cache of ballooned pages.
>*/
>>> -	xen_blkbk_free_caches(ring);
>>> +		if (ring->blk_rings.common.sring) {
>>> +			xenbus_unmap_ring_vfree(blkif->be->dev, ring->blk_ring);
>>> +			ring->blk_rings.common.sring = NULL;
>>> +		}
>>>  
>>> -	/* Check that there is no request in use */
>>> -	list_for_each_entry_safe(req, n, &ring->pending_free, free_list) {
>>> -		list_del(&req->free_list);
>>> +		/* Remove all persistent grants and the cache of ballooned pages.
>*/
>>> +		xen_blkbk_free_caches(ring);
>>>  
>>> -		for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>>> -			kfree(req->segments[j]);
>>> +		/* Check that there is no request in use */
>>> +		list_for_each_entry_safe(req, n, &ring->pending_free, free_list)
>{
>>> +			list_del(&req->free_list);
>>>  
>>> -		for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>>> -			kfree(req->indirect_pages[j]);
>>> +			for (j = 0; j < MAX_INDIRECT_SEGMENTS; j++)
>>> +				kfree(req->segments[j]);
>>>  
>>> -		kfree(req);
>>> -		i++;
>>> -	}
>>> +			for (j = 0; j < MAX_INDIRECT_PAGES; j++)
>>> +				kfree(req->indirect_pages[j]);
>>>  
>>> -	WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>>> +			kfree(req);
>>> +			i++;
>>> +		}
>>> +
>>> +		WARN_ON(i != (XEN_BLKIF_REQS_PER_PAGE * blkif->nr_ring_pages));
>>> +	}
>>>  	blkif->nr_ring_pages = 0;
>>>  
>>>  	return 0;
>>> @@ -281,6 +314,7 @@ static void xen_blkif_free(struct xen_blkif
>*blkif)
>>>  	BUG_ON(!list_empty(&blkif->free_pages));
>>>  	BUG_ON(!RB_EMPTY_ROOT(&blkif->persistent_gnts));
>>>  
>>> +	kfree(blkif->rings);
>>>  	kmem_cache_free(xen_blkif_cachep, blkif);
>>>  }
>>>  
>>> @@ -307,15 +341,19 @@ int __init xen_blkif_interface_init(void)
>>>  		struct xenbus_device *dev = to_xenbus_device(_dev);	\
>>>  		struct backend_info *be = dev_get_drvdata(&dev->dev);	\
>>>  		struct xen_blkif *blkif = be->blkif;			\
>>> -		struct xen_blkif_ring *ring = &blkif->ring;		\
>>> +		int i;							\
>>> +									\
>>> +		for (i = 0; i < blkif->nr_rings; i++) {			\
>>> +			struct xen_blkif_ring *ring = &blkif->rings[i];	\
>>>  									\
>>> -		blkif->st_oo_req = ring->st_oo_req;			\
>>> -		blkif->st_rd_req = ring->st_rd_req;			\
>>> -		blkif->st_wr_req = ring->st_wr_req;			\
>>> -		blkif->st_f_req = ring->st_f_req;			\
>>> -		blkif->st_ds_req = ring->st_ds_req;			\
>>> -		blkif->st_rd_sect = ring->st_rd_sect;			\
>>> -		blkif->st_wr_sect = ring->st_wr_sect;			\
>>> +			blkif->st_oo_req += ring->st_oo_req;		\
>>> +			blkif->st_rd_req += ring->st_rd_req;		\
>>> +			blkif->st_wr_req += ring->st_wr_req;		\
>>> +			blkif->st_f_req += ring->st_f_req;		\
>>> +			blkif->st_ds_req += ring->st_ds_req;		\
>>> +			blkif->st_rd_sect += ring->st_rd_sect;		\
>>> +			blkif->st_wr_sect += ring->st_wr_sect;		\
>>> +		}							\
>> 
>> That needs fixing. You could alter the macro to only read the values
>> from the proper member.
>
>Do you think we still need these debug values for per-ring?
>I'm thinking of just leave them for per-device(blkif), but that
>requires extra lock to protect?

It should be per device. However I think you are OK with locking - as long as the sysfs is torn down before we deallocate the rings.



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/