linux-kernel - Re: [Xen-devel] [PATCH 3/3] xen-blkback: fix shutdown race

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <52EFCACF.6080604@citrix.com>
Date:	Mon, 3 Feb 2014 17:58:55 +0100
From:	Roger Pau Monné <roger.pau@...rix.com>
To:	Jan Beulich <JBeulich@...e.com>
CC:	Matt Rushton <mrushton@...zon.com>, Matt Wilson <msw@...zon.com>,
	David Vrabel <david.vrabel@...rix.com>,
	Ian Campbell <Ian.Campbell@...rix.com>,
	<xen-devel@...ts.xenproject.org>,
	Boris Ostrovsky <boris.ostrovsky@...cle.com>,
	<linux-kernel@...r.kernel.org>
Subject: Re: [Xen-devel] [PATCH 3/3] xen-blkback: fix shutdown race

On 29/01/14 09:52, Jan Beulich wrote:
>>>> On 28.01.14 at 18:43, Roger Pau Monne <roger.pau@...rix.com> wrote:
>> --- a/drivers/block/xen-blkback/blkback.c
>> +++ b/drivers/block/xen-blkback/blkback.c
>> @@ -985,17 +985,31 @@ static void __end_block_io_op(struct pending_req 
>> *pending_req, int error)
>>  	 * the proper response on the ring.
>>  	 */
>>  	if (atomic_dec_and_test(&pending_req->pendcnt)) {
>> -		xen_blkbk_unmap(pending_req->blkif,
>> +		struct xen_blkif *blkif = pending_req->blkif;
>> +
>> +		xen_blkbk_unmap(blkif,
>>  		                pending_req->segments,
>>  		                pending_req->nr_pages);
>> -		make_response(pending_req->blkif, pending_req->id,
>> +		make_response(blkif, pending_req->id,
>>  			      pending_req->operation, pending_req->status);
>> -		xen_blkif_put(pending_req->blkif);
>> -		if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
>> -			if (atomic_read(&pending_req->blkif->drain))
>> -				complete(&pending_req->blkif->drain_complete);
>> +		free_req(blkif, pending_req);
>> +		/*
>> +		 * Make sure the request is freed before releasing blkif,
>> +		 * or there could be a race between free_req and the
>> +		 * cleanup done in xen_blkif_free during shutdown.
>> +		 *
>> +		 * NB: The fact that we might try to wake up pending_free_wq
>> +		 * before drain_complete (in case there's a drain going on)
>> +		 * it's not a problem with our current implementation
>> +		 * because we can assure there's no thread waiting on
>> +		 * pending_free_wq if there's a drain going on, but it has
>> +		 * to be taken into account if the current model is changed.
>> +		 */
>> +		xen_blkif_put(blkif);
>> +		if (atomic_read(&blkif->refcnt) <= 2) {
>> +			if (atomic_read(&blkif->drain))
>> +				complete(&blkif->drain_complete);
>>  		}
>> -		free_req(pending_req->blkif, pending_req);
>>  	}
>>  }
> 
> The put is still too early imo - you're explicitly accessing field in the
> structure immediately afterwards. This may not be an issue at
> present, but I think it's at least a latent one.
> 
> Apart from that, the two if()s would - at least to me - be more
> clear if combined into one.

In order to get rid of the race I had to introduce yet another atomic_t 
in xen_blkif struct, which is something I don't really like, but I 
could not see any other way to solve this. If that's fine I will resend 
the series, here is the reworked patch:

---
diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c
index dcfe49f..d9b8cd3 100644
--- a/drivers/block/xen-blkback/blkback.c
+++ b/drivers/block/xen-blkback/blkback.c
@@ -945,7 +945,7 @@ static void xen_blk_drain_io(struct xen_blkif *blkif)
 	do {
 		/* The initial value is one, and one refcnt taken at the
 		 * start of the xen_blkif_schedule thread. */
-		if (atomic_read(&blkif->refcnt) <= 2)
+		if (atomic_read(&blkif->inflight) == 0)
 			break;
 		wait_for_completion_interruptible_timeout(
 				&blkif->drain_complete, HZ);
@@ -985,17 +985,30 @@ static void __end_block_io_op(struct pending_req *pending_req, int error)
 	 * the proper response on the ring.
 	 */
 	if (atomic_dec_and_test(&pending_req->pendcnt)) {
-		xen_blkbk_unmap(pending_req->blkif,
+		struct xen_blkif *blkif = pending_req->blkif;
+
+		xen_blkbk_unmap(blkif,
 		                pending_req->segments,
 		                pending_req->nr_pages);
-		make_response(pending_req->blkif, pending_req->id,
+		make_response(blkif, pending_req->id,
 			      pending_req->operation, pending_req->status);
-		xen_blkif_put(pending_req->blkif);
-		if (atomic_read(&pending_req->blkif->refcnt) <= 2) {
-			if (atomic_read(&pending_req->blkif->drain))
-				complete(&pending_req->blkif->drain_complete);
+		free_req(blkif, pending_req);
+		/*
+		 * Make sure the request is freed before releasing blkif,
+		 * or there could be a race between free_req and the
+		 * cleanup done in xen_blkif_free during shutdown.
+		 *
+		 * NB: The fact that we might try to wake up pending_free_wq
+		 * before drain_complete (in case there's a drain going on)
+		 * it's not a problem with our current implementation
+		 * because we can assure there's no thread waiting on
+		 * pending_free_wq if there's a drain going on, but it has
+		 * to be taken into account if the current model is changed.
+		 */
+		if (atomic_dec_and_test(&blkif->inflight) && atomic_read(&blkif->drain)) {
+			complete(&blkif->drain_complete);
 		}
-		free_req(pending_req->blkif, pending_req);
+		xen_blkif_put(blkif);
 	}
 }
 
@@ -1249,6 +1262,7 @@ static int dispatch_rw_block_io(struct xen_blkif *blkif,
 	 * below (in "!bio") if we are handling a BLKIF_OP_DISCARD.
 	 */
 	xen_blkif_get(blkif);
+	atomic_inc(&blkif->inflight);
 
 	for (i = 0; i < nseg; i++) {
 		while ((bio == NULL) ||
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index f733d76..e40326a 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -278,6 +278,7 @@ struct xen_blkif {
 	/* for barrier (drain) requests */
 	struct completion	drain_complete;
 	atomic_t		drain;
+	atomic_t		inflight;
 	/* One thread per one blkif. */
 	struct task_struct	*xenblkd;
 	unsigned int		waiting_reqs;
diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c
index 8afef67..84973c6 100644
--- a/drivers/block/xen-blkback/xenbus.c
+++ b/drivers/block/xen-blkback/xenbus.c
@@ -128,6 +128,7 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid)
 	INIT_LIST_HEAD(&blkif->persistent_purge_list);
 	blkif->free_pages_num = 0;
 	atomic_set(&blkif->persistent_gnt_in_use, 0);
+	atomic_set(&blkif->inflight, 0);
 
 	INIT_LIST_HEAD(&blkif->pending_free);
 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/