[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1194398700.2970.18.camel@dwillia2-linux.ch.intel.com>
Date: Tue, 06 Nov 2007 18:25:00 -0700
From: Dan Williams <dan.j.williams@...el.com>
To: BERTRAND Joël <joel.bertrand@...tella.fr>
Cc: Justin Piszcz <jpiszcz@...idpixels.com>,
Neil Brown <neilb@...e.de>, linux-kernel@...r.kernel.org,
linux-raid@...r.kernel.org, Jeff@...sem.org
Subject: Re: 2.6.23.1: mdadm/raid5 hung/d-state
On Tue, 2007-11-06 at 03:19 -0700, BERTRAND Joël wrote:
> Done. Here is obtained ouput :
Much appreciated.
>
> [ 1260.969314] handling stripe 7629696, state=0x14 cnt=1, pd_idx=2 ops=0:0:0
> [ 1260.980606] check 5: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ffcffcc0 written 0000000000000000
> [ 1260.994808] check 4: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fdd4e360 written 0000000000000000
> [ 1261.009325] check 3: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000
> [ 1261.244478] check 2: state 0x1 toread 0000000000000000 read 0000000000000000 write 0000000000000000 written 0000000000000000
> [ 1261.270821] check 1: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800ff517e40 written 0000000000000000
> [ 1261.312320] check 0: state 0x6 toread 0000000000000000 read 0000000000000000 write fffff800fd4cae60 written 0000000000000000
> [ 1261.361030] locked=4 uptodate=2 to_read=0 to_write=4 failed=0 failed_num=0
> [ 1261.443120] for sector 7629696, rmw=0 rcw=0
[..]
This looks as if the blocks were prepared to be written out, but were
never handled in ops_run_biodrain(), so they remain locked forever. The
operations flags are all clear which means handle_stripe thinks nothing
else needs to be done.
The following patch, also attached, cleans up cases where the code looks
at sh->ops.pending when it should be looking at the consistent
stack-based snapshot of the operations flags.
---
drivers/md/raid5.c | 16 +++++++++-------
1 files changed, 9 insertions(+), 7 deletions(-)
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 496b9a3..e1a3942 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -693,7 +693,8 @@ ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
}
static struct dma_async_tx_descriptor *
-ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+ unsigned long pending)
{
int disks = sh->disks;
int pd_idx = sh->pd_idx, i;
@@ -701,7 +702,7 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
/* check if prexor is active which means only process blocks
* that are part of a read-modify-write (Wantprexor)
*/
- int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+ int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
pr_debug("%s: stripe %llu\n", __FUNCTION__,
(unsigned long long)sh->sector);
@@ -778,7 +779,8 @@ static void ops_complete_write(void *stripe_head_ref)
}
static void
-ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
+ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx,
+ unsigned long pending)
{
/* kernel stack size limits the total number of disks */
int disks = sh->disks;
@@ -786,7 +788,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
int count = 0, pd_idx = sh->pd_idx, i;
struct page *xor_dest;
- int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
+ int prexor = test_bit(STRIPE_OP_PREXOR, &pending);
unsigned long flags;
dma_async_tx_callback callback;
@@ -813,7 +815,7 @@ ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx)
}
/* check whether this postxor is part of a write */
- callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ?
+ callback = test_bit(STRIPE_OP_BIODRAIN, &pending) ?
ops_complete_write : ops_complete_postxor;
/* 1/ if we prexor'd then the dest is reused as a source
@@ -901,12 +903,12 @@ static void raid5_run_ops(struct stripe_head *sh, unsigned long pending)
tx = ops_run_prexor(sh, tx);
if (test_bit(STRIPE_OP_BIODRAIN, &pending)) {
- tx = ops_run_biodrain(sh, tx);
+ tx = ops_run_biodrain(sh, tx, pending);
overlap_clear++;
}
if (test_bit(STRIPE_OP_POSTXOR, &pending))
- ops_run_postxor(sh, tx);
+ ops_run_postxor(sh, tx, pending);
if (test_bit(STRIPE_OP_CHECK, &pending))
ops_run_check(sh);
View attachment "raid5-fix-unending-write-sequence.patch" of type "text/x-patch" (2651 bytes)
Powered by blists - more mailing lists