[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+Da2qxNU9cuYLm3-RcTeqpSszeCtHw4bzQ2xe_=76mwaKiKSQ@mail.gmail.com>
Date: Tue, 27 Sep 2022 13:45:21 +0800
From: ιζθΆ
<wenchao.chen666@...il.com>
To: Adrian Hunter <adrian.hunter@...el.com>
Cc: Ulf Hansson <ulf.hansson@...aro.org>,
baolin.wang@...ux.alibaba.com, zhang.lyra@...il.com,
linux-mmc@...r.kernel.org, linux-kernel@...r.kernel.org,
megoo.tang@...il.com, lzx.stg@...il.com
Subject: Re: [PATCH] mmc: host: Fix data stomping during mmc recovery
On Tue, Sep 20, 2022 at 6:19 PM Adrian Hunter <adrian.hunter@...el.com> wrote:
>
> On 20/09/22 12:32, Ulf Hansson wrote:
> > + Adrian
> >
> > On Fri, 16 Sept 2022 at 11:05, Wenchao Chen <wenchao.chen666@...il.com> wrote:
> >>
> >> From: Wenchao Chen <wenchao.chen@...soc.com>
> >>
> >> The block device uses multiple queues to access emmc. There will be up to 3
> >> requests in the hsq of the host. The current code will check whether there
> >> is a request doing recovery before entering the queue, but it will not check
> >> whether there is a request when the lock is issued. The request is in recovery
> >> mode. If there is a request in recovery, then a read and write request is
> >> initiated at this time, and the conflict between the request and the recovery
> >> request will cause the data to be trampled.
> >>
> >> Signed-off-by: Wenchao Chen <wenchao.chen@...soc.com>
> >
> > Looks like we should consider tagging this for stable kernels too, right?
Yes,
Kernel crash log:
[ 242.987575] process reclaim queue work at vmpressure 79
[ 243.041611] CPU: 0 PID: 5 Comm: kworker/0:0 Tainted: G WC O
5.4.147-ab07227 #1
[ 243.041615] Hardware name: Generic DT based system
[ 243.041628] Workqueue: events mmc_mq_recovery_handler
[ 243.041638] PC is at mmc_blk_mq_recovery+0x2c/0x120
[ 243.041643] LR is at mmc_mq_recovery_handler+0x54/0xb8
[ 243.041648] pc : [<c0b9511c>] lr : [<c06e331c>] psr: 20030013
[ 243.041651] sp : ee941f00 ip : eed191a0 fp : ee941f08
[ 243.041655] r10: 00000000 r9 : e00aca0c r8 : e0243fc0
[ 243.041659] r7 : e0096000 r6 : eed1b280 r5 : 00000000 r4 : e00aca08
[ 243.041667] r3 : c0cb7fd0 r2 : 00000000 r1 : a68e26a3 r0 : e0096000
[ 243.059012] process reclaim queue work at vmpressure 72
dis -lx mmc_blk_mq_recovery
0xc0b950f0 <mmc_blk_mq_recovery>: push {r4, r5, r11, lr}
0xc0b950f4 <mmc_blk_mq_recovery+0x4>: add r11, sp, #8
0xc0b950f8 <mmc_blk_mq_recovery+0x8>: mov r4, r0
0xc0b950fc <mmc_blk_mq_recovery+0xc>: stmfd sp!, {lr}
0xc0b95100 <mmc_blk_mq_recovery+0x10>: ldmfd sp!, {lr}
0xc0b95104 <mmc_blk_mq_recovery+0x14>: ldr r0, [r4]
0xc0b95108 <mmc_blk_mq_recovery+0x18>: mov r2, #0
0xc0b9510c <mmc_blk_mq_recovery+0x1c>: ldr r5, [r4, #196] ; 0xc4
0xc0b95110 <mmc_blk_mq_recovery+0x20>: ldr r0, [r0]
0xc0b95114 <mmc_blk_mq_recovery+0x24>: str r2, [r4, #196] ; 0xc4
0xc0b95118 <mmc_blk_mq_recovery+0x28>: strb r2, [r4, #148] ; 0x94
0xc0b9511c <mmc_blk_mq_recovery+0x2c>: ldr r1, [r5, #404] ; 0x194
Analyze:
0xc0b9510c <mmc_blk_mq_recovery+0x1c>: ldr r5, [r4, #196] ; 0xc4
r4 = e00aca08
r4 + 0xc4 = E00ACACC
crash_arm> rd E00ACACC
e00acacc: ec00cc00
But r5 is 0, the correct value should be ec00cc00
Code:
void mmc_blk_mq_recovery(struct mmc_queue *mq)
{
struct request *req = mq->recovery_req;
struct mmc_host *host = mq->card->host;
struct mmc_queue_req *mqrq = req_to_mmc_queue_req(req);
mq->recovery_req = NULL;//<2>
mq->rw_wait = false;
if (mmc_blk_rq_error(&mqrq->brq)) {
mmc_retune_hold_now(host);
mmc_blk_mq_rw_recovery(mq, req);
}
mmc_blk_urgent_bkops(mq, mqrq);
mmc_blk_mq_post_req(mq, req, true);
}
static void mmc_blk_hsq_req_done(struct mmc_request *mrq)
{
struct mmc_queue_req *mqrq =
container_of(mrq, struct mmc_queue_req, brq.mrq);
struct request *req = mmc_queue_req_to_req(mqrq);
struct request_queue *q = req->q;
struct mmc_queue *mq = q->queuedata;
struct mmc_host *host = mq->card->host;
unsigned long flags;
if (mmc_blk_rq_error(&mqrq->brq) ||
mmc_blk_urgent_bkops_needed(mq, mqrq)) {
spin_lock_irqsave(&mq->lock, flags);
mq->recovery_needed = true;
mq->recovery_req = req; //<1>
spin_unlock_irqrestore(&mq->lock, flags);
host->cqe_ops->cqe_recovery_start(host);
schedule_work(&mq->recovery_work);
return;
}
mmc_blk_rw_reset_success(mq, req);
/*
* Block layer timeouts race with completions which means the normal
* completion path cannot be used during recovery.
*/
if (mq->in_recovery)
mmc_blk_cqe_complete_rq(mq, req);
else if (likely(!blk_should_fake_timeout(req->q)))
blk_mq_complete_request(req);
}
When <1> is executed, just after the previous work is executed <2>, at
this time, mq->recovery_req will be reassigned to NULL, causing the
kernel to crash.
So we need to wait for the recovery to complete before continuing to issue req.
> >
> >> ---
> >> drivers/mmc/host/mmc_hsq.c | 2 +-
> >> 1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/drivers/mmc/host/mmc_hsq.c b/drivers/mmc/host/mmc_hsq.c
> >> index a5e05ed0fda3..9d35453e7371 100644
> >> --- a/drivers/mmc/host/mmc_hsq.c
> >> +++ b/drivers/mmc/host/mmc_hsq.c
> >> @@ -34,7 +34,7 @@ static void mmc_hsq_pump_requests(struct mmc_hsq *hsq)
> >> spin_lock_irqsave(&hsq->lock, flags);
> >>
> >> /* Make sure we are not already running a request now */
> >> - if (hsq->mrq) {
> >> + if (hsq->mrq || hsq->recovery_halt) {
> >
> > This still looks a bit odd to me, but I may not fully understand the
> > code, as it's been a while since I looked at this.
> >
> > In particular, I wonder why the callers of mmc_hsq_pump_requests()
> > need to release the spin_lock before they call
> > mmc_hsq_pump_requests()? Is it because we want to allow some other
> > code that may be waiting for the spin_lock to be released, to run too?
>
> FWIW, I am not aware of any reason.
>
> >
> > If that isn't the case, it seems better to let the callers of
> > mmc_hsq_pump_requests() to keep holding the lock - and thus we can
> > avoid the additional check(s). In that case, it means the
> > "recovery_halt" flag has already been checked, for example.
> >
> >> spin_unlock_irqrestore(&hsq->lock, flags);
> >> return;
> >> }
> >> --
> >> 2.17.1
> >>
> >
> > Kind regards
> > Uffe
>
Powered by blists - more mailing lists