lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <9a11394d-f7df-e549-8afb-0834f7d30202@quicinc.com>
Date:   Fri, 10 Jun 2022 08:00:37 -0600
From:   Jeffrey Hugo <quic_jhugo@...cinc.com>
To:     Qiang Yu <quic_qianyu@...cinc.com>, <mani@...nel.org>,
        <quic_hemantk@...cinc.com>, <loic.poulain@...aro.org>
CC:     <mhi@...ts.linux.dev>, <linux-arm-msm@...r.kernel.org>,
        <linux-kernel@...r.kernel.org>, <quic_cang@...cinc.com>
Subject: Re: [PATCH] bus: mhi: Disable IRQs instead of freeing them during
 power down

On 6/9/2022 9:21 PM, Qiang Yu wrote:
> On 6/9/2022 9:54 PM, Jeffrey Hugo wrote:
> 
>> On 6/9/2022 7:43 AM, Qiang Yu wrote:
>>> EP tends to read MSI address/data once and cache them after BME is set.
>>> So host should avoid changing MSI address/data after BME is set.
>>>
>>> In pci reset function, host invokes free_irq(), which also clears MSI
>>> address/data in EP's PCIe config space. If the invalid address/data
>>> are cached and used by EP, MSI triggered by EP wouldn't be received by
>>> host, because an invalid MSI data is sent to an invalid MSI address.
>>>
>>> To fix this issue, after host runs request_irq() successfully during
>>> mhi driver probe, let's invoke enable_irq()/disable_irq() instead of
>>> request_irq()/free_irq() when we want to power on and power down MHI.
>>> Meanwhile, Host should invoke free_irq() when mhi host driver is
>>> removed.
>>
>> I don't think this works for hotplug, nor cases where there are 
>> multiple MHI devices on the system.
>>
>> The EP shouldn't be caching this information for multiple reasons. 
>> Masking the MSIs, disabling the MSIs, changing the address when the 
>> affinity changes, etc.
>>
>> It really feels like we are solving the problem in the wrong place.
>>
>> Right now, this gets a NACK from me.
>>
> After free_irq(), MSI is still enabled but MSI address and data are 
> cleared. So there is a chance that device initiates MSI using zero 
> address. How to fix this race conditions.

On what system is MSI still enabled?  I just removed the AIC100 
controller on an random x86 system, and lspci is indicating MSIs are 
disabled -

Capabilities: [50] MSI: Enable- Count=32/32 Maskable+ 64bit+

> Maybe EP should not cache MSI data and address. But I think this patch 
> is necessary and we will talk with EP POC.
> 
>>>
>>> Signed-off-by: Qiang Yu <quic_qianyu@...cinc.com>
>>> ---
>>>   drivers/bus/mhi/host/init.c        | 31 
>>> +++++++++++++++++++++++++++++++
>>>   drivers/bus/mhi/host/pci_generic.c |  2 ++
>>>   drivers/bus/mhi/host/pm.c          |  4 ++--
>>>   3 files changed, 35 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/drivers/bus/mhi/host/init.c b/drivers/bus/mhi/host/init.c
>>> index cbb86b2..48cb093 100644
>>> --- a/drivers/bus/mhi/host/init.c
>>> +++ b/drivers/bus/mhi/host/init.c
>>> @@ -18,6 +18,7 @@
>>>   #include <linux/slab.h>
>>>   #include <linux/vmalloc.h>
>>>   #include <linux/wait.h>
>>> +#include <linux/irq.h>
>>
>> Should be in alphabetical order
>>
>>>   #include "internal.h"
>>>     static DEFINE_IDA(mhi_controller_ida);
>>> @@ -168,6 +169,22 @@ int mhi_init_irq_setup(struct mhi_controller 
>>> *mhi_cntrl)
>>>       unsigned long irq_flags = IRQF_SHARED | IRQF_NO_SUSPEND;
>>>       int i, ret;
>>>   +    /*
>>> +     * if irq[0] has action, it represents all MSI IRQs have been
>>> +     * requested, so we just need to enable them.
>>> +     */
>>
>> This seems like an assumption about how the interrupts are allocated 
>> and assigned that may not hold true for all devices.
> 
> All interrupts are allocated and assigned together in mhi_pci_get_irqs() 
> and mhi_init_irq_setup().
> 
> So I think if irq[0] has action, other irqs must be requested 
> successfully. If any other msi request fail, irq[0] should have been freed.
> 
>>> +    if (irq_has_action(mhi_cntrl->irq[0])) {
>>> +        enable_irq(mhi_cntrl->irq[0]);
>>> +
>>> +        for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
>>> +            if (mhi_event->offload_ev)
>>> +                continue;
>>> +
>>> +            enable_irq(mhi_cntrl->irq[mhi_event->irq]);
>>> +        }
>>> +        return 0;
>>> +    }
>>> +
>>>       /* if controller driver has set irq_flags, use it */
>>>       if (mhi_cntrl->irq_flags)
>>>           irq_flags = mhi_cntrl->irq_flags;
>>> @@ -179,6 +196,11 @@ int mhi_init_irq_setup(struct mhi_controller 
>>> *mhi_cntrl)
>>>                      "bhi", mhi_cntrl);
>>>       if (ret)
>>>           return ret;
>>> +    /*
>>> +     * IRQ marked IRQF_SHARED isn't recommended to use IRQ_NOAUTOEN,
>>> +     * so disable it explicitly.
>>> +     */
>>> +    disable_irq(mhi_cntrl->irq[0]);
>>>         for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
>>>           if (mhi_event->offload_ev)
>>> @@ -200,6 +222,8 @@ int mhi_init_irq_setup(struct mhi_controller 
>>> *mhi_cntrl)
>>>                   mhi_cntrl->irq[mhi_event->irq], i);
>>>               goto error_request;
>>>           }
>>> +
>>> +        disable_irq(mhi_cntrl->irq[mhi_event->irq]);
>>>       }
>>>         return 0;
>>> @@ -1003,8 +1027,14 @@ int mhi_register_controller(struct 
>>> mhi_controller *mhi_cntrl,
>>>         mhi_create_debugfs(mhi_cntrl);
>>>   +    ret = mhi_init_irq_setup(mhi_cntrl);
>>> +    if (ret)
>>> +        goto error_setup_irq;
>>> +
>>>       return 0;
>>>   +error_setup_irq:
>>> +    mhi_destroy_debugfs(mhi_cntrl);
>>>   err_release_dev:
>>>       put_device(&mhi_dev->dev);
>>>   err_ida_free:
>>> @@ -1027,6 +1057,7 @@ void mhi_unregister_controller(struct 
>>> mhi_controller *mhi_cntrl)
>>>       struct mhi_chan *mhi_chan = mhi_cntrl->mhi_chan;
>>>       unsigned int i;
>>>   +    mhi_deinit_free_irq(mhi_cntrl);
>>>       mhi_destroy_debugfs(mhi_cntrl);
>>>         destroy_workqueue(mhi_cntrl->hiprio_wq);
>>> diff --git a/drivers/bus/mhi/host/pci_generic.c 
>>> b/drivers/bus/mhi/host/pci_generic.c
>>> index 6fbc591..60020d0 100644
>>> --- a/drivers/bus/mhi/host/pci_generic.c
>>> +++ b/drivers/bus/mhi/host/pci_generic.c
>>> @@ -945,6 +945,8 @@ static void mhi_pci_remove(struct pci_dev *pdev)
>>>         mhi_unregister_controller(mhi_cntrl);
>>>       pci_disable_pcie_error_reporting(pdev);
>>> +
>>> +    pci_free_irq_vectors(pdev);
>>>   }
>>>     static void mhi_pci_shutdown(struct pci_dev *pdev)
>>> diff --git a/drivers/bus/mhi/host/pm.c b/drivers/bus/mhi/host/pm.c
>>> index dc2e8ff..190231c 100644
>>> --- a/drivers/bus/mhi/host/pm.c
>>> +++ b/drivers/bus/mhi/host/pm.c
>>> @@ -500,7 +500,7 @@ static void mhi_pm_disable_transition(struct 
>>> mhi_controller *mhi_cntrl)
>>>       for (i = 0; i < mhi_cntrl->total_ev_rings; i++, mhi_event++) {
>>>           if (mhi_event->offload_ev)
>>>               continue;
>>> -        free_irq(mhi_cntrl->irq[mhi_event->irq], mhi_event);
>>> +        disable_irq(mhi_cntrl->irq[mhi_event->irq]);
>>>           tasklet_kill(&mhi_event->task);
>>>       }
>>>   @@ -1182,7 +1182,7 @@ void mhi_power_down(struct mhi_controller 
>>> *mhi_cntrl, bool graceful)
>>>       /* Wait for shutdown to complete */
>>>       flush_work(&mhi_cntrl->st_worker);
>>>   -    free_irq(mhi_cntrl->irq[0], mhi_cntrl);
>>> +    disable_irq(mhi_cntrl->irq[0]);
>>>   }
>>>   EXPORT_SYMBOL_GPL(mhi_power_down);
>>

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ