lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 26 Mar 2020 15:54:42 -0700
From:   Chris Lew <clew@...eaurora.org>
To:     Manivannan Sadhasivam <manivannan.sadhasivam@...aro.org>,
        Bjorn Andersson <bjorn.andersson@...aro.org>
Cc:     gregkh@...uxfoundation.org, davem@...emloft.net,
        smohanad@...eaurora.org, jhugo@...eaurora.org,
        kvalo@...eaurora.org, hemantk@...eaurora.org,
        linux-arm-msm@...r.kernel.org, linux-kernel@...r.kernel.org,
        netdev@...r.kernel.org
Subject: Re: [PATCH v3 6/7] net: qrtr: Add MHI transport layer



On 3/25/2020 3:37 AM, Manivannan Sadhasivam wrote:
> Hi Bjorn,
> 
> + Chris Lew
> 
> On Tue, Mar 24, 2020 at 01:39:52PM -0700, Bjorn Andersson wrote:
>> On Mon 23 Mar 23:10 PDT 2020, Manivannan Sadhasivam wrote:
>>
>>> MHI is the transport layer used for communicating to the external modems.
>>> Hence, this commit adds MHI transport layer support to QRTR for
>>> transferring the QMI messages over IPC Router.
>>>
>>> Cc: "David S. Miller" <davem@...emloft.net>
>>> Cc: netdev@...r.kernel.org
>>> Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@...aro.org>
>>> ---
>>>   net/qrtr/Kconfig  |   7 ++
>>>   net/qrtr/Makefile |   2 +
>>>   net/qrtr/mhi.c    | 208 ++++++++++++++++++++++++++++++++++++++++++++++
>>>   3 files changed, 217 insertions(+)
>>>   create mode 100644 net/qrtr/mhi.c
>>>
>>> diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
>>> index 63f89cc6e82c..8eb876471564 100644
>>> --- a/net/qrtr/Kconfig
>>> +++ b/net/qrtr/Kconfig
>>> @@ -29,4 +29,11 @@ config QRTR_TUN
>>>   	  implement endpoints of QRTR, for purpose of tunneling data to other
>>>   	  hosts or testing purposes.
>>>   
>>> +config QRTR_MHI
>>> +	tristate "MHI IPC Router channels"
>>> +	depends on MHI_BUS
>>> +	help
>>> +	  Say Y here to support MHI based ipcrouter channels. MHI is the
>>> +	  transport used for communicating to external modems.
>>> +
>>>   endif # QRTR
>>> diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
>>> index 1c6d6c120fb7..3dc0a7c9d455 100644
>>> --- a/net/qrtr/Makefile
>>> +++ b/net/qrtr/Makefile
>>> @@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
>>>   qrtr-smd-y	:= smd.o
>>>   obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
>>>   qrtr-tun-y	:= tun.o
>>> +obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
>>> +qrtr-mhi-y	:= mhi.o
>>> diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
>>> new file mode 100644
>>> index 000000000000..90af208f34c1
>>> --- /dev/null
>>> +++ b/net/qrtr/mhi.c
>>> @@ -0,0 +1,208 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +/*
>>> + * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
>>> + */
>>> +
>>> +#include <linux/mhi.h>
>>> +#include <linux/mod_devicetable.h>
>>> +#include <linux/module.h>
>>> +#include <linux/skbuff.h>
>>> +#include <net/sock.h>
>>> +
>>> +#include "qrtr.h"
>>> +
>>> +struct qrtr_mhi_dev {
>>> +	struct qrtr_endpoint ep;
>>> +	struct mhi_device *mhi_dev;
>>> +	struct device *dev;
>>> +	spinlock_t ul_lock;		/* lock to protect ul_pkts */
>>> +	struct list_head ul_pkts;
>>> +	atomic_t in_reset;
>>> +};
>>> +
>>> +struct qrtr_mhi_pkt {
>>> +	struct list_head node;
>>> +	struct sk_buff *skb;
>>> +	struct kref refcount;
>>> +	struct completion done;
>>> +};
>>> +
>>> +static void qrtr_mhi_pkt_release(struct kref *ref)
>>> +{
>>> +	struct qrtr_mhi_pkt *pkt = container_of(ref, struct qrtr_mhi_pkt,
>>> +						refcount);
>>> +	struct sock *sk = pkt->skb->sk;
>>> +
>>> +	consume_skb(pkt->skb);
>>> +	if (sk)
>>> +		sock_put(sk);
>>> +
>>> +	kfree(pkt);
>>> +}
>>> +
>>> +/* From MHI to QRTR */
>>> +static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
>>> +				      struct mhi_result *mhi_res)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	int rc;
>>> +
>>> +	if (!qdev || mhi_res->transaction_status)
>>> +		return;
>>> +
>>> +	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
>>> +				mhi_res->bytes_xferd);
>>> +	if (rc == -EINVAL)
>>> +		dev_err(qdev->dev, "invalid ipcrouter packet\n");
>>
>> Perhaps this should be a debug print, perhaps rate limited. But either
>> way it's relevant for any transport, so I think you should skip it here
>> - and potentially move it into qrtr_endpoint_post() in some form.
>>
> 
> I agree with moving this to qrtr_endpoint_post() but I don't think it is a
> good idea to make it as a debug print. It is an error log and should stay
> as it is. Only in this MHI transport driver, the return value is not getting
> used but in others it is.
> 

This print has been useful for catching transport errors. Only issue I 
see with moving this to qrtr_endpoint_post() is that there are multiple 
points of failure in qrtr_endpoint_post() and logging it here makes it 
easier.

>>> +}
>>> +
>>> +/* From QRTR to MHI */
>>> +static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
>>> +				      struct mhi_result *mhi_res)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	unsigned long flags;
>>> +
>>> +	spin_lock_irqsave(&qdev->ul_lock, flags);
>>> +	pkt = list_first_entry(&qdev->ul_pkts, struct qrtr_mhi_pkt, node);
>>> +	list_del(&pkt->node);
>>> +	complete_all(&pkt->done);
>>
>> You should be able to release the lock after popping the item off the
>> list, then complete and refcount it.
>>
> 
> Okay.
> 
>>> +
>>> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
>>> +}
>>> +
>>> +static void qcom_mhi_qrtr_status_callback(struct mhi_device *mhi_dev,
>>> +					  enum mhi_callback mhi_cb)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	unsigned long flags;
>>> +
>>> +	if (mhi_cb != MHI_CB_FATAL_ERROR)
>>> +		return;
>>> +
>>> +	atomic_inc(&qdev->in_reset);
>>
>> You have ul_lock close at hand in both places where you access in_reset,
>> so I think it would be better to just use that, instead of an atomic.
>>
> 
> Okay.
> 

Does this version of MHI give the MHI_CB_FATAL_ERROR as a status 
callback? We added this as an early notifier workaround. The in_reset 
code probably isn't required with the current feature set.

>>> +	spin_lock_irqsave(&qdev->ul_lock, flags);
>>> +	list_for_each_entry(pkt, &qdev->ul_pkts, node)
>>> +		complete_all(&pkt->done);
> 
> Chris, shouldn't we require list_del(&pkt->node) here?
>

No this isn't a full cleanup, with the "early notifier" we just 
unblocked any threads waiting for the ul_callback. Those threads will 
wake, check in_reset, return an error back to the caller. Any list 
cleanup will be done in the ul_callbacks that the mhi bus will do for 
each queued packet right before device remove.

Again to simplify the code, we can probable remove the in_reset handling 
since it's not required with the current feature set.

>>> +	spin_unlock_irqrestore(&qdev->ul_lock, flags);
>>> +}
>>> +
>>> +/* Send data over MHI */
>>> +static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
>>> +	struct qrtr_mhi_pkt *pkt;
>>> +	int rc;
>>> +
>>> +	rc = skb_linearize(skb);
>>> +	if (rc) {
>>> +		kfree_skb(skb);
>>> +		return rc;
>>> +	}
>>> +
>>> +	pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
>>> +	if (!pkt) {
>>> +		kfree_skb(skb);
>>> +		return -ENOMEM;
>>> +	}
>>> +
>>> +	init_completion(&pkt->done);
>>> +	kref_init(&pkt->refcount);
>>> +	kref_get(&pkt->refcount);
>>> +	pkt->skb = skb;
>>> +
>>> +	spin_lock_bh(&qdev->ul_lock);
>>> +	list_add_tail(&pkt->node, &qdev->ul_pkts);
>>> +	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
>>> +			   MHI_EOT);
>>
>> Do you want to continue doing this when qdev->in_reset? Wouldn't it be
>> better to bail early if the remote end is dying?
>>
> 
> Now I'm thinking why we are not decrementing in_reset anywhere! Incase of
> SYS_ERR, the status_cb will get processed and in_reset will be set but
> it will stay so even when after MHI gets reset.
> 
> Chris, can you please clarify?
> 

Early notify of reset, if we do get SYS_ERR, we expect remove to be 
called *soon*.

>>> +	if (rc) {
>>> +		list_del(&pkt->node);
>>> +		/* Reference count needs to be dropped 2 times */
>>> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +		kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +		kfree_skb(skb);
>>> +		spin_unlock_bh(&qdev->ul_lock);
>>> +		return rc;
>>> +	}
>>> +
>>> +	spin_unlock_bh(&qdev->ul_lock);
>>> +	if (skb->sk)
>>> +		sock_hold(skb->sk);
>>> +
>>> +	rc = wait_for_completion_interruptible_timeout(&pkt->done, HZ * 5);
>>> +	if (atomic_read(&qdev->in_reset))
>>> +		rc = -ECONNRESET;
>>> +	else if (rc == 0)
>>> +		rc = -ETIMEDOUT;
>>
>> Is this recoverable? The message will remain on the list and may be
>> delivered at a later point(?), but qrtr and the app will learn that the
>> message was lost - which is presumably considered fatal.
>>
>> Is it guaranteed that qcom_mhi_qrtr_ul_callback() will happen later and
>> find the head of the list?
>>
> 
> There are 2 scenarios:
> 
> 1. If the completion interrupt happens after timeout, ul_callback()
> will be called. But it will only try to fetch the current head of ul_pkts.
> In most cases, we can hope that the completion interrupt will happen before
> next queue_skb().
> 
> 2. If we don't get completion interrupt, timeout will happen and at the final
> stage (during mhi_driver_remove()), MHI stack will go over the pending TREs
> for all channels in queue and call ul_callback() with -ENOTCONN. But in the
> callback, we don't have any idea of the pkt which was not successfully
> transferred to the device and currently just fetching first entry.
> 
> Now I'm seeing some issue here which I missed earlier. If the completion
> interrupt never happens then the corresponding pkt will never get freed and
> therefore we have a leak. Eventhough the ul_callback() will get called during
> mhi_driver_remove() for pending TREs, we don't exactly fetch the right pkt.
> 
> Chris, our assumption of the ul_callback() gets called irrespective of
> transfer status is wrong. I think this code needs a bit of rework.
> 
>>
>> The reason for my question is that without this you have one of two
>> scenarios;
>> 1) the message is put on the list, decremented in
>> qcom_mhi_qrtr_ul_callback() then we get back here and decrement it
>> again.
>> 2) the message is put on the list, then qcom_mhi_qrtr_status_callback()
>> happens and all messages are released - presumably then
>> qcom_mhi_qrtr_ul_callback() won't happen.
>>
>>
>> So if the third case (where we return here and then later
>> qcom_mhi_qrtr_ul_callback() must find this particular packet at the
>> front of the queue) can't happen, then you can just skip the entire
>> refcounting.
>>
>> Further more, you could carry qrtr_mhi_pkt on the stack.
>>
>>
>> ...or to flip this around, is there a reason to wait here at all? What
>> would happen if you just return immediately after calling
>> mhi_queue_skb()? Wouldn't that provide you better throughput?
>>
> 
> Chris would be best person to answer this question.
> 

Yea - after working with MHI for a bit now, I think we can definitely 
return after queueing the skb.

The IPC-Router Protocol, wasn't really designed with the ability to 
recover from dropped packets since SMD/GLINK are "Lossless". I figured 
it would be better for the client to definitely know that the packet 
reached the other side which is why it blocks until ul callback.

I thought having the client get an error on timeout and resend the 
packet would be better than silently dropping it. In practice, we've 
really only seen the timeout or ul_callback errors on unrecoverable 
errors so I think the timeout handling can definitely be redone.

Thanks,
Chris

>>> +	else if (rc > 0)
>>> +		rc = 0;
>>> +
>>> +	kref_put(&pkt->refcount, qrtr_mhi_pkt_release);
>>> +
>>> +	return rc;
>>> +}
>>> +
>>> +static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
>>> +			       const struct mhi_device_id *id)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev;
>>> +	u32 net_id;
>>> +	int rc;
>>> +
>>> +	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
>>> +	if (!qdev)
>>> +		return -ENOMEM;
>>> +
>>> +	qdev->mhi_dev = mhi_dev;
>>> +	qdev->dev = &mhi_dev->dev;
>>> +	qdev->ep.xmit = qcom_mhi_qrtr_send;
>>> +	atomic_set(&qdev->in_reset, 0);
>>> +
>>> +	net_id = QRTR_EP_NID_AUTO;
>>
>> Just pass QRTR_EP_NID_AUTO directly in the function call below.
>>
> 
> Okay.
> 
> Thanks,
> Mani
> 
>> Regards,
>> Bjorn
>>
>>> +
>>> +	INIT_LIST_HEAD(&qdev->ul_pkts);
>>> +	spin_lock_init(&qdev->ul_lock);
>>> +
>>> +	dev_set_drvdata(&mhi_dev->dev, qdev);
>>> +	rc = qrtr_endpoint_register(&qdev->ep, net_id);
>>> +	if (rc)
>>> +		return rc;
>>> +
>>> +	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
>>> +
>>> +	return 0;
>>> +}
>>> +
>>> +static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
>>> +{
>>> +	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
>>> +
>>> +	qrtr_endpoint_unregister(&qdev->ep);
>>> +	dev_set_drvdata(&mhi_dev->dev, NULL);
>>> +}
>>> +
>>> +static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
>>> +	{ .chan = "IPCR" },
>>> +	{}
>>> +};
>>> +MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
>>> +
>>> +static struct mhi_driver qcom_mhi_qrtr_driver = {
>>> +	.probe = qcom_mhi_qrtr_probe,
>>> +	.remove = qcom_mhi_qrtr_remove,
>>> +	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
>>> +	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
>>> +	.status_cb = qcom_mhi_qrtr_status_callback,
>>> +	.id_table = qcom_mhi_qrtr_id_table,
>>> +	.driver = {
>>> +		.name = "qcom_mhi_qrtr",
>>> +	},
>>> +};
>>> +
>>> +module_mhi_driver(qcom_mhi_qrtr_driver);
>>> +
>>> +MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
>>> +MODULE_LICENSE("GPL v2");
>>> -- 
>>> 2.17.1
>>>

-- 
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a 
Linux Foundation Collaborative Project

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ