lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <BY5PR07MB8119BDCC597D72B64FCD3A3EB0929@BY5PR07MB8119.namprd07.prod.outlook.com>
Date:   Tue, 9 Mar 2021 05:39:20 +0000
From:   Sanket Parmar <sparmar@...ence.com>
To:     Peter Chen <peter.chen@...nel.org>
CC:     Pawel Laszczak <pawell@...ence.com>,
        "a-govindraju@...com" <a-govindraju@...com>,
        "linux-usb@...r.kernel.org" <linux-usb@...r.kernel.org>,
        "linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>,
        Rahul Kumar <kurahul@...ence.com>,
        "gregkh@...uxfoundation.org" <gregkh@...uxfoundation.org>,
        "frank.li@....com" <frank.li@....com>
Subject: RE: [PATCH] usb: cdns3: Coherent memory allocation optimization

Hi Peter,

> On 21-03-05 17:01:11, Sanket Parmar wrote:
> > Allocation of DMA coherent memory in atomic context using
> > dma_alloc_coherent() might fail on some platform. To fix it,
> > Replaced dma_alloc_coherent() with dma_pool API to allocate a
> > smaller chunk of DMA coherent memory for TRB rings.
> >
> > Also in cdns3_prepare_aligned_request_buf(), replaced
> > dma_alloc_coherent() with kmalloc and dma_map API to allocate
> > aligned request buffer of dynamic length.
> >
> 
> You do two changes in one commit, would you please split this one as
> two patches?
> 
> > Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
> 
> "commit" is not needed
> 
> > Reported by: Aswath Govindraju <a-govindraju@...com>
> 
> Reported-by:
>
I have split the change into two patches.
New patch series has been posted already.
 
> > Signed-off-by: Sanket Parmar <sparmar@...ence.com>
> > ---
> >  drivers/usb/cdns3/cdns3-gadget.c |  115 ++++++++++++++++++++++------------
> ---
> >  drivers/usb/cdns3/cdns3-gadget.h |    3 +
> >  2 files changed, 71 insertions(+), 47 deletions(-)
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
> gadget.c
> > index 582bfec..5fd6993 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.c
> > +++ b/drivers/usb/cdns3/cdns3-gadget.c
> > @@ -59,6 +59,7 @@
> >  #include <linux/dma-mapping.h>
> >  #include <linux/usb/gadget.h>
> >  #include <linux/module.h>
> > +#include <linux/dmapool.h>
> >  #include <linux/iopoll.h>
> >
> >  #include "core.h"
> > @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct
> cdns3_endpoint *priv_ep,
> >  	return priv_ep->trb_pool_dma + offset;
> >  }
> >
> > -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
> > -{
> > -	switch (priv_ep->type) {
> > -	case USB_ENDPOINT_XFER_ISOC:
> > -		return TRB_ISO_RING_SIZE;
> > -	case USB_ENDPOINT_XFER_CONTROL:
> > -		return TRB_CTRL_RING_SIZE;
> > -	default:
> > -		if (priv_ep->use_streams)
> > -			return TRB_STREAM_RING_SIZE;
> > -		else
> > -			return TRB_RING_SIZE;
> > -	}
> > -}
> > -
> >  static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
> >  {
> >  	struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> >
> >  	if (priv_ep->trb_pool) {
> > -		dma_free_coherent(priv_dev->sysdev,
> > -				  cdns3_ring_size(priv_ep),
> > -				  priv_ep->trb_pool, priv_ep->trb_pool_dma);
> > +		dma_pool_free(priv_dev->eps_dma_pool,
> > +				priv_ep->trb_pool, priv_ep->trb_pool_dma);
> >  		priv_ep->trb_pool = NULL;
> >  	}
> >  }
> > @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct
> cdns3_endpoint *priv_ep)
> >  int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
> >  {
> >  	struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> > -	int ring_size = cdns3_ring_size(priv_ep);
> > +	int ring_size = TRB_RING_SIZE;
> 
> You will use the same size for TRB ring region for control/bulk/isoc
> endpoint.
> 
As single DMA pool is used to allocate the ring buffer, different TRB ring
size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which fits
for all type of EPs is used.

> >  	int num_trbs = ring_size / TRB_SIZE;
> >  	struct cdns3_trb *link_trb;
> >
> > @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct
> cdns3_endpoint *priv_ep)
> >  		cdns3_free_trb_pool(priv_ep);
> >
> >  	if (!priv_ep->trb_pool) {
> > -		priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
> > -						       ring_size,
> > -						       &priv_ep->trb_pool_dma,
> > -						       GFP_DMA32 |
> GFP_ATOMIC);
> > +		priv_ep->trb_pool = dma_pool_alloc(priv_dev-
> >eps_dma_pool,
> > +						GFP_DMA32 | GFP_ATOMIC,
> > +						&priv_ep->trb_pool_dma);
> 
> dma_pool_alloc also allocates the whole thunk of TRB region. See the
> kernel-doc for dma_pool_create.
> 
>  * Given one of these pools, dma_pool_alloc()
>  * may be used to allocate memory.  Such memory will all have "consistent"

Yes,  dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs from the 
pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc allocates at least
7.2KB for TRB region per enabled EP.

> > +
> >  		if (!priv_ep->trb_pool)
> >  			return -ENOMEM;
> >
> > @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct
> cdns3_endpoint *priv_ep,
> >  	usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> >  					priv_ep->dir);
> >
> > -	if ((priv_req->flags & REQUEST_UNALIGNED) &&
> > -	    priv_ep->dir == USB_DIR_OUT && !request->status)
> > -		memcpy(request->buf, priv_req->aligned_buf->buf,
> > -		       request->length);
> > +	if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req-
> >aligned_buf) {
> > +		struct cdns3_aligned_buf *buf;
> > +
> > +		buf = priv_req->aligned_buf;
> > +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > +			buf->dir);
> > +		priv_req->flags &= ~REQUEST_UNALIGNED;
> > +
> > +		if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> > +			memcpy(request->buf, priv_req->aligned_buf->buf,
> > +			       request->length);
> > +		}
> > +
> > +		trace_cdns3_free_aligned_request(priv_req);
> > +		priv_req->aligned_buf->in_use = 0;
> > +		queue_work(system_freezable_wq,
> > +			   &priv_dev->aligned_buf_wq);
> > +		priv_req->aligned_buf = NULL;
> > +
> > +	}
> >
> >  	priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
> >  	/* All TRBs have finished, clear the counter */
> > @@ -898,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct
> work_struct *work)
> >  			 * interrupts.
> >  			 */
> >  			spin_unlock_irqrestore(&priv_dev->lock, flags);
> > -			dma_free_coherent(priv_dev->sysdev, buf->size,
> > -					  buf->buf, buf->dma);
> > +			kfree(buf->buf);
> >  			kfree(buf);
> >  			spin_lock_irqsave(&priv_dev->lock, flags);
> >  		}
> > @@ -925,27 +925,16 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> >  		if (!buf)
> >  			return -ENOMEM;
> >
> > -		buf->size = priv_req->request.length;
> > +		buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
> > +				usb_ep_align(&(priv_ep->endpoint), priv_req-
> >request.length)
> > +				: priv_req->request.length;
> >
> > -		buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> > -					      buf->size,
> > -					      &buf->dma,
> > -					      GFP_ATOMIC);
> > +		buf->buf = kmalloc(buf->size, GFP_ATOMIC);
> >  		if (!buf->buf) {
> >  			kfree(buf);
> >  			return -ENOMEM;
> >  		}
> >
> > -		if (priv_req->aligned_buf) {
> > -			trace_cdns3_free_aligned_request(priv_req);
> > -			priv_req->aligned_buf->in_use = 0;
> > -			queue_work(system_freezable_wq,
> > -				   &priv_dev->aligned_buf_wq);
> > -		}
> > -
> > -		buf->in_use = 1;
> > -		priv_req->aligned_buf = buf;
> > -
> >  		list_add_tail(&buf->list,
> >  			      &priv_dev->aligned_buf_list);
> >  	}
> > @@ -955,6 +944,27 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> >  		       priv_req->request.length);
> >  	}
> >
> > +	if (priv_req->aligned_buf) {
> > +		trace_cdns3_free_aligned_request(priv_req);
> > +		priv_req->aligned_buf->in_use = 0;
> > +		queue_work(system_freezable_wq,
> > +			   &priv_dev->aligned_buf_wq);
> > +	}
> > +
> > +	buf->dir =  priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > +	buf->in_use = 1;
> > +	priv_req->aligned_buf = buf;
> > +
> > +	buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
> > +				buf->dir);
> > +
> > +	if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
> > +		dev_err(priv_dev->dev, "Failed to map buffer\n");
> > +		kfree(buf->buf);
> > +		kfree(buf);
> > +		return -EFAULT;
> > +	}
> > +
> >  	priv_req->flags |= REQUEST_UNALIGNED;
> >  	trace_cdns3_prepare_aligned_request(priv_req);
> >
> > @@ -3103,16 +3113,17 @@ static void cdns3_gadget_exit(struct cdns
> *cdns)
> >  		struct cdns3_aligned_buf *buf;
> >
> >  		buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> > -		dma_free_coherent(priv_dev->sysdev, buf->size,
> > -				  buf->buf,
> > -				  buf->dma);
> > +		dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > +			buf->dir);
> >
> >  		list_del(&buf->list);
> > +		kfree(buf->buf);
> >  		kfree(buf);
> >  	}
> >
> >  	dma_free_coherent(priv_dev->sysdev, 8, priv_dev->setup_buf,
> >  			  priv_dev->setup_dma);
> > +	dma_pool_destroy(priv_dev->eps_dma_pool);
> >
> >  	kfree(priv_dev->zlp_buf);
> >  	usb_put_gadget(&priv_dev->gadget);
> > @@ -3185,6 +3196,14 @@ static int cdns3_gadget_start(struct cdns *cdns)
> >  	/* initialize endpoint container */
> >  	INIT_LIST_HEAD(&priv_dev->gadget.ep_list);
> >  	INIT_LIST_HEAD(&priv_dev->aligned_buf_list);
> > +	priv_dev->eps_dma_pool = dma_pool_create("cdns3_eps_dma_pool",
> > +						priv_dev->sysdev,
> > +						TRB_RING_SIZE, 8, 0);
> > +	if (!priv_dev->eps_dma_pool) {
> > +		dev_err(priv_dev->dev, "Failed to create TRB dma pool\n");
> > +		ret = -ENOMEM;
> > +		goto err1;
> > +	}
> >
> >  	ret = cdns3_init_eps(priv_dev);
> >  	if (ret) {
> > @@ -3235,6 +3254,8 @@ static int cdns3_gadget_start(struct cdns *cdns)
> >  err2:
> >  	cdns3_free_all_eps(priv_dev);
> >  err1:
> > +	dma_pool_destroy(priv_dev->eps_dma_pool);
> > +
> >  	usb_put_gadget(&priv_dev->gadget);
> >  	cdns->gadget_dev = NULL;
> >  	return ret;
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-
> gadget.h
> > index 21fa461..c5660f2 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.h
> > +++ b/drivers/usb/cdns3/cdns3-gadget.h
> > @@ -12,6 +12,7 @@
> >  #ifndef __LINUX_CDNS3_GADGET
> >  #define __LINUX_CDNS3_GADGET
> >  #include <linux/usb/gadget.h>
> > +#include <linux/dma-direction.h>
> >
> >  /*
> >   * USBSS-DEV register interface.
> > @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf {
> >  	void			*buf;
> >  	dma_addr_t		dma;
> >  	u32			size;
> > +	enum dma_data_direction dir;
> >  	unsigned		in_use:1;
> >  	struct list_head	list;
> >  };
> > @@ -1298,6 +1300,7 @@ struct cdns3_device {
> >
> >  	struct cdns3_usb_regs		__iomem *regs;
> >
> > +	struct dma_pool			*eps_dma_pool;
> >  	struct usb_ctrlrequest		*setup_buf;
> >  	dma_addr_t			setup_dma;
> >  	void				*zlp_buf;
> > --
> > 1.7.1
> >
> 
> I guess this issue may due to the size for DMA region is too small,
> try to enlarge the it (eg, CMA size).
> 
> --
> 
> Thanks,
> Peter Chen

--
Thanks,
Sanket Parmar

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ