linux-kernel - Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc usb requests

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite for Android: free password hash cracker in your pocket
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <915ef27a-11c8-49ba-8f8a-b4524b85c75a@ideasonboard.com>
Date:   Thu, 2 Nov 2023 16:07:28 +0000
From:   Dan Scally <dan.scally@...asonboard.com>
To:     Jayant Chowdhary <jchowdhary@...gle.com>,
        stern@...land.harvard.edu, laurent.pinchart@...asonboard.com,
        m.grzeschik@...gutronix.de, gregkh@...uxfoundation.org
Cc:     Thinh.Nguyen@...opsys.com, arakesh@...gle.com, etalvala@...gle.com,
        linux-kernel@...r.kernel.org, linux-usb@...r.kernel.org
Subject: Re: [PATCH v3] usb:gadget:uvc Do not use worker thread to queue isoc
 usb requests

Hi Jayant - thanks for the patch

On 02/11/2023 06:01, Jayant Chowdhary wrote:
> When we use an async work queue to perform the function of pumping
> usb requests to the usb controller, it is possible that amongst other
> factors, thread scheduling affects at what cadence we're able to pump
> requests. This could mean isoc usb requests miss their uframes - resulting
> in video stream flickers on the host device.
>
> To avoid this, we make the async_wq thread only produce isoc usb_requests
> with uvc buffers encoded into them. The process of queueing to the
> endpoint is done by the uvc_video_complete() handler. In case no
> usb_requests are ready with encoded information, we just queue a zero
> length request to the endpoint from the complete handler.
>
> For bulk endpoints the async_wq thread still queues usb requests to the
> endpoint.
>
> Signed-off-by: Michael Grzeschik <m.grzeschik@...gutronix.de>
> Signed-off-by: Jayant Chowdhary <jchowdhary@...gle.com>
> Suggested-by: Avichal Rakesh <arakesh@...gle.com>
> Suggested-by: Alan Stern <stern@...land.harvard.edu>
> ---
>   Based on top of
>   https://lore.kernel.org/linux-usb/20230930184821.310143-1-arakesh@google.com/T/#t:
>   v1->v2: Added self Signed-Off-by and addressed review comments
>   v2->v3: Encode to usb requests in async_wq; queue to ep in complete handler
> 	 for isoc transfers.
>
>   drivers/usb/gadget/function/uvc.h       |   8 +
>   drivers/usb/gadget/function/uvc_video.c | 187 +++++++++++++++++++-----
>   2 files changed, 156 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/usb/gadget/function/uvc.h b/drivers/usb/gadget/function/uvc.h
> index e8d4c87f1e09..82c783410554 100644
> --- a/drivers/usb/gadget/function/uvc.h
> +++ b/drivers/usb/gadget/function/uvc.h
> @@ -105,7 +105,15 @@ struct uvc_video {
>   	bool is_enabled; /* tracks whether video stream is enabled */
>   	unsigned int req_size;
>   	struct list_head ureqs; /* all uvc_requests allocated by uvc_video */
> +
> +	/* USB requests video pump thread can encode into*/

"USB requests that the video pump thread can encode into", and a space before the closing */ please 
(and the same a few more times below).

>   	struct list_head req_free;
> +
> +	/*
> +	 * USB requests video pump thread has already encoded into. These are
> +	 * ready to be queued to the endpoint.
> +	 */
> +	struct list_head req_ready;
>   	spinlock_t req_lock;
>   
>   	unsigned int req_int_count;
> diff --git a/drivers/usb/gadget/function/uvc_video.c b/drivers/usb/gadget/function/uvc_video.c
> index 53feb790a4c3..c84183e9afcc 100644
> --- a/drivers/usb/gadget/function/uvc_video.c
> +++ b/drivers/usb/gadget/function/uvc_video.c
> @@ -268,6 +268,98 @@ static int uvcg_video_ep_queue(struct uvc_video *video, struct usb_request *req)
>   	return ret;
>   }
>   
> +/* This function must be called with video->req_lock held*/
> +static int uvcg_video_usb_req_queue(struct uvc_video *video,
> +	struct usb_request *req, bool queue_to_ep) {
Brace on a new line please - same a few more times below
> +	bool is_bulk = video->max_payload_size;
empty line here
> +	if (!video->is_enabled) {
> +		uvc_video_free_request(req->context, video->ep);
> +		return -ENODEV;
> +	}
> +	if (queue_to_ep) {
> +		struct uvc_request *ureq = req->context;
> +		/*
> +		 * With USB3 handling more requests at a higher speed, we can't
> +		 * afford to generate an interrupt for every request. Decide to
> +		 * interrupt:
> +		 *
> +		 * - When no more requests are available in the free queue, as
> +		 *   this may be our last chance to refill the endpoint's
> +		 *   request queue.
> +		 *
> +		 * - When this is request is the last request for the video
> +		 *   buffer, as we want to start sending the next video buffer
> +		 *   ASAP in case it doesn't get started already in the next
> +		 *   iteration of this loop.
> +		 *
> +		 * - Four times over the length of the requests queue (as
> +		 *   indicated by video->uvc_num_requests), as a trade-off
> +		 *   between latency and interrupt load.
> +		*/
> +		if (list_empty(&video->req_free) || ureq->last_buf ||
> +			!(video->req_int_count %
> +			DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> +			video->req_int_count = 0;
> +			req->no_interrupt = 0;
> +		} else {
> +			req->no_interrupt = 1;
> +		}
> +		video->req_int_count++;
> +		return uvcg_video_ep_queue(video, req);
> +	} else {
> +		/*
> +		* If we're not queing to the ep, for isoc we're queing
> +		* to the req_ready list, otherwise req_free.
> +		*/
> +		struct list_head *list =
> +			is_bulk ? &video->req_free : &video->req_ready;
> +		list_add_tail(&req->list, list);
> +	}
> +	return 0;
> +}
> +
> +static int uvcg_video_ep_queue_zero_length(struct usb_request *req,
> +	struct uvc_video *video) {
> +	req->length = 0;
> +	return uvcg_video_ep_queue(video, req);
> +}
Not sure this is worth its own function
> +
> +/* Must only be called from uvcg_video_enable - since after that we only want to
> + * queue requests to the endpoint from the uvc_video_complete complete handler.
> + * This function is needed in order to 'kick start' the flow of requests from
> + * gadget driver to the usb controller.
> + */
> +static void uvc_video_ep_queue_initial_requests(struct uvc_video *video) {
> +	struct usb_request *req = NULL;
> +	unsigned long flags = 0;
> +	unsigned int count = 0;
> +	int ret = 0;
> +	/* We only queue half of the free list since we still want to have
> +	 * some free usb_requests in the free list for the video_pump async_wq
> +	 * thread to encode uvc buffers into. Otherwise we could get into a
> +	 * situation where the free list does not have any usb requests to
> +	 * encode into - we always end up queueing 0 length requests to the
> +	 * end point.
> +	 */
> +	unsigned half_list_size = video->uvc_num_requests / 2;
> +	spin_lock_irqsave(&video->req_lock, flags);
> +	/* Take these requests off the free list and queue them all to the
> +	 * endpoint. Since we queue the requests with the req_lock held,
> +	 */

This comment seems to be incomplete? You also want an opening /* on its own line:


/*
  * Multi line comments
  * look like this
  */

> +	while (count < half_list_size) {
> +		req = list_first_entry(&video->req_free, struct usb_request,
> +					list);
> +		list_del(&req->list);
> +		ret = uvcg_video_ep_queue_zero_length(req, video);
> +		if (ret < 0) {
> +			uvcg_queue_cancel(&video->queue, /*disconnect*/0);
> +			break;
> +		}
> +		count++;
> +	}
> +	spin_unlock_irqrestore(&video->req_lock, flags);
> +}
> +

So if I'm understanding the new starting sequence right for an isoc endpoint there's an initial 
flight of half the requests (between 2 and 32) that are queued as zero length - the very first one 
to .complete() being re-queued as a zero length request before the workqueue is started and encodes 
data into the _other_ half of the requests which were left in req_free and putting them into 
req_ready. At that point the .complete()s being run start to pick requests off req_ready instead and 
they get sent out with data...does that sound right?


What are the implications of those initial 3-33 zero length requests? What kind of latency can that 
introduce to the start of the video stream?

>   static void
>   uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   {
> @@ -276,6 +368,8 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	struct uvc_video_queue *queue = &video->queue;
>   	struct uvc_buffer *last_buf = NULL;
>   	unsigned long flags;
> +	bool is_bulk = video->max_payload_size;
> +	int ret = 0;
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	if (!video->is_enabled) {
> @@ -329,7 +423,38 @@ uvc_video_complete(struct usb_ep *ep, struct usb_request *req)
>   	 * back to req_free
>   	 */
>   	if (video->is_enabled) {
> -		list_add_tail(&req->list, &video->req_free);
> +		/*
> +		 * Here we check whether any request is available in the ready
> +		 * list. If it is, queue it to the ep and add the current
> +		 * usb_request to the req_free list - for video_pump to fill in.
> +		 * Otherwise, just use the current usb_request to queue a 0
> +		 * length request to the ep. Since we always add to the req_free
> +		 * list if we dequeue from the ready list, there will never
> +		 * be a situation where the req_free list is completely out of
> +		 * requests and cannot recover.
> +		 */
> +		struct usb_request *to_queue = req;
> +		to_queue->length = 0;
> +		if (!list_empty(&video->req_ready)) {
> +			to_queue = list_first_entry(&video->req_ready,
> +				struct usb_request, list);
> +			list_del(&to_queue->list);
> +			/* Add it to the free list. */
> +			list_add_tail(&req->list, &video->req_free);
> +		}
> +		/*
> +		 * Queue to the endpoint. The actual queueing to ep will
> +		 * only happen on one thread - the async_wq for bulk endpoints
> +		 * and this thread for isoc endpoints.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, to_queue,
> +					       /*queue_to_ep*/!is_bulk);


In principle in-line comments are fine, but I don't think the parameter name is worth a comment

> +		if(ret < 0) {
> +			uvcg_queue_cancel(queue, 0);
> +		}
> +		/* Queue work to the wq as well since its possible that a buffer
> +		 * may not have been completed.
> +		 */


The phrasing of this implies this is a bit of defensive programming, but if we don't queue to the wq 
here then doesn't that mean it'll never run?

>   		queue_work(video->async_wq, &video->pump);
>   	} else {
>   		uvc_video_free_request(ureq, ep);
> @@ -347,6 +472,7 @@ uvc_video_free_requests(struct uvc_video *video)
>   
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	return 0;
>   }
> @@ -424,8 +550,7 @@ static void uvcg_video_pump(struct work_struct *work)
>   	struct usb_request *req = NULL;
>   	struct uvc_buffer *buf;
>   	unsigned long flags;
> -	bool buf_done;
> -	int ret;
> +	int ret = 0;
>   
>   	while (true) {
>   		if (!video->ep->enabled)
> @@ -454,7 +579,6 @@ static void uvcg_video_pump(struct work_struct *work)
>   
>   		if (buf != NULL) {
>   			video->encode(req, video, buf);
> -			buf_done = buf->state == UVC_BUF_STATE_DONE;
>   		} else if (!(queue->flags & UVC_QUEUE_DISCONNECTED) && !is_bulk) {
>   			/*
>   			 * No video buffer available; the queue is still connected and
> @@ -462,7 +586,6 @@ static void uvcg_video_pump(struct work_struct *work)
>   			 * prevent missed ISOC transfers.
>   			 */
>   			req->length = 0;
> -			buf_done = false;
>   		} else {
>   			/*
>   			 * Either the queue has been disconnected or no video buffer
> @@ -473,45 +596,26 @@ static void uvcg_video_pump(struct work_struct *work)
>   			break;
>   		}
>   
> -		/*
> -		 * With USB3 handling more requests at a higher speed, we can't
> -		 * afford to generate an interrupt for every request. Decide to
> -		 * interrupt:
> -		 *
> -		 * - When no more requests are available in the free queue, as
> -		 *   this may be our last chance to refill the endpoint's
> -		 *   request queue.
> -		 *
> -		 * - When this is request is the last request for the video
> -		 *   buffer, as we want to start sending the next video buffer
> -		 *   ASAP in case it doesn't get started already in the next
> -		 *   iteration of this loop.
> -		 *
> -		 * - Four times over the length of the requests queue (as
> -		 *   indicated by video->uvc_num_requests), as a trade-off
> -		 *   between latency and interrupt load.
> -		 */
> -		if (list_empty(&video->req_free) || buf_done ||
> -		    !(video->req_int_count %
> -		       DIV_ROUND_UP(video->uvc_num_requests, 4))) {
> -			video->req_int_count = 0;
> -			req->no_interrupt = 0;
> -		} else {
> -			req->no_interrupt = 1;
> -		}
> -
> -		/* Queue the USB request */
> -		ret = uvcg_video_ep_queue(video, req);
>   		spin_unlock_irqrestore(&queue->irqlock, flags);
>   
> +		/* Queue the USB request.*/
I think just drop this - it was always superfluous.
> +		spin_lock_irqsave(&video->req_lock, flags);
> +		/* For bulk end points we queue from the worker thread
> +		 * since we would preferably not want to wait on requests
> +		 * to be ready, in the uvcg_video_complete() handler.
> +		 * For isoc endpoints we add the request to the ready list
> +		 * and only queue it to the endpoint from the complete handler.
> +		 */
> +		ret = uvcg_video_usb_req_queue(video, req, is_bulk);
> +		spin_unlock_irqrestore(&video->req_lock, flags);
> +
>   		if (ret < 0) {
>   			uvcg_queue_cancel(queue, 0);
>   			break;
>   		}
>   
> -		/* Endpoint now owns the request */
> +		/* The request is owned by  the endpoint / ready list*/
>   		req = NULL;
> -		video->req_int_count++;
>   	}
>   
>   	if (!req)
> @@ -567,7 +671,7 @@ uvcg_video_disable(struct uvc_video *video)
>   
>   	spin_lock_irqsave(&video->req_lock, flags);
>   	/*
> -	 * Remove all uvc_reqeusts from ureqs with list_del_init
> +	 * Remove all uvc_requests from ureqs with list_del_init
This should get fixed in the earlier series.
>   	 * This lets uvc_video_free_request correctly identify
>   	 * if the uvc_request is attached to a list or not when freeing
>   	 * memory.
> @@ -579,9 +683,13 @@ uvcg_video_disable(struct uvc_video *video)
>   		list_del(&req->list);
>   		uvc_video_free_request(req->context, video->ep);
>   	}
> -
keep the empty line please
> +	list_for_each_entry_safe(req, temp, &video->req_ready, list) {
> +		list_del(&req->list);
> +		uvc_video_free_request(req->context, video->ep);
> +	}
and one here too.
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	video->req_size = 0;
>   	spin_unlock_irqrestore(&video->req_lock, flags);
>   
> @@ -635,7 +743,7 @@ int uvcg_video_enable(struct uvc_video *video)
>   
>   	video->req_int_count = 0;
>   
> -	queue_work(video->async_wq, &video->pump);
> +	uvc_video_ep_queue_initial_requests(video);
>   
>   	return ret;
>   }
> @@ -648,6 +756,7 @@ int uvcg_video_init(struct uvc_video *video, struct uvc_device *uvc)
>   	video->is_enabled = false;
>   	INIT_LIST_HEAD(&video->ureqs);
>   	INIT_LIST_HEAD(&video->req_free);
> +	INIT_LIST_HEAD(&video->req_ready);
>   	spin_lock_init(&video->req_lock);
>   	INIT_WORK(&video->pump, uvcg_video_pump);
>