lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-ID:
 <SLXP216MB1148F3ED4920B12EECF84C03EDC9A@SLXP216MB1148.KORP216.PROD.OUTLOOK.COM>
Date: Mon, 17 Nov 2025 02:07:14 +0000
From: jackson.lee <jackson.lee@...psnmedia.com>
To: Nicolas Dufresne <nicolas.dufresne@...labora.com>, "mchehab@...nel.org"
	<mchehab@...nel.org>, "hverkuil-cisco@...all.nl" <hverkuil-cisco@...all.nl>,
	"bob.beckett@...labora.com" <bob.beckett@...labora.com>
CC: "linux-media@...r.kernel.org" <linux-media@...r.kernel.org>,
	"linux-kernel@...r.kernel.org" <linux-kernel@...r.kernel.org>, lafley.kim
	<lafley.kim@...psnmedia.com>, "b-brnich@...com" <b-brnich@...com>,
	"hverkuil@...all.nl" <hverkuil@...all.nl>, Nas Chung
	<nas.chung@...psnmedia.com>
Subject: RE: [PATCH v6 4/4] media: chips-media: wave5: Improve performance of
 decoder

Hi Nicoas

> >  }
> >
> >  static int wave5_vpu_dec_querycap(struct file *file, void *fh, struct
> > v4l2_capability *cap) @@ -1142,11 +1141,31 @@ static int
> write_to_ringbuffer(struct vpu_instance *inst, void *buffer, size_t b
> >  	return 0;
> >  }
> >
> > +static struct vpu_src_buffer *inst_src_buf_remove(struct vpu_instance
> > +*inst) {
> > +	struct vpu_src_buffer *b;
> > +
> > +	if (list_empty(&inst->avail_src_bufs))
> > +		return NULL;
> > +	inst->queued_count--;
> 
> The only use of this counter can be replaced with list_empty().
> 
> > +	b = list_first_entry(&inst->avail_src_bufs, struct vpu_src_buffer,
> list);
> > +	list_del(&b->list);
> > +	b->list.prev = NULL;
> > +	b->list.next = NULL;
> > +	INIT_LIST_HEAD(&b->list);
> 
> These 3 lines above are simply:
> 
> 	list_del_init(&b->list);
> 
> > +	if (inst->queued_count == 0) {
> > +		inst->avail_src_bufs.prev = NULL;
> > +		inst->avail_src_bufs.next = NULL;
> > +		INIT_LIST_HEAD(&inst->avail_src_bufs);
> > +	}
> 
> I don't think this is needed, once empty that list_empty() condition
> should be met again.



I will do

> 
> > +	return b;
> > +}
> > +
> >  static int fill_ringbuffer(struct vpu_instance *inst)
> >  {
> >  	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
> > -	struct v4l2_m2m_buffer *buf, *n;
> > -	int ret;
> > +	struct vpu_src_buffer *vpu_buf;
> > +	int ret = 0;
> >
> >  	if (m2m_ctx->last_src_buf)  {
> >  		struct vpu_src_buffer *vpu_buf =
> > wave5_to_vpu_src_buf(m2m_ctx->last_src_buf);
> > @@ -1157,9 +1176,8 @@ static int fill_ringbuffer(struct vpu_instance
> *inst)
> >  		}
> >  	}
> >
> > -	v4l2_m2m_for_each_src_buf_safe(m2m_ctx, buf, n) {
> > -		struct vb2_v4l2_buffer *vbuf = &buf->vb;
> > -		struct vpu_src_buffer *vpu_buf = wave5_to_vpu_src_buf(vbuf);
> > +	while ((vpu_buf = inst_src_buf_remove(inst)) != NULL) {
> > +		struct vb2_v4l2_buffer *vbuf = &vpu_buf->v4l2_m2m_buf.vb;
> >  		struct vpu_buf *ring_buffer = &inst->bitstream_vbuf;
> >  		size_t src_size = vb2_get_plane_payload(&vbuf->vb2_buf, 0);
> >  		void *src_buf = vb2_plane_vaddr(&vbuf->vb2_buf, 0); @@ -
> 1219,9
> > +1237,12 @@ static int fill_ringbuffer(struct vpu_instance *inst)
> >  			dev_dbg(inst->dev->dev, "last src buffer written to
> the ring buffer\n");
> >  			break;
> >  		}
> > +
> > +		inst->queuing_num++;
> > +		break;
> >  	}
> >
> > -	return 0;
> > +	return ret;
> >  }
> >
> >  static void wave5_vpu_dec_buf_queue_src(struct vb2_buffer *vb) @@
> > -1233,7 +1254,8 @@ static void wave5_vpu_dec_buf_queue_src(struct
> > vb2_buffer *vb)
> >
> >  	vpu_buf->consumed = false;
> >  	vbuf->sequence = inst->queued_src_buf_num++;
> > -
> > +	list_add_tail(&vpu_buf->list, &inst->avail_src_bufs);
> > +	inst->queued_count++;
> >  	v4l2_m2m_buf_queue(m2m_ctx, vbuf);
> >  }
> >
> > @@ -1288,10 +1310,16 @@ static void wave5_vpu_dec_buf_queue(struct
> vb2_buffer *vb)
> >  		__func__, vb->type, vb->index, vb2_plane_size(&vbuf->vb2_buf,
> 0),
> >  		vb2_plane_size(&vbuf->vb2_buf, 1), vb2_plane_size(&vbuf-
> >vb2_buf,
> > 2));
> >
> > -	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE)
> > +	if (vb->type == V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE) {
> > +		mutex_lock(&inst->feed_lock);
> >  		wave5_vpu_dec_buf_queue_src(vb);
> > -	else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE)
> > +
> > +		if (inst->empty_queue)
> > +			inst->empty_queue = false;
> > +		mutex_unlock(&inst->feed_lock);
> > +	} else if (vb->type == V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE) {
> >  		wave5_vpu_dec_buf_queue_dst(vb);
> > +	}
> >  }
> >
> >  static int wave5_vpu_dec_allocate_ring_buffer(struct vpu_instance
> > *inst) @@ -1385,6 +1413,17 @@ static int streamoff_output(struct
> vb2_queue *q)
> >  	dma_addr_t new_rd_ptr;
> >  	struct dec_output_info dec_info;
> >  	unsigned int i;
> > +	struct vpu_src_buffer *vpu_buf, *tmp;
> > +
> > +	inst->retry = false;
> > +	inst->queuing_num = 0;
> > +	inst->queued_count = 0;
> > +	mutex_lock(&inst->feed_lock);
> > +	list_for_each_entry_safe(vpu_buf, tmp, &inst->avail_src_bufs, list)
> {
> > +		vpu_buf->consumed = false;
> > +		list_del(&vpu_buf->list);
> > +	}
> > +	mutex_unlock(&inst->feed_lock);
> >
> >  	for (i = 0; i < v4l2_m2m_num_dst_bufs_ready(m2m_ctx); i++) {
> >  		ret = wave5_vpu_dec_set_disp_flag(inst, i); @@ -1470,6
> +1509,7 @@
> > static void wave5_vpu_dec_stop_streaming(struct vb2_queue *q)
> >  {
> >  	struct vpu_instance *inst = vb2_get_drv_priv(q);
> >  	struct v4l2_m2m_ctx *m2m_ctx = inst->v4l2_fh.m2m_ctx;
> > +
> >  	bool check_cmd = TRUE;
> >
> >  	dev_dbg(inst->dev->dev, "%s: type: %u\n", __func__, q->type); @@
> > -1480,11 +1520,10 @@ static void wave5_vpu_dec_stop_streaming(struct
> vb2_queue *q)
> >  		struct dec_output_info dec_output_info;
> >
> >  		wave5_vpu_dec_give_command(inst, DEC_GET_QUEUE_STATUS,
> &q_status);
> > -
> > -		if (q_status.report_queue_count == 0)
> > -			break;
> > -
> > -		if (wave5_vpu_wait_interrupt(inst, VPU_DEC_TIMEOUT) < 0)
> > +		if ((inst->state == VPU_INST_STATE_STOP ||
> > +		     inst->state == VPU_INST_STATE_INIT_SEQ ||
> > +		     q_status.instance_queue_count == 0) &&
> > +			q_status.report_queue_count == 0)
> >  			break;
> >
> >  		if (wave5_vpu_dec_get_output_info(inst, &dec_output_info))
> @@
> > -1498,6 +1537,8 @@ static void wave5_vpu_dec_stop_streaming(struct
> vb2_queue *q)
> >  	else
> >  		streamoff_capture(q);
> >
> > +	inst->empty_queue = false;
> > +	inst->sent_eos = false;
> >  	pm_runtime_put_autosuspend(inst->dev->dev);
> >  }
> >
> > @@ -1576,13 +1617,24 @@ static void wave5_vpu_dec_device_run(void *priv)
> >  	struct queue_status_info q_status;
> >  	u32 fail_res = 0;
> >  	int ret = 0;
> > +	unsigned long flags;
> >
> >  	dev_dbg(inst->dev->dev, "%s: Fill the ring buffer with new
> bitstream data", __func__);
> >  	pm_runtime_resume_and_get(inst->dev->dev);
> > -	ret = fill_ringbuffer(inst);
> > -	if (ret) {
> > -		dev_warn(inst->dev->dev, "Filling ring buffer failed\n");
> > -		goto finish_job_and_return;
> > +	if (!inst->retry) {
> > +		mutex_lock(&inst->feed_lock);
> > +		ret = fill_ringbuffer(inst);
> > +		mutex_unlock(&inst->feed_lock);
> > +		if (ret < 0) {
> > +			dev_warn(inst->dev->dev, "Filling ring buffer
> failed\n");
> > +			goto finish_job_and_return;
> > +		} else if (!inst->eos &&
> > +				inst->queuing_num == 0 &&
> > +				inst->state == VPU_INST_STATE_PIC_RUN) {
> > +			dev_dbg(inst->dev->dev, "%s: no bitstream for feeding,
> so skip ", __func__);
> > +			inst->empty_queue = true;
> > +			goto finish_job_and_return;
> > +		}
> >  	}
> >
> >  	switch (inst->state) {
> > @@ -1607,7 +1659,9 @@ static void wave5_vpu_dec_device_run(void *priv)
> >  			}
> >  			spin_unlock_irqrestore(&inst->state_spinlock, flags);
> >  		} else {
> > +			spin_lock_irqsave(&inst->state_spinlock, flags);
> 
> Move the locking inside the set_state function.
> 
> cheers,
> Nicolas

I think the locking should not be move into the set_state function(switch_state).
Because the send_eos_event, handle_dynamic_resolution_change and flag_last_buffer_done already have the lockdep_assert_held(&inst->state_spinlock); inside those function,
So to concisify the above code, even if the locking is moved into switch_statue, the locking should be called again outside.


Thanks
Jackson


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ