netdev - Re: [PATCH v4 05/14] net-next/yunsilicon: Add eq and alloc

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250218171036.GB1615191@kernel.org>
Date: Tue, 18 Feb 2025 17:10:36 +0000
From: Simon Horman <horms@...nel.org>
To: Xin Tian <tianx@...silicon.com>
Cc: netdev@...r.kernel.org, leon@...nel.org, andrew+netdev@...n.ch,
	kuba@...nel.org, pabeni@...hat.com, edumazet@...gle.com,
	davem@...emloft.net, jeff.johnson@....qualcomm.com,
	przemyslaw.kitszel@...el.com, weihg@...silicon.com,
	wanry@...silicon.com, parthiban.veerasooran@...rochip.com,
	masahiroy@...nel.org
Subject: Re: [PATCH v4 05/14] net-next/yunsilicon: Add eq and alloc

On Thu, Feb 13, 2025 at 05:14:14PM +0800, Xin Tian wrote:
> Add eq management and buffer alloc apis
> 
> Signed-off-by: Xin Tian <tianx@...silicon.com>
> Signed-off-by: Honggang Wei <weihg@...silicon.com>

...

> diff --git a/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h b/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h

...

> +struct xsc_eq_table {
> +	void __iomem	       *update_ci;
> +	void __iomem	       *update_arm_ci;
> +	struct list_head       comp_eqs_list;

nit: The indentation of the member names above seems inconsistent
     with what is below.

> +	struct xsc_eq		pages_eq;
> +	struct xsc_eq		async_eq;
> +	struct xsc_eq		cmd_eq;
> +	int			num_comp_vectors;
> +	int			eq_vec_comp_base;
> +	/* protect EQs list
> +	 */
> +	spinlock_t		lock;
> +};

...

> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c

...

> +/* Handling for queue buffers -- we allocate a bunch of memory and
> + * register it in a memory region at HCA virtual address 0.  If the
> + * requested size is > max_direct, we split the allocation into
> + * multiple pages, so we don't require too much contiguous memory.
> + */

I can't help but think there is an existing API to handle this.

> +int xsc_buf_alloc(struct xsc_core_device *xdev, int size, int max_direct,

I think unsigned long would be slightly better types for size and max_direct.

> +		  struct xsc_buf *buf)
> +{
> +	dma_addr_t t;
> +
> +	buf->size = size;
> +	if (size <= max_direct) {
> +		buf->nbufs        = 1;
> +		buf->npages       = 1;
> +		buf->page_shift   = get_order(size) + PAGE_SHIFT;
> +		buf->direct.buf   = dma_alloc_coherent(&xdev->pdev->dev,
> +						       size,
> +						       &t,
> +						       GFP_KERNEL | __GFP_ZERO);
> +		if (!buf->direct.buf)
> +			return -ENOMEM;
> +
> +		buf->direct.map = t;
> +
> +		while (t & ((1 << buf->page_shift) - 1)) {

I think GENMASK() can be used here.

> +			--buf->page_shift;
> +			buf->npages *= 2;
> +		}
> +	} else {
> +		int i;
> +
> +		buf->direct.buf  = NULL;
> +		buf->nbufs       = (size + PAGE_SIZE - 1) / PAGE_SIZE;

I think this is open-coding DIV_ROUND_UP

> +		buf->npages      = buf->nbufs;
> +		buf->page_shift  = PAGE_SHIFT;
> +		buf->page_list   = kcalloc(buf->nbufs, sizeof(*buf->page_list),
> +					   GFP_KERNEL);
> +		if (!buf->page_list)
> +			return -ENOMEM;
> +
> +		for (i = 0; i < buf->nbufs; i++) {
> +			buf->page_list[i].buf =
> +				dma_alloc_coherent(&xdev->pdev->dev, PAGE_SIZE,
> +						   &t, GFP_KERNEL | __GFP_ZERO);
> +			if (!buf->page_list[i].buf)
> +				goto err_free;
> +
> +			buf->page_list[i].map = t;
> +		}
> +
> +		if (BITS_PER_LONG == 64) {
> +			struct page **pages;
> +
> +			pages = kmalloc_array(buf->nbufs, sizeof(*pages),
> +					      GFP_KERNEL);
> +			if (!pages)
> +				goto err_free;
> +			for (i = 0; i < buf->nbufs; i++) {
> +				void *addr = buf->page_list[i].buf;
> +
> +				if (is_vmalloc_addr(addr))
> +					pages[i] = vmalloc_to_page(addr);
> +				else
> +					pages[i] = virt_to_page(addr);
> +			}
> +			buf->direct.buf = vmap(pages, buf->nbufs,
> +					       VM_MAP, PAGE_KERNEL);
> +			kfree(pages);
> +			if (!buf->direct.buf)
> +				goto err_free;
> +		}

I think some explanation is warranted of why the above is relevant
only when BITS_PER_LONG == 64.

> +	}
> +
> +	return 0;
> +
> +err_free:
> +	xsc_buf_free(xdev, buf);
> +
> +	return -ENOMEM;
> +}

...

> +void xsc_fill_page_array(struct xsc_buf *buf, __be64 *pas, int npages)

As per my comment on unsigned long in my response to another patch,
I think npages can be unsigned long.

> +{
> +	int shift = PAGE_SHIFT - PAGE_SHIFT_4K;
> +	int mask = (1 << shift) - 1;

Likewise, I think that mask should be an unsigned long.
Or, both shift and mask could be #defines, as they are compile-time
constants.

Also, mask can be generated using GENMASK, e.g.

#define XSC_PAGE_ARRAY_MASK GENMASK(PAGE_SHIFT, PAGE_SHIFT_4K)
#define XSC_PAGE_ARRAY_SHIFT (PAGE_SHIFT - PAGE_SHIFT_4K)

And I note, in the (common) case of 4k pages, that both shift and mask are 0.

> +	u64 addr;
> +	int i;
> +
> +	for (i = 0; i < npages; i++) {
> +		if (buf->nbufs == 1)
> +			addr = buf->direct.map + (i << PAGE_SHIFT_4K);
> +		else
> +			addr = buf->page_list[i >> shift].map
> +			       + ((i & mask) << PAGE_SHIFT_4K);

The like above is open-coding FIELD_PREP().
However, I don't think it can be used here as
the compiler complains very loudly because the mask is 0.

> +
> +		pas[i] = cpu_to_be64(addr);
> +	}
> +}
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h

...

> +static void eq_update_ci(struct xsc_eq *eq, int arm)
> +{
> +	struct xsc_eq_doorbell db = {0};
> +
> +	db.data0 = XSC_SET_FIELD(cpu_to_le32(eq->cons_index),
> +				 XSC_EQ_DB_NEXT_CID) |
> +		   XSC_SET_FIELD(cpu_to_le32(eq->eqn), XSC_EQ_DB_EQ_ID);

Each of the two uses of XSC_SET_FIELD() are passed a little-endian value
and a host-byte order mask. This does not seem correct as it seems
they byte order should be consistent.

> +	if (arm)
> +		db.data0 |= XSC_EQ_DB_ARM;

Likewise, here data0 is little-endian while XSC_EQ_DB_ARM is host
byte-order.

> +	writel(db.data0, XSC_REG_ADDR(eq->dev, eq->doorbell));

And here, db.data0 is little-endian, but writel expects a host-byte order
value (which it converts to little-endian).

I didn't dig deeper but it seems to me that it would be easier to change
the type of data0 to host byte-order and drop the use of cpu_to_le32()
above.

Issues flagged by Sparse.

> +	/* We still want ordering, just not swabbing, so add a barrier */
> +	mb();
> +}

...

> +static int xsc_eq_int(struct xsc_core_device *xdev, struct xsc_eq *eq)
> +{
> +	u32 cqn, qpn, queue_id;
> +	struct xsc_eqe *eqe;
> +	int eqes_found = 0;
> +	int set_ci = 0;
> +
> +	while ((eqe = next_eqe_sw(eq))) {
> +		/* Make sure we read EQ entry contents after we've
> +		 * checked the ownership bit.
> +		 */
> +		rmb();
> +		switch (eqe->type) {
> +		case XSC_EVENT_TYPE_COMP:
> +		case XSC_EVENT_TYPE_INTERNAL_ERROR:
> +			/* eqe is changing */
> +			queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> +							     XSC_EQE_QUEUE_ID));

Similarly, here XSC_GET_FIELD() is passed a little-endian value and a host
byte-order mask, which is inconsistent.

Perhaps this should be (completely untested!):

			queue_id = XSC_GET_FIELD(le16_to_cpu(eqe->queue_id_data),
						 XSC_EQE_QUEUE_ID);

Likewise for the two uses of XSC_GET_FIELD below.

And perhaps queue_id could be renamed, say to q_id, to make things a bit
more succinct.


> +			cqn = queue_id;

I'm unsure why both cqn and queue_id are needed.

> +			xsc_cq_completion(xdev, cqn);
> +			break;
> +
> +		case XSC_EVENT_TYPE_CQ_ERROR:
> +			queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> +							     XSC_EQE_QUEUE_ID));
> +			cqn = queue_id;
> +			xsc_eq_cq_event(xdev, cqn, eqe->type);
> +			break;
> +		case XSC_EVENT_TYPE_WQ_CATAS_ERROR:
> +		case XSC_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
> +		case XSC_EVENT_TYPE_WQ_ACCESS_ERROR:
> +			queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> +							     XSC_EQE_QUEUE_ID));
> +			qpn = queue_id;
> +			xsc_qp_event(xdev, qpn, eqe->type);
> +			break;
> +		default:
> +			break;
> +		}
> +
> +		++eq->cons_index;
> +		eqes_found = 1;
> +		++set_ci;
> +
> +		/* The HCA will think the queue has overflowed if we
> +		 * don't tell it we've been processing events.  We
> +		 * create our EQs with XSC_NUM_SPARE_EQE extra
> +		 * entries, so we must update our consumer index at
> +		 * least that often.
> +		 */
> +		if (unlikely(set_ci >= XSC_NUM_SPARE_EQE)) {
> +			eq_update_ci(eq, 0);
> +			set_ci = 0;
> +		}
> +	}
> +
> +	eq_update_ci(eq, 1);
> +
> +	return eqes_found;
> +}

...