[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20250218171036.GB1615191@kernel.org>
Date: Tue, 18 Feb 2025 17:10:36 +0000
From: Simon Horman <horms@...nel.org>
To: Xin Tian <tianx@...silicon.com>
Cc: netdev@...r.kernel.org, leon@...nel.org, andrew+netdev@...n.ch,
kuba@...nel.org, pabeni@...hat.com, edumazet@...gle.com,
davem@...emloft.net, jeff.johnson@....qualcomm.com,
przemyslaw.kitszel@...el.com, weihg@...silicon.com,
wanry@...silicon.com, parthiban.veerasooran@...rochip.com,
masahiroy@...nel.org
Subject: Re: [PATCH v4 05/14] net-next/yunsilicon: Add eq and alloc
On Thu, Feb 13, 2025 at 05:14:14PM +0800, Xin Tian wrote:
> Add eq management and buffer alloc apis
>
> Signed-off-by: Xin Tian <tianx@...silicon.com>
> Signed-off-by: Honggang Wei <weihg@...silicon.com>
...
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h b/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h
...
> +struct xsc_eq_table {
> + void __iomem *update_ci;
> + void __iomem *update_arm_ci;
> + struct list_head comp_eqs_list;
nit: The indentation of the member names above seems inconsistent
with what is below.
> + struct xsc_eq pages_eq;
> + struct xsc_eq async_eq;
> + struct xsc_eq cmd_eq;
> + int num_comp_vectors;
> + int eq_vec_comp_base;
> + /* protect EQs list
> + */
> + spinlock_t lock;
> +};
...
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c
...
> +/* Handling for queue buffers -- we allocate a bunch of memory and
> + * register it in a memory region at HCA virtual address 0. If the
> + * requested size is > max_direct, we split the allocation into
> + * multiple pages, so we don't require too much contiguous memory.
> + */
I can't help but think there is an existing API to handle this.
> +int xsc_buf_alloc(struct xsc_core_device *xdev, int size, int max_direct,
I think unsigned long would be slightly better types for size and max_direct.
> + struct xsc_buf *buf)
> +{
> + dma_addr_t t;
> +
> + buf->size = size;
> + if (size <= max_direct) {
> + buf->nbufs = 1;
> + buf->npages = 1;
> + buf->page_shift = get_order(size) + PAGE_SHIFT;
> + buf->direct.buf = dma_alloc_coherent(&xdev->pdev->dev,
> + size,
> + &t,
> + GFP_KERNEL | __GFP_ZERO);
> + if (!buf->direct.buf)
> + return -ENOMEM;
> +
> + buf->direct.map = t;
> +
> + while (t & ((1 << buf->page_shift) - 1)) {
I think GENMASK() can be used here.
> + --buf->page_shift;
> + buf->npages *= 2;
> + }
> + } else {
> + int i;
> +
> + buf->direct.buf = NULL;
> + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
I think this is open-coding DIV_ROUND_UP
> + buf->npages = buf->nbufs;
> + buf->page_shift = PAGE_SHIFT;
> + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
> + GFP_KERNEL);
> + if (!buf->page_list)
> + return -ENOMEM;
> +
> + for (i = 0; i < buf->nbufs; i++) {
> + buf->page_list[i].buf =
> + dma_alloc_coherent(&xdev->pdev->dev, PAGE_SIZE,
> + &t, GFP_KERNEL | __GFP_ZERO);
> + if (!buf->page_list[i].buf)
> + goto err_free;
> +
> + buf->page_list[i].map = t;
> + }
> +
> + if (BITS_PER_LONG == 64) {
> + struct page **pages;
> +
> + pages = kmalloc_array(buf->nbufs, sizeof(*pages),
> + GFP_KERNEL);
> + if (!pages)
> + goto err_free;
> + for (i = 0; i < buf->nbufs; i++) {
> + void *addr = buf->page_list[i].buf;
> +
> + if (is_vmalloc_addr(addr))
> + pages[i] = vmalloc_to_page(addr);
> + else
> + pages[i] = virt_to_page(addr);
> + }
> + buf->direct.buf = vmap(pages, buf->nbufs,
> + VM_MAP, PAGE_KERNEL);
> + kfree(pages);
> + if (!buf->direct.buf)
> + goto err_free;
> + }
I think some explanation is warranted of why the above is relevant
only when BITS_PER_LONG == 64.
> + }
> +
> + return 0;
> +
> +err_free:
> + xsc_buf_free(xdev, buf);
> +
> + return -ENOMEM;
> +}
...
> +void xsc_fill_page_array(struct xsc_buf *buf, __be64 *pas, int npages)
As per my comment on unsigned long in my response to another patch,
I think npages can be unsigned long.
> +{
> + int shift = PAGE_SHIFT - PAGE_SHIFT_4K;
> + int mask = (1 << shift) - 1;
Likewise, I think that mask should be an unsigned long.
Or, both shift and mask could be #defines, as they are compile-time
constants.
Also, mask can be generated using GENMASK, e.g.
#define XSC_PAGE_ARRAY_MASK GENMASK(PAGE_SHIFT, PAGE_SHIFT_4K)
#define XSC_PAGE_ARRAY_SHIFT (PAGE_SHIFT - PAGE_SHIFT_4K)
And I note, in the (common) case of 4k pages, that both shift and mask are 0.
> + u64 addr;
> + int i;
> +
> + for (i = 0; i < npages; i++) {
> + if (buf->nbufs == 1)
> + addr = buf->direct.map + (i << PAGE_SHIFT_4K);
> + else
> + addr = buf->page_list[i >> shift].map
> + + ((i & mask) << PAGE_SHIFT_4K);
The like above is open-coding FIELD_PREP().
However, I don't think it can be used here as
the compiler complains very loudly because the mask is 0.
> +
> + pas[i] = cpu_to_be64(addr);
> + }
> +}
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h
...
> +static void eq_update_ci(struct xsc_eq *eq, int arm)
> +{
> + struct xsc_eq_doorbell db = {0};
> +
> + db.data0 = XSC_SET_FIELD(cpu_to_le32(eq->cons_index),
> + XSC_EQ_DB_NEXT_CID) |
> + XSC_SET_FIELD(cpu_to_le32(eq->eqn), XSC_EQ_DB_EQ_ID);
Each of the two uses of XSC_SET_FIELD() are passed a little-endian value
and a host-byte order mask. This does not seem correct as it seems
they byte order should be consistent.
> + if (arm)
> + db.data0 |= XSC_EQ_DB_ARM;
Likewise, here data0 is little-endian while XSC_EQ_DB_ARM is host
byte-order.
> + writel(db.data0, XSC_REG_ADDR(eq->dev, eq->doorbell));
And here, db.data0 is little-endian, but writel expects a host-byte order
value (which it converts to little-endian).
I didn't dig deeper but it seems to me that it would be easier to change
the type of data0 to host byte-order and drop the use of cpu_to_le32()
above.
Issues flagged by Sparse.
> + /* We still want ordering, just not swabbing, so add a barrier */
> + mb();
> +}
...
> +static int xsc_eq_int(struct xsc_core_device *xdev, struct xsc_eq *eq)
> +{
> + u32 cqn, qpn, queue_id;
> + struct xsc_eqe *eqe;
> + int eqes_found = 0;
> + int set_ci = 0;
> +
> + while ((eqe = next_eqe_sw(eq))) {
> + /* Make sure we read EQ entry contents after we've
> + * checked the ownership bit.
> + */
> + rmb();
> + switch (eqe->type) {
> + case XSC_EVENT_TYPE_COMP:
> + case XSC_EVENT_TYPE_INTERNAL_ERROR:
> + /* eqe is changing */
> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> + XSC_EQE_QUEUE_ID));
Similarly, here XSC_GET_FIELD() is passed a little-endian value and a host
byte-order mask, which is inconsistent.
Perhaps this should be (completely untested!):
queue_id = XSC_GET_FIELD(le16_to_cpu(eqe->queue_id_data),
XSC_EQE_QUEUE_ID);
Likewise for the two uses of XSC_GET_FIELD below.
And perhaps queue_id could be renamed, say to q_id, to make things a bit
more succinct.
> + cqn = queue_id;
I'm unsure why both cqn and queue_id are needed.
> + xsc_cq_completion(xdev, cqn);
> + break;
> +
> + case XSC_EVENT_TYPE_CQ_ERROR:
> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> + XSC_EQE_QUEUE_ID));
> + cqn = queue_id;
> + xsc_eq_cq_event(xdev, cqn, eqe->type);
> + break;
> + case XSC_EVENT_TYPE_WQ_CATAS_ERROR:
> + case XSC_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
> + case XSC_EVENT_TYPE_WQ_ACCESS_ERROR:
> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
> + XSC_EQE_QUEUE_ID));
> + qpn = queue_id;
> + xsc_qp_event(xdev, qpn, eqe->type);
> + break;
> + default:
> + break;
> + }
> +
> + ++eq->cons_index;
> + eqes_found = 1;
> + ++set_ci;
> +
> + /* The HCA will think the queue has overflowed if we
> + * don't tell it we've been processing events. We
> + * create our EQs with XSC_NUM_SPARE_EQE extra
> + * entries, so we must update our consumer index at
> + * least that often.
> + */
> + if (unlikely(set_ci >= XSC_NUM_SPARE_EQE)) {
> + eq_update_ci(eq, 0);
> + set_ci = 0;
> + }
> + }
> +
> + eq_update_ci(eq, 1);
> +
> + return eqes_found;
> +}
...
Powered by blists - more mailing lists