[<prev] [next>] [<thread-prev] [day] [month] [year] [list]
Message-Id: <b0adf539-8104-452d-ba34-14a120602bd5@yunsilicon.com>
Date: Thu, 20 Feb 2025 23:35:26 +0800
From: "tianx" <tianx@...silicon.com>
To: "Simon Horman" <horms@...nel.org>
Cc: <netdev@...r.kernel.org>, <leon@...nel.org>, <andrew+netdev@...n.ch>,
<kuba@...nel.org>, <pabeni@...hat.com>, <edumazet@...gle.com>,
<davem@...emloft.net>, <jeff.johnson@....qualcomm.com>,
<przemyslaw.kitszel@...el.com>, <weihg@...silicon.com>,
<wanry@...silicon.com>, <parthiban.veerasooran@...rochip.com>,
<masahiroy@...nel.org>
Subject: Re: [PATCH v4 05/14] net-next/yunsilicon: Add eq and alloc
On 2025/2/19 1:10, Simon Horman wrote:
> On Thu, Feb 13, 2025 at 05:14:14PM +0800, Xin Tian wrote:
>> Add eq management and buffer alloc apis
>>
>> Signed-off-by: Xin Tian<tianx@...silicon.com>
>> Signed-off-by: Honggang Wei<weihg@...silicon.com>
> ...
>
>> diff --git a/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h b/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h
> ...
>
>> +struct xsc_eq_table {
>> + void __iomem *update_ci;
>> + void __iomem *update_arm_ci;
>> + struct list_head comp_eqs_list;
> nit: The indentation of the member names above seems inconsistent
> with what is below.
got it
>> + struct xsc_eq pages_eq;
>> + struct xsc_eq async_eq;
>> + struct xsc_eq cmd_eq;
>> + int num_comp_vectors;
>> + int eq_vec_comp_base;
>> + /* protect EQs list
>> + */
>> + spinlock_t lock;
>> +};
> ...
>
>> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.c
> ...
>
>> +/* Handling for queue buffers -- we allocate a bunch of memory and
>> + * register it in a memory region at HCA virtual address 0. If the
>> + * requested size is > max_direct, we split the allocation into
>> + * multiple pages, so we don't require too much contiguous memory.
>> + */
> I can't help but think there is an existing API to handle this.
failed to find one
>> +int xsc_buf_alloc(struct xsc_core_device *xdev, int size, int max_direct,
> I think unsigned long would be slightly better types for size and max_direct.
yes, will modify
>> + struct xsc_buf *buf)
>> +{
>> + dma_addr_t t;
>> +
>> + buf->size = size;
>> + if (size <= max_direct) {
>> + buf->nbufs = 1;
>> + buf->npages = 1;
>> + buf->page_shift = get_order(size) + PAGE_SHIFT;
>> + buf->direct.buf = dma_alloc_coherent(&xdev->pdev->dev,
>> + size,
>> + &t,
>> + GFP_KERNEL | __GFP_ZERO);
>> + if (!buf->direct.buf)
>> + return -ENOMEM;
>> +
>> + buf->direct.map = t;
>> +
>> + while (t & ((1 << buf->page_shift) - 1)) {
> I think GENMASK() can be used here.
ok
>> + --buf->page_shift;
>> + buf->npages *= 2;
>> + }
>> + } else {
>> + int i;
>> +
>> + buf->direct.buf = NULL;
>> + buf->nbufs = (size + PAGE_SIZE - 1) / PAGE_SIZE;
> I think this is open-coding DIV_ROUND_UP
right, I'll change
>> + buf->npages = buf->nbufs;
>> + buf->page_shift = PAGE_SHIFT;
>> + buf->page_list = kcalloc(buf->nbufs, sizeof(*buf->page_list),
>> + GFP_KERNEL);
>> + if (!buf->page_list)
>> + return -ENOMEM;
>> +
>> + for (i = 0; i < buf->nbufs; i++) {
>> + buf->page_list[i].buf =
>> + dma_alloc_coherent(&xdev->pdev->dev, PAGE_SIZE,
>> + &t, GFP_KERNEL | __GFP_ZERO);
>> + if (!buf->page_list[i].buf)
>> + goto err_free;
>> +
>> + buf->page_list[i].map = t;
>> + }
>> +
>> + if (BITS_PER_LONG == 64) {
>> + struct page **pages;
>> +
>> + pages = kmalloc_array(buf->nbufs, sizeof(*pages),
>> + GFP_KERNEL);
>> + if (!pages)
>> + goto err_free;
>> + for (i = 0; i < buf->nbufs; i++) {
>> + void *addr = buf->page_list[i].buf;
>> +
>> + if (is_vmalloc_addr(addr))
>> + pages[i] = vmalloc_to_page(addr);
>> + else
>> + pages[i] = virt_to_page(addr);
>> + }
>> + buf->direct.buf = vmap(pages, buf->nbufs,
>> + VM_MAP, PAGE_KERNEL);
>> + kfree(pages);
>> + if (!buf->direct.buf)
>> + goto err_free;
>> + }
> I think some explanation is warranted of why the above is relevant
> only when BITS_PER_LONG == 64.
Some strange historical reasons, and no need for the check now. I'll
clean this up
>> + }
>> +
>> + return 0;
>> +
>> +err_free:
>> + xsc_buf_free(xdev, buf);
>> +
>> + return -ENOMEM;
>> +}
> ...
>
>> +void xsc_fill_page_array(struct xsc_buf *buf, __be64 *pas, int npages)
> As per my comment on unsigned long in my response to another patch,
> I think npages can be unsigned long.
ok
>> +{
>> + int shift = PAGE_SHIFT - PAGE_SHIFT_4K;
>> + int mask = (1 << shift) - 1;
> Likewise, I think that mask should be an unsigned long.
> Or, both shift and mask could be #defines, as they are compile-time
> constants.
>
> Also, mask can be generated using GENMASK, e.g.
>
> #define XSC_PAGE_ARRAY_MASK GENMASK(PAGE_SHIFT, PAGE_SHIFT_4K)
> #define XSC_PAGE_ARRAY_SHIFT (PAGE_SHIFT - PAGE_SHIFT_4K)
>
> And I note, in the (common) case of 4k pages, that both shift and mask are 0.
Thank you for the suggestion, but that's not quite the case here. The
|shift| and |mask| are not used to extract fields from data. Instead,
they are part of a calculation. In |xsc_buf_alloc|, we allocate the
buffer based on the system's page size. However, in this function, we
need to break each page in the |buflist| into 4KB chunks, populate the
|pas| array with the corresponding DMA addresses, and then map them to
hardware.
The |shift| is calculated as |PAGE_SHIFT - PAGE_SHIFT_4K|, allowing us
to convert the 4KB chunk index (|i|) to the corresponding page index in
|buflist| with |i >> shift|. The |i & mask| gives us the offset of the
current 4KB chunk within the page, and by applying |((i & mask) <<
PAGE_SHIFT_4K)|, we can compute the offset of that chunk within the page.
I hope this makes things clearer!
>> + u64 addr;
>> + int i;
>> +
>> + for (i = 0; i < npages; i++) {
>> + if (buf->nbufs == 1)
>> + addr = buf->direct.map + (i << PAGE_SHIFT_4K);
>> + else
>> + addr = buf->page_list[i >> shift].map
>> + + ((i & mask) << PAGE_SHIFT_4K);
> The like above is open-coding FIELD_PREP().
> However, I don't think it can be used here as
> the compiler complains very loudly because the mask is 0.
>> +
>> + pas[i] = cpu_to_be64(addr);
>> + }
>> +}
>> diff --git a/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h b/drivers/net/ethernet/yunsilicon/xsc/pci/alloc.h
> ...
>
>> +static void eq_update_ci(struct xsc_eq *eq, int arm)
>> +{
>> + struct xsc_eq_doorbell db = {0};
>> +
>> + db.data0 = XSC_SET_FIELD(cpu_to_le32(eq->cons_index),
>> + XSC_EQ_DB_NEXT_CID) |
>> + XSC_SET_FIELD(cpu_to_le32(eq->eqn), XSC_EQ_DB_EQ_ID);
> Each of the two uses of XSC_SET_FIELD() are passed a little-endian value
> and a host-byte order mask. This does not seem correct as it seems
> they byte order should be consistent.
>> + if (arm)
>> + db.data0 |= XSC_EQ_DB_ARM;
> Likewise, here data0 is little-endian while XSC_EQ_DB_ARM is host
> byte-order.
>
>> + writel(db.data0, XSC_REG_ADDR(eq->dev, eq->doorbell));
> And here, db.data0 is little-endian, but writel expects a host-byte order
> value (which it converts to little-endian).
>
> I didn't dig deeper but it seems to me that it would be easier to change
> the type of data0 to host byte-order and drop the use of cpu_to_le32()
> above.
>
> Issues flagged by Sparse.
>
>> + /* We still want ordering, just not swabbing, so add a barrier */
>> + mb();
>> +}
> ...
>
>> +static int xsc_eq_int(struct xsc_core_device *xdev, struct xsc_eq *eq)
>> +{
>> + u32 cqn, qpn, queue_id;
>> + struct xsc_eqe *eqe;
>> + int eqes_found = 0;
>> + int set_ci = 0;
>> +
>> + while ((eqe = next_eqe_sw(eq))) {
>> + /* Make sure we read EQ entry contents after we've
>> + * checked the ownership bit.
>> + */
>> + rmb();
>> + switch (eqe->type) {
>> + case XSC_EVENT_TYPE_COMP:
>> + case XSC_EVENT_TYPE_INTERNAL_ERROR:
>> + /* eqe is changing */
>> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
>> + XSC_EQE_QUEUE_ID));
> Similarly, here XSC_GET_FIELD() is passed a little-endian value and a host
> byte-order mask, which is inconsistent.
>
> Perhaps this should be (completely untested!):
>
> queue_id = XSC_GET_FIELD(le16_to_cpu(eqe->queue_id_data),
> XSC_EQE_QUEUE_ID);
>
> Likewise for the two uses of XSC_GET_FIELD below.
I have noticed the sparse check warnings on Patchwork, and I will
address all the related issues in the next version.
> And perhaps queue_id could be renamed, say to q_id, to make things a bit
> more succinct.
>
>> + cqn = queue_id;
> I'm unsure why both cqn and queue_id are needed.
The |queue_id| is indeed a bit redundant, and I will remove it.
>> + xsc_cq_completion(xdev, cqn);
>> + break;
>> +
>> + case XSC_EVENT_TYPE_CQ_ERROR:
>> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
>> + XSC_EQE_QUEUE_ID));
>> + cqn = queue_id;
>> + xsc_eq_cq_event(xdev, cqn, eqe->type);
>> + break;
>> + case XSC_EVENT_TYPE_WQ_CATAS_ERROR:
>> + case XSC_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
>> + case XSC_EVENT_TYPE_WQ_ACCESS_ERROR:
>> + queue_id = le16_to_cpu(XSC_GET_FIELD(eqe->queue_id_data,
>> + XSC_EQE_QUEUE_ID));
>> + qpn = queue_id;
>> + xsc_qp_event(xdev, qpn, eqe->type);
>> + break;
>> + default:
>> + break;
>> + }
>> +
>> + ++eq->cons_index;
>> + eqes_found = 1;
>> + ++set_ci;
>> +
>> + /* The HCA will think the queue has overflowed if we
>> + * don't tell it we've been processing events. We
>> + * create our EQs with XSC_NUM_SPARE_EQE extra
>> + * entries, so we must update our consumer index at
>> + * least that often.
>> + */
>> + if (unlikely(set_ci >= XSC_NUM_SPARE_EQE)) {
>> + eq_update_ci(eq, 0);
>> + set_ci = 0;
>> + }
>> + }
>> +
>> + eq_update_ci(eq, 1);
>> +
>> + return eqes_found;
>> +}
> ...
Powered by blists - more mailing lists