lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ca6e3e52-dbcd-4613-8802-4952c7125c48@linux.dev>
Date: Tue, 8 Jul 2025 14:15:13 +0100
From: Vadim Fedorenko <vadim.fedorenko@...ux.dev>
To: Xin Tian <tianx@...silicon.com>, netdev@...r.kernel.org
Cc: leon@...nel.org, andrew+netdev@...n.ch, kuba@...nel.org,
 pabeni@...hat.com, edumazet@...gle.com, davem@...emloft.net,
 jeff.johnson@....qualcomm.com, przemyslaw.kitszel@...el.com,
 weihg@...silicon.com, wanry@...silicon.com, jacky@...silicon.com,
 horms@...nel.org, parthiban.veerasooran@...rochip.com, masahiroy@...nel.org,
 kalesh-anakkur.purayil@...adcom.com, geert+renesas@...der.be,
 geert@...ux-m68k.org
Subject: Re: [PATCH net-next v12 12/14] xsc: Add ndo_start_xmit

On 03/07/2025 08:54, Xin Tian wrote:
> This patch adds core data transmission functionality, focusing
> on the ndo_start_xmit interface. The main steps are:
> 
> 1. Transmission Entry
> The entry point selects the appropriate transmit queue (SQ) and
> verifies hardware readiness before calling xsc_eth_xmit_frame
> for packet transmission.
> 2. Packet Processing
> Supports TCP/UDP GSO, calculates MSS and IHS.
> If necessary, performs SKB linearization and handles checksum
> offload. Maps data for DMA using dma_map_single and
> skb_frag_dma_map.
> 3. Descriptor Generation
> Constructs control (cseg) and data (dseg) segments, including
> setting operation codes, segment counts, and DMA addresses.
> Hardware Notification & Queue Management:
> 4. Notifies hardware using a doorbell register and manages
> queue flow to avoid overloading.
> 5. Combines small packets using netdev_xmit_more to reduce
> doorbell writes and supports zero-copy transmission for efficiency.
> 
> Co-developed-by: Honggang Wei <weihg@...silicon.com>
> Signed-off-by: Honggang Wei <weihg@...silicon.com>
> Co-developed-by: Lei Yan <jacky@...silicon.com>
> Signed-off-by: Lei Yan <jacky@...silicon.com>
> Signed-off-by: Xin Tian <tianx@...silicon.com>
> ---
>   .../ethernet/yunsilicon/xsc/common/xsc_core.h |   5 +
>   .../net/ethernet/yunsilicon/xsc/net/Makefile  |   2 +-
>   .../net/ethernet/yunsilicon/xsc/net/main.c    |   1 +
>   .../net/ethernet/yunsilicon/xsc/net/xsc_eth.h |   2 +
>   .../yunsilicon/xsc/net/xsc_eth_common.h       |   8 +
>   .../ethernet/yunsilicon/xsc/net/xsc_eth_tx.c  | 315 ++++++++++++++++++
>   .../yunsilicon/xsc/net/xsc_eth_txrx.c         | 150 ++++++++-
>   .../yunsilicon/xsc/net/xsc_eth_txrx.h         |  66 ++++
>   .../ethernet/yunsilicon/xsc/net/xsc_queue.h   |   7 +
>   9 files changed, 552 insertions(+), 4 deletions(-)
>   create mode 100644 drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_tx.c
> 
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h b/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h
> index 0b54b2d8a..bc938292a 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h
> +++ b/drivers/net/ethernet/yunsilicon/xsc/common/xsc_core.h
> @@ -490,4 +490,9 @@ static inline u8 xsc_get_user_mode(struct xsc_core_device *xdev)
>   	return xdev->user_mode;
>   }
>   
> +static inline u8 get_cqe_opcode(struct xsc_cqe *cqe)
> +{
> +	return FIELD_GET(XSC_CQE_MSG_OPCD_MASK, le32_to_cpu(cqe->data0));
> +}
> +
>   #endif
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/Makefile b/drivers/net/ethernet/yunsilicon/xsc/net/Makefile
> index 104ef5330..7cfc2aaa2 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/Makefile
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/Makefile
> @@ -6,4 +6,4 @@ ccflags-y += -I$(srctree)/drivers/net/ethernet/yunsilicon/xsc
>   
>   obj-$(CONFIG_YUNSILICON_XSC_ETH) += xsc_eth.o
>   
> -xsc_eth-y := main.o xsc_eth_wq.o xsc_eth_txrx.o xsc_eth_rx.o
> +xsc_eth-y := main.o xsc_eth_wq.o xsc_eth_txrx.o xsc_eth_tx.o xsc_eth_rx.o
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/main.c b/drivers/net/ethernet/yunsilicon/xsc/net/main.c
> index 3ed995b10..3341d3cf1 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/main.c
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/main.c
> @@ -1683,6 +1683,7 @@ static int xsc_eth_set_hw_mtu(struct xsc_core_device *xdev,
>   static const struct net_device_ops xsc_netdev_ops = {
>   	.ndo_open		= xsc_eth_open,
>   	.ndo_stop		= xsc_eth_close,
> +	.ndo_start_xmit		= xsc_eth_xmit_start,
>   };
>   
>   static void xsc_eth_build_nic_netdev(struct xsc_adapter *adapter)
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth.h b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth.h
> index 1b652c192..61354b892 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth.h
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth.h
> @@ -6,6 +6,8 @@
>   #ifndef __XSC_ETH_H
>   #define __XSC_ETH_H
>   
> +#include <linux/udp.h>
> +
>   #include "common/xsc_device.h"
>   #include "xsc_eth_common.h"
>   
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_common.h b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_common.h
> index 7b6e180be..4f47dac5c 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_common.h
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_common.h
> @@ -178,4 +178,12 @@ struct xsc_eth_channels {
>   	u32			rqn_base;
>   };
>   
> +union xsc_send_doorbell {
> +	struct{
> +		s32  next_pid : 16;
> +		u32 qp_num : 15;
> +	};
> +	u32 send_data;
> +};
> +
>   #endif
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_tx.c b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_tx.c
> new file mode 100644
> index 000000000..1237433b1
> --- /dev/null
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_tx.c
> @@ -0,0 +1,315 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (C) 2021-2025, Shanghai Yunsilicon Technology Co., Ltd.
> + * All rights reserved.
> + */
> +
> +#include <linux/tcp.h>
> +
> +#include "xsc_eth.h"
> +#include "xsc_eth_txrx.h"
> +
> +#define XSC_OPCODE_RAW 7
> +
> +static void xsc_dma_push(struct xsc_sq *sq, dma_addr_t addr, u32 size,
> +			 enum xsc_dma_map_type map_type)
> +{
> +	struct xsc_sq_dma *dma = xsc_dma_get(sq, sq->dma_fifo_pc++);
> +
> +	dma->addr = addr;
> +	dma->size = size;
> +	dma->type = map_type;
> +}
> +
> +static void xsc_dma_unmap_wqe(struct xsc_sq *sq, u8 num_dma)
> +{
> +	struct xsc_adapter *adapter = sq->channel->adapter;
> +	struct device *dev  = adapter->dev;
> +	int i;
> +
> +	for (i = 0; i < num_dma; i++) {
> +		struct xsc_sq_dma *last_pushed_dma;
> +
> +		last_pushed_dma = xsc_dma_get(sq, --sq->dma_fifo_pc);
> +		xsc_tx_dma_unmap(dev, last_pushed_dma);
> +	}
> +}
> +
> +static void *xsc_sq_fetch_wqe(struct xsc_sq *sq, size_t size, u16 *pi)
> +{
> +	struct xsc_wq_cyc *wq = &sq->wq;
> +	void *wqe;
> +
> +	/*caution, sp->pc is default to be zero*/
> +	*pi  = xsc_wq_cyc_ctr2ix(wq, sq->pc);
> +	wqe = xsc_wq_cyc_get_wqe(wq, *pi);
> +	memset(wqe, 0, size);
> +
> +	return wqe;
> +}
> +
> +static u16 xsc_tx_get_gso_ihs(struct xsc_sq *sq, struct sk_buff *skb)
> +{
> +	u16 ihs;
> +
> +	if (skb->encapsulation) {
> +		ihs = skb_inner_transport_offset(skb) + inner_tcp_hdrlen(skb);
> +	} else {
> +		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
> +			ihs = skb_transport_offset(skb) + sizeof(struct udphdr);
> +		else
> +			ihs = skb_transport_offset(skb) + tcp_hdrlen(skb);
> +	}
> +
> +	return ihs;
> +}
> +
> +static void xsc_txwqe_build_cseg_csum(struct xsc_sq *sq,
> +				      struct sk_buff *skb,
> +				      struct xsc_send_wqe_ctrl_seg *cseg)
> +{
> +	u32 val = le32_to_cpu(cseg->data0);
> +
> +	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
> +		if (skb->encapsulation)
> +			val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
> +					  XSC_ETH_WQE_INNER_AND_OUTER_CSUM);
> +		else
> +			val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
> +					  XSC_ETH_WQE_OUTER_CSUM);
> +	} else {
> +		val |= FIELD_PREP(XSC_WQE_CTRL_SEG_CSUM_EN_MASK,
> +				  XSC_ETH_WQE_NONE_CSUM);
> +	}
> +	cseg->data0 = cpu_to_le32(val);
> +}
> +
> +static void xsc_txwqe_build_csegs(struct xsc_sq *sq, struct sk_buff *skb,
> +				  u16 mss, u16 ihs, u16 headlen,
> +				  u8 opcode, u16 ds_cnt, u32 msglen,
> +				  struct xsc_send_wqe_ctrl_seg *cseg)
> +{
> +	struct xsc_core_device *xdev = sq->cq.xdev;
> +	int send_wqe_ds_num_log;
> +	u32 val = 0;
> +
> +	send_wqe_ds_num_log = ilog2(xdev->caps.send_ds_num);
> +	xsc_txwqe_build_cseg_csum(sq, skb, cseg);
> +
> +	if (mss != 0) {
> +		val |= XSC_WQE_CTRL_SEG_HAS_PPH |
> +		       XSC_WQE_CTRL_SEG_SO_TYPE |
> +		       FIELD_PREP(XSC_WQE_CTRL_SEG_SO_HDR_LEN_MASK, ihs) |
> +		       FIELD_PREP(XSC_WQE_CTRL_SEG_SO_DATA_SIZE_MASK, mss);
> +		cseg->data2 = cpu_to_le32(val);
> +	}
> +
> +	val = le32_to_cpu(cseg->data0);
> +	val |= FIELD_PREP(XSC_WQE_CTRL_SEG_MSG_OPCODE_MASK, opcode) |
> +	       FIELD_PREP(XSC_WQE_CTRL_SEG_WQE_ID_MASK,
> +			  sq->pc << send_wqe_ds_num_log) |
> +	       FIELD_PREP(XSC_WQE_CTRL_SEG_DS_DATA_NUM_MASK,
> +			  ds_cnt - XSC_SEND_WQEBB_CTRL_NUM_DS);
> +	cseg->data0 = cpu_to_le32(val);
> +	cseg->msg_len = cpu_to_le32(msglen);
> +	cseg->data3 = cpu_to_le32(XSC_WQE_CTRL_SEG_CE);
> +}
> +
> +static int xsc_txwqe_build_dsegs(struct xsc_sq *sq, struct sk_buff *skb,
> +				 u16 ihs, u16 headlen,
> +				 struct xsc_wqe_data_seg *dseg)
> +{
> +	struct xsc_adapter *adapter = sq->channel->adapter;
> +	struct device *dev = adapter->dev;
> +	dma_addr_t dma_addr = 0;
> +	u8 num_dma = 0;
> +	int i;
> +
> +	if (headlen) {
> +		dma_addr = dma_map_single(dev,
> +					  skb->data,
> +					  headlen,
> +					  DMA_TO_DEVICE);
> +		if (unlikely(dma_mapping_error(dev, dma_addr)))
> +			goto err_dma_unmap_wqe;
> +
> +		dseg->va = cpu_to_le64(dma_addr);
> +		dseg->mkey  = cpu_to_le32(be32_to_cpu(sq->mkey_be));
> +		dseg->data0 |=
> +			cpu_to_le32(FIELD_PREP(XSC_WQE_DATA_SEG_SEG_LEN_MASK,
> +					       headlen));
> +
> +		xsc_dma_push(sq, dma_addr, headlen, XSC_DMA_MAP_SINGLE);
> +		num_dma++;
> +		dseg++;
> +	}
> +
> +	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
> +		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
> +		int fsz = skb_frag_size(frag);
> +
> +		dma_addr = skb_frag_dma_map(dev, frag, 0, fsz, DMA_TO_DEVICE);
> +		if (unlikely(dma_mapping_error(dev, dma_addr)))
> +			goto err_dma_unmap_wqe;
> +
> +		dseg->va = cpu_to_le64(dma_addr);
> +		dseg->mkey = cpu_to_le32(be32_to_cpu(sq->mkey_be));
> +		dseg->data0 |=
> +			cpu_to_le32(FIELD_PREP(XSC_WQE_DATA_SEG_SEG_LEN_MASK,
> +					       fsz));
> +
> +		xsc_dma_push(sq, dma_addr, fsz, XSC_DMA_MAP_PAGE);
> +		num_dma++;
> +		dseg++;
> +	}
> +
> +	return num_dma;
> +
> +err_dma_unmap_wqe:
> +	xsc_dma_unmap_wqe(sq, num_dma);
> +	return -ENOMEM;
> +}
> +
> +static void xsc_sq_notify_hw(struct xsc_wq_cyc *wq, u16 pc,
> +			     struct xsc_sq *sq)
> +{
> +	struct xsc_adapter *adapter = sq->channel->adapter;
> +	struct xsc_core_device *xdev  = adapter->xdev;
> +	union xsc_send_doorbell doorbell_value;
> +	int send_ds_num_log;
> +
> +	send_ds_num_log = ilog2(xdev->caps.send_ds_num);
> +	/*reverse wqe index to ds index*/
> +	doorbell_value.next_pid = pc << send_ds_num_log;
> +	doorbell_value.qp_num = sq->sqn;
> +
> +	/* Make sure that descriptors are written before
> +	 * updating doorbell record and ringing the doorbell
> +	 */
> +	wmb();
> +	writel(doorbell_value.send_data, XSC_REG_ADDR(xdev, xdev->regs.tx_db));
> +}
> +
> +static void xsc_txwqe_complete(struct xsc_sq *sq, struct sk_buff *skb,
> +			       u8 opcode, u16 ds_cnt,
> +			       u8 num_wqebbs, u32 num_bytes, u8 num_dma,
> +			       struct xsc_tx_wqe_info *wi)
> +{
> +	struct xsc_wq_cyc *wq = &sq->wq;
> +
> +	wi->num_bytes = num_bytes;
> +	wi->num_dma = num_dma;
> +	wi->num_wqebbs = num_wqebbs;
> +	wi->skb = skb;
> +
> +	netdev_tx_sent_queue(sq->txq, num_bytes);
> +
> +	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
> +		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
> +
> +	sq->pc += wi->num_wqebbs;
> +
> +	if (unlikely(!xsc_wqc_has_room_for(wq, sq->cc, sq->pc, sq->stop_room)))
> +		netif_tx_stop_queue(sq->txq);
> +
> +	if (!netdev_xmit_more() || netif_xmit_stopped(sq->txq))
> +		xsc_sq_notify_hw(wq, sq->pc, sq);
> +}
> +
> +static uint32_t xsc_eth_xmit_frame(struct sk_buff *skb,
> +				   struct xsc_sq *sq,
> +				   struct xsc_tx_wqe *wqe,
> +				   u16 pi)
> +{
> +	struct xsc_core_device *xdev = sq->cq.xdev;
> +	struct xsc_send_wqe_ctrl_seg *cseg;
> +	struct xsc_wqe_data_seg *dseg;
> +	struct xsc_tx_wqe_info *wi;
> +	u16 mss, ihs, headlen;
> +	u32 num_bytes;
> +	u8 num_wqebbs;
> +	int num_dma;
> +	u16 ds_cnt;
> +	u8 opcode;
> +
> +retry_send:
> +	/* Calc ihs and ds cnt, no writes to wqe yet
> +	 * ctrl-ds, it would be reduce in ds_data_num */
> +	ds_cnt = XSC_SEND_WQEBB_CTRL_NUM_DS;
> +
> +	if (skb_is_gso(skb)) {
> +		opcode		= XSC_OPCODE_RAW;
> +		mss		= skb_shinfo(skb)->gso_size;
> +		ihs		= xsc_tx_get_gso_ihs(sq, skb);
> +		num_bytes	= skb->len +
> +				  (skb_shinfo(skb)->gso_segs - 1) * ihs;
> +	} else {
> +		opcode		= XSC_OPCODE_RAW;
> +		mss		= 0;
> +		ihs		= 0;
> +		num_bytes	= skb->len;
> +	}

opcode is identical for GSO and non-GSO packets. could be moved outside 
of the branch, or XSC_OPCODE_RAW can be used directly later in
xsc_txwqe_complete() call.

> +
> +	/*linear data in skb*/
> +	headlen = skb->len - skb->data_len;
> +	ds_cnt += !!headlen;
> +	ds_cnt += skb_shinfo(skb)->nr_frags;
> +
> +	/* Check packet size. */
> +	if (unlikely(mss == 0 && skb->len > sq->hw_mtu))
> +		goto err_drop;
> +
> +	num_wqebbs = DIV_ROUND_UP(ds_cnt, xdev->caps.send_ds_num);
> +	/*if ds_cnt exceed one wqe, drop it*/
> +	if (num_wqebbs != 1) {
> +		if (skb_linearize(skb))
> +			goto err_drop;
> +		goto retry_send;
> +	}
> +
> +	/* fill wqe */
> +	wi   = (struct xsc_tx_wqe_info *)&sq->db.wqe_info[pi];
> +	cseg = &wqe->ctrl;
> +	dseg = &wqe->data[0];
> +
> +	if (unlikely(skb->len == 0))
> +		goto err_drop;
> +
> +	xsc_txwqe_build_csegs(sq, skb, mss, ihs, headlen,
> +			      opcode, ds_cnt, skb->len, cseg);
> +
> +	/*inline header is also use dma to transport*/
> +	num_dma = xsc_txwqe_build_dsegs(sq, skb, ihs, headlen, dseg);
> +	if (unlikely(num_dma < 0))
> +		goto err_drop;
> +
> +	xsc_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
> +			   num_dma, wi);
> +
> +	return NETDEV_TX_OK;
> +
> +err_drop:
> +	dev_kfree_skb_any(skb);
> +
> +	return NETDEV_TX_OK;
> +}
> +
> +netdev_tx_t xsc_eth_xmit_start(struct sk_buff *skb, struct net_device *netdev)
> +{
> +	struct xsc_adapter *adapter = netdev_priv(netdev);
> +	struct xsc_tx_wqe *wqe;
> +	struct xsc_sq *sq;
> +	u32 ds_num;
> +	u16 pi;
> +
> +	if (adapter->status != XSCALE_ETH_DRIVER_OK)
> +		return NETDEV_TX_BUSY;
> +
> +	sq = adapter->txq2sq[skb_get_queue_mapping(skb)];
> +	if (unlikely(!sq))
> +		return NETDEV_TX_BUSY;
> +
> +	ds_num = adapter->xdev->caps.send_ds_num;
> +	wqe = xsc_sq_fetch_wqe(sq, ds_num * XSC_SEND_WQE_DS, &pi);
> +
> +	return xsc_eth_xmit_frame(skb, sq, wqe, pi);
> +}
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.c b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.c
> index 2ed08ee44..9784816c3 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.c
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.c
> @@ -20,9 +20,153 @@ void xsc_cq_notify_hw_rearm(struct xsc_cq *cq)
>   	writel(db_val, XSC_REG_ADDR(cq->xdev, cq->xdev->regs.complete_db));
>   }
>   
> -int xsc_eth_napi_poll(struct napi_struct *napi, int budget)
> +void xsc_cq_notify_hw(struct xsc_cq *cq)
>   {
> -	/* TBD */
> -	return true;
> +	struct xsc_core_device *xdev  = cq->xdev;
> +	u32 db_val = 0;
> +
> +	db_val |= FIELD_PREP(XSC_CQ_DB_NEXT_CID_MASK, cq->wq.cc) |
> +		  FIELD_PREP(XSC_CQ_DB_CQ_ID_MASK, cq->xcq.cqn);
> +
> +	/* ensure doorbell record is visible to device
> +	 * before ringing the doorbell
> +	 */
> +	wmb();
> +	writel(db_val, XSC_REG_ADDR(xdev, xdev->regs.complete_reg));
>   }
>   
> +static bool xsc_channel_no_affinity_change(struct xsc_channel *c)
> +{
> +	int current_cpu = smp_processor_id();
> +
> +	return cpumask_test_cpu(current_cpu, c->aff_mask);
> +}
> +
> +static void xsc_dump_error_sqcqe(struct xsc_sq *sq,
> +				 struct xsc_cqe *cqe)
> +{
> +	struct net_device *netdev = sq->channel->netdev;
> +	u32 ci = xsc_cqwq_get_ci(&sq->cq.wq);
> +
> +	net_err_ratelimited("Err cqe on dev %s cqn=0x%x ci=0x%x sqn=0x%x err_code=0x%x qpid=0x%lx\n",
> +			    netdev->name, sq->cq.xcq.cqn, ci,
> +			    sq->sqn, get_cqe_opcode(cqe),
> +			    FIELD_GET(XSC_CQE_QP_ID_MASK,
> +				      le32_to_cpu(cqe->data0)));
> +}
> +
> +static bool xsc_poll_tx_cq(struct xsc_cq *cq, int napi_budget)
> +{
> +	struct xsc_adapter *adapter;
> +	struct xsc_cqe *cqe;
> +	struct device *dev;
> +	struct xsc_sq *sq;
> +	u32 dma_fifo_cc;
> +	u32 nbytes = 0;
> +	u16 npkts = 0;
> +	int i = 0;
> +	u16 sqcc;
> +
> +	sq = container_of(cq, struct xsc_sq, cq);
> +	if (!test_bit(XSC_ETH_SQ_STATE_ENABLED, &sq->state))
> +		return false;
> +
> +	adapter = sq->channel->adapter;
> +	dev = adapter->dev;
> +
> +	cqe = xsc_cqwq_get_cqe(&cq->wq);
> +	if (!cqe)
> +		goto out;
> +
> +	if (unlikely(get_cqe_opcode(cqe) & BIT(7))) {
> +		xsc_dump_error_sqcqe(sq, cqe);
> +		return false;
> +	}
> +
> +	sqcc = sq->cc;
> +
> +	/* avoid dirtying sq cache line every cqe */
> +	dma_fifo_cc = sq->dma_fifo_cc;
> +	i = 0;
> +	do {
> +		struct xsc_tx_wqe_info *wi;
> +		struct sk_buff *skb;
> +		int j;
> +		u16 ci;
> +
> +		xsc_cqwq_pop(&cq->wq);
> +
> +		ci = xsc_wq_cyc_ctr2ix(&sq->wq, sqcc);
> +		wi = &sq->db.wqe_info[ci];
> +		skb = wi->skb;
> +
> +		/*cqe may be overstanding in real test, not by nop in other*/
> +		if (unlikely(!skb))
> +			continue;
> +
> +		for (j = 0; j < wi->num_dma; j++) {
> +			struct xsc_sq_dma *dma = xsc_dma_get(sq, dma_fifo_cc++);
> +
> +			xsc_tx_dma_unmap(dev, dma);
> +		}
> +
> +		npkts++;
> +		nbytes += wi->num_bytes;
> +		sqcc += wi->num_wqebbs;
> +		napi_consume_skb(skb, 0);
> +
> +	} while ((++i <= napi_budget) && (cqe = xsc_cqwq_get_cqe(&cq->wq)));
> +
> +	xsc_cq_notify_hw(cq);
> +
> +	/* ensure cq space is freed before enabling more cqes */
> +	wmb();
> +
> +	sq->dma_fifo_cc = dma_fifo_cc;
> +	sq->cc = sqcc;
> +
> +	netdev_tx_completed_queue(sq->txq, npkts, nbytes);
> +
> +	if (netif_tx_queue_stopped(sq->txq) &&
> +	    xsc_wqc_has_room_for(&sq->wq, sq->cc, sq->pc, sq->stop_room)) {
> +		netif_tx_wake_queue(sq->txq);
> +	}
> +
> +out:
> +	return (i == napi_budget);
> +}
> +
> +int xsc_eth_napi_poll(struct napi_struct *napi, int budget)
> +{
> +	struct xsc_channel *c = container_of(napi, struct xsc_channel, napi);
> +	struct xsc_eth_params *params = &c->adapter->nic_param;
> +	struct xsc_sq *sq = NULL;
> +	bool busy = false;
> +	int work_done = 0;
> +	int tx_budget = 0;
> +	int i;
> +
> +	rcu_read_lock();
> +
> +	tx_budget = params->sq_size >> 2;
> +	for (i = 0; i < c->num_tc; i++)
> +		busy |= xsc_poll_tx_cq(&c->qp.sq[i].cq, tx_budget);
> +
> +	if (busy) {
> +		if (likely(xsc_channel_no_affinity_change(c))) {
> +			rcu_read_unlock();
> +			return budget;
> +		}
> +	}
> +
> +	if (unlikely(!napi_complete_done(napi, work_done)))
> +		goto err_out;
> +
> +	for (i = 0; i < c->num_tc; i++) {
> +		sq = &c->qp.sq[i];
> +		xsc_cq_notify_hw_rearm(&sq->cq);
> +	}
> +err_out:
> +	rcu_read_unlock();
> +	return work_done;
> +}
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.h b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.h
> index 1f6187093..f8acc6bbb 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.h
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_eth_txrx.h
> @@ -6,9 +6,20 @@
>   #ifndef __XSC_RXTX_H
>   #define __XSC_RXTX_H
>   
> +#include <linux/netdevice.h>
> +#include <linux/skbuff.h>
> +
>   #include "xsc_eth.h"
>   
> +enum {
> +	XSC_ETH_WQE_NONE_CSUM,
> +	XSC_ETH_WQE_INNER_CSUM,
> +	XSC_ETH_WQE_OUTER_CSUM,
> +	XSC_ETH_WQE_INNER_AND_OUTER_CSUM,
> +};
> +
>   void xsc_cq_notify_hw_rearm(struct xsc_cq *cq);
> +void xsc_cq_notify_hw(struct xsc_cq *cq);
>   int xsc_eth_napi_poll(struct napi_struct *napi, int budget);
>   bool xsc_eth_post_rx_wqes(struct xsc_rq *rq);
>   void xsc_eth_handle_rx_cqe(struct xsc_cqwq *cqwq,
> @@ -21,4 +32,59 @@ struct sk_buff *xsc_skb_from_cqe_nonlinear(struct xsc_rq *rq,
>   					   struct xsc_wqe_frag_info *wi,
>   					   u32 cqe_bcnt, u8 has_pph);
>   
> +netdev_tx_t xsc_eth_xmit_start(struct sk_buff *skb, struct net_device *netdev);
> +
> +static inline void xsc_tx_dma_unmap(struct device *dev, struct xsc_sq_dma *dma)
> +{
> +	switch (dma->type) {
> +	case XSC_DMA_MAP_SINGLE:
> +		dma_unmap_single(dev, dma->addr, dma->size, DMA_TO_DEVICE);
> +		break;
> +	case XSC_DMA_MAP_PAGE:
> +		dma_unmap_page(dev, dma->addr, dma->size, DMA_TO_DEVICE);
> +		break;
> +	default:
> +		break;
> +	}
> +}
> +
> +static inline struct xsc_sq_dma *xsc_dma_get(struct xsc_sq *sq, u32 i)
> +{
> +	return &sq->db.dma_fifo[i & sq->dma_fifo_mask];
> +}
> +
> +static inline bool xsc_wqc_has_room_for(struct xsc_wq_cyc *wq,
> +					u16 cc, u16 pc, u16 n)
> +{
> +	return (xsc_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
> +}
> +
> +static inline struct xsc_cqe *xsc_cqwq_get_cqe_buff(struct xsc_cqwq *wq, u32 ix)
> +{
> +	struct xsc_cqe *cqe = xsc_frag_buf_get_wqe(&wq->fbc, ix);
> +
> +	return cqe;
> +}

why not:

	return xsc_frag_buf_get_wqe(&wq->fbc, ix);

> +
> +static inline struct xsc_cqe *xsc_cqwq_get_cqe(struct xsc_cqwq *wq)
> +{
> +	u32 ci = xsc_cqwq_get_ci(wq);
> +	u8 cqe_ownership_bit;
> +	struct xsc_cqe *cqe;
> +	u8 sw_ownership_val;
> +
> +	cqe = xsc_cqwq_get_cqe_buff(wq, ci);
> +
> +	cqe_ownership_bit = !!(le32_to_cpu(cqe->data1) & XSC_CQE_OWNER);
> +	sw_ownership_val = xsc_cqwq_get_wrap_cnt(wq) & 1;
> +
> +	if (cqe_ownership_bit != sw_ownership_val)
> +		return NULL;
> +
> +	/* ensure cqe content is read after cqe ownership bit */
> +	dma_rmb();
> +
> +	return cqe;
> +}
> +
>   #endif /* XSC_RXTX_H */
> diff --git a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_queue.h b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_queue.h
> index 643b2c759..4c9f183fc 100644
> --- a/drivers/net/ethernet/yunsilicon/xsc/net/xsc_queue.h
> +++ b/drivers/net/ethernet/yunsilicon/xsc/net/xsc_queue.h
> @@ -36,6 +36,8 @@ enum {
>   #define XSC_RECV_WQEBB_NUM_DS	        (XSC_RECV_WQE_BB / XSC_RECV_WQE_DS)
>   #define XSC_LOG_RECV_WQEBB_NUM_DS	ilog2(XSC_RECV_WQEBB_NUM_DS)
>   
> +#define XSC_SEND_WQEBB_CTRL_NUM_DS	1
> +
>   /* each ds holds one fragment in skb */
>   #define XSC_MAX_RX_FRAGS        4
>   #define XSC_RX_FRAG_SZ_ORDER    0
> @@ -158,6 +160,11 @@ struct xsc_tx_wqe_info {
>   	u8  num_dma;
>   };
>   
> +struct xsc_tx_wqe {
> +	struct xsc_send_wqe_ctrl_seg ctrl;
> +	struct xsc_wqe_data_seg data[];
> +};
> +
>   struct xsc_sq {
>   	struct xsc_core_qp		cqp;
>   	/* dirtied @completion */


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ