[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1509124098-12234-1-git-send-email-atul.gupta@chelsio.com>
Date: Fri, 27 Oct 2017 22:38:17 +0530
From: Atul Gupta <atul.gupta@...lsio.com>
To: netdev@...r.kernel.org, davem@...emloft.net,
steffen.klassert@...unet.com, herbert@...dor.apana.org.au,
linux-crypto@...r.kernel.org
Cc: Ganesh Goudar <ganeshgr@...lsio.com>
Subject: [PATCH net-next 1/2] cxgb4: Add support for Inline IPSec Tx
Added Tx routine for ULD
- define interface for ULD Tx.
Export routines used for Tx data
- Routines common for data transmit are used by cxgb4 and chcr
drivers.
- EXPORT routines enable transmit from chcr driver.
Signed-off-by: Atul Gupta <atul.gupta@...lsio.com>
Signed-off-by: Ganesh Goudar <ganeshgr@...lsio.com>
---
drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 23 +++++
drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 2 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 2 +-
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 1 +
drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h | 3 +
drivers/net/ethernet/chelsio/cxgb4/sge.c | 101 ++++++++++-----------
6 files changed, 80 insertions(+), 52 deletions(-)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 92a0b02..ee3832a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -58,6 +58,13 @@
extern struct list_head adapter_list;
extern struct mutex uld_mutex;
+/* Suspend an Ethernet Tx queue with fewer available descriptors than this.
+ * This is the same as calc_tx_descs() for a TSO packet with
+ * nr_frags == MAX_SKB_FRAGS.
+ */
+#define ETHTXQ_STOP_THRES \
+ (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
+
enum {
MAX_NPORTS = 4, /* max # of ports */
SERNUM_LEN = 24, /* Serial # length */
@@ -555,6 +562,7 @@ enum { /* adapter flags */
enum {
ULP_CRYPTO_LOOKASIDE = 1 << 0,
+ ULP_CRYPTO_IPSEC_INLINE = 1 << 1,
};
struct rx_sw_desc;
@@ -957,6 +965,11 @@ enum {
SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */
};
+struct tx_sw_desc { /* SW state per Tx descriptor */
+ struct sk_buff *skb;
+ struct ulptx_sgl *sgl;
+};
+
/* Support for "sched_queue" command to allow one or more NIC TX Queues
* to be bound to a TX Scheduling Class.
*/
@@ -1662,4 +1675,14 @@ void t4_tp_mib_read(struct adapter *adap, u32 *buff, u32 nregs,
void free_tx_desc(struct adapter *adap, struct sge_txq *q,
unsigned int n, bool unmap);
void free_txq(struct adapter *adap, struct sge_txq *q);
+inline void cxgb4_reclaim_completed_tx(struct adapter *adap,
+ struct sge_txq *q, bool unmap);
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+ dma_addr_t *addr);
+void cxgb4_inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
+ void *pos);
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+ struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+ const dma_addr_t *addr);
+inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n);
#endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 917663b..cf47183 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3096,6 +3096,8 @@ static int chcr_show(struct seq_file *seq, void *v)
atomic_read(&adap->chcr_stats.error));
seq_printf(seq, "Fallback: %10u \n",
atomic_read(&adap->chcr_stats.fallback));
+ seq_printf(seq, "IPSec PDU: %10u\n",
+ atomic_read(&adap->chcr_stats.ipsec_cnt));
return 0;
}
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index e16078d..538a8a7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -4079,7 +4079,7 @@ static int adap_init0(struct adapter *adap)
} else {
adap->vres.ncrypto_fc = val[0];
}
- adap->params.crypto |= ULP_CRYPTO_LOOKASIDE;
+ adap->params.crypto = ntohs(caps_cmd.cryptocaps);
adap->num_uld += 1;
}
#undef FW_PARAM_PFVF
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 71a315b..6b5fea4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -637,6 +637,7 @@ static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld)
lld->nchan = adap->params.nports;
lld->nports = adap->params.nports;
lld->wr_cred = adap->params.ofldq_wr_cred;
+ lld->crypto = adap->params.crypto;
lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A));
lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A);
lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 52324c7..879b4c6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -295,6 +295,7 @@ struct chcr_stats_debug {
atomic_t complete;
atomic_t error;
atomic_t fallback;
+ atomic_t ipsec_cnt;
};
#define OCQ_WIN_OFFSET(pdev, vres) \
@@ -320,6 +321,7 @@ struct cxgb4_lld_info {
unsigned char wr_cred; /* WR 16-byte credits */
unsigned char adapter_type; /* type of adapter */
unsigned char fw_api_ver; /* FW API version */
+ unsigned char crypto; /* crypto support */
unsigned int fw_vers; /* FW version */
unsigned int iscsi_iolen; /* iSCSI max I/O length */
unsigned int cclk_ps; /* Core clock period in psec */
@@ -368,6 +370,7 @@ struct cxgb4_uld_info {
struct t4_lro_mgr *lro_mgr,
struct napi_struct *napi);
void (*lro_flush)(struct t4_lro_mgr *);
+ int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
};
int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 4ef68f6..833d498 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -41,6 +41,7 @@
#include <linux/jiffies.h>
#include <linux/prefetch.h>
#include <linux/export.h>
+#include <net/xfrm.h>
#include <net/ipv6.h>
#include <net/tcp.h>
#include <net/busy_poll.h>
@@ -53,6 +54,7 @@
#include "t4_msg.h"
#include "t4fw_api.h"
#include "cxgb4_ptp.h"
+#include "cxgb4_uld.h"
/*
* Rx buffer size. We use largish buffers if possible but settle for single
@@ -110,14 +112,6 @@
#define NOMEM_TMR_IDX (SGE_NTIMERS - 1)
/*
- * Suspend an Ethernet Tx queue with fewer available descriptors than this.
- * This is the same as calc_tx_descs() for a TSO packet with
- * nr_frags == MAX_SKB_FRAGS.
- */
-#define ETHTXQ_STOP_THRES \
- (1 + DIV_ROUND_UP((3 * MAX_SKB_FRAGS) / 2 + (MAX_SKB_FRAGS & 1), 8))
-
-/*
* Suspension threshold for non-Ethernet Tx queues. We require enough room
* for a full sized WR.
*/
@@ -134,11 +128,6 @@
*/
#define MAX_CTRL_WR_LEN SGE_MAX_WR_LEN
-struct tx_sw_desc { /* SW state per Tx descriptor */
- struct sk_buff *skb;
- struct ulptx_sgl *sgl;
-};
-
struct rx_sw_desc { /* SW state per Rx descriptor */
struct page *page;
dma_addr_t dma_addr;
@@ -248,8 +237,8 @@ static inline bool fl_starving(const struct adapter *adapter,
return fl->avail - fl->pend_cred <= s->fl_starve_thres;
}
-static int map_skb(struct device *dev, const struct sk_buff *skb,
- dma_addr_t *addr)
+int cxgb4_map_skb(struct device *dev, const struct sk_buff *skb,
+ dma_addr_t *addr)
{
const skb_frag_t *fp, *end;
const struct skb_shared_info *si;
@@ -277,6 +266,7 @@ static int map_skb(struct device *dev, const struct sk_buff *skb,
out_err:
return -ENOMEM;
}
+EXPORT_SYMBOL(cxgb4_map_skb);
#ifdef CONFIG_NEED_DMA_MAP_STATE
static void unmap_skb(struct device *dev, const struct sk_buff *skb,
@@ -411,7 +401,7 @@ static inline int reclaimable(const struct sge_txq *q)
}
/**
- * reclaim_completed_tx - reclaims completed Tx descriptors
+ * cxgb4_reclaim_completed_tx - reclaims completed Tx descriptors
* @adap: the adapter
* @q: the Tx queue to reclaim completed descriptors from
* @unmap: whether the buffers should be unmapped for DMA
@@ -420,7 +410,7 @@ static inline int reclaimable(const struct sge_txq *q)
* and frees the associated buffers if possible. Called with the Tx
* queue locked.
*/
-static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
+inline void cxgb4_reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
bool unmap)
{
int avail = reclaimable(q);
@@ -437,6 +427,7 @@ static inline void reclaim_completed_tx(struct adapter *adap, struct sge_txq *q,
q->in_use -= avail;
}
}
+EXPORT_SYMBOL(cxgb4_reclaim_completed_tx);
static inline int get_buf_size(struct adapter *adapter,
const struct rx_sw_desc *d)
@@ -833,7 +824,7 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
}
/**
- * write_sgl - populate a scatter/gather list for a packet
+ * cxgb4_write_sgl - populate a scatter/gather list for a packet
* @skb: the packet
* @q: the Tx queue we are writing into
* @sgl: starting location for writing the SGL
@@ -849,9 +840,9 @@ static inline unsigned int calc_tx_descs(const struct sk_buff *skb)
* right after the end of the SGL but does not account for any potential
* wrap around, i.e., @end > @sgl.
*/
-static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
- struct ulptx_sgl *sgl, u64 *end, unsigned int start,
- const dma_addr_t *addr)
+void cxgb4_write_sgl(const struct sk_buff *skb, struct sge_txq *q,
+ struct ulptx_sgl *sgl, u64 *end, unsigned int start,
+ const dma_addr_t *addr)
{
unsigned int i, len;
struct ulptx_sge_pair *to;
@@ -903,6 +894,7 @@ static void write_sgl(const struct sk_buff *skb, struct sge_txq *q,
if ((uintptr_t)end & 8) /* 0-pad to multiple of 16 */
*end = 0;
}
+EXPORT_SYMBOL(cxgb4_write_sgl);
/* This function copies 64 byte coalesced work request to
* memory mapped BAR2 space. For coalesced WR SGE fetches
@@ -921,14 +913,14 @@ static void cxgb_pio_copy(u64 __iomem *dst, u64 *src)
}
/**
- * ring_tx_db - check and potentially ring a Tx queue's doorbell
+ * cxgb4_ring_tx_db - check and potentially ring a Tx queue's doorbell
* @adap: the adapter
* @q: the Tx queue
* @n: number of new descriptors to give to HW
*
* Ring the doorbel for a Tx queue.
*/
-static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
+inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
{
/* Make sure that all writes to the TX Descriptors are committed
* before we tell the hardware about them.
@@ -995,9 +987,10 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
wmb();
}
}
+EXPORT_SYMBOL(cxgb4_ring_tx_db);
/**
- * inline_tx_skb - inline a packet's data into Tx descriptors
+ * cxgb4_inline_tx_skb - inline a packet's data into Tx descriptors
* @skb: the packet
* @q: the Tx queue where the packet will be inlined
* @pos: starting position in the Tx queue where to inline the packet
@@ -1007,8 +1000,8 @@ static inline void ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
* Most of the complexity of this operation is dealing with wrap arounds
* in the middle of the packet we want to inline.
*/
-static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
- void *pos)
+void cxgb4_inline_tx_skb(const struct sk_buff *skb,
+ const struct sge_txq *q, void *pos)
{
u64 *p;
int left = (void *)q->stat - pos;
@@ -1030,6 +1023,7 @@ static void inline_tx_skb(const struct sk_buff *skb, const struct sge_txq *q,
if ((uintptr_t)p & 8)
*p = 0;
}
+EXPORT_SYMBOL(cxgb4_inline_tx_skb);
static void *inline_tx_skb_header(const struct sk_buff *skb,
const struct sge_txq *q, void *pos,
@@ -1199,6 +1193,11 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
pi = netdev_priv(dev);
adap = pi->adapter;
+ ssi = skb_shinfo(skb);
+
+ if (xfrm_offload(skb) && !ssi->gso_size)
+ return adap->uld[CXGB4_ULD_CRYPTO].tx_handler(skb, dev);
+
qidx = skb_get_queue_mapping(skb);
if (ptp_enabled) {
spin_lock(&adap->ptp_lock);
@@ -1215,7 +1214,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
}
skb_tx_timestamp(skb);
- reclaim_completed_tx(adap, &q->q, true);
+ cxgb4_reclaim_completed_tx(adap, &q->q, true);
cntrl = TXPKT_L4CSUM_DIS_F | TXPKT_IPCSUM_DIS_F;
#ifdef CONFIG_CHELSIO_T4_FCOE
@@ -1245,7 +1244,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
immediate = true;
if (!immediate &&
- unlikely(map_skb(adap->pdev_dev, skb, addr) < 0)) {
+ unlikely(cxgb4_map_skb(adap->pdev_dev, skb, addr) < 0)) {
q->mapping_err++;
if (ptp_enabled)
spin_unlock(&adap->ptp_lock);
@@ -1264,7 +1263,6 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
end = (u64 *)wr + flits;
len = immediate ? skb->len : 0;
- ssi = skb_shinfo(skb);
if (ssi->gso_size) {
struct cpl_tx_pkt_lso *lso = (void *)wr;
bool v6 = (ssi->gso_type & SKB_GSO_TCPV6) != 0;
@@ -1341,13 +1339,13 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
cpl->ctrl1 = cpu_to_be64(cntrl);
if (immediate) {
- inline_tx_skb(skb, &q->q, cpl + 1);
+ cxgb4_inline_tx_skb(skb, &q->q, cpl + 1);
dev_consume_skb_any(skb);
} else {
int last_desc;
- write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1), end, 0,
- addr);
+ cxgb4_write_sgl(skb, &q->q, (struct ulptx_sgl *)(cpl + 1),
+ end, 0, addr);
skb_orphan(skb);
last_desc = q->q.pidx + ndesc - 1;
@@ -1359,7 +1357,7 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
txq_advance(&q->q, ndesc);
- ring_tx_db(adap, &q->q, ndesc);
+ cxgb4_ring_tx_db(adap, &q->q, ndesc);
if (ptp_enabled)
spin_unlock(&adap->ptp_lock);
return NETDEV_TX_OK;
@@ -1369,9 +1367,9 @@ netdev_tx_t t4_eth_xmit(struct sk_buff *skb, struct net_device *dev)
* reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
* @q: the SGE control Tx queue
*
- * This is a variant of reclaim_completed_tx() that is used for Tx queues
- * that send only immediate data (presently just the control queues) and
- * thus do not have any sk_buffs to release.
+ * This is a variant of cxgb4_reclaim_completed_tx() that is used
+ * for Tx queues that send only immediate data (presently just
+ * the control queues) and thus do not have any sk_buffs to release.
*/
static inline void reclaim_completed_tx_imm(struct sge_txq *q)
{
@@ -1446,13 +1444,13 @@ static int ctrl_xmit(struct sge_ctrl_txq *q, struct sk_buff *skb)
}
wr = (struct fw_wr_hdr *)&q->q.desc[q->q.pidx];
- inline_tx_skb(skb, &q->q, wr);
+ cxgb4_inline_tx_skb(skb, &q->q, wr);
txq_advance(&q->q, ndesc);
if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES))
ctrlq_check_stop(q, wr);
- ring_tx_db(q->adap, &q->q, ndesc);
+ cxgb4_ring_tx_db(q->adap, &q->q, ndesc);
spin_unlock(&q->sendq.lock);
kfree_skb(skb);
@@ -1487,7 +1485,7 @@ static void restart_ctrlq(unsigned long data)
txq_advance(&q->q, ndesc);
spin_unlock(&q->sendq.lock);
- inline_tx_skb(skb, &q->q, wr);
+ cxgb4_inline_tx_skb(skb, &q->q, wr);
kfree_skb(skb);
if (unlikely(txq_avail(&q->q) < TXQ_STOP_THRES)) {
@@ -1500,14 +1498,15 @@ static void restart_ctrlq(unsigned long data)
}
}
if (written > 16) {
- ring_tx_db(q->adap, &q->q, written);
+ cxgb4_ring_tx_db(q->adap, &q->q, written);
written = 0;
}
spin_lock(&q->sendq.lock);
}
q->full = 0;
-ringdb: if (written)
- ring_tx_db(q->adap, &q->q, written);
+ringdb:
+ if (written)
+ cxgb4_ring_tx_db(q->adap, &q->q, written);
spin_unlock(&q->sendq.lock);
}
@@ -1644,7 +1643,7 @@ static void service_ofldq(struct sge_uld_txq *q)
*/
spin_unlock(&q->sendq.lock);
- reclaim_completed_tx(q->adap, &q->q, false);
+ cxgb4_reclaim_completed_tx(q->adap, &q->q, false);
flits = skb->priority; /* previously saved */
ndesc = flits_to_desc(flits);
@@ -1655,9 +1654,9 @@ static void service_ofldq(struct sge_uld_txq *q)
pos = (u64 *)&q->q.desc[q->q.pidx];
if (is_ofld_imm(skb))
- inline_tx_skb(skb, &q->q, pos);
- else if (map_skb(q->adap->pdev_dev, skb,
- (dma_addr_t *)skb->head)) {
+ cxgb4_inline_tx_skb(skb, &q->q, pos);
+ else if (cxgb4_map_skb(q->adap->pdev_dev, skb,
+ (dma_addr_t *)skb->head)) {
txq_stop_maperr(q);
spin_lock(&q->sendq.lock);
break;
@@ -1688,9 +1687,9 @@ static void service_ofldq(struct sge_uld_txq *q)
pos = (void *)txq->desc;
}
- write_sgl(skb, &q->q, (void *)pos,
- end, hdr_len,
- (dma_addr_t *)skb->head);
+ cxgb4_write_sgl(skb, &q->q, (void *)pos,
+ end, hdr_len,
+ (dma_addr_t *)skb->head);
#ifdef CONFIG_NEED_DMA_MAP_STATE
skb->dev = q->adap->port[0];
skb->destructor = deferred_unmap_destructor;
@@ -1704,7 +1703,7 @@ static void service_ofldq(struct sge_uld_txq *q)
txq_advance(&q->q, ndesc);
written += ndesc;
if (unlikely(written > 32)) {
- ring_tx_db(q->adap, &q->q, written);
+ cxgb4_ring_tx_db(q->adap, &q->q, written);
written = 0;
}
@@ -1719,7 +1718,7 @@ static void service_ofldq(struct sge_uld_txq *q)
kfree_skb(skb);
}
if (likely(written))
- ring_tx_db(q->adap, &q->q, written);
+ cxgb4_ring_tx_db(q->adap, &q->q, written);
/*Indicate that no thread is processing the Pending Send Queue
* currently.
--
1.8.3.1
Powered by blists - more mailing lists