[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <d1002e0511e7fbd1204cfe9bca3318599750139f.1657878101.git.ecree.xilinx@gmail.com>
Date: Fri, 15 Jul 2022 13:33:30 +0100
From: <ecree@...inx.com>
To: <davem@...emloft.net>, <kuba@...nel.org>, <pabeni@...hat.com>,
<linux-net-drivers@....com>
CC: <netdev@...r.kernel.org>, Edward Cree <ecree.xilinx@...il.com>
Subject: [PATCH net-next 08/10] sfc: support passing a representor to the EF100 TX path
From: Edward Cree <ecree.xilinx@...il.com>
A non-null efv in __ef100_enqueue_skb() indicates that the packet is
from that representor, should be transmitted with a suitable option
descriptor (to instruct the switch to deliver it to the representee),
and should not be accounted to the parent PF's stats or BQL.
Signed-off-by: Edward Cree <ecree.xilinx@...il.com>
---
drivers/net/ethernet/sfc/ef100_rep.h | 7 +++
drivers/net/ethernet/sfc/ef100_tx.c | 84 +++++++++++++++++++++++++--
drivers/net/ethernet/sfc/ef100_tx.h | 3 +
drivers/net/ethernet/sfc/net_driver.h | 1 +
drivers/net/ethernet/sfc/tx.c | 6 +-
drivers/net/ethernet/sfc/tx_common.c | 35 +++++++----
drivers/net/ethernet/sfc/tx_common.h | 3 +-
7 files changed, 122 insertions(+), 17 deletions(-)
diff --git a/drivers/net/ethernet/sfc/ef100_rep.h b/drivers/net/ethernet/sfc/ef100_rep.h
index a2f16bd59771..9365986f2841 100644
--- a/drivers/net/ethernet/sfc/ef100_rep.h
+++ b/drivers/net/ethernet/sfc/ef100_rep.h
@@ -15,6 +15,12 @@
#include "net_driver.h"
+struct efx_rep_sw_stats {
+ atomic64_t rx_packets, tx_packets;
+ atomic64_t rx_bytes, tx_bytes;
+ atomic64_t rx_dropped, tx_errors;
+};
+
/* Private data for an Efx representor */
struct efx_rep {
struct efx_nic *parent;
@@ -23,6 +29,7 @@ struct efx_rep {
u32 mport; /* m-port ID of corresponding VF */
unsigned int idx; /* VF index */
struct list_head list; /* entry on efx->vf_reps */
+ struct efx_rep_sw_stats stats;
};
int efx_ef100_vfrep_create(struct efx_nic *efx, unsigned int i);
diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c
index 26ef51d6b542..102ddc7e206a 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.c
+++ b/drivers/net/ethernet/sfc/ef100_tx.c
@@ -254,7 +254,8 @@ static void ef100_make_tso_desc(struct efx_nic *efx,
static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
const struct sk_buff *skb,
- unsigned int segment_count)
+ unsigned int segment_count,
+ struct efx_rep *efv)
{
unsigned int old_write_count = tx_queue->write_count;
unsigned int new_write_count = old_write_count;
@@ -272,6 +273,20 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
else
next_desc_type = ESE_GZ_TX_DESC_TYPE_SEND;
+ if (unlikely(efv)) {
+ /* Create TX override descriptor */
+ write_ptr = new_write_count & tx_queue->ptr_mask;
+ txd = ef100_tx_desc(tx_queue, write_ptr);
+ ++new_write_count;
+
+ tx_queue->packet_write_count = new_write_count;
+ EFX_POPULATE_OWORD_3(*txd,
+ ESF_GZ_TX_DESC_TYPE, ESE_GZ_TX_DESC_TYPE_PREFIX,
+ ESF_GZ_TX_PREFIX_EGRESS_MPORT, efv->mport,
+ ESF_GZ_TX_PREFIX_EGRESS_MPORT_EN, 1);
+ nr_descs--;
+ }
+
/* if it's a raw write (such as XDP) then always SEND single frames */
if (!skb)
nr_descs = 1;
@@ -306,6 +321,9 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
/* if it's a raw write (such as XDP) then always SEND */
next_desc_type = skb ? ESE_GZ_TX_DESC_TYPE_SEG :
ESE_GZ_TX_DESC_TYPE_SEND;
+ /* mark as an EFV buffer if applicable */
+ if (unlikely(efv))
+ buffer->flags |= EFX_TX_BUF_EFV;
} while (new_write_count != tx_queue->insert_count);
@@ -324,7 +342,7 @@ static void ef100_tx_make_descriptors(struct efx_tx_queue *tx_queue,
void ef100_tx_write(struct efx_tx_queue *tx_queue)
{
- ef100_tx_make_descriptors(tx_queue, NULL, 0);
+ ef100_tx_make_descriptors(tx_queue, NULL, 0, NULL);
ef100_tx_push_buffers(tx_queue);
}
@@ -350,6 +368,12 @@ void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event)
* function will free the SKB.
*/
int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+ return __ef100_enqueue_skb(tx_queue, skb, NULL);
+}
+
+int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ struct efx_rep *efv)
{
unsigned int old_insert_count = tx_queue->insert_count;
struct efx_nic *efx = tx_queue->efx;
@@ -376,16 +400,64 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
return 0;
}
+ if (unlikely(efv)) {
+ struct efx_tx_buffer *buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+
+ /* Drop representor packets if the queue is stopped.
+ * We currently don't assert backoff to representors so this is
+ * to make sure representor traffic can't starve the main
+ * net device.
+ * And, of course, if there are no TX descriptors left.
+ */
+ if (netif_tx_queue_stopped(tx_queue->core_txq) ||
+ unlikely(efx_tx_buffer_in_use(buffer))) {
+ atomic64_inc(&efv->stats.tx_errors);
+ rc = -ENOSPC;
+ goto err;
+ }
+
+ /* Also drop representor traffic if it could cause us to
+ * stop the queue. If we assert backoff and we haven't
+ * received traffic on the main net device recently then the
+ * TX watchdog can go off erroneously.
+ */
+ fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+ fill_level += efx_tx_max_skb_descs(efx);
+ if (fill_level > efx->txq_stop_thresh) {
+ struct efx_tx_queue *txq2;
+
+ /* Refresh cached fill level and re-check */
+ efx_for_each_channel_tx_queue(txq2, tx_queue->channel)
+ txq2->old_read_count = READ_ONCE(txq2->read_count);
+
+ fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
+ fill_level += efx_tx_max_skb_descs(efx);
+ if (fill_level > efx->txq_stop_thresh) {
+ atomic64_inc(&efv->stats.tx_errors);
+ rc = -ENOSPC;
+ goto err;
+ }
+ }
+
+ buffer->flags = EFX_TX_BUF_OPTION | EFX_TX_BUF_EFV;
+ tx_queue->insert_count++;
+ }
+
/* Map for DMA and create descriptors */
rc = efx_tx_map_data(tx_queue, skb, segments);
if (rc)
goto err;
- ef100_tx_make_descriptors(tx_queue, skb, segments);
+ ef100_tx_make_descriptors(tx_queue, skb, segments, efv);
fill_level = efx_channel_tx_old_fill_level(tx_queue->channel);
if (fill_level > efx->txq_stop_thresh) {
struct efx_tx_queue *txq2;
+ /* Because of checks above, representor traffic should
+ * not be able to stop the queue.
+ */
+ WARN_ON(efv);
+
netif_tx_stop_queue(tx_queue->core_txq);
/* Re-read after a memory barrier in case we've raced with
* the completion path. Otherwise there's a danger we'll never
@@ -404,8 +476,12 @@ int ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
/* If xmit_more then we don't need to push the doorbell, unless there
* are 256 descriptors already queued in which case we have to push to
* ensure we never push more than 256 at once.
+ *
+ * Always push for representor traffic, and don't account it to parent
+ * PF netdevice's BQL.
*/
- if (__netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
+ if (unlikely(efv) ||
+ __netdev_tx_sent_queue(tx_queue->core_txq, skb->len, xmit_more) ||
tx_queue->write_count - tx_queue->notify_count > 255)
ef100_tx_push_buffers(tx_queue);
diff --git a/drivers/net/ethernet/sfc/ef100_tx.h b/drivers/net/ethernet/sfc/ef100_tx.h
index ddc4b98fa6db..e9e11540fcde 100644
--- a/drivers/net/ethernet/sfc/ef100_tx.h
+++ b/drivers/net/ethernet/sfc/ef100_tx.h
@@ -13,6 +13,7 @@
#define EFX_EF100_TX_H
#include "net_driver.h"
+#include "ef100_rep.h"
int ef100_tx_probe(struct efx_tx_queue *tx_queue);
void ef100_tx_init(struct efx_tx_queue *tx_queue);
@@ -22,4 +23,6 @@ unsigned int ef100_tx_max_skb_descs(struct efx_nic *efx);
void ef100_ev_tx(struct efx_channel *channel, const efx_qword_t *p_event);
netdev_tx_t ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb);
+int __ef100_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+ struct efx_rep *efv);
#endif
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 80ee2c936f59..83631fab7994 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -178,6 +178,7 @@ struct efx_tx_buffer {
#define EFX_TX_BUF_OPTION 0x10 /* empty buffer for option descriptor */
#define EFX_TX_BUF_XDP 0x20 /* buffer was sent with XDP */
#define EFX_TX_BUF_TSO_V3 0x40 /* empty buffer for a TSO_V3 descriptor */
+#define EFX_TX_BUF_EFV 0x100 /* buffer was sent from representor */
/**
* struct efx_tx_queue - An Efx TX queue
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 79cc0bb76321..d12474042c84 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -559,6 +559,7 @@ netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
{
unsigned int pkts_compl = 0, bytes_compl = 0;
+ unsigned int efv_pkts_compl = 0;
unsigned int read_ptr;
bool finished = false;
@@ -580,7 +581,8 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
/* Need to check the flag before dequeueing. */
if (buffer->flags & EFX_TX_BUF_SKB)
finished = true;
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+ &efv_pkts_compl);
++tx_queue->read_count;
read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -589,7 +591,7 @@ void efx_xmit_done_single(struct efx_tx_queue *tx_queue)
tx_queue->pkts_compl += pkts_compl;
tx_queue->bytes_compl += bytes_compl;
- EFX_WARN_ON_PARANOID(pkts_compl != 1);
+ EFX_WARN_ON_PARANOID(pkts_compl + efv_pkts_compl != 1);
efx_xmit_done_check_empty(tx_queue);
}
diff --git a/drivers/net/ethernet/sfc/tx_common.c b/drivers/net/ethernet/sfc/tx_common.c
index 658ea2d34070..67e789b96c43 100644
--- a/drivers/net/ethernet/sfc/tx_common.c
+++ b/drivers/net/ethernet/sfc/tx_common.c
@@ -109,9 +109,11 @@ void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
/* Free any buffers left in the ring */
while (tx_queue->read_count != tx_queue->write_count) {
unsigned int pkts_compl = 0, bytes_compl = 0;
+ unsigned int efv_pkts_compl = 0;
buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+ &efv_pkts_compl);
++tx_queue->read_count;
}
@@ -146,7 +148,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer,
unsigned int *pkts_compl,
- unsigned int *bytes_compl)
+ unsigned int *bytes_compl,
+ unsigned int *efv_pkts_compl)
{
if (buffer->unmap_len) {
struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
@@ -164,9 +167,15 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
if (buffer->flags & EFX_TX_BUF_SKB) {
struct sk_buff *skb = (struct sk_buff *)buffer->skb;
- EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
- (*pkts_compl)++;
- (*bytes_compl) += skb->len;
+ if (unlikely(buffer->flags & EFX_TX_BUF_EFV)) {
+ EFX_WARN_ON_PARANOID(!efv_pkts_compl);
+ (*efv_pkts_compl)++;
+ } else {
+ EFX_WARN_ON_PARANOID(!pkts_compl || !bytes_compl);
+ (*pkts_compl)++;
+ (*bytes_compl) += skb->len;
+ }
+
if (tx_queue->timestamping &&
(tx_queue->completed_timestamp_major ||
tx_queue->completed_timestamp_minor)) {
@@ -199,7 +208,8 @@ void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
unsigned int index,
unsigned int *pkts_compl,
- unsigned int *bytes_compl)
+ unsigned int *bytes_compl,
+ unsigned int *efv_pkts_compl)
{
struct efx_nic *efx = tx_queue->efx;
unsigned int stop_index, read_ptr;
@@ -218,7 +228,8 @@ static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
return;
}
- efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+ efx_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl,
+ efv_pkts_compl);
++tx_queue->read_count;
read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
@@ -241,15 +252,17 @@ void efx_xmit_done_check_empty(struct efx_tx_queue *tx_queue)
void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
{
unsigned int fill_level, pkts_compl = 0, bytes_compl = 0;
+ unsigned int efv_pkts_compl = 0;
struct efx_nic *efx = tx_queue->efx;
EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
- efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+ efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl,
+ &efv_pkts_compl);
tx_queue->pkts_compl += pkts_compl;
tx_queue->bytes_compl += bytes_compl;
- if (pkts_compl > 1)
+ if (pkts_compl + efv_pkts_compl > 1)
++tx_queue->merge_events;
/* See if we need to restart the netif queue. This memory
@@ -274,6 +287,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
unsigned int insert_count)
{
+ unsigned int efv_pkts_compl = 0;
struct efx_tx_buffer *buffer;
unsigned int bytes_compl = 0;
unsigned int pkts_compl = 0;
@@ -282,7 +296,8 @@ void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
while (tx_queue->insert_count != insert_count) {
--tx_queue->insert_count;
buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
- efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+ efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl,
+ &efv_pkts_compl);
}
}
diff --git a/drivers/net/ethernet/sfc/tx_common.h b/drivers/net/ethernet/sfc/tx_common.h
index bbab7f248250..d87aecbc7bf1 100644
--- a/drivers/net/ethernet/sfc/tx_common.h
+++ b/drivers/net/ethernet/sfc/tx_common.h
@@ -19,7 +19,8 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue);
void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
struct efx_tx_buffer *buffer,
unsigned int *pkts_compl,
- unsigned int *bytes_compl);
+ unsigned int *bytes_compl,
+ unsigned int *efv_pkts_compl);
static inline bool efx_tx_buffer_in_use(struct efx_tx_buffer *buffer)
{
Powered by blists - more mailing lists