[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20210628095950.ifbgzmsmpecmlol4@skbuf>
Date: Mon, 28 Jun 2021 09:59:51 +0000
From: Vladimir Oltean <vladimir.oltean@....com>
To: Vinicius Costa Gomes <vinicius.gomes@...el.com>
CC: "netdev@...r.kernel.org" <netdev@...r.kernel.org>,
"jhs@...atatu.com" <jhs@...atatu.com>,
"xiyou.wangcong@...il.com" <xiyou.wangcong@...il.com>,
"jiri@...nulli.us" <jiri@...nulli.us>,
"kuba@...nel.org" <kuba@...nel.org>, Po Liu <po.liu@....com>,
"intel-wired-lan@...ts.osuosl.org" <intel-wired-lan@...ts.osuosl.org>,
"anthony.l.nguyen@...el.com" <anthony.l.nguyen@...el.com>,
"mkubecek@...e.cz" <mkubecek@...e.cz>
Subject: Re: [PATCH net-next v4 12/12] igc: Add support for Frame Preemption
verification
On Fri, Jun 25, 2021 at 05:33:14PM -0700, Vinicius Costa Gomes wrote:
> Add support for sending/receiving Frame Preemption verification
> frames.
>
> The i225 hardware doesn't implement the process of verification
> internally, this is left to the driver.
>
> Add a simple implementation of the state machine defined in IEEE
> 802.3-2018, Section 99.4.7.
>
> For now, the state machine is started manually by the user, when
> enabling verification. Example:
>
> $ ethtool --set-frame-preemption IFACE disable-verify off
>
> The "verified" condition is set to true when the SMD-V frame is sent,
> and the SMD-R frame is received. So, it only tracks the transmission
> side. This seems to be what's expected from IEEE 802.3-2018.
>
> Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@...el.com>
> ---
> drivers/net/ethernet/intel/igc/igc.h | 15 ++
> drivers/net/ethernet/intel/igc/igc_defines.h | 13 ++
> drivers/net/ethernet/intel/igc/igc_ethtool.c | 20 +-
> drivers/net/ethernet/intel/igc/igc_main.c | 216 +++++++++++++++++++
> 4 files changed, 261 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
> index 9b2ddcbf65fb..84234efed781 100644
> --- a/drivers/net/ethernet/intel/igc/igc.h
> +++ b/drivers/net/ethernet/intel/igc/igc.h
> @@ -122,6 +122,13 @@ struct igc_ring {
> struct xsk_buff_pool *xsk_pool;
> } ____cacheline_internodealigned_in_smp;
>
> +enum frame_preemption_state {
> + FRAME_PREEMPTION_STATE_FAILED,
> + FRAME_PREEMPTION_STATE_DONE,
> + FRAME_PREEMPTION_STATE_START,
> + FRAME_PREEMPTION_STATE_SENT,
> +};
> +
> /* Board specific private data structure */
> struct igc_adapter {
> struct net_device *netdev;
> @@ -240,6 +247,14 @@ struct igc_adapter {
> struct timespec64 start;
> struct timespec64 period;
> } perout[IGC_N_PEROUT];
> +
> + struct delayed_work fp_verification_work;
> + unsigned long fp_start;
> + bool fp_received_smd_v;
> + bool fp_received_smd_r;
> + unsigned int fp_verify_cnt;
> + enum frame_preemption_state fp_tx_state;
> + bool fp_disable_verify;
> };
>
> void igc_up(struct igc_adapter *adapter);
> diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
> index a2ea057d8e6e..cf46f5d5a505 100644
> --- a/drivers/net/ethernet/intel/igc/igc_defines.h
> +++ b/drivers/net/ethernet/intel/igc/igc_defines.h
> @@ -268,6 +268,8 @@
> #define IGC_TXD_DTYP_C 0x00000000 /* Context Descriptor */
> #define IGC_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */
> #define IGC_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */
> +#define IGC_TXD_POPTS_SMD_V 0x10 /* Transmitted packet is a SMD-Verify */
> +#define IGC_TXD_POPTS_SMD_R 0x20 /* Transmitted packet is a SMD-Response */
> #define IGC_TXD_CMD_EOP 0x01000000 /* End of Packet */
> #define IGC_TXD_CMD_IC 0x04000000 /* Insert Checksum */
> #define IGC_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */
> @@ -327,9 +329,20 @@
>
> #define IGC_RXDEXT_STATERR_LB 0x00040000
>
> +#define IGC_RXD_STAT_SMD_V 0x2000 /* Received packet is SMD-Verify packet */
> +#define IGC_RXD_STAT_SMD_R 0x4000 /* Received packet is SMD-Response packet */
> +
So the i225 gives you the ability to select from multiple
Start-of-mPacket-Delimiter values on a per-TX descriptor basis?
And this is in addition to configuring that TX ring as preemptable I
guess? Because I notice that you're sending on the TX ring affine to the
current CPU that the verification work item is running on (which you
don't check anywhere that it is configured as going to the pMAC or not).
And on RX, it always gives you the kind of SMD that the packet had
(including the classic SFD for express packets)?
Cool.
It would be nice if I could connect back to back an i225 board with an
NXP LS1028A to see if the verification state machines pass both ways (on
LS1028A it is 100% hardware based, we just enable/disable the feature
and we can monitor the state changes via an interrupt).
> /* Advanced Receive Descriptor bit definitions */
> #define IGC_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */
>
> +#define IGC_RXDADV_STAT_SMD_TYPE_MASK 0x06000
> +#define IGC_RXDADV_STAT_SMD_TYPE_SHIFT 13
> +
> +#define IGC_SMD_TYPE_SFD 0x0
> +#define IGC_SMD_TYPE_SMD_V 0x1
> +#define IGC_SMD_TYPE_SMD_R 0x2
> +#define IGC_SMD_TYPE_COMPLETE 0x3
> +
> #define IGC_RXDEXT_STATERR_L4E 0x20000000
> #define IGC_RXDEXT_STATERR_IPE 0x40000000
> #define IGC_RXDEXT_STATERR_RXE 0x80000000
> diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> index 84d5afe92154..f52a7be3af66 100644
> --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
> +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
> @@ -1649,6 +1649,8 @@ static int igc_ethtool_get_preempt(struct net_device *netdev,
>
> fpcmd->enabled = adapter->frame_preemption_active;
> fpcmd->add_frag_size = adapter->add_frag_size;
> + fpcmd->verified = adapter->fp_tx_state == FRAME_PREEMPTION_STATE_DONE;
> + fpcmd->disable_verify = adapter->fp_disable_verify;
>
> return 0;
> }
> @@ -1664,10 +1666,22 @@ static int igc_ethtool_set_preempt(struct net_device *netdev,
> return -EINVAL;
> }
>
> - adapter->frame_preemption_active = fpcmd->enabled;
> - adapter->add_frag_size = fpcmd->add_frag_size;
> + if (!fpcmd->disable_verify && adapter->fp_disable_verify) {
> + adapter->fp_tx_state = FRAME_PREEMPTION_STATE_START;
> + schedule_delayed_work(&adapter->fp_verification_work, msecs_to_jiffies(10));
Not sure how much you'd like to tune this, but the spec has a
configurable verifyTime between 1 ms and 128 ms. You chose the default
value, so we should be ok for now.
> + }
>
> - return igc_tsn_offload_apply(adapter);
> + adapter->fp_disable_verify = fpcmd->disable_verify;
> +
> + if (adapter->frame_preemption_active != fpcmd->enabled ||
> + adapter->add_frag_size != fpcmd->add_frag_size) {
> + adapter->frame_preemption_active = fpcmd->enabled;
> + adapter->add_frag_size = fpcmd->add_frag_size;
> +
> + return igc_tsn_offload_apply(adapter);
> + }
> +
> + return 0;
> }
>
> static int igc_ethtool_begin(struct net_device *netdev)
> diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
> index 20dac04a02f2..ed55bd13e4a1 100644
> --- a/drivers/net/ethernet/intel/igc/igc_main.c
> +++ b/drivers/net/ethernet/intel/igc/igc_main.c
> @@ -28,6 +28,11 @@
> #define IGC_XDP_TX BIT(1)
> #define IGC_XDP_REDIRECT BIT(2)
>
> +#define IGC_FP_TIMEOUT msecs_to_jiffies(100)
> +#define IGC_MAX_VERIFY_CNT 3
> +
> +#define IGC_FP_SMD_FRAME_SIZE 60
> +
> static int debug = -1;
>
> MODULE_AUTHOR("Intel Corporation, <linux.nics@...el.com>");
> @@ -2169,6 +2174,79 @@ static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
> return 0;
> }
>
> +static int igc_fp_init_smd_frame(struct igc_ring *ring, struct igc_tx_buffer *buffer,
> + struct sk_buff *skb)
> +{
> + dma_addr_t dma;
> + unsigned int size;
> +
> + size = skb_headlen(skb);
> +
> + dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
> + if (dma_mapping_error(ring->dev, dma)) {
> + netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
> + return -ENOMEM;
> + }
> +
> + buffer->skb = skb;
> + buffer->protocol = 0;
> + buffer->bytecount = skb->len;
> + buffer->gso_segs = 1;
> + buffer->time_stamp = jiffies;
> + dma_unmap_len_set(buffer, len, skb->len);
> + dma_unmap_addr_set(buffer, dma, dma);
> +
> + return 0;
> +}
> +
> +static int igc_fp_init_tx_descriptor(struct igc_ring *ring,
> + struct sk_buff *skb, int type)
> +{
> + struct igc_tx_buffer *buffer;
> + union igc_adv_tx_desc *desc;
> + u32 cmd_type, olinfo_status;
> + int err;
> +
> + if (!igc_desc_unused(ring))
> + return -EBUSY;
> +
> + buffer = &ring->tx_buffer_info[ring->next_to_use];
> + err = igc_fp_init_smd_frame(ring, buffer, skb);
> + if (err)
> + return err;
> +
> + cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
> + IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
> + buffer->bytecount;
> + olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
> +
> + switch (type) {
> + case IGC_SMD_TYPE_SMD_V:
> + olinfo_status |= (IGC_TXD_POPTS_SMD_V << 8);
> + break;
> + case IGC_SMD_TYPE_SMD_R:
> + olinfo_status |= (IGC_TXD_POPTS_SMD_R << 8);
> + break;
> + default:
> + return -EINVAL;
> + }
> +
> + desc = IGC_TX_DESC(ring, ring->next_to_use);
> + desc->read.cmd_type_len = cpu_to_le32(cmd_type);
> + desc->read.olinfo_status = cpu_to_le32(olinfo_status);
> + desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
> +
> + netdev_tx_sent_queue(txring_txq(ring), skb->len);
> +
> + buffer->next_to_watch = desc;
> +
> + ring->next_to_use++;
> + if (ring->next_to_use == ring->count)
> + ring->next_to_use = 0;
> +
> + return 0;
> +}
> +
> static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
> int cpu)
> {
> @@ -2299,6 +2377,19 @@ static void igc_update_rx_stats(struct igc_q_vector *q_vector,
> q_vector->rx.total_bytes += bytes;
> }
>
> +static int igc_rx_desc_smd_type(union igc_adv_rx_desc *rx_desc)
> +{
> + u32 status = le32_to_cpu(rx_desc->wb.upper.status_error);
> +
> + return (status & IGC_RXDADV_STAT_SMD_TYPE_MASK)
> + >> IGC_RXDADV_STAT_SMD_TYPE_SHIFT;
> +}
> +
> +static bool igc_check_smd_frame(struct igc_rx_buffer *rx_buffer, unsigned int size)
> +{
> + return size == 60;
You should probably also verify that the contents is 60 octets of zeroes (sans the mCRC)?
> +}
> +
> static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
> {
> unsigned int total_bytes = 0, total_packets = 0;
> @@ -2315,6 +2406,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
> ktime_t timestamp = 0;
> struct xdp_buff xdp;
> int pkt_offset = 0;
> + int smd_type;
> void *pktbuf;
>
> /* return some buffers to hardware, one at a time is too slow */
> @@ -2346,6 +2438,22 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
> size -= IGC_TS_HDR_LEN;
> }
>
> + smd_type = igc_rx_desc_smd_type(rx_desc);
> +
> + if (smd_type == IGC_SMD_TYPE_SMD_V || smd_type == IGC_SMD_TYPE_SMD_R) {
I guess the performance people will love you for this change. You should
probably guard it by an "if (unlikely(disableVerify == false))" condition.
> + if (igc_check_smd_frame(rx_buffer, size)) {
> + adapter->fp_received_smd_v = smd_type == IGC_SMD_TYPE_SMD_V;
> + adapter->fp_received_smd_r = smd_type == IGC_SMD_TYPE_SMD_R;
> + schedule_delayed_work(&adapter->fp_verification_work, 0);
> + }
> +
> + /* Advance the ring next-to-clean */
> + igc_is_non_eop(rx_ring, rx_desc);
> +
> + cleaned_count++;
> + continue;
> + }
> +
> if (!skb) {
> xdp_init_buff(&xdp, truesize, &rx_ring->xdp_rxq);
> xdp_prepare_buff(&xdp, pktbuf - igc_rx_offset(rx_ring),
> @@ -5607,6 +5715,107 @@ static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
> return igc_tsn_offload_apply(adapter);
> }
>
> +/* I225 doesn't send the SMD frames automatically, we need to handle
> + * them ourselves.
> + */
> +static int igc_xmit_smd_frame(struct igc_adapter *adapter, int type)
> +{
> + int cpu = smp_processor_id();
> + struct netdev_queue *nq;
> + struct igc_ring *ring;
> + struct sk_buff *skb;
> + void *data;
> + int err;
> +
> + if (!netif_running(adapter->netdev))
> + return -ENOTCONN;
> +
> + /* FIXME: rename this function to something less specific, as
> + * it can be used outside XDP.
> + */
> + ring = igc_xdp_get_tx_ring(adapter, cpu);
> + nq = txring_txq(ring);
> +
> + skb = alloc_skb(IGC_FP_SMD_FRAME_SIZE, GFP_KERNEL);
> + if (!skb)
> + return -ENOMEM;
> +
> + data = skb_put(skb, IGC_FP_SMD_FRAME_SIZE);
> + memset(data, 0, IGC_FP_SMD_FRAME_SIZE);
> +
> + __netif_tx_lock(nq, cpu);
> +
> + err = igc_fp_init_tx_descriptor(ring, skb, type);
> +
> + igc_flush_tx_descriptors(ring);
> +
> + __netif_tx_unlock(nq);
> +
> + return err;
> +}
> +
> +static void igc_fp_verification_work(struct work_struct *work)
> +{
> + struct delayed_work *dwork = to_delayed_work(work);
> + struct igc_adapter *adapter;
> + int err;
> +
> + adapter = container_of(dwork, struct igc_adapter, fp_verification_work);
> +
> + if (adapter->fp_disable_verify)
> + goto done;
> +
> + switch (adapter->fp_tx_state) {
> + case FRAME_PREEMPTION_STATE_START:
> + adapter->fp_received_smd_r = false;
> + err = igc_xmit_smd_frame(adapter, IGC_SMD_TYPE_SMD_V);
> + if (err < 0)
> + netdev_err(adapter->netdev, "Error sending SMD-V frame\n");
On TX error should you really advance to the STATE_SENT?
> +
> + adapter->fp_tx_state = FRAME_PREEMPTION_STATE_SENT;
> + adapter->fp_start = jiffies;
> + schedule_delayed_work(&adapter->fp_verification_work, IGC_FP_TIMEOUT);
> + break;
> +
> + case FRAME_PREEMPTION_STATE_SENT:
> + if (adapter->fp_received_smd_r) {
> + adapter->fp_tx_state = FRAME_PREEMPTION_STATE_DONE;
> + adapter->fp_received_smd_r = false;
> + break;
> + }
> +
> + if (time_is_before_jiffies(adapter->fp_start + IGC_FP_TIMEOUT)) {
> + adapter->fp_verify_cnt++;
> + netdev_warn(adapter->netdev, "Timeout waiting for SMD-R frame\n");
> +
> + if (adapter->fp_verify_cnt > IGC_MAX_VERIFY_CNT) {
> + adapter->fp_verify_cnt = 0;
> + adapter->fp_tx_state = FRAME_PREEMPTION_STATE_FAILED;
> + netdev_err(adapter->netdev,
> + "Exceeded number of attempts for frame preemption verification\n");
> + } else {
> + adapter->fp_tx_state = FRAME_PREEMPTION_STATE_START;
> + }
> + schedule_delayed_work(&adapter->fp_verification_work, IGC_FP_TIMEOUT);
> + }
> +
> + break;
> +
> + case FRAME_PREEMPTION_STATE_FAILED:
> + case FRAME_PREEMPTION_STATE_DONE:
> + break;
> + }
> +
> +done:
> + if (adapter->fp_received_smd_v) {
> + err = igc_xmit_smd_frame(adapter, IGC_SMD_TYPE_SMD_R);
> + if (err < 0)
> + netdev_err(adapter->netdev, "Error sending SMD-R frame\n");
> +
> + adapter->fp_received_smd_v = false;
> + }
> +}
> +
> static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
> void *type_data)
> {
> @@ -6023,6 +6232,7 @@ static int igc_probe(struct pci_dev *pdev,
>
> INIT_WORK(&adapter->reset_task, igc_reset_task);
> INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
> + INIT_DELAYED_WORK(&adapter->fp_verification_work, igc_fp_verification_work);
>
> /* Initialize link properties that are user-changeable */
> adapter->fc_autoneg = true;
> @@ -6044,6 +6254,12 @@ static int igc_probe(struct pci_dev *pdev,
>
> igc_ptp_init(adapter);
>
> + /* FIXME: This sets the default to not do the verification
> + * automatically, when we have support in multiple
> + * controllers, this default can be changed.
> + */
> + adapter->fp_disable_verify = true;
> +
Hmmmmm. So we need to instruct our users to explicitly enable
verification in their ethtool-based scripts, since the default values
will vary wildly from one vendor to another. On LS1028A I see no reason
why verification would be disabled by default.
> /* reset the hardware with the new settings */
> igc_reset(adapter);
>
> --
> 2.32.0
>
Powered by blists - more mailing lists