lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Tue, 28 Jun 2022 21:48:06 +0200
From:   Alexander Lobakin <alexandr.lobakin@...el.com>
To:     Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>,
        Andrii Nakryiko <andrii@...nel.org>
Cc:     Alexander Lobakin <alexandr.lobakin@...el.com>,
        Larysa Zaremba <larysa.zaremba@...el.com>,
        Michal Swiatkowski <michal.swiatkowski@...ux.intel.com>,
        Jesper Dangaard Brouer <hawk@...nel.org>,
        Björn Töpel <bjorn@...nel.org>,
        Magnus Karlsson <magnus.karlsson@...el.com>,
        Maciej Fijalkowski <maciej.fijalkowski@...el.com>,
        Jonathan Lemon <jonathan.lemon@...il.com>,
        Toke Hoiland-Jorgensen <toke@...hat.com>,
        Lorenzo Bianconi <lorenzo@...nel.org>,
        "David S. Miller" <davem@...emloft.net>,
        Eric Dumazet <edumazet@...gle.com>,
        Jakub Kicinski <kuba@...nel.org>,
        Paolo Abeni <pabeni@...hat.com>,
        Jesse Brandeburg <jesse.brandeburg@...el.com>,
        John Fastabend <john.fastabend@...il.com>,
        Yajun Deng <yajun.deng@...ux.dev>,
        Willem de Bruijn <willemb@...gle.com>, bpf@...r.kernel.org,
        netdev@...r.kernel.org, linux-kernel@...r.kernel.org,
        xdp-hints@...-project.net
Subject: [PATCH RFC bpf-next 46/52] net, ice: use an onstack &xdp_meta_generic_rx to store HW frame info

To be able to pass HW-provided frame metadata, such as hash,
checksum status etc., to BPF and XSK programs, unify the container
which is used to store it regardless of an XDP program presence or
a verdict returned by it. Use an intermediate onstack
&xdp_meta_generic_rx before filling skb fields and switch descriptor
parsing functions to use it instead of an &sk_buff.
This works the same way how &xdp_buff is being filled before forming
an skb. If metadata generation is enabled, the actual space in front
of a frame will be used in the upcoming changes.
Using &xdp_meta_generic_rx instead of full-blown &xdp_meta_generic
reduces text size by 32 bytes per function.

Signed-off-by: Alexander Lobakin <alexandr.lobakin@...el.com>
---
 drivers/net/ethernet/intel/ice/ice_ptp.c      |  19 ++--
 drivers/net/ethernet/intel/ice/ice_ptp.h      |  17 ++-
 drivers/net/ethernet/intel/ice/ice_txrx.c     |   4 +-
 drivers/net/ethernet/intel/ice/ice_txrx.h     |   1 +
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 105 ++++++++++--------
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h |  12 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c      |   4 +-
 7 files changed, 91 insertions(+), 71 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c
index ef9344ef0d8e..d4d955152682 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.c
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.c
@@ -1795,24 +1795,22 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr)
 
 /**
  * ice_ptp_rx_hwtstamp - Check for an Rx timestamp
- * @rx_ring: Ring to get the VSI info
  * @rx_desc: Receive descriptor
- * @skb: Particular skb to send timestamp with
+ * @rx_ring: Ring to get the VSI info
+ * @md: Metadata to set timestamp in
  *
  * The driver receives a notification in the receive descriptor with timestamp.
  * The timestamp is in ns, so we must convert the result first.
  */
-void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb)
+void ice_ptp_rx_hwtstamp(struct xdp_meta_generic *md,
+			 const union ice_32b_rx_flex_desc *rx_desc,
+			 const struct ice_rx_ring *rx_ring)
 {
 	u32 ts_high;
 	u64 ts_ns;
 
-	/* Populate timesync data into skb */
+	/* Populate timesync data into md */
 	if (rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID) {
-		struct skb_shared_hwtstamps *hwtstamps;
-
 		/* Use ice_ptp_extend_32b_ts directly, using the ring-specific
 		 * cached PHC value, rather than accessing the PF. This also
 		 * allows us to simply pass the upper 32bits of nanoseconds
@@ -1822,9 +1820,8 @@ ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
 		ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high);
 		ts_ns = ice_ptp_extend_32b_ts(rx_ring->cached_phctime, ts_high);
 
-		hwtstamps = skb_hwtstamps(skb);
-		memset(hwtstamps, 0, sizeof(*hwtstamps));
-		hwtstamps->hwtstamp = ns_to_ktime(ts_ns);
+		xdp_meta_rx_tstamp_present_set(md, 1);
+		xdp_meta_rx_tstamp_set(md, ts_ns);
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h
index 10e396abf130..488b6bb01605 100644
--- a/drivers/net/ethernet/intel/ice/ice_ptp.h
+++ b/drivers/net/ethernet/intel/ice/ice_ptp.h
@@ -228,8 +228,12 @@ struct ice_ptp {
 #define N_EXT_TS_E810_NO_SMA		2
 #define ETH_GLTSYN_ENA(_i)		(0x03000348 + ((_i) * 4))
 
-#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 struct ice_pf;
+struct ice_rx_ring;
+struct xdp_meta_generic;
+union ice_32b_rx_flex_desc;
+
+#if IS_ENABLED(CONFIG_PTP_1588_CLOCK)
 int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 int ice_ptp_get_ts_config(struct ice_pf *pf, struct ifreq *ifr);
 void ice_ptp_cfg_timestamp(struct ice_pf *pf, bool ena);
@@ -238,9 +242,9 @@ int ice_get_ptp_clock_index(struct ice_pf *pf);
 s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb);
 void ice_ptp_process_ts(struct ice_pf *pf);
 
-void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb);
+void ice_ptp_rx_hwtstamp(struct xdp_meta_generic *md,
+			 const union ice_32b_rx_flex_desc *rx_desc,
+			 const struct ice_rx_ring *rx_ring);
 void ice_ptp_reset(struct ice_pf *pf);
 void ice_ptp_prepare_for_reset(struct ice_pf *pf);
 void ice_ptp_init(struct ice_pf *pf);
@@ -271,8 +275,9 @@ ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb)
 
 static inline void ice_ptp_process_ts(struct ice_pf *pf) { }
 static inline void
-ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring,
-		    union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { }
+ice_ptp_rx_hwtstamp(struct xdp_meta_generic *md,
+		    const union ice_32b_rx_flex_desc *rx_desc,
+		    const struct ice_rx_ring *rx_ring) { }
 static inline void ice_ptp_reset(struct ice_pf *pf) { }
 static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { }
 static inline void ice_ptp_init(struct ice_pf *pf) { }
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index ffea5138a7e8..c679f7c30bdc 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1123,6 +1123,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 	/* start the loop to process Rx packets bounded by 'budget' */
 	while (likely(total_rx_pkts < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
+		struct xdp_meta_generic_rx md;
 		struct ice_rx_buf *rx_buf;
 		unsigned char *hard_start;
 		unsigned int size;
@@ -1239,7 +1240,8 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget)
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 
-		ice_process_skb_fields(rx_ring, rx_desc, skb);
+		ice_xdp_build_meta(&md, rx_desc, rx_ring, 0);
+		__xdp_populate_skb_meta_generic(skb, &md);
 
 		ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb);
 		ice_receive_skb(rx_ring, skb);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 1fc31ab0bf33..a814709deb50 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -4,6 +4,7 @@
 #ifndef _ICE_TXRX_H_
 #define _ICE_TXRX_H_
 
+#include <net/xdp_meta.h>
 #include "ice_type.h"
 
 #define ICE_DFLT_IRQ_WORK	256
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 92c001baa2cc..7550e2ed8936 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -43,36 +43,37 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val)
  * @decoded: the decoded ptype value from the descriptor
  *
  * Returns appropriate hash type (such as PKT_HASH_TYPE_L2/L3/L4) to be used by
- * skb_set_hash based on PTYPE as parsed by HW Rx pipeline and is part of
- * Rx desc.
+ * xdp_meta_rx_hash_type_set() based on PTYPE as parsed by HW Rx pipeline and
+ * is part of Rx desc.
  */
-static enum pkt_hash_types
+static u32
 ice_ptype_to_htype(struct ice_rx_ptype_decoded decoded)
 {
 	if (!decoded.known)
-		return PKT_HASH_TYPE_NONE;
+		return XDP_META_RX_HASH_NONE;
 	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY4)
-		return PKT_HASH_TYPE_L4;
+		return XDP_META_RX_HASH_L4;
 	if (decoded.payload_layer == ICE_RX_PTYPE_PAYLOAD_LAYER_PAY3)
-		return PKT_HASH_TYPE_L3;
+		return XDP_META_RX_HASH_L3;
 	if (decoded.outer_ip == ICE_RX_PTYPE_OUTER_L2)
-		return PKT_HASH_TYPE_L2;
+		return XDP_META_RX_HASH_L2;
 
-	return PKT_HASH_TYPE_NONE;
+	return XDP_META_RX_HASH_NONE;
 }
 
 /**
- * ice_rx_hash - set the hash value in the skb
+ * ice_rx_hash - set the hash value in the medatadata
+ * @md: pointer to current metadata
  * @rx_ring: descriptor ring
  * @rx_desc: specific descriptor
- * @skb: pointer to current skb
  * @decoded: the decoded ptype value from the descriptor
  */
-static void
-ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
-	    struct sk_buff *skb, struct ice_rx_ptype_decoded decoded)
+static void ice_rx_hash(struct xdp_meta_generic *md,
+			const struct ice_rx_ring *rx_ring,
+			const union ice_32b_rx_flex_desc *rx_desc,
+			struct ice_rx_ptype_decoded decoded)
 {
-	struct ice_32b_rx_flex_desc_nic *nic_mdid;
+	const struct ice_32b_rx_flex_desc_nic *nic_mdid;
 	u32 hash;
 
 	if (!(rx_ring->netdev->features & NETIF_F_RXHASH))
@@ -81,24 +82,24 @@ ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
 	if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC)
 		return;
 
-	nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc;
+	nic_mdid = (typeof(nic_mdid))rx_desc;
 	hash = le32_to_cpu(nic_mdid->rss_hash);
-	skb_set_hash(skb, hash, ice_ptype_to_htype(decoded));
+
+	xdp_meta_rx_hash_type_set(md, ice_ptype_to_htype(decoded));
+	xdp_meta_rx_hash_set(md, hash);
 }
 
 /**
- * ice_rx_csum - Indicate in skb if checksum is good
+ * ice_rx_csum - Indicate in metadata if checksum is good
+ * @md: metadata currently being filled
  * @ring: the ring we care about
- * @skb: skb currently being received and modified
  * @rx_desc: the receive descriptor
  * @decoded: the decoded packet type parsed by hardware
- *
- * skb->protocol must be set before this function is called
  */
-static void
-ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
-	    union ice_32b_rx_flex_desc *rx_desc,
-	    struct ice_rx_ptype_decoded decoded)
+static void ice_rx_csum(struct xdp_meta_generic *md,
+			const struct ice_rx_ring *ring,
+			const union ice_32b_rx_flex_desc *rx_desc,
+			struct ice_rx_ptype_decoded decoded)
 {
 	u16 rx_status0, rx_status1;
 	bool ipv4, ipv6;
@@ -106,10 +107,6 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
 	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
 	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
 
-	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
-	skb->ip_summed = CHECKSUM_NONE;
-	skb_checksum_none_assert(skb);
-
 	/* check if Rx checksum is enabled */
 	if (!(ring->netdev->features & NETIF_F_RXCSUM))
 		return;
@@ -149,14 +146,14 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
 	 * we are indicating we validated the inner checksum.
 	 */
 	if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
-		skb->csum_level = 1;
+		xdp_meta_rx_csum_level_set(md, 1);
 
 	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
 	switch (decoded.inner_prot) {
 	case ICE_RX_PTYPE_INNER_PROT_TCP:
 	case ICE_RX_PTYPE_INNER_PROT_UDP:
 	case ICE_RX_PTYPE_INNER_PROT_SCTP:
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		xdp_meta_rx_csum_status_set(md, XDP_META_RX_CSUM_OK);
 		break;
 	default:
 		break;
@@ -167,7 +164,13 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb,
 	ring->vsi->back->hw_csum_rx_error++;
 }
 
-static void ice_rx_vlan(struct sk_buff *skb,
+#define xdp_meta_rx_vlan_from_feat(feat) ({			\
+	((feat) & NETIF_F_HW_VLAN_CTAG_RX) ? XDP_META_RX_CVID :	\
+	((feat) & NETIF_F_HW_VLAN_STAG_RX) ? XDP_META_RX_SVID :	\
+	XDP_META_RX_VLAN_NONE;					\
+})
+
+static void ice_rx_vlan(struct xdp_meta_generic *md,
 			const struct ice_rx_ring *rx_ring,
 			const union ice_32b_rx_flex_desc *rx_desc)
 {
@@ -181,42 +184,48 @@ static void ice_rx_vlan(struct sk_buff *skb,
 	if (!non_zero_vlan)
 		return;
 
-	if ((features & NETIF_F_HW_VLAN_CTAG_RX))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
-	else if ((features & NETIF_F_HW_VLAN_STAG_RX))
-		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag);
+	xdp_meta_rx_vlan_type_set(md, xdp_meta_rx_vlan_from_feat(features));
+	xdp_meta_rx_vid_set(md, vlan_tag);
 }
 
 /**
- * ice_process_skb_fields - Populate skb header fields from Rx descriptor
- * @rx_ring: Rx descriptor ring packet is being transacted on
+ * __ice_xdp_build_meta - Populate XDP generic metadata fields from Rx desc
+ * @rx_md: pointer to the metadata structure to be populated
  * @rx_desc: pointer to the EOP Rx descriptor
- * @skb: pointer to current skb being populated
+ * @rx_ring: Rx descriptor ring packet is being transacted on
+ * @full_id: full ID (BTF ID + type ID) to fill in
  *
  * This function checks the ring, descriptor, and packet information in
  * order to populate the hash, checksum, VLAN, protocol, and
- * other fields within the skb.
+ * other fields within the metadata.
  */
-void
-ice_process_skb_fields(struct ice_rx_ring *rx_ring,
-		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb)
+void __ice_xdp_build_meta(struct xdp_meta_generic_rx *rx_md,
+			  const union ice_32b_rx_flex_desc *rx_desc,
+			  const struct ice_rx_ring *rx_ring,
+			  __le64 full_id)
 {
+	struct xdp_meta_generic *md = to_gen_md(rx_md);
 	struct ice_rx_ptype_decoded decoded;
 	u16 ptype;
 
-	skb_record_rx_queue(skb, rx_ring->q_index);
+	xdp_meta_init(&md->id, full_id);
+	md->rx_hash = 0;
+	md->rx_csum = 0;
+	md->rx_flags = 0;
+
+	xdp_meta_rx_qid_present_set(md, 1);
+	xdp_meta_rx_qid_set(md, rx_ring->q_index);
 
 	ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) &
 		ICE_RX_FLEX_DESC_PTYPE_M;
 	decoded = ice_decode_rx_desc_ptype(ptype);
 
-	ice_rx_hash(rx_ring, rx_desc, skb, decoded);
-	ice_rx_csum(rx_ring, skb, rx_desc, decoded);
-	ice_rx_vlan(skb, rx_ring, rx_desc);
+	ice_rx_hash(md, rx_ring, rx_desc, decoded);
+	ice_rx_csum(md, rx_ring, rx_desc, decoded);
+	ice_rx_vlan(md, rx_ring, rx_desc);
 
 	if (rx_ring->ptp_rx)
-		ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb);
+		ice_ptp_rx_hwtstamp(md, rx_desc, rx_ring);
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index 45dc5ef79e28..b51e58b8e83d 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -92,9 +92,13 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res);
 int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring);
 int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring);
 void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val);
-void
-ice_process_skb_fields(struct ice_rx_ring *rx_ring,
-		       union ice_32b_rx_flex_desc *rx_desc,
-		       struct sk_buff *skb);
+
+void __ice_xdp_build_meta(struct xdp_meta_generic_rx *rx_md,
+			  const union ice_32b_rx_flex_desc *rx_desc,
+			  const struct ice_rx_ring *rx_ring,
+			  __le64 full_id);
+
+#define ice_xdp_build_meta(md, ...)					\
+	__ice_xdp_build_meta(to_rx_md(md), ##__VA_ARGS__)
 
 #endif /* !_ICE_TXRX_LIB_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 0a66128964e7..eade918723eb 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -603,6 +603,7 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
 		unsigned int size, xdp_res = 0;
+		struct xdp_meta_generic_rx md;
 		struct xdp_buff *xdp;
 		struct sk_buff *skb;
 		u16 stat_err_bits;
@@ -673,7 +674,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget)
 		total_rx_bytes += skb->len;
 		total_rx_packets++;
 
-		ice_process_skb_fields(rx_ring, rx_desc, skb);
+		ice_xdp_build_meta(&md, rx_desc, rx_ring, 0);
+		__xdp_populate_skb_meta_generic(skb, &md);
 		ice_receive_skb(rx_ring, skb);
 	}
 
-- 
2.36.1

Powered by blists - more mailing lists