lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <86137924-b3cb-3d96-51b1-19923252f092@brouer.com>
Date:   Mon, 14 Mar 2022 21:39:21 +0100
From:   "Jesper D. Brouer" <netdev@...uer.com>
To:     Felix Fietkau <nbd@....name>, netdev@...r.kernel.org,
        bpf <bpf@...r.kernel.org>
Cc:     brouer@...hat.com, Toke Hoiland Jorgensen <toke@...hat.com>,
        John Fastabend <john.fastabend@...il.com>,
        Jakub Kicinski <kuba@...nel.org>
Subject: Re: [PATCH] net: xdp: allow user space to request a smaller packet
 headroom requirement

(Cc. BPF list and other XDP maintainers)

On 14/03/2022 11.22, Felix Fietkau wrote:
> Most ethernet drivers allocate a packet headroom of NET_SKB_PAD. Since it is
> rounded up to L1 cache size, it ends up being at least 64 bytes on the most
> common platforms.
> On most ethernet drivers, having a guaranteed headroom of 256 bytes for XDP
> adds an extra forced pskb_expand_head call when enabling SKB XDP, which can
> be quite expensive.
> Many XDP programs need only very little headroom, so it can be beneficial
> to have a way to opt-out of the 256 bytes headroom requirement.

IMHO 64 bytes is too small.
We are using this area for struct xdp_frame and also for metadata
(XDP-hints).  This will limit us from growing this structures for
the sake of generic-XDP.

I'm fine with reducting this to 192 bytes, as most Intel drivers
have this headroom, and have defacto established that this is
a valid XDP headroom, even for native-XDP.

We could go a small as two cachelines 128 bytes, as if xdp_frame
and metadata grows above a cache-line (64 bytes) each, then we have
done something wrong (performance wise).


> Add an extra flag XDP_FLAGS_SMALL_HEADROOM that can be set when attaching
> the XDP program, which reduces the minimum headroom to 64 bytes.

I don't like a flags approach.

Multiple disadvantages.
  (a) Harder to use
  (b) Now reading a new cache-line in net_device

> Signed-off-by: Felix Fietkau <nbd@....name>
> ---
>   include/linux/netdevice.h          | 1 +
>   include/uapi/linux/bpf.h           | 1 +
>   include/uapi/linux/if_link.h       | 4 +++-
>   net/core/dev.c                     | 9 ++++++++-
>   tools/include/uapi/linux/if_link.h | 4 +++-
>   5 files changed, 16 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 0d994710b335..f6f270a5e301 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -2274,6 +2274,7 @@ struct net_device {
>   	bool			proto_down;
>   	unsigned		wol_enabled:1;
>   	unsigned		threaded:1;
> +	unsigned		xdp_small_headroom:1;
>   

Looks like we need to read this cache-line, in a XDP (generic) fastpath.

>   	struct list_head	net_notifier_list;
>   
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 4eebea830613..7635dfb02313 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -5688,6 +5688,7 @@ struct bpf_xdp_sock {
>   };
>   
>   #define XDP_PACKET_HEADROOM 256
> +#define XDP_PACKET_HEADROOM_SMALL 64

Define it 192 instead.

>   
>   /* User return codes for XDP prog type.
>    * A valid XDP program must return one of these defined values. All other
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index e003a0b9b4b2..acb996334910 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -1275,11 +1275,13 @@ enum {
>   #define XDP_FLAGS_DRV_MODE		(1U << 2)
>   #define XDP_FLAGS_HW_MODE		(1U << 3)
>   #define XDP_FLAGS_REPLACE		(1U << 4)
> +#define XDP_FLAGS_SMALL_HEADROOM	(1U << 5)
>   #define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
>   					 XDP_FLAGS_DRV_MODE | \
>   					 XDP_FLAGS_HW_MODE)
>   #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
> -					 XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
> +					 XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \
> +					 XDP_FLAGS_SMALL_HEADROOM)
>   
>   /* These are stored into IFLA_XDP_ATTACHED on dump. */
>   enum {
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 8d25ec5b3af7..cb12379b8f11 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4722,6 +4722,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
>   				     struct xdp_buff *xdp,
>   				     struct bpf_prog *xdp_prog)
>   {
> +	int min_headroom = XDP_PACKET_HEADROOM;
>   	u32 act = XDP_DROP;
>   
>   	/* Reinjected packets coming from act_mirred or similar should
> @@ -4730,12 +4731,15 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
>   	if (skb_is_redirected(skb))
>   		return XDP_PASS;
>   
> +	if (skb->dev->xdp_small_headroom)
> +		min_headroom = XDP_PACKET_HEADROOM_SMALL;
> +
>   	/* XDP packets must be linear and must have sufficient headroom
>   	 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
>   	 * native XDP provides, thus we need to do it here as well.
>   	 */
>   	if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
> -	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
> +	    skb_headroom(skb) < min_headroom) {

Use define XDP_PACKET_HEADROOM_SMALL here directly.

>   		int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
>   		int troom = skb->tail + skb->data_len - skb->end;
>   
> @@ -9135,6 +9139,9 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
>   			return err;
>   	}
>   
> +	if (mode == XDP_MODE_SKB)
> +		dev->xdp_small_headroom = !!(flags & XDP_FLAGS_SMALL_HEADROOM);
> +
>   	if (link)
>   		dev_xdp_set_link(dev, mode, link);
>   	else
> diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
> index e1ba2d51b717..0df737a6c489 100644
> --- a/tools/include/uapi/linux/if_link.h
> +++ b/tools/include/uapi/linux/if_link.h
> @@ -1185,11 +1185,13 @@ enum {
>   #define XDP_FLAGS_DRV_MODE		(1U << 2)
>   #define XDP_FLAGS_HW_MODE		(1U << 3)
>   #define XDP_FLAGS_REPLACE		(1U << 4)
> +#define XDP_FLAGS_SMALL_HEADROOM	(1U << 5)
>   #define XDP_FLAGS_MODES			(XDP_FLAGS_SKB_MODE | \
>   					 XDP_FLAGS_DRV_MODE | \
>   					 XDP_FLAGS_HW_MODE)
>   #define XDP_FLAGS_MASK			(XDP_FLAGS_UPDATE_IF_NOEXIST | \
> -					 XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
> +					 XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \
> +					 XDP_FLAGS_SMALL_HEADROOM)
>   
>   /* These are stored into IFLA_XDP_ATTACHED on dump. */
>   enum {
> 

--Jesper

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ