lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Fri, 24 Jan 2020 10:55:19 +0100
From:   Daniel Borkmann <daniel@...earbox.net>
To:     Luigi Rizzo <lrizzo@...gle.com>, netdev@...r.kernel.org
Cc:     Jesper Dangaard Brouer <hawk@...nel.org>,
        "David S. Miller" <davem@...emloft.net>, sameehj@...zon.com,
        toke@...hat.com
Subject: Re: [PATCH net-next] v2 net-xdp: netdev attribute to control
 xdpgeneric skb linearization

On 1/24/20 12:20 AM, Luigi Rizzo wrote:
> Add a netdevice flag to control skb linearization in generic xdp mode.
> Among the various mechanism to control the flag, the sysfs
> interface seems sufficiently simple and self-contained.
> The attribute can be modified through
> 	/sys/class/net/<DEVICE>/xdp_linearize
> The default is 1 (on)
> 
> On a kernel instrumented to grab timestamps around the linearization
> code in netif_receive_generic_xdp, and heavy netperf traffic with 1500b
> mtu, I see the following times (nanoseconds/pkt)
> 
> The receiver generally sees larger packets so the difference is more
> significant.
> 
> ns/pkt                   RECEIVER                 SENDER
> 
>                      p50     p90     p99       p50   p90    p99
> 
> LINEARIZATION:    600ns  1090ns  4900ns     149ns 249ns  460ns
> NO LINEARIZATION:  40ns    59ns    90ns      40ns  50ns  100ns
> 
> Signed-off-by: Luigi Rizzo <lrizzo@...gle.com>
> ---
>   include/linux/netdevice.h |  3 ++-
>   net/core/dev.c            |  5 +++--
>   net/core/net-sysfs.c      | 15 +++++++++++++++
>   3 files changed, 20 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 5ec3537fbdb1..b182f3cb0bf0 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -1959,7 +1959,8 @@ struct net_device {
>   
>   	struct netdev_rx_queue	*_rx;
>   	unsigned int		num_rx_queues;
> -	unsigned int		real_num_rx_queues;
> +	unsigned int		real_num_rx_queues:31;
> +	unsigned int		xdp_linearize : 1;
>   
>   	struct bpf_prog __rcu	*xdp_prog;
>   	unsigned long		gro_flush_timeout;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 4dcc1b390667..13a671e45b61 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -4484,8 +4484,8 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
>   	 * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
>   	 * native XDP provides, thus we need to do it here as well.
>   	 */
> -	if (skb_is_nonlinear(skb) ||
> -	    skb_headroom(skb) < XDP_PACKET_HEADROOM) {
> +	if (skb->dev->xdp_linearize && (skb_is_nonlinear(skb) ||
> +	    skb_headroom(skb) < XDP_PACKET_HEADROOM)) {
>   		int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
>   		int troom = skb->tail + skb->data_len - skb->end;

I still think in order for this knob to be generally useful, we would need to
provide an equivalent of bpf_skb_pull_data() helper, which in generic XDP would then
pull in more data from non-linear section, and in native XDP would be a "no-op" since
the frame is already linear. Otherwise, as mentioned in previous thread, users would
have no chance to examine headers if they are not pre-pulled by the driver.

> @@ -9756,6 +9756,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
>   	dev->gso_max_segs = GSO_MAX_SEGS;
>   	dev->upper_level = 1;
>   	dev->lower_level = 1;
> +	dev->xdp_linearize = 1;
>   
>   	INIT_LIST_HEAD(&dev->napi_list);
>   	INIT_LIST_HEAD(&dev->unreg_list);
> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
> index 4c826b8bf9b1..ec59aa296664 100644
> --- a/net/core/net-sysfs.c
> +++ b/net/core/net-sysfs.c
> @@ -442,6 +442,20 @@ static ssize_t proto_down_store(struct device *dev,
>   }
>   NETDEVICE_SHOW_RW(proto_down, fmt_dec);
>   
> +static int change_xdp_linearize(struct net_device *dev, unsigned long val)
> +{
> +	dev->xdp_linearize = !!val;
> +	return 0;
> +}
> +
> +static ssize_t xdp_linearize_store(struct device *dev,
> +				   struct device_attribute *attr,
> +				   const char *buf, size_t len)
> +{
> +	return netdev_store(dev, attr, buf, len, change_xdp_linearize);
> +}
> +NETDEVICE_SHOW_RW(xdp_linearize, fmt_dec);
> +
>   static ssize_t phys_port_id_show(struct device *dev,
>   				 struct device_attribute *attr, char *buf)
>   {
> @@ -536,6 +550,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
>   	&dev_attr_phys_port_name.attr,
>   	&dev_attr_phys_switch_id.attr,
>   	&dev_attr_proto_down.attr,
> +	&dev_attr_xdp_linearize.attr,
>   	&dev_attr_carrier_up_count.attr,
>   	&dev_attr_carrier_down_count.attr,
>   	NULL,
> 

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ