lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <75bd0a9fdcd1f48533e0cff7b2d3e95e362880a4.camel@redhat.com>
Date:   Thu, 06 Apr 2023 12:16:06 +0200
From:   Paolo Abeni <pabeni@...hat.com>
To:     Vladimir Nikishkin <vladimir@...ishkin.pw>, netdev@...r.kernel.org
Cc:     davem@...emloft.net, edumazet@...gle.com, kuba@...nel.org,
        eng.alaamohamedsoliman.am@...il.com, gnault@...hat.com,
        razor@...ckwall.org, idosch@...dia.com, liuhangbin@...il.com,
        eyal.birger@...il.com, jtoppins@...hat.com
Subject: Re: [PATCH net-next v6] vxlan: try to send a packet normally if
 local bypass fails

On Wed, 2023-04-05 at 13:01 +0800, Vladimir Nikishkin wrote:
> In vxlan_core, if an fdb entry is pointing to a local
> address with some port, the system tries to get the packet to
> deliver the packet to the vxlan directly, bypassing the network

The above sentence sounds confusing to me. Possibly:

... the system tries to deliver the packet to ...

?

> stack.
> 
> This patch makes it still try canonical delivery, if there is no
> linux kernel vxlan listening on this port. This will be useful
> for the cases when there is some userspace daemon expecting
> vxlan packets for post-processing, or some other implementation
> of vxlan.
> 
> Signed-off-by: Vladimir Nikishkin <vladimir@...ishkin.pw>
> ---
>  drivers/net/vxlan/vxlan_core.c                |  29 +++--
>  include/net/vxlan.h                           |   4 +-
>  include/uapi/linux/if_link.h                  |   1 +
>  tools/include/uapi/linux/if_link.h            |   2 +
>  tools/testing/selftests/net/Makefile          |   1 +
>  tools/testing/selftests/net/rtnetlink.sh      |   3 +
>  .../selftests/net/test_vxlan_nolocalbypass.sh | 102 ++++++++++++++++++
>  7 files changed, 135 insertions(+), 7 deletions(-)
>  create mode 100755 tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
> 
> diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
> index 561fe1b314f5..f9dfb179af58 100644
> --- a/drivers/net/vxlan/vxlan_core.c
> +++ b/drivers/net/vxlan/vxlan_core.c
> @@ -2341,7 +2341,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
>  				 union vxlan_addr *daddr,
>  				 __be16 dst_port, int dst_ifindex, __be32 vni,
>  				 struct dst_entry *dst,
> -				 u32 rt_flags)
> +				 u32 rt_flags, bool localbypass)

No need to add an additional argument, you can use vxlan->cfg.flags
instead

>  {
>  #if IS_ENABLED(CONFIG_IPV6)
>  	/* IPv6 rt-flags are checked against RTF_LOCAL, but the value of
> @@ -2355,11 +2355,13 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
>  	    !(rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))) {
>  		struct vxlan_dev *dst_vxlan;
>  
> -		dst_release(dst);
>  		dst_vxlan = vxlan_find_vni(vxlan->net, dst_ifindex, vni,
>  					   daddr->sa.sa_family, dst_port,
>  					   vxlan->cfg.flags);
>  		if (!dst_vxlan) {
> +			if (!localbypass)
> +				return 0;

A space here would make the code more clear

> +			dst_release(dst);
>  			dev->stats.tx_errors++;
>  			vxlan_vnifilter_count(vxlan, vni, NULL,
>  					      VXLAN_VNI_STATS_TX_ERRORS, 0);
> @@ -2367,6 +2369,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
>  
>  			return -ENOENT;
>  		}
> +		dst_release(dst);
>  		vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
>  		return 1;
>  	}
> @@ -2494,9 +2497,11 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>  
>  		if (!info) {
>  			/* Bypass encapsulation if the destination is local */
> +			bool localbypass = flags & VXLAN_F_LOCALBYPASS;

The new variable is not needed (see above). Anyway you need to add an
empty line between variable declarations and code - btw strange that
checkpatch did not complain.

>  			err = encap_bypass_if_local(skb, dev, vxlan, dst,
>  						    dst_port, ifindex, vni,
> -						    &rt->dst, rt->rt_flags);
> +						    &rt->dst, rt->rt_flags,
> +						    localbypass);
>  			if (err)
>  				goto out_unlock;
>  
> @@ -2568,10 +2573,10 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
>  
>  		if (!info) {
>  			u32 rt6i_flags = ((struct rt6_info *)ndst)->rt6i_flags;
> -
> +			bool localbypass = flags & VXLAN_F_LOCALBYPASS;
>  			err = encap_bypass_if_local(skb, dev, vxlan, dst,
>  						    dst_port, ifindex, vni,
> -						    ndst, rt6i_flags);
> +						    ndst, rt6i_flags, localbypass);
>  			if (err)
>  				goto out_unlock;
>  		}
> @@ -3202,6 +3207,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
>  	[IFLA_VXLAN_TTL_INHERIT]	= { .type = NLA_FLAG },
>  	[IFLA_VXLAN_DF]		= { .type = NLA_U8 },
>  	[IFLA_VXLAN_VNIFILTER]	= { .type = NLA_U8 },
> +	[IFLA_VXLAN_LOCALBYPASS]	= { .type = NLA_U8 },
>  };
>  
>  static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[],
> @@ -4011,6 +4017,14 @@ static int vxlan_nl2conf(struct nlattr *tb[], struct nlattr *data[],
>  			conf->flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
>  	}
>  
> +	if (data[IFLA_VXLAN_LOCALBYPASS]) {
> +		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_LOCALBYPASS,
> +				    VXLAN_F_LOCALBYPASS, changelink,
> +				    false, extack);
> +		if (err)
> +			return err;
> +	}
> +
>  	if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]) {
>  		err = vxlan_nl2flag(conf, data, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
>  				    VXLAN_F_UDP_ZERO_CSUM6_TX, changelink,
> @@ -4232,6 +4246,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
>  		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
> +		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LOCALBYPASS */
>  		0;
>  }
>  
> @@ -4308,7 +4323,9 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
>  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
>  		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_TX)) ||
>  	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
> -		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)))
> +		       !!(vxlan->cfg.flags & VXLAN_F_REMCSUM_RX)) ||
> +	    nla_put_u8(skb, IFLA_VXLAN_LOCALBYPASS,
> +		       !!(vxlan->cfg.flags & VXLAN_F_LOCALBYPASS)))
>  		goto nla_put_failure;
>  
>  	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
> diff --git a/include/net/vxlan.h b/include/net/vxlan.h
> index 20bd7d893e10..0be91ca78d3a 100644
> --- a/include/net/vxlan.h
> +++ b/include/net/vxlan.h
> @@ -328,6 +328,7 @@ struct vxlan_dev {
>  #define VXLAN_F_TTL_INHERIT		0x10000
>  #define VXLAN_F_VNIFILTER               0x20000
>  #define VXLAN_F_MDB			0x40000
> +#define VXLAN_F_LOCALBYPASS		0x80000
>  
>  /* Flags that are used in the receive path. These flags must match in
>   * order for a socket to be shareable
> @@ -348,7 +349,8 @@ struct vxlan_dev {
>  					 VXLAN_F_UDP_ZERO_CSUM6_TX |	\
>  					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
>  					 VXLAN_F_COLLECT_METADATA  |	\
> -					 VXLAN_F_VNIFILTER)
> +					 VXLAN_F_VNIFILTER         |    \
> +					 VXLAN_F_LOCALBYPASS)
>  
>  struct net_device *vxlan_dev_create(struct net *net, const char *name,
>  				    u8 name_assign_type, struct vxlan_config *conf);
> diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
> index 8d679688efe0..0fc56be5e19f 100644
> --- a/include/uapi/linux/if_link.h
> +++ b/include/uapi/linux/if_link.h
> @@ -827,6 +827,7 @@ enum {
>  	IFLA_VXLAN_TTL_INHERIT,
>  	IFLA_VXLAN_DF,
>  	IFLA_VXLAN_VNIFILTER, /* only applicable with COLLECT_METADATA mode */
> +	IFLA_VXLAN_LOCALBYPASS,
>  	__IFLA_VXLAN_MAX
>  };
>  #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
> diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
> index 39e659c83cfd..1253bd0aa90e 100644
> --- a/tools/include/uapi/linux/if_link.h
> +++ b/tools/include/uapi/linux/if_link.h
> @@ -748,6 +748,8 @@ enum {
>  	IFLA_VXLAN_GPE,
>  	IFLA_VXLAN_TTL_INHERIT,
>  	IFLA_VXLAN_DF,
> +	IFLA_VXLAN_VNIFILTER,
> +	IFLA_VXLAN_LOCALBYPASS,
>  	__IFLA_VXLAN_MAX
>  };
>  #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
> diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
> index 1de34ec99290..7a9cfd0c92db 100644
> --- a/tools/testing/selftests/net/Makefile
> +++ b/tools/testing/selftests/net/Makefile
> @@ -83,6 +83,7 @@ TEST_GEN_FILES += nat6to4.o
>  TEST_GEN_FILES += ip_local_port_range
>  TEST_GEN_FILES += bind_wildcard
>  TEST_PROGS += test_vxlan_mdb.sh
> +TEST_PROGS += test_vxlan_nolocalbypass.sh
>  
>  TEST_FILES := settings
>  
> diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
> index 383ac6fc037d..09a5ef4bd42b 100755
> --- a/tools/testing/selftests/net/rtnetlink.sh
> +++ b/tools/testing/selftests/net/rtnetlink.sh
> @@ -505,6 +505,9 @@ kci_test_encap_vxlan()
>  	ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null
>  	check_fail $?
>  
> +	ip -netns "$testns" link set dev "$vxlan" type vxlan nolocalbypass 2>/dev/null
> +	check_fail $?

This will fail until a new version of 'ip' is deployed, right? I would
wait before adding this test case (or will move it in the
test_vxlan_nolocalbypass.sh, after the relevant check)

> +
>  	ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null
>  	check_fail $?
>  
> diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
> new file mode 100755
> index 000000000000..efa37af2da7b
> --- /dev/null
> +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
> @@ -0,0 +1,102 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +
> +# This file is testing that the [no]localbypass option for a vxlan device is
> +# working. With the nolocalbypass option, packets to a local destination, which
> +# have no corresponding vxlan in the kernel, will be delivered to userspace, for
> +# any userspace process to process. In this test tcpdump plays the role of such a
> +# process. This is what the test 1 is checking.
> +# The test 2 checks that without the nolocalbypass (which is equivalent to the
> +# localbypass option), the packets do not reach userspace.
> +
> +EXIT_FAIL=1
> +ksft_skip=4
> +EXIT_SUCCESS=0
> +
> +if [ "$(id -u)" -ne 0 ];then
> +        echo "SKIP: Need root privileges"
> +        exit $ksft_skip;
> +fi
> +
> +if [ ! -x "$(command -v ip)" ]; then
> +        echo "SKIP: Could not run test without ip tool"
> +        exit $ksft_skip
> +fi
> +
> +if [ ! -x "$(command -v bridge)" ]; then
> +        echo "SKIP: Could not run test without bridge tool"
> +        exit $ksft_skip
> +fi

No need to check for the above tool presence. You can assume they are
present, as most net self-tests unconditionally relay on them.

> +
> +if [ ! -x "$(command -v tcpdump)" ]; then
> +        echo "SKIP: Could not run test without tcpdump tool"
> +        exit $ksft_skip
> +fi

I personally would opt for adding nft rules and checking the counters,
but no strong opinion on that.

> +
> +if [ ! -x "$(command -v grep)" ]; then
> +        echo "SKIP: Could not run test without grep tool"
> +        exit $ksft_skip
> +fi

Some self-tests check for 'grep' presence but most simply assume such
command is available. I would opt for the latter.

> +
> +ip link help vxlan 2>&1 | grep -q "localbypass"
> +if [ $? -ne 0 ]; then
> +   echo "SKIP: iproute2 bridge too old, missing VXLAN nolocalbypass support"
> +   exit $ksft_skip
> +fi
> +
> +

Additional empty line not needed

> +packetfile=/tmp/packets-"$(uuidgen)"
> +
> +# test 1: packets going to userspace
> +rm "$packetfile"
> +ip link del dev testvxlan0
> +ip link add testvxlan0 type vxlan  \
> +  id 100 \
> +  dstport 4789 \
> +  srcport 4789 4790 \
> +  nolearning noproxy \
> +  nolocalbypass
> +ip link set up dev testvxlan0
> +bridge fdb add 00:00:00:00:00:00 dev testvxlan0 dst 127.0.0.1 port 4792
> +ip address add 172.16.100.1/24 dev testvxlan0
> +tcpdump -i lo 'udp and port 4792' > "$packetfile" &
> +tcpdump_pid=$!
> +timeout 5 ping -c 5 172.16.100.2
> +kill "$tcpdump_pid"
> +ip link del dev testvxlan0
> +
> +if grep -q "VXLAN" "$packetfile" ; then
> +  echo 'Positive test passed'
> +else
> +  echo 'Positive test failed'
> +  exit $EXIT_FAIL
> +fi
> +rm "$packetfile"

It's better if you additionally remove the generate file(s) in a
cleanup function set as trap for exit:

cleanup()
{
	rm -f ${packetfile}
}

trap cleanup EXIT


Cheers,

Paolo

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ