netdev - Re: [PATCH bpf-next v11 7/7] selftests: bpf: add test_lwt_ip

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20190214060305.GD10595@mini-arch>
Date:   Wed, 13 Feb 2019 22:03:05 -0800
From:   Stanislav Fomichev <sdf@...ichev.me>
To:     Peter Oskolkov <posk@...gle.com>
Cc:     Alexei Starovoitov <ast@...nel.org>,
        Daniel Borkmann <daniel@...earbox.net>, netdev@...r.kernel.org,
        Peter Oskolkov <posk@...k.io>, David Ahern <dsahern@...il.com>,
        Willem de Bruijn <willemb@...gle.com>
Subject: Re: [PATCH bpf-next v11 7/7] selftests: bpf: add test_lwt_ip_encap
 selftest

On 02/13, Peter Oskolkov wrote:
> This patch adds a bpf self-test to cover BPF_LWT_ENCAP_IP mode
> in bpf_lwt_push_encap.
> 
> Covered:
> - encapping in LWT_IN and LWT_XMIT
> - IPv4 and IPv6
> 
> A follow-up patch will add GSO and VRF-enabled tests.
> 
> Signed-off-by: Peter Oskolkov <posk@...gle.com>
> ---
>  tools/testing/selftests/bpf/Makefile          |   3 +-
>  .../selftests/bpf/progs/test_lwt_ip_encap.c   |  85 +++++
>  .../selftests/bpf/test_lwt_ip_encap.sh        | 311 ++++++++++++++++++
>  3 files changed, 398 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
>  create mode 100755 tools/testing/selftests/bpf/test_lwt_ip_encap.sh
> 
> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
> index c3edf47da05d..ccffaa0a0787 100644
> --- a/tools/testing/selftests/bpf/Makefile
> +++ b/tools/testing/selftests/bpf/Makefile
> @@ -50,7 +50,8 @@ TEST_PROGS := test_kmod.sh \
>  	test_lirc_mode2.sh \
>  	test_skb_cgroup_id.sh \
>  	test_flow_dissector.sh \
> -	test_xdp_vlan.sh
> +	test_xdp_vlan.sh \
> +	test_lwt_ip_encap.sh
>  
>  TEST_PROGS_EXTENDED := with_addr.sh \
>  	with_tunnels.sh \
> diff --git a/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
> new file mode 100644
> index 000000000000..c957d6dfe6d7
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_lwt_ip_encap.c
> @@ -0,0 +1,85 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <stddef.h>
> +#include <string.h>
> +#include <linux/bpf.h>
> +#include <linux/ip.h>
> +#include <linux/ipv6.h>
> +#include "bpf_helpers.h"
> +#include "bpf_endian.h"
> +
> +struct grehdr {
> +	__be16 flags;
> +	__be16 protocol;
> +};
> +
> +SEC("encap_gre")
> +int bpf_lwt_encap_gre(struct __sk_buff *skb)
> +{
> +	struct encap_hdr {
> +		struct iphdr iph;
> +		struct grehdr greh;
> +	} hdr;
> +	int err;
> +
> +	memset(&hdr, 0, sizeof(struct encap_hdr));
> +
> +	hdr.iph.ihl = 5;
> +	hdr.iph.version = 4;
> +	hdr.iph.ttl = 0x40;
> +	hdr.iph.protocol = 47;  /* IPPROTO_GRE */

[...]

> +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
> +	hdr.iph.saddr = 0x640110ac;  /* 172.16.1.100 */
> +	hdr.iph.daddr = 0x641010ac;  /* 172.16.16.100 */
> +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
> +	hdr.iph.saddr = 0xac100164;  /* 172.16.1.100 */
> +	hdr.iph.daddr = 0xac101064;  /* 172.16.16.100 */
> +#else
> +#error "Fix your compiler's __BYTE_ORDER__?!"
> +#endif

Nit, why not just:

	hdr.iph.saddr = bpf_htonl(0xac100164);  /* 172.16.1.100 */
	hdr.iph.daddr = bpf_htonl(0xac101064);  /* 172.16.16.100 */

?

> +	hdr.iph.tot_len = bpf_htons(skb->len + sizeof(struct encap_hdr));
> +
> +	hdr.greh.protocol = skb->protocol;
> +
> +	err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
> +				 sizeof(struct encap_hdr));
> +	if (err)
> +		return BPF_DROP;
> +
> +	return BPF_LWT_REROUTE;
> +}
> +
> +SEC("encap_gre6")
> +int bpf_lwt_encap_gre6(struct __sk_buff *skb)
> +{
> +	struct encap_hdr {
> +		struct ipv6hdr ip6hdr;
> +		struct grehdr greh;
> +	} hdr;
> +	int err;
> +
> +	memset(&hdr, 0, sizeof(struct encap_hdr));
> +
> +	hdr.ip6hdr.version = 6;
> +	hdr.ip6hdr.payload_len = bpf_htons(skb->len + sizeof(struct grehdr));
> +	hdr.ip6hdr.nexthdr = 47;  /* IPPROTO_GRE */
> +	hdr.ip6hdr.hop_limit = 0x40;
> +	/* fb01::1 */
> +	hdr.ip6hdr.saddr.s6_addr[0] = 0xfb;
> +	hdr.ip6hdr.saddr.s6_addr[1] = 1;
> +	hdr.ip6hdr.saddr.s6_addr[15] = 1;
> +	/* fb10::1 */
> +	hdr.ip6hdr.daddr.s6_addr[0] = 0xfb;
> +	hdr.ip6hdr.daddr.s6_addr[1] = 0x10;
> +	hdr.ip6hdr.daddr.s6_addr[15] = 1;
> +
> +	hdr.greh.protocol = skb->protocol;
> +
> +	err = bpf_lwt_push_encap(skb, BPF_LWT_ENCAP_IP, &hdr,
> +				 sizeof(struct encap_hdr));
> +	if (err)
> +		return BPF_DROP;
> +
> +	return BPF_LWT_REROUTE;
> +}
> +
> +char _license[] SEC("license") = "GPL";
> diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
> new file mode 100755
> index 000000000000..4ca714e23ab0
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
> @@ -0,0 +1,311 @@
> +#!/bin/bash
> +# SPDX-License-Identifier: GPL-2.0
> +#
> +# Setup/topology:
> +#
> +#    NS1             NS2             NS3
> +#   veth1 <---> veth2   veth3 <---> veth4 (the top route)
> +#   veth5 <---> veth6   veth7 <---> veth8 (the bottom route)
> +#
> +#   each vethN gets IPv[4|6]_N address
> +#
> +#   IPv*_SRC = IPv*_1
> +#   IPv*_DST = IPv*_4
> +#
> +#   all tests test pings from IPv*_SRC to IPv*_DST
> +#
> +#   by default, routes are configured to allow packets to go
> +#   IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
> +#
> +#   a GRE device is installed in NS3 with IPv*_GRE, and
> +#   NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
> +#   (the bottom route)
> +#
> +# Tests:
> +#
> +#   1. routes NS2->IPv*_DST are brought down, so the only way a ping
> +#      from IP*_SRC to IP*_DST can work is via IPv*_GRE
> +#
> +#   2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
> +#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
> +#
> +#       ping: SRC->[encap at veth1:egress]->GRE:decap->DST
> +#       ping replies go DST->SRC directly
> +#
> +#   2b. in an ingress test, a bpf LWT_IN program is installed on veth2
> +#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
> +#
> +#       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
> +#       ping replies go DST->SRC directly
> +
> +set -e  # exit on error
> +
> +if [[ $EUID -ne 0 ]]; then
> +	echo "This script must be run as root"
> +	echo "FAIL"
> +	exit 1
> +fi
> +
> +readonly NS1="ns1-$(mktemp -u XXXXXX)"
> +readonly NS2="ns2-$(mktemp -u XXXXXX)"
> +readonly NS3="ns3-$(mktemp -u XXXXXX)"
> +
> +readonly IPv4_1="172.16.1.100"
> +readonly IPv4_2="172.16.2.100"
> +readonly IPv4_3="172.16.3.100"
> +readonly IPv4_4="172.16.4.100"
> +readonly IPv4_5="172.16.5.100"
> +readonly IPv4_6="172.16.6.100"
> +readonly IPv4_7="172.16.7.100"
> +readonly IPv4_8="172.16.8.100"
> +readonly IPv4_GRE="172.16.16.100"
> +
> +readonly IPv4_SRC=$IPv4_1
> +readonly IPv4_DST=$IPv4_4
> +
> +readonly IPv6_1="fb01::1"
> +readonly IPv6_2="fb02::1"
> +readonly IPv6_3="fb03::1"
> +readonly IPv6_4="fb04::1"
> +readonly IPv6_5="fb05::1"
> +readonly IPv6_6="fb06::1"
> +readonly IPv6_7="fb07::1"
> +readonly IPv6_8="fb08::1"
> +readonly IPv6_GRE="fb10::1"
> +
> +readonly IPv6_SRC=$IPv6_1
> +readonly IPv6_DST=$IPv6_4
> +
> +setup() {
> +set -e  # exit on error
> +	# create devices and namespaces
> +	ip netns add "${NS1}"
> +	ip netns add "${NS2}"
> +	ip netns add "${NS3}"
> +
> +	ip link add veth1 type veth peer name veth2
> +	ip link add veth3 type veth peer name veth4
> +	ip link add veth5 type veth peer name veth6
> +	ip link add veth7 type veth peer name veth8
> +
> +	ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
> +	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
> +
> +	ip link set veth1 netns ${NS1}
> +	ip link set veth2 netns ${NS2}
> +	ip link set veth3 netns ${NS2}
> +	ip link set veth4 netns ${NS3}
> +	ip link set veth5 netns ${NS1}
> +	ip link set veth6 netns ${NS2}
> +	ip link set veth7 netns ${NS2}
> +	ip link set veth8 netns ${NS3}
> +
> +	# configure addesses: the top route (1-2-3-4)
> +	ip -netns ${NS1}    addr add ${IPv4_1}/24  dev veth1
> +	ip -netns ${NS2}    addr add ${IPv4_2}/24  dev veth2
> +	ip -netns ${NS2}    addr add ${IPv4_3}/24  dev veth3
> +	ip -netns ${NS3}    addr add ${IPv4_4}/24  dev veth4
> +	ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
> +	ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
> +	ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
> +	ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
> +
> +	# configure addresses: the bottom route (5-6-7-8)
> +	ip -netns ${NS1}    addr add ${IPv4_5}/24  dev veth5
> +	ip -netns ${NS2}    addr add ${IPv4_6}/24  dev veth6
> +	ip -netns ${NS2}    addr add ${IPv4_7}/24  dev veth7
> +	ip -netns ${NS3}    addr add ${IPv4_8}/24  dev veth8
> +	ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
> +	ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
> +	ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
> +	ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
> +
> +
> +	ip -netns ${NS1} link set dev veth1 up
> +	ip -netns ${NS2} link set dev veth2 up
> +	ip -netns ${NS2} link set dev veth3 up
> +	ip -netns ${NS3} link set dev veth4 up
> +	ip -netns ${NS1} link set dev veth5 up
> +	ip -netns ${NS2} link set dev veth6 up
> +	ip -netns ${NS2} link set dev veth7 up
> +	ip -netns ${NS3} link set dev veth8 up
> +
> +	# configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
> +	# the bottom route to specific bottom addresses
> +
> +	# NS1
> +	# top route
> +	ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1
> +	ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2}  # go top by default
> +	ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1
> +	ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2}  # go top by default
> +	# bottom route
> +	ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5
> +	ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6}
> +	ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6}
> +	ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5
> +	ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6}
> +	ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6}
> +
> +	# NS2
> +	# top route
> +	ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2
> +	ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3
> +	ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2
> +	ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3
> +	# bottom route
> +	ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6
> +	ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7
> +	ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6
> +	ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7
> +
> +	# NS3
> +	# top route
> +	ip -netns ${NS3}    route add ${IPv4_3}/32  dev veth4
> +	ip -netns ${NS3}    route add ${IPv4_1}/32  dev veth4 via ${IPv4_3}
> +	ip -netns ${NS3}    route add ${IPv4_2}/32  dev veth4 via ${IPv4_3}
> +	ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
> +	ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
> +	ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
> +	# bottom route
> +	ip -netns ${NS3}    route add ${IPv4_7}/32  dev veth8
> +	ip -netns ${NS3}    route add ${IPv4_5}/32  dev veth8 via ${IPv4_7}
> +	ip -netns ${NS3}    route add ${IPv4_6}/32  dev veth8 via ${IPv4_7}
> +	ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
> +	ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
> +	ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
> +
> +	# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
> +	ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
> +	ip -netns ${NS3} link set gre_dev up
> +	ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
> +	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6}
> +	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8}
> +
> +
> +	# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
> +	ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
> +	ip -netns ${NS3} link set gre6_dev up
> +	ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
> +	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6}
> +	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8}
> +
> +	# rp_filter gets confused by what these tests are doing, so disable it
> +	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
> +	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
> +	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
> +}
> +
> +cleanup() {
> +	ip netns del ${NS1} 2> /dev/null
> +	ip netns del ${NS2} 2> /dev/null
> +	ip netns del ${NS3} 2> /dev/null
> +}
> +
> +trap cleanup EXIT
> +
> +test_ping() {
> +	local readonly PROTO=$1
> +	local readonly EXPECTED=$2
> +	local RET=0
> +
> +	set +e
> +	if [ "${PROTO}" == "IPv4" ] ; then
> +		ip netns exec ${NS1} ping  -c 1 -W 1 -I ${IPv4_SRC} ${IPv4_DST} 2>&1 > /dev/null
> +		RET=$?
> +	elif [ "${PROTO}" == "IPv6" ] ; then
> +		ip netns exec ${NS1} ping6 -c 1 -W 6 -I ${IPv6_SRC} ${IPv6_DST} 2>&1 > /dev/null
> +		RET=$?
> +	else
> +		echo "test_ping: unknown PROTO: ${PROTO}"
> +		exit 1
> +	fi
> +	set -e
> +
> +	if [ "0" != "${RET}" ]; then
> +		RET=1
> +	fi
> +
> +	if [ "${EXPECTED}" != "${RET}" ] ; then
> +		echo "FAIL: test_ping: ${RET}"
> +		exit 1
> +	fi
> +}
> +
> +test_egress() {
> +	local readonly ENCAP=$1
> +	echo "starting egress ${ENCAP} encap test"
> +	setup
> +
> +	# need to wait a bit for IPv6 to autoconf, otherwise
> +	# ping6 sometimes fails with "unable to bind to address"
> +
> +	# by default, pings work
> +	test_ping IPv4 0
> +	test_ping IPv6 0
> +
> +	# remove NS2->DST routes, ping fails
> +	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
> +	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
> +	test_ping IPv4 1
> +	test_ping IPv6 1
> +
> +	# install replacement routes (LWT/eBPF), pings succeed
> +	if [ "${ENCAP}" == "IPv4" ] ; then
> +		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
> +		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre dev veth1
> +	elif [ "${ENCAP}" == "IPv6" ] ; then
> +		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
> +		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj test_lwt_ip_encap.o sec encap_gre6 dev veth1
> +	else
> +		echo "FAIL: unknown encap ${ENCAP}"
> +	fi
> +	test_ping IPv4 0
> +	test_ping IPv6 0
> +
> +	cleanup
> +	echo "PASS"
> +}
> +
> +test_ingress() {
> +	local readonly ENCAP=$1
> +	echo "starting ingress ${ENCAP} encap test"
> +	setup
> +
> +	# need to wait a bit for IPv6 to autoconf, otherwise
> +	# ping6 sometimes fails with "unable to bind to address"
> +
> +	# by default, pings work
> +	test_ping IPv4 0
> +	test_ping IPv6 0
> +
> +	# remove NS2->DST routes, pings fail
> +	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3
> +	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3
> +	test_ping IPv4 1
> +	test_ping IPv6 1
> +
> +	# install replacement routes (LWT/eBPF), pings succeed
> +	if [ "${ENCAP}" == "IPv4" ] ; then
> +		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
> +		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre dev veth2
> +	elif [ "${ENCAP}" == "IPv6" ] ; then
> +		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
> +		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj test_lwt_ip_encap.o sec encap_gre6 dev veth2
> +	else
> +		echo "FAIL: unknown encap ${ENCAP}"
> +	fi
> +	test_ping IPv4 0
> +	test_ping IPv6 0
> +
> +	cleanup
> +	echo "PASS"
> +}
> +
> +test_egress IPv4
> +test_egress IPv6
> +
> +test_ingress IPv4
> +test_ingress IPv6
> +
> +echo "all tests passed"
> -- 
> 2.20.1.791.gb4d0f1c61a-goog
>