netdev - Re: [PATCHv2 bpf-next] samples/bpf: add xdp program on egress for xdp_redirect

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <54642499-57d7-5f03-f51e-c0be72fb89de@fb.com>
Date:   Thu, 26 Nov 2020 22:31:56 -0800
From:   Yonghong Song <yhs@...com>
To:     Hangbin Liu <liuhangbin@...il.com>, <bpf@...r.kernel.org>
CC:     <netdev@...r.kernel.org>, Daniel Borkmann <daniel@...earbox.net>,
        Jesper Dangaard Brouer <brouer@...hat.com>,
        John Fastabend <john.fastabend@...il.com>,
        Toke Høiland-Jørgensen <toke@...hat.com>,
        Tariq Toukan <tariqt@...lanox.com>,
        Maciej Fijalkowski <maciej.fijalkowski@...el.com>
Subject: Re: [PATCHv2 bpf-next] samples/bpf: add xdp program on egress for
 xdp_redirect_map



On 11/26/20 12:43 AM, Hangbin Liu wrote:
> Current sample test xdp_redirect_map only count pkts on ingress. But we
> can't know whether the pkts are redirected or dropped. So add a counter
> on egress interface so we could know how many pkts are redirect in fact.
> 
> sample result:
> 
> $ ./xdp_redirect_map -X veth1 veth2
> input: 5 output: 6
> libbpf: elf: skipping unrecognized data section(9) .rodata.str1.16
> libbpf: elf: skipping unrecognized data section(23) .eh_frame
> libbpf: elf: skipping relo section(24) .rel.eh_frame for section(23) .eh_frame
> in ifindex 5:          1 pkt/s, out ifindex 6:          1 pkt/s
> in ifindex 5:          1 pkt/s, out ifindex 6:          1 pkt/s
> in ifindex 5:          0 pkt/s, out ifindex 6:          0 pkt/s
> in ifindex 5:         68 pkt/s, out ifindex 6:         68 pkt/s
> in ifindex 5:         91 pkt/s, out ifindex 6:         91 pkt/s
> in ifindex 5:         91 pkt/s, out ifindex 6:         91 pkt/s
> in ifindex 5:         66 pkt/s, out ifindex 6:         66 pkt/s
> 
> Signed-off-by: Hangbin Liu <liuhangbin@...il.com>
> ---
> v2:
> a) use pkt counter instead of IP ttl modification on egress program
> b) make the egress program selectable by option -X
> 
> ---
>   samples/bpf/xdp_redirect_map_kern.c |  26 +++--
>   samples/bpf/xdp_redirect_map_user.c | 142 ++++++++++++++++++----------
>   2 files changed, 113 insertions(+), 55 deletions(-)
> 
> diff --git a/samples/bpf/xdp_redirect_map_kern.c b/samples/bpf/xdp_redirect_map_kern.c
> index 6489352ab7a4..fd6704a4f7e2 100644
> --- a/samples/bpf/xdp_redirect_map_kern.c
> +++ b/samples/bpf/xdp_redirect_map_kern.c
> @@ -22,19 +22,19 @@
>   struct {
>   	__uint(type, BPF_MAP_TYPE_DEVMAP);
>   	__uint(key_size, sizeof(int));
> -	__uint(value_size, sizeof(int));
> +	__uint(value_size, sizeof(struct bpf_devmap_val));
>   	__uint(max_entries, 100);
>   } tx_port SEC(".maps");
>   
> -/* Count RX packets, as XDP bpf_prog doesn't get direct TX-success
> - * feedback.  Redirect TX errors can be caught via a tracepoint.
> +/* Count RX/TX packets, use key 0 for rx pkt count, key 1 for tx
> + * pkt count.
>    */
>   struct {
>   	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
>   	__type(key, u32);
>   	__type(value, long);
> -	__uint(max_entries, 1);
> -} rxcnt SEC(".maps");
> +	__uint(max_entries, 2);
> +} pktcnt SEC(".maps");
>   
>   static void swap_src_dst_mac(void *data)
>   {
> @@ -72,7 +72,7 @@ int xdp_redirect_map_prog(struct xdp_md *ctx)
>   	vport = 0;
>   
>   	/* count packet in global counter */
> -	value = bpf_map_lookup_elem(&rxcnt, &key);
> +	value = bpf_map_lookup_elem(&pktcnt, &key);
>   	if (value)
>   		*value += 1;
>   
> @@ -82,6 +82,20 @@ int xdp_redirect_map_prog(struct xdp_md *ctx)
>   	return bpf_redirect_map(&tx_port, vport, 0);
>   }
>   
> +SEC("xdp_devmap/map_prog")
> +int xdp_devmap_prog(struct xdp_md *ctx)
> +{
> +	long *value;
> +	u32 key = 1;
> +
> +	/* count packet in global counter */
> +	value = bpf_map_lookup_elem(&pktcnt, &key);
> +	if (value)
> +		*value += 1;
> +
> +	return XDP_PASS;
> +}
> +
>   /* Redirect require an XDP bpf_prog loaded on the TX device */
>   SEC("xdp_redirect_dummy")
>   int xdp_redirect_dummy_prog(struct xdp_md *ctx)
> diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c
> index 35e16dee613e..8bdec0865e1d 100644
> --- a/samples/bpf/xdp_redirect_map_user.c
> +++ b/samples/bpf/xdp_redirect_map_user.c
> @@ -21,12 +21,13 @@
>   
>   static int ifindex_in;
>   static int ifindex_out;
> -static bool ifindex_out_xdp_dummy_attached = true;
> +static bool ifindex_out_xdp_dummy_attached = false;
> +static bool xdp_prog_attached = false;

Maybe xdp_devmap_prog_attached? Feel xdp_prog_attached
is too generic since actually it controls xdp_devmap program
attachment.

>   static __u32 prog_id;
>   static __u32 dummy_prog_id;
>   
>   static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
> -static int rxcnt_map_fd;
> +static int pktcnt_map_fd;
>   
>   static void int_exit(int sig)
>   {
> @@ -60,26 +61,46 @@ static void int_exit(int sig)
>   	exit(0);
>   }
>   
> -static void poll_stats(int interval, int ifindex)
> +static void poll_stats(int interval, int if_ingress, int if_egress)
>   {
>   	unsigned int nr_cpus = bpf_num_possible_cpus();
> -	__u64 values[nr_cpus], prev[nr_cpus];
> +	__u64 values[nr_cpus], in_prev[nr_cpus], e_prev[nr_cpus];
> +	__u64 sum;
> +	__u32 key;
> +	int i;
>   
> -	memset(prev, 0, sizeof(prev));
> +	memset(in_prev, 0, sizeof(in_prev));
> +	memset(e_prev, 0, sizeof(e_prev));
>   
>   	while (1) {
> -		__u64 sum = 0;
> -		__u32 key = 0;
> -		int i;
> +		sum = 0;
> +		key = 0;
>   
>   		sleep(interval);
> -		assert(bpf_map_lookup_elem(rxcnt_map_fd, &key, values) == 0);
> -		for (i = 0; i < nr_cpus; i++)
> -			sum += (values[i] - prev[i]);
> -		if (sum)
> -			printf("ifindex %i: %10llu pkt/s\n",
> -			       ifindex, sum / interval);
> -		memcpy(prev, values, sizeof(values));
> +		if (bpf_map_lookup_elem(pktcnt_map_fd, &key, values) == 0) {

When we could have a failure here? If it indeed failed maybe it signals
something wrong and the process should fail?

> +			for (i = 0; i < nr_cpus; i++)
> +				sum += (values[i] - in_prev[i]);
> +			if (sum)
> +				printf("in ifindex %i: %10llu pkt/s",
> +				       if_ingress, sum / interval);
> +			memcpy(in_prev, values, sizeof(values));
> +		}
> +
> +		if (!xdp_prog_attached) {
> +			printf("\n");
> +			continue;
> +		}
> +
> +		sum = 0;
> +		key = 1;
> +		if (bpf_map_lookup_elem(pktcnt_map_fd, &key, values) == 0) {

same as the above, if bpf_map_lookup_elem() failed, maybe we should 
signal a failure?

> +			for (i = 0; i < nr_cpus; i++)
> +				sum += (values[i] - e_prev[i]);
> +			if (sum)
> +				printf(", out ifindex %i: %10llu pkt/s\n",
> +				       if_egress, sum / interval);
> +			memcpy(e_prev, values, sizeof(values));
> +		}
>   	}
>   }
>   
[...]