netdev - Re: [PATCH RFC v4 bpf-next 03/11] xdp: Add xdp_txq_info to xdp

lists.openwall.net		lists / announce owl-users owl-dev john-users john-dev passwdqc-users yescrypt popa3d-users / oss-security kernel-hardening musl sabotage tlsify passwords / crypt-dev xvendor / Bugtraq Full-Disclosure linux-kernel linux-netdev linux-ext4 linux-hardening linux-cve-announce PHC
Open Source and information security mailing list archives
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20200227090046.3e3177b3@carbon>
Date:   Thu, 27 Feb 2020 09:00:46 +0100
From:   Jesper Dangaard Brouer <brouer@...hat.com>
To:     David Ahern <dsahern@...nel.org>
Cc:     netdev@...r.kernel.org, davem@...emloft.net, kuba@...nel.org,
        prashantbhole.linux@...il.com, jasowang@...hat.com,
        toke@...hat.com, mst@...hat.com, toshiaki.makita1@...il.com,
        daniel@...earbox.net, john.fastabend@...il.com, ast@...nel.org,
        kafai@...com, songliubraving@...com, yhs@...com, andriin@...com,
        dsahern@...il.com, David Ahern <dahern@...italocean.com>,
        brouer@...hat.com
Subject: Re: [PATCH RFC v4 bpf-next 03/11] xdp: Add xdp_txq_info to xdp_buff

On Wed, 26 Feb 2020 20:20:05 -0700
David Ahern <dsahern@...nel.org> wrote:

> From: David Ahern <dahern@...italocean.com>
> 
> Add xdp_txq_info as the Tx counterpart to xdp_rxq_info. At the
> moment only the device is added. Other fields (queue_index)
> can be added as use cases arise.
> 
> From a UAPI perspective, egress_ifindex is a union with ingress_ifindex
> since only one applies based on where the program is attached.
> 
> Signed-off-by: David Ahern <dahern@...italocean.com>
> ---
>  include/net/xdp.h        |  5 +++++
>  include/uapi/linux/bpf.h |  6 ++++--
>  net/core/filter.c        | 27 +++++++++++++++++++--------
>  3 files changed, 28 insertions(+), 10 deletions(-)
> 
> diff --git a/include/net/xdp.h b/include/net/xdp.h
> index 40c6d3398458..5584b9db86fe 100644
> --- a/include/net/xdp.h
> +++ b/include/net/xdp.h
> @@ -63,6 +63,10 @@ struct xdp_rxq_info {
>  	struct xdp_mem_info mem;
>  } ____cacheline_aligned; /* perf critical, avoid false-sharing */
>  
> +struct xdp_txq_info {
> +	struct net_device *dev;
> +};
> +
>  struct xdp_buff {
>  	void *data;
>  	void *data_end;
> @@ -70,6 +74,7 @@ struct xdp_buff {
>  	void *data_hard_start;
>  	unsigned long handle;
>  	struct xdp_rxq_info *rxq;
> +	struct xdp_txq_info *txq;
>  };
>  
>  struct xdp_frame {
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 7850f8683b81..5e3f8aefad41 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -3334,8 +3334,10 @@ struct xdp_md {
>  	__u32 data;
>  	__u32 data_end;
>  	__u32 data_meta;
> -	/* Below access go through struct xdp_rxq_info */
> -	__u32 ingress_ifindex; /* rxq->dev->ifindex */
> +	union {
> +		__u32 ingress_ifindex; /* rxq->dev->ifindex */
> +		__u32 egress_ifindex;  /* txq->dev->ifindex */
> +	};

Are we sure it is wise to "union share" (struct) xdp_md as the
XDP-context in the XDP programs, with different expected_attach_type?
As this allows the XDP-programmer to code an EGRESS program that access
ctx->ingress_ifindex, this will under the hood be translated to
ctx->egress_ifindex, because from the compilers-PoV this will just be an
offset.

We are setting up the XDP-programmer for a long debugging session, as
she will be expecting to read 'ingress_ifindex', but will be getting
'egress_ifindex'.  (As the compiler cannot warn her, and it is also
correct seen from the verifier).


>  	__u32 rx_queue_index;  /* rxq->queue_index  */

So, the TX program can still read 'rx_queue_index', is this wise?
(It should be easy to catch below and reject).


>  };
>  
> diff --git a/net/core/filter.c b/net/core/filter.c
> index c7cc98c55621..d1c65dccd671 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -7716,14 +7716,25 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
>  				      offsetof(struct xdp_buff, data_end));
>  		break;
>  	case offsetof(struct xdp_md, ingress_ifindex):
> -		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
> -				      si->dst_reg, si->src_reg,
> -				      offsetof(struct xdp_buff, rxq));
> -		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
> -				      si->dst_reg, si->dst_reg,
> -				      offsetof(struct xdp_rxq_info, dev));
> -		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> -				      offsetof(struct net_device, ifindex));
> +		if (prog->expected_attach_type == BPF_XDP_EGRESS) {
> +			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
> +					      si->dst_reg, si->src_reg,
> +					      offsetof(struct xdp_buff, txq));
> +			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
> +					      si->dst_reg, si->dst_reg,
> +					      offsetof(struct xdp_txq_info, dev));
> +			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> +					      offsetof(struct net_device, ifindex));
> +		} else {
> +			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),
> +					      si->dst_reg, si->src_reg,
> +					      offsetof(struct xdp_buff, rxq));
> +			*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_rxq_info, dev),
> +					      si->dst_reg, si->dst_reg,
> +					      offsetof(struct xdp_rxq_info, dev));
> +			*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
> +					      offsetof(struct net_device, ifindex));
> +		}
>  		break;
>  	case offsetof(struct xdp_md, rx_queue_index):
>  		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, rxq),

We can catch and disallow access to rx_queue_index from expected_attach_type
BPF_XDP_EGRESS, here.  But then we are adding more code to handle/separate
egress from normal RX/ingress.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer