[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CAF=yD-KFgbkYcGBHZPEnPj4Y+hAG-oVCGu2XGP+wHp--L=Spmw@mail.gmail.com>
Date: Thu, 10 Jan 2019 15:41:57 -0500
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Martin KaFai Lau <kafai@...com>
Cc: Network Development <netdev@...r.kernel.org>,
Alexei Starovoitov <ast@...com>,
Daniel Borkmann <daniel@...earbox.net>, tgraf@...g.ch
Subject: Re: [PATCH net 1/2] bpf: Fix bpf_redirect to an ipip/ip6tnl dev
On Wed, Nov 9, 2016 at 6:57 PM Martin KaFai Lau <kafai@...com> wrote:
>
> If the bpf program calls bpf_redirect(dev, 0) and dev is
> an ipip/ip6tnl, it currently includes the mac header.
> e.g. If dev is ipip, the end result is IP-EthHdr-IP instead
> of IP-IP.
>
> The fix is to pull the mac header. At ingress, skb_postpull_rcsum()
> is not needed because the ethhdr should have been pulled once already
> and then got pushed back just before calling the bpf_prog.
> At egress, this patch calls skb_postpull_rcsum().
>
> If bpf_redirect(dev, BPF_F_INGRESS) is called,
> it also fails now because it calls dev_forward_skb() which
> eventually calls eth_type_trans(skb, dev). The eth_type_trans()
> will set skb->type = PACKET_OTHERHOST because the mac address
> does not match the redirecting dev->dev_addr. The PACKET_OTHERHOST
> will eventually cause the ip_rcv() errors out. To fix this,
> ____dev_forward_skb() is added.
>
> Joint work with Daniel Borkmann.
>
> Fixes: cfc7381b3002 ("ip_tunnel: add collect_md mode to IPIP tunnel")
> Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels")
> Acked-by: Daniel Borkmann <daniel@...earbox.net>
> Acked-by: Alexei Starovoitov <ast@...com>
> Signed-off-by: Martin KaFai Lau <kafai@...com>
> ---
> include/linux/netdevice.h | 15 +++++++++++
> net/core/dev.c | 17 +++++-------
> net/core/filter.c | 68 +++++++++++++++++++++++++++++++++++++++++------
> 3 files changed, 81 insertions(+), 19 deletions(-)
>
> diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
> index 91ee364..bf04a46 100644
> --- a/include/linux/netdevice.h
> +++ b/include/linux/netdevice.h
> @@ -3354,6 +3354,21 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
> bool is_skb_forwardable(const struct net_device *dev,
> const struct sk_buff *skb);
>
> +static __always_inline int ____dev_forward_skb(struct net_device *dev,
> + struct sk_buff *skb)
> +{
> + if (skb_orphan_frags(skb, GFP_ATOMIC) ||
> + unlikely(!is_skb_forwardable(dev, skb))) {
> + atomic_long_inc(&dev->rx_dropped);
> + kfree_skb(skb);
> + return NET_RX_DROP;
> + }
> +
> + skb_scrub_packet(skb, true);
> + skb->priority = 0;
> + return 0;
> +}
> +
> void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev);
>
> extern int netdev_budget;
> diff --git a/net/core/dev.c b/net/core/dev.c
> index eaad4c2..6666b28 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -1766,19 +1766,14 @@ EXPORT_SYMBOL_GPL(is_skb_forwardable);
>
> int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
> {
> - if (skb_orphan_frags(skb, GFP_ATOMIC) ||
> - unlikely(!is_skb_forwardable(dev, skb))) {
> - atomic_long_inc(&dev->rx_dropped);
> - kfree_skb(skb);
> - return NET_RX_DROP;
> - }
> + int ret = ____dev_forward_skb(dev, skb);
>
> - skb_scrub_packet(skb, true);
> - skb->priority = 0;
> - skb->protocol = eth_type_trans(skb, dev);
> - skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
> + if (likely(!ret)) {
> + skb->protocol = eth_type_trans(skb, dev);
> + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
> + }
>
> - return 0;
> + return ret;
> }
> EXPORT_SYMBOL_GPL(__dev_forward_skb);
>
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 00351cd..b391209 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -1628,6 +1628,19 @@ static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
> return dev_forward_skb(dev, skb);
> }
>
> +static inline int __bpf_rx_skb_no_mac(struct net_device *dev,
> + struct sk_buff *skb)
> +{
> + int ret = ____dev_forward_skb(dev, skb);
> +
> + if (likely(!ret)) {
> + skb->dev = dev;
> + ret = netif_rx(skb);
> + }
> +
> + return ret;
> +}
> +
> static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
> {
> int ret;
> @@ -1647,6 +1660,51 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb)
> return ret;
> }
>
> +static int __bpf_redirect_no_mac(struct sk_buff *skb, struct net_device *dev,
> + u32 flags)
> +{
> + /* skb->mac_len is not set on normal egress */
> + unsigned int mlen = skb->network_header - skb->mac_header;
> +
> + __skb_pull(skb, mlen);
syzbot found an issue when redirecting an LWT_XMIT program using
BPF_PROG_TEST_RUN.
That path produces packets with skb->data at skb->network_header, as
is_l2 is false in bpf_prog_test_run_skb.
I think the correct fix here is to pull from skb->data up to
skb->network_header, instead of unconditionally from skb->mac_header:
- unsigned int mlen = skb->network_header - skb->mac_header;
+ unsigned int mlen = skb_network_offset(skb);
- __skb_pull(skb, mlen);
-
- /* At ingress, the mac header has already been pulled once.
- * At egress, skb_pospull_rcsum has to be done in case that
- * the skb is originated from ingress (i.e. a forwarded skb)
- * to ensure that rcsum starts at net header.
- */
- if (!skb_at_tc_ingress(skb))
- skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ if (mlen) {
+ __skb_pull(skb, mlen);
+
+ /* At ingress, the mac header has already been pulled once.
+ * At egress, skb_pospull_rcsum has to be done in case that
+ * the skb is originated from ingress (i.e. a forwarded skb)
+ * to ensure that rcsum starts at net header.
+ */
+ if (!skb_at_tc_ingress(skb))
+ skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
+ }
Comments, concerns?
> +
> + /* At ingress, the mac header has already been pulled once.
> + * At egress, skb_pospull_rcsum has to be done in case that
> + * the skb is originated from ingress (i.e. a forwarded skb)
> + * to ensure that rcsum starts at net header.
> + */
> + if (!skb_at_tc_ingress(skb))
> + skb_postpull_rcsum(skb, skb_mac_header(skb), mlen);
> + skb_pop_mac_header(skb);
> + skb_reset_mac_len(skb);
> + return flags & BPF_F_INGRESS ?
> + __bpf_rx_skb_no_mac(dev, skb) : __bpf_tx_skb(dev, skb);
> +}
> +
> +static int __bpf_redirect_common(struct sk_buff *skb, struct net_device *dev,
> + u32 flags)
> +{
This function has since been extended with check
+ /* Verify that a link layer header is carried */
+ if (unlikely(skb->mac_header >= skb->network_header)) {
+ kfree_skb(skb);
+ return -ERANGE;
+ }
+
I haven't checked yet, but I think that this will stillbe incorrect
from LWT_XMIT to a destination that expects skb->data at
skb->mac_header.
> + bpf_push_mac_rcsum(skb);
> + return flags & BPF_F_INGRESS ?
> + __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb);
> +}
Powered by blists - more mailing lists