[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+FuTSewa5gCcDJzfQc4j6oQVU6d1kpPqL+D9ZQ9BQUi6Zp9Nw@mail.gmail.com>
Date:   Fri, 29 Jul 2022 08:52:04 +0200
From:   Willem de Bruijn <willemdebruijn.kernel@...il.com>
To:     Cezar Bulinaru <cbulinaru@...il.com>
Cc:     davem@...emloft.net, kuba@...nel.org, netdev@...r.kernel.org
Subject: Re: [PATCH v2] net: tap: NULL pointer derefence in
 dev_parse_header_protocol when skb->dev is null
On Fri, Jul 29, 2022 at 7:17 AM Cezar Bulinaru <cbulinaru@...il.com> wrote:
>
> Fixes a NULL pointer derefence bug triggered from tap driver.
> When tap_get_user calls virtio_net_hdr_to_skb the skb->dev is null
> (in tap.c skb->dev is set after the call to virtio_net_hdr_to_skb)
> virtio_net_hdr_to_skb calls dev_parse_header_protocol which
> needs skb->dev field to be valid.
>
> The line that trigers the bug is in dev_parse_header_protocol
> (dev is at offset 0x10 from skb and is stored in RAX register)
>   if (!dev->header_ops || !dev->header_ops->parse_protocol)
>   22e1:   mov    0x10(%rbx),%rax
>   22e5:   mov    0x230(%rax),%rax
>
> Setting skb->dev before the call in tap.c fixes the issue.
>
> BUG: kernel NULL pointer dereference, address: 0000000000000230
> RIP: 0010:virtio_net_hdr_to_skb.constprop.0+0x335/0x410 [tap]
> Code: c0 0f 85 b7 fd ff ff eb d4 41 39 c6 77 cf 29 c6 48 89 df 44 01 f6 e8 7a 79 83 c1 48 85 c0 0f 85 d9 fd ff ff eb b7 48 8b 43 10 <48> 8b 80 30 02 00 00 48 85 c0 74 55 48 8b 40 28 48 85 c0 74 4c 48
> RSP: 0018:ffffc90005c27c38 EFLAGS: 00010246
> RAX: 0000000000000000 RBX: ffff888298f25300 RCX: 0000000000000010
> RDX: 0000000000000005 RSI: ffffc90005c27cb6 RDI: ffff888298f25300
> RBP: ffffc90005c27c80 R08: 00000000ffffffea R09: 00000000000007e8
> R10: ffff88858ec77458 R11: 0000000000000000 R12: 0000000000000001
> R13: 0000000000000014 R14: ffffc90005c27e08 R15: ffffc90005c27cb6
> FS:  0000000000000000(0000) GS:ffff88858ec40000(0000) knlGS:0000000000000000
> CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000230 CR3: 0000000281408006 CR4: 00000000003706e0
> Call Trace:
>  tap_get_user+0x3f1/0x540 [tap]
>  tap_sendmsg+0x56/0x362 [tap]
>  ? get_tx_bufs+0xc2/0x1e0 [vhost_net]
>  handle_tx_copy+0x114/0x670 [vhost_net]
>  handle_tx+0xb0/0xe0 [vhost_net]
>  handle_tx_kick+0x15/0x20 [vhost_net]
>  vhost_worker+0x7b/0xc0 [vhost]
>  ? vhost_vring_call_reset+0x40/0x40 [vhost]
>  kthread+0xfa/0x120
>  ? kthread_complete_and_exit+0x20/0x20
>  ret_from_fork+0x1f/0x30
>
> Signed-off-by: Cezar Bulinaru <cbulinaru@...il.com>
This is likely introduced when dev_parse_header_protocol starts being
called in virtio_net_hdr_to_skb, so
Fixes: 924a9bc362a5 ("net: check if protocol extracted by
virtio_net_hdr_set_proto is correct")
> ---
>  drivers/net/tap.c                    |  21 +-
>  tools/testing/selftests/net/Makefile |   2 +-
>  tools/testing/selftests/net/tap.c    | 395 +++++++++++++++++++++++++++
Is there prior art in mixing fixes and tests? Should the test go to
net or always to net-next?
>  3 files changed, 409 insertions(+), 9 deletions(-)
Please also add to .gitignore
>  create mode 100644 tools/testing/selftests/net/tap.c
>
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index c3d42062559d..557236d51d01 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -716,10 +716,20 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
>         skb_reset_mac_header(skb);
>         skb->protocol = eth_hdr(skb)->h_proto;
>
> +       rcu_read_lock();
> +       tap = rcu_dereference(q->tap);
> +       if (tap) {
> +               skb->dev = tap->dev;
> +       } else {
> +               kfree_skb(skb);
> +               goto post_send;
So little is done after post_send, that this could also just return.
One small issue is that the code now returns success even on packets
that previously would have been dropped on error in
virtio_net_hdr_to_skb. That seems acceptable in this case.
> +       }
> +
>         if (vnet_hdr_len) {
>                 err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
>                                             tap_is_little_endian(q));
>                 if (err) {
> +                       rcu_read_unlock();
>                         drop_reason = SKB_DROP_REASON_DEV_HDR;
>                         goto err_kfree;
>                 }
> @@ -732,8 +742,6 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
>             __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
>                 skb_set_network_header(skb, depth);
>
> -       rcu_read_lock();
> -       tap = rcu_dereference(q->tap);
>         /* copy skb_ubuf_info for callback when skb has no error */
>         if (zerocopy) {
>                 skb_zcopy_init(skb, msg_control);
> @@ -742,12 +750,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
>                 uarg->callback(NULL, uarg, false);
>         }
>
> -       if (tap) {
> -               skb->dev = tap->dev;
> -               dev_queue_xmit(skb);
> -       } else {
> -               kfree_skb(skb);
> -       }
> +       dev_queue_xmit(skb);
> +
> +post_send:
>         rcu_read_unlock();
>
>         return total_len;
> diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
> index db05b3764b77..71e3f9f7f2d6 100644
> --- a/tools/testing/selftests/net/Makefile
> +++ b/tools/testing/selftests/net/Makefile
> @@ -54,7 +54,7 @@ TEST_GEN_FILES += ipsec
>  TEST_GEN_FILES += ioam6_parser
>  TEST_GEN_FILES += gro
>  TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
> -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun
> +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
>  TEST_GEN_FILES += toeplitz
>  TEST_GEN_FILES += cmsg_sender
>  TEST_GEN_FILES += stress_reuseport_listen
> diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
> new file mode 100644
> index 000000000000..5851b333d705
> --- /dev/null
> +++ b/tools/testing/selftests/net/tap.c
> @@ -0,0 +1,395 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#define _GNU_SOURCE
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <net/if.h>
> +#include <linux/if_tun.h>
> +#include <linux/netlink.h>
> +#include <linux/rtnetlink.h>
> +#include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <linux/virtio_net.h>
> +#include <netinet/ip.h>
> +#include <netinet/udp.h>
> +#include "../kselftest_harness.h"
> +
> +static const char param_dev_tap_name[] = "xmacvtap0";
> +static const char param_dev_dummy_name[] = "xdummy0";
> +static unsigned char param_hwaddr_src[] = { 0x00, 0xfe, 0x98, 0x14, 0x22, 0x42 };
> +static unsigned char param_hwaddr_dest[] = {
> +       0x00, 0xfe, 0x98, 0x94, 0xd2, 0x43
> +};
> +
> +#define MAX_RTNL_PAYLOAD (2048)
> +#define PKT_DATA 0xCB
> +#define TEST_PACKET_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU)
> +
> +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
> +                                unsigned short len)
> +{
> +       struct rtattr *rta =
> +               (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
> +       rta->rta_type = type;
> +       rta->rta_len = RTA_LENGTH(len);
> +       nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
> +       return rta;
> +}
> +
> +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
> +{
> +       return rtattr_add(nh, type, 0);
> +}
> +
> +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
> +{
> +       uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
> +
> +       attr->rta_len = end - (uint8_t *)attr;
> +}
> +
> +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
> +                                    const char *s)
> +{
> +       struct rtattr *rta = rtattr_add(nh, type, strlen(s));
> +
> +       memcpy(RTA_DATA(rta), s, strlen(s));
> +       return rta;
> +}
> +
> +static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
> +                                      const char *s)
> +{
> +       struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
> +
> +       strcpy(RTA_DATA(rta), s);
> +       return rta;
> +}
> +
> +static struct rtattr *rtattr_add_any(struct nlmsghdr *nh, unsigned short type,
> +                                    const void *arr, size_t len)
> +{
> +       struct rtattr *rta = rtattr_add(nh, type, len);
> +
> +       memcpy(RTA_DATA(rta), arr, len);
> +       return rta;
> +}
> +
> +static int dev_create(const char *dev, const char *link_type,
> +                     int (*fill_rtattr)(struct nlmsghdr *nh),
> +                     int (*fill_info_data)(struct nlmsghdr *nh))
> +{
> +       struct {
> +               struct nlmsghdr nh;
> +               struct ifinfomsg info;
> +               unsigned char data[MAX_RTNL_PAYLOAD];
> +       } req;
> +       struct rtattr *link_info, *info_data;
> +       int ret, rtnl;
> +
> +       rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
> +       if (rtnl < 0) {
> +               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
> +               return 1;
> +       }
> +
> +       memset(&req, 0, sizeof(req));
> +       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
> +       req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
> +       req.nh.nlmsg_type = RTM_NEWLINK;
> +
> +       req.info.ifi_family = AF_UNSPEC;
> +       req.info.ifi_type = 1;
> +       req.info.ifi_index = 0;
> +       req.info.ifi_flags = IFF_BROADCAST | IFF_UP;
> +       req.info.ifi_change = 0xffffffff;
> +
> +       rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
> +
> +       if (fill_rtattr) {
> +               ret = fill_rtattr(&req.nh);
> +               if (ret)
> +                       return ret;
> +       }
> +
> +       link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
> +
> +       rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
> +
> +       if (fill_info_data) {
> +               info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
> +               ret = fill_info_data(&req.nh);
> +               if (ret)
> +                       return ret;
> +               rtattr_end(&req.nh, info_data);
> +       }
> +
> +       rtattr_end(&req.nh, link_info);
> +
> +       ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
> +       if (ret < 0)
> +               fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
> +       ret = (unsigned int)ret != req.nh.nlmsg_len;
> +
> +       close(rtnl);
> +       return ret;
> +}
> +
> +static int dev_delete(const char *dev)
> +{
> +       struct {
> +               struct nlmsghdr nh;
> +               struct ifinfomsg info;
> +               unsigned char data[MAX_RTNL_PAYLOAD];
> +       } req;
> +       int ret, rtnl;
> +
> +       rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
> +       if (rtnl < 0) {
> +               fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
> +               return 1;
> +       }
> +
> +       memset(&req, 0, sizeof(req));
> +       req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
> +       req.nh.nlmsg_flags = NLM_F_REQUEST;
> +       req.nh.nlmsg_type = RTM_DELLINK;
> +
> +       req.info.ifi_family = AF_UNSPEC;
> +
> +       rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
> +
> +       ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
> +       if (ret < 0)
> +               fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
> +
> +       ret = (unsigned int)ret != req.nh.nlmsg_len;
> +
> +       close(rtnl);
> +       return ret;
> +}
> +
> +static int macvtap_fill_rtattr(struct nlmsghdr *nh)
> +{
> +       int ifindex;
> +
> +       ifindex = if_nametoindex(param_dev_dummy_name);
> +       if (ifindex == 0) {
> +               fprintf(stderr, "%s: ifindex  %s\n", __func__, strerror(errno));
> +               return -errno;
> +       }
> +
> +       rtattr_add_any(nh, IFLA_LINK, &ifindex, sizeof(ifindex));
> +       rtattr_add_any(nh, IFLA_ADDRESS, param_hwaddr_src, ETH_ALEN);
> +
> +       return 0;
> +}
> +
> +static int opentap(const char *devname)
> +{
> +       int ifindex;
> +       char buf[256];
> +       int fd;
> +       struct ifreq ifr;
> +
> +       ifindex = if_nametoindex(devname);
> +       if (ifindex == 0) {
> +               fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
> +               return -errno;
> +       }
> +
> +       sprintf(buf, "/dev/tap%d", ifindex);
> +       fd = open(buf, O_RDWR | O_NONBLOCK);
> +       if (fd < 0) {
> +               fprintf(stderr, "%s: open %s\n", __func__, strerror(errno));
> +               return -errno;
> +       }
> +
> +       memset(&ifr, 0, sizeof(ifr));
> +       strcpy(ifr.ifr_name, devname);
> +       ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_MULTI_QUEUE;
> +       if (ioctl(fd, TUNSETIFF, &ifr, sizeof(ifr)) < 0)
> +               return -errno;
> +       return fd;
> +}
> +
> +FIXTURE(tap)
> +{
> +       int fd;
> +};
> +
> +FIXTURE_SETUP(tap)
> +{
> +       int ret;
> +
> +       ret = dev_create(param_dev_dummy_name, "dummy", NULL, NULL);
> +       EXPECT_EQ(ret, 0);
> +
> +       ret = dev_create(param_dev_tap_name, "macvtap", macvtap_fill_rtattr,
> +                        NULL);
> +       EXPECT_EQ(ret, 0);
> +
> +       self->fd = opentap(param_dev_tap_name);
> +       ASSERT_GE(self->fd, 0);
> +}
> +
> +FIXTURE_TEARDOWN(tap)
> +{
> +       int ret;
> +
> +       if (self->fd != -1)
> +               close(self->fd);
> +
> +       ret = dev_delete(param_dev_dummy_name);
> +       EXPECT_EQ(ret, 0);
> +
> +       ret = dev_delete(param_dev_tap_name);
> +       EXPECT_EQ(ret, 0);
> +}
> +
> +size_t build_eth(uint8_t *buf, uint16_t proto)
> +{
> +       struct ethhdr *eth = (struct ethhdr *)buf;
> +
> +       eth->h_proto = htons(proto);
> +       memcpy(eth->h_source, param_hwaddr_src, ETH_ALEN);
> +       memcpy(eth->h_dest, param_hwaddr_dest, ETH_ALEN);
> +
> +       return ETH_HLEN;
> +}
> +
> +static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
> +{
> +       unsigned long sum = 0;
> +       int i;
> +
> +       for (i = 0; i < num_u16; i++)
> +               sum += start[i];
> +
> +       return sum;
> +}
> +
> +static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
> +                             unsigned long sum)
> +{
> +       sum += add_csum_hword(start, num_u16);
> +
> +       while (sum >> 16)
> +               sum = (sum & 0xffff) + (sum >> 16);
> +
> +       return ~sum;
> +}
> +
> +static int build_ipv4_header(uint8_t *buf, int payload_len)
> +{
> +       struct iphdr *iph = (struct iphdr *)buf;
> +
> +       iph->ihl = 5;
> +       iph->version = 4;
> +       iph->ttl = 8;
> +       iph->tot_len =
> +               htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
> +       iph->id = htons(1337);
> +       iph->protocol = IPPROTO_UDP;
> +       iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
> +       iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
> +       iph->check = build_ip_csum((const uint16_t *)iph, iph->ihl << 1, 0);
> +
> +       return iph->ihl << 2;
> +}
> +
> +static int build_udp_header(uint8_t *buf, int payload_len, bool csum_off)
> +{
> +       const int alen = sizeof(uint32_t);
> +       struct udphdr *udph = (struct udphdr *)buf;
> +       int len = sizeof(*udph) + payload_len;
> +
> +       udph->source = htons(22);
> +       udph->dest = htons(58822);
> +       udph->len = htons(len);
> +
> +       if (csum_off)
> +               udph->check = build_ip_csum((const uint16_t *)buf - (2 * alen),
> +                                           alen,
> +                                           htons(IPPROTO_UDP) + udph->len);
> +       else
> +               udph->check = 0;
> +
> +       return sizeof(*udph);
> +}
> +
> +size_t build_test_packet_valid_udp_csum(uint8_t *buf, size_t payload_len)
> +{
> +       uint8_t *cur = buf;
> +       struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
> +
> +       vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
> +       vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> +       vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
> +       vh->csum_offset = __builtin_offsetof(struct udphdr, check);
> +       vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
> +       vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
> +       cur += sizeof(*vh);
> +
> +       cur += build_eth(cur, ETH_P_IP);
> +       cur += build_ipv4_header(cur, payload_len);
> +       cur += build_udp_header(cur, payload_len, true);
> +       memset(cur, PKT_DATA, payload_len);
Need to compute the udp checksum after configuring the payload.
> +       cur += payload_len;
> +
> +       return cur - buf;
> +}
> +
> +size_t build_test_packet_crash_tap_invalid_eth_proto(uint8_t *buf,
> +                                                    size_t payload_len)
> +{
> +       uint8_t *cur = buf;
> +       struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
> +
> +       vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
> +       vh->flags = 0;
> +       vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
> +       vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
> +       cur += sizeof(*vh);
Thanks for the test. It helps to see which packet triggers the bug.
This is indeed a non-standard packet (GSO without NEEDS_CSUM) that
enters that branch in virtio_net_hdr_to_skb.
This function is so similar to the previous. Could they be
deduplicated? That makes the diff between normal packet and bad packet
even clearer.
> +
> +       cur += build_eth(cur, 0);
> +       cur += sizeof(struct iphdr) + sizeof(struct udphdr);
> +       cur += build_ipv4_header(cur, payload_len);
> +       cur += build_udp_header(cur, payload_len, true);
> +       memset(cur, PKT_DATA, payload_len);
> +       cur += payload_len;
> +
> +       return cur - buf;
> +}
> +
> +TEST_F(tap, test_packet_valid_udp_csum)
> +{
> +       uint8_t pkt[TEST_PACKET_SZ];
> +       size_t off;
> +       int ret;
> +
> +       memset(pkt, 0, sizeof(pkt));
> +       off = build_test_packet_valid_udp_csum(pkt, 1024);
> +       ret = write(self->fd, pkt, off);
> +       ASSERT_EQ(ret, off);
> +}
> +
> +TEST_F(tap, test_packet_crash_tap_invalid_eth_proto)
> +{
> +       uint8_t pkt[TEST_PACKET_SZ];
> +       size_t off;
> +       int ret;
> +
> +       memset(pkt, 0, sizeof(pkt));
> +       off = build_test_packet_crash_tap_invalid_eth_proto(pkt, 1024);
> +       ret = write(self->fd, pkt, off);
> +       ASSERT_EQ(ret, -1);
> +       ASSERT_EQ(errno, EINVAL);
> +}
> +
> +TEST_HARNESS_MAIN
> --
> 2.34.1
>
Powered by blists - more mailing lists
 
