[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CA+FuTSewa5gCcDJzfQc4j6oQVU6d1kpPqL+D9ZQ9BQUi6Zp9Nw@mail.gmail.com>
Date: Fri, 29 Jul 2022 08:52:04 +0200
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Cezar Bulinaru <cbulinaru@...il.com>
Cc: davem@...emloft.net, kuba@...nel.org, netdev@...r.kernel.org
Subject: Re: [PATCH v2] net: tap: NULL pointer derefence in
dev_parse_header_protocol when skb->dev is null
On Fri, Jul 29, 2022 at 7:17 AM Cezar Bulinaru <cbulinaru@...il.com> wrote:
>
> Fixes a NULL pointer derefence bug triggered from tap driver.
> When tap_get_user calls virtio_net_hdr_to_skb the skb->dev is null
> (in tap.c skb->dev is set after the call to virtio_net_hdr_to_skb)
> virtio_net_hdr_to_skb calls dev_parse_header_protocol which
> needs skb->dev field to be valid.
>
> The line that trigers the bug is in dev_parse_header_protocol
> (dev is at offset 0x10 from skb and is stored in RAX register)
> if (!dev->header_ops || !dev->header_ops->parse_protocol)
> 22e1: mov 0x10(%rbx),%rax
> 22e5: mov 0x230(%rax),%rax
>
> Setting skb->dev before the call in tap.c fixes the issue.
>
> BUG: kernel NULL pointer dereference, address: 0000000000000230
> RIP: 0010:virtio_net_hdr_to_skb.constprop.0+0x335/0x410 [tap]
> Code: c0 0f 85 b7 fd ff ff eb d4 41 39 c6 77 cf 29 c6 48 89 df 44 01 f6 e8 7a 79 83 c1 48 85 c0 0f 85 d9 fd ff ff eb b7 48 8b 43 10 <48> 8b 80 30 02 00 00 48 85 c0 74 55 48 8b 40 28 48 85 c0 74 4c 48
> RSP: 0018:ffffc90005c27c38 EFLAGS: 00010246
> RAX: 0000000000000000 RBX: ffff888298f25300 RCX: 0000000000000010
> RDX: 0000000000000005 RSI: ffffc90005c27cb6 RDI: ffff888298f25300
> RBP: ffffc90005c27c80 R08: 00000000ffffffea R09: 00000000000007e8
> R10: ffff88858ec77458 R11: 0000000000000000 R12: 0000000000000001
> R13: 0000000000000014 R14: ffffc90005c27e08 R15: ffffc90005c27cb6
> FS: 0000000000000000(0000) GS:ffff88858ec40000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 0000000000000230 CR3: 0000000281408006 CR4: 00000000003706e0
> Call Trace:
> tap_get_user+0x3f1/0x540 [tap]
> tap_sendmsg+0x56/0x362 [tap]
> ? get_tx_bufs+0xc2/0x1e0 [vhost_net]
> handle_tx_copy+0x114/0x670 [vhost_net]
> handle_tx+0xb0/0xe0 [vhost_net]
> handle_tx_kick+0x15/0x20 [vhost_net]
> vhost_worker+0x7b/0xc0 [vhost]
> ? vhost_vring_call_reset+0x40/0x40 [vhost]
> kthread+0xfa/0x120
> ? kthread_complete_and_exit+0x20/0x20
> ret_from_fork+0x1f/0x30
>
> Signed-off-by: Cezar Bulinaru <cbulinaru@...il.com>
This is likely introduced when dev_parse_header_protocol starts being
called in virtio_net_hdr_to_skb, so
Fixes: 924a9bc362a5 ("net: check if protocol extracted by
virtio_net_hdr_set_proto is correct")
> ---
> drivers/net/tap.c | 21 +-
> tools/testing/selftests/net/Makefile | 2 +-
> tools/testing/selftests/net/tap.c | 395 +++++++++++++++++++++++++++
Is there prior art in mixing fixes and tests? Should the test go to
net or always to net-next?
> 3 files changed, 409 insertions(+), 9 deletions(-)
Please also add to .gitignore
> create mode 100644 tools/testing/selftests/net/tap.c
>
> diff --git a/drivers/net/tap.c b/drivers/net/tap.c
> index c3d42062559d..557236d51d01 100644
> --- a/drivers/net/tap.c
> +++ b/drivers/net/tap.c
> @@ -716,10 +716,20 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
> skb_reset_mac_header(skb);
> skb->protocol = eth_hdr(skb)->h_proto;
>
> + rcu_read_lock();
> + tap = rcu_dereference(q->tap);
> + if (tap) {
> + skb->dev = tap->dev;
> + } else {
> + kfree_skb(skb);
> + goto post_send;
So little is done after post_send, that this could also just return.
One small issue is that the code now returns success even on packets
that previously would have been dropped on error in
virtio_net_hdr_to_skb. That seems acceptable in this case.
> + }
> +
> if (vnet_hdr_len) {
> err = virtio_net_hdr_to_skb(skb, &vnet_hdr,
> tap_is_little_endian(q));
> if (err) {
> + rcu_read_unlock();
> drop_reason = SKB_DROP_REASON_DEV_HDR;
> goto err_kfree;
> }
> @@ -732,8 +742,6 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
> __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
> skb_set_network_header(skb, depth);
>
> - rcu_read_lock();
> - tap = rcu_dereference(q->tap);
> /* copy skb_ubuf_info for callback when skb has no error */
> if (zerocopy) {
> skb_zcopy_init(skb, msg_control);
> @@ -742,12 +750,9 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control,
> uarg->callback(NULL, uarg, false);
> }
>
> - if (tap) {
> - skb->dev = tap->dev;
> - dev_queue_xmit(skb);
> - } else {
> - kfree_skb(skb);
> - }
> + dev_queue_xmit(skb);
> +
> +post_send:
> rcu_read_unlock();
>
> return total_len;
> diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
> index db05b3764b77..71e3f9f7f2d6 100644
> --- a/tools/testing/selftests/net/Makefile
> +++ b/tools/testing/selftests/net/Makefile
> @@ -54,7 +54,7 @@ TEST_GEN_FILES += ipsec
> TEST_GEN_FILES += ioam6_parser
> TEST_GEN_FILES += gro
> TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
> -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun
> +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
> TEST_GEN_FILES += toeplitz
> TEST_GEN_FILES += cmsg_sender
> TEST_GEN_FILES += stress_reuseport_listen
> diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
> new file mode 100644
> index 000000000000..5851b333d705
> --- /dev/null
> +++ b/tools/testing/selftests/net/tap.c
> @@ -0,0 +1,395 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#define _GNU_SOURCE
> +
> +#include <errno.h>
> +#include <fcntl.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <unistd.h>
> +#include <net/if.h>
> +#include <linux/if_tun.h>
> +#include <linux/netlink.h>
> +#include <linux/rtnetlink.h>
> +#include <sys/ioctl.h>
> +#include <sys/socket.h>
> +#include <linux/virtio_net.h>
> +#include <netinet/ip.h>
> +#include <netinet/udp.h>
> +#include "../kselftest_harness.h"
> +
> +static const char param_dev_tap_name[] = "xmacvtap0";
> +static const char param_dev_dummy_name[] = "xdummy0";
> +static unsigned char param_hwaddr_src[] = { 0x00, 0xfe, 0x98, 0x14, 0x22, 0x42 };
> +static unsigned char param_hwaddr_dest[] = {
> + 0x00, 0xfe, 0x98, 0x94, 0xd2, 0x43
> +};
> +
> +#define MAX_RTNL_PAYLOAD (2048)
> +#define PKT_DATA 0xCB
> +#define TEST_PACKET_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU)
> +
> +static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
> + unsigned short len)
> +{
> + struct rtattr *rta =
> + (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
> + rta->rta_type = type;
> + rta->rta_len = RTA_LENGTH(len);
> + nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
> + return rta;
> +}
> +
> +static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
> +{
> + return rtattr_add(nh, type, 0);
> +}
> +
> +static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
> +{
> + uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
> +
> + attr->rta_len = end - (uint8_t *)attr;
> +}
> +
> +static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
> + const char *s)
> +{
> + struct rtattr *rta = rtattr_add(nh, type, strlen(s));
> +
> + memcpy(RTA_DATA(rta), s, strlen(s));
> + return rta;
> +}
> +
> +static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
> + const char *s)
> +{
> + struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
> +
> + strcpy(RTA_DATA(rta), s);
> + return rta;
> +}
> +
> +static struct rtattr *rtattr_add_any(struct nlmsghdr *nh, unsigned short type,
> + const void *arr, size_t len)
> +{
> + struct rtattr *rta = rtattr_add(nh, type, len);
> +
> + memcpy(RTA_DATA(rta), arr, len);
> + return rta;
> +}
> +
> +static int dev_create(const char *dev, const char *link_type,
> + int (*fill_rtattr)(struct nlmsghdr *nh),
> + int (*fill_info_data)(struct nlmsghdr *nh))
> +{
> + struct {
> + struct nlmsghdr nh;
> + struct ifinfomsg info;
> + unsigned char data[MAX_RTNL_PAYLOAD];
> + } req;
> + struct rtattr *link_info, *info_data;
> + int ret, rtnl;
> +
> + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
> + if (rtnl < 0) {
> + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
> + return 1;
> + }
> +
> + memset(&req, 0, sizeof(req));
> + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
> + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
> + req.nh.nlmsg_type = RTM_NEWLINK;
> +
> + req.info.ifi_family = AF_UNSPEC;
> + req.info.ifi_type = 1;
> + req.info.ifi_index = 0;
> + req.info.ifi_flags = IFF_BROADCAST | IFF_UP;
> + req.info.ifi_change = 0xffffffff;
> +
> + rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
> +
> + if (fill_rtattr) {
> + ret = fill_rtattr(&req.nh);
> + if (ret)
> + return ret;
> + }
> +
> + link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
> +
> + rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
> +
> + if (fill_info_data) {
> + info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
> + ret = fill_info_data(&req.nh);
> + if (ret)
> + return ret;
> + rtattr_end(&req.nh, info_data);
> + }
> +
> + rtattr_end(&req.nh, link_info);
> +
> + ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
> + if (ret < 0)
> + fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
> + ret = (unsigned int)ret != req.nh.nlmsg_len;
> +
> + close(rtnl);
> + return ret;
> +}
> +
> +static int dev_delete(const char *dev)
> +{
> + struct {
> + struct nlmsghdr nh;
> + struct ifinfomsg info;
> + unsigned char data[MAX_RTNL_PAYLOAD];
> + } req;
> + int ret, rtnl;
> +
> + rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
> + if (rtnl < 0) {
> + fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
> + return 1;
> + }
> +
> + memset(&req, 0, sizeof(req));
> + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
> + req.nh.nlmsg_flags = NLM_F_REQUEST;
> + req.nh.nlmsg_type = RTM_DELLINK;
> +
> + req.info.ifi_family = AF_UNSPEC;
> +
> + rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
> +
> + ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
> + if (ret < 0)
> + fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
> +
> + ret = (unsigned int)ret != req.nh.nlmsg_len;
> +
> + close(rtnl);
> + return ret;
> +}
> +
> +static int macvtap_fill_rtattr(struct nlmsghdr *nh)
> +{
> + int ifindex;
> +
> + ifindex = if_nametoindex(param_dev_dummy_name);
> + if (ifindex == 0) {
> + fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
> + return -errno;
> + }
> +
> + rtattr_add_any(nh, IFLA_LINK, &ifindex, sizeof(ifindex));
> + rtattr_add_any(nh, IFLA_ADDRESS, param_hwaddr_src, ETH_ALEN);
> +
> + return 0;
> +}
> +
> +static int opentap(const char *devname)
> +{
> + int ifindex;
> + char buf[256];
> + int fd;
> + struct ifreq ifr;
> +
> + ifindex = if_nametoindex(devname);
> + if (ifindex == 0) {
> + fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
> + return -errno;
> + }
> +
> + sprintf(buf, "/dev/tap%d", ifindex);
> + fd = open(buf, O_RDWR | O_NONBLOCK);
> + if (fd < 0) {
> + fprintf(stderr, "%s: open %s\n", __func__, strerror(errno));
> + return -errno;
> + }
> +
> + memset(&ifr, 0, sizeof(ifr));
> + strcpy(ifr.ifr_name, devname);
> + ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_MULTI_QUEUE;
> + if (ioctl(fd, TUNSETIFF, &ifr, sizeof(ifr)) < 0)
> + return -errno;
> + return fd;
> +}
> +
> +FIXTURE(tap)
> +{
> + int fd;
> +};
> +
> +FIXTURE_SETUP(tap)
> +{
> + int ret;
> +
> + ret = dev_create(param_dev_dummy_name, "dummy", NULL, NULL);
> + EXPECT_EQ(ret, 0);
> +
> + ret = dev_create(param_dev_tap_name, "macvtap", macvtap_fill_rtattr,
> + NULL);
> + EXPECT_EQ(ret, 0);
> +
> + self->fd = opentap(param_dev_tap_name);
> + ASSERT_GE(self->fd, 0);
> +}
> +
> +FIXTURE_TEARDOWN(tap)
> +{
> + int ret;
> +
> + if (self->fd != -1)
> + close(self->fd);
> +
> + ret = dev_delete(param_dev_dummy_name);
> + EXPECT_EQ(ret, 0);
> +
> + ret = dev_delete(param_dev_tap_name);
> + EXPECT_EQ(ret, 0);
> +}
> +
> +size_t build_eth(uint8_t *buf, uint16_t proto)
> +{
> + struct ethhdr *eth = (struct ethhdr *)buf;
> +
> + eth->h_proto = htons(proto);
> + memcpy(eth->h_source, param_hwaddr_src, ETH_ALEN);
> + memcpy(eth->h_dest, param_hwaddr_dest, ETH_ALEN);
> +
> + return ETH_HLEN;
> +}
> +
> +static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
> +{
> + unsigned long sum = 0;
> + int i;
> +
> + for (i = 0; i < num_u16; i++)
> + sum += start[i];
> +
> + return sum;
> +}
> +
> +static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
> + unsigned long sum)
> +{
> + sum += add_csum_hword(start, num_u16);
> +
> + while (sum >> 16)
> + sum = (sum & 0xffff) + (sum >> 16);
> +
> + return ~sum;
> +}
> +
> +static int build_ipv4_header(uint8_t *buf, int payload_len)
> +{
> + struct iphdr *iph = (struct iphdr *)buf;
> +
> + iph->ihl = 5;
> + iph->version = 4;
> + iph->ttl = 8;
> + iph->tot_len =
> + htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
> + iph->id = htons(1337);
> + iph->protocol = IPPROTO_UDP;
> + iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
> + iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
> + iph->check = build_ip_csum((const uint16_t *)iph, iph->ihl << 1, 0);
> +
> + return iph->ihl << 2;
> +}
> +
> +static int build_udp_header(uint8_t *buf, int payload_len, bool csum_off)
> +{
> + const int alen = sizeof(uint32_t);
> + struct udphdr *udph = (struct udphdr *)buf;
> + int len = sizeof(*udph) + payload_len;
> +
> + udph->source = htons(22);
> + udph->dest = htons(58822);
> + udph->len = htons(len);
> +
> + if (csum_off)
> + udph->check = build_ip_csum((const uint16_t *)buf - (2 * alen),
> + alen,
> + htons(IPPROTO_UDP) + udph->len);
> + else
> + udph->check = 0;
> +
> + return sizeof(*udph);
> +}
> +
> +size_t build_test_packet_valid_udp_csum(uint8_t *buf, size_t payload_len)
> +{
> + uint8_t *cur = buf;
> + struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
> +
> + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
> + vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
> + vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
> + vh->csum_offset = __builtin_offsetof(struct udphdr, check);
> + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
> + vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
> + cur += sizeof(*vh);
> +
> + cur += build_eth(cur, ETH_P_IP);
> + cur += build_ipv4_header(cur, payload_len);
> + cur += build_udp_header(cur, payload_len, true);
> + memset(cur, PKT_DATA, payload_len);
Need to compute the udp checksum after configuring the payload.
> + cur += payload_len;
> +
> + return cur - buf;
> +}
> +
> +size_t build_test_packet_crash_tap_invalid_eth_proto(uint8_t *buf,
> + size_t payload_len)
> +{
> + uint8_t *cur = buf;
> + struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
> +
> + vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
> + vh->flags = 0;
> + vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
> + vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
> + cur += sizeof(*vh);
Thanks for the test. It helps to see which packet triggers the bug.
This is indeed a non-standard packet (GSO without NEEDS_CSUM) that
enters that branch in virtio_net_hdr_to_skb.
This function is so similar to the previous. Could they be
deduplicated? That makes the diff between normal packet and bad packet
even clearer.
> +
> + cur += build_eth(cur, 0);
> + cur += sizeof(struct iphdr) + sizeof(struct udphdr);
> + cur += build_ipv4_header(cur, payload_len);
> + cur += build_udp_header(cur, payload_len, true);
> + memset(cur, PKT_DATA, payload_len);
> + cur += payload_len;
> +
> + return cur - buf;
> +}
> +
> +TEST_F(tap, test_packet_valid_udp_csum)
> +{
> + uint8_t pkt[TEST_PACKET_SZ];
> + size_t off;
> + int ret;
> +
> + memset(pkt, 0, sizeof(pkt));
> + off = build_test_packet_valid_udp_csum(pkt, 1024);
> + ret = write(self->fd, pkt, off);
> + ASSERT_EQ(ret, off);
> +}
> +
> +TEST_F(tap, test_packet_crash_tap_invalid_eth_proto)
> +{
> + uint8_t pkt[TEST_PACKET_SZ];
> + size_t off;
> + int ret;
> +
> + memset(pkt, 0, sizeof(pkt));
> + off = build_test_packet_crash_tap_invalid_eth_proto(pkt, 1024);
> + ret = write(self->fd, pkt, off);
> + ASSERT_EQ(ret, -1);
> + ASSERT_EQ(errno, EINVAL);
> +}
> +
> +TEST_HARNESS_MAIN
> --
> 2.34.1
>
Powered by blists - more mailing lists