[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <542aa2c2-fd54-1e6c-f2f4-46fcc2e6f6ee@redhat.com>
Date: Thu, 5 Sep 2019 11:10:46 +0800
From: Jason Wang <jasowang@...hat.com>
To: Yang Yingliang <yangyingliang@...wei.com>
Cc: David Miller <davem@...emloft.net>, netdev@...r.kernel.org,
eric dumazet <eric.dumazet@...il.com>,
xiyou wangcong <xiyou.wangcong@...il.com>,
weiyongjun1@...wei.com
Subject: Re: [PATCH v3] tun: fix use-after-free when register netdev failed
On 2019/9/5 上午10:03, Yang Yingliang wrote:
>
>
> On 2019/9/3 18:50, Jason Wang wrote:
>>
>> ----- Original Message -----
>>>
>>> On 2019/9/3 14:06, Jason Wang wrote:
>>>> On 2019/9/3 下午1:42, Yang Yingliang wrote:
>>>>>
>>>>> On 2019/9/3 11:03, Jason Wang wrote:
>>>>>> On 2019/9/3 上午9:45, Yang Yingliang wrote:
>>>>>>>
>>>>>>> On 2019/9/2 13:32, Jason Wang wrote:
>>>>>>>> On 2019/8/23 下午5:36, Yang Yingliang wrote:
>>>>>>>>>
>>>>>>>>> On 2019/8/23 11:05, Jason Wang wrote:
>>>>>>>>>> ----- Original Message -----
>>>>>>>>>>> On 2019/8/22 14:07, Yang Yingliang wrote:
>>>>>>>>>>>> On 2019/8/22 10:13, Jason Wang wrote:
>>>>>>>>>>>>> On 2019/8/20 上午10:28, Jason Wang wrote:
>>>>>>>>>>>>>> On 2019/8/20 上午9:25, David Miller wrote:
>>>>>>>>>>>>>>> From: Yang Yingliang <yangyingliang@...wei.com>
>>>>>>>>>>>>>>> Date: Mon, 19 Aug 2019 21:31:19 +0800
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>>> Call tun_attach() after register_netdevice() to make sure
>>>>>>>>>>>>>>>> tfile->tun
>>>>>>>>>>>>>>>> is not published until the netdevice is registered. So the
>>>>>>>>>>>>>>>> read/write
>>>>>>>>>>>>>>>> thread can not use the tun pointer that may freed by
>>>>>>>>>>>>>>>> free_netdev().
>>>>>>>>>>>>>>>> (The tun and dev pointer are allocated by
>>>>>>>>>>>>>>>> alloc_netdev_mqs(), they
>>>>>>>>>>>>>>>> can
>>>>>>>>>>>>>>>> be freed by netdev_freemem().)
>>>>>>>>>>>>>>> register_netdevice() must always be the last operation in
>>>>>>>>>>>>>>> the order of
>>>>>>>>>>>>>>> network device setup.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> At the point register_netdevice() is called, the device is
>>>>>>>>>>>>>>> visible
>>>>>>>>>>>>>>> globally
>>>>>>>>>>>>>>> and therefore all of it's software state must be fully
>>>>>>>>>>>>>>> initialized and
>>>>>>>>>>>>>>> ready for us.
>>>>>>>>>>>>>>>
>>>>>>>>>>>>>>> You're going to have to find another solution to these
>>>>>>>>>>>>>>> problems.
>>>>>>>>>>>>>> The device is loosely coupled with sockets/queues. Each
>>>>>>>>>>>>>> side is
>>>>>>>>>>>>>> allowed to be go away without caring the other side. So
>>>>>>>>>>>>>> in this
>>>>>>>>>>>>>> case, there's a small window that network stack think the
>>>>>>>>>>>>>> device has
>>>>>>>>>>>>>> one queue but actually not, the code can then safely drop
>>>>>>>>>>>>>> them.
>>>>>>>>>>>>>> Maybe it's ok here with some comments?
>>>>>>>>>>>>>>
>>>>>>>>>>>>>> Or if not, we can try to hold the device before tun_attach
>>>>>>>>>>>>>> and drop
>>>>>>>>>>>>>> it after register_netdevice().
>>>>>>>>>>>>> Hi Yang:
>>>>>>>>>>>>>
>>>>>>>>>>>>> I think maybe we can try to hold refcnt instead of playing
>>>>>>>>>>>>> real num
>>>>>>>>>>>>> queues here. Do you want to post a V4?
>>>>>>>>>>>> I think the refcnt can prevent freeing the memory in this
>>>>>>>>>>>> case.
>>>>>>>>>>>> When register_netdevice() failed, free_netdev() will be called
>>>>>>>>>>>> directly,
>>>>>>>>>>>> dev->pcpu_refcnt and dev are freed without checking refcnt of
>>>>>>>>>>>> dev.
>>>>>>>>>>> How about using patch-v1 that using a flag to check whether the
>>>>>>>>>>> device
>>>>>>>>>>> registered successfully.
>>>>>>>>>>>
>>>>>>>>>> As I said, it lacks sufficient locks or barriers. To be clear, I
>>>>>>>>>> meant
>>>>>>>>>> something like (compile-test only):
>>>>>>>>>>
>>>>>>>>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>>>>>>>>> index db16d7a13e00..e52678f9f049 100644
>>>>>>>>>> --- a/drivers/net/tun.c
>>>>>>>>>> +++ b/drivers/net/tun.c
>>>>>>>>>> @@ -2828,6 +2828,7 @@ static int tun_set_iff(struct net *net,
>>>>>>>>>> struct file *file, struct ifreq *ifr)
>>>>>>>>>> (ifr->ifr_flags & TUN_FEATURES);
>>>>>>>>>> INIT_LIST_HEAD(&tun->disabled);
>>>>>>>>>> + dev_hold(dev);
>>>>>>>>>> err = tun_attach(tun, file, false,
>>>>>>>>>> ifr->ifr_flags & IFF_NAPI,
>>>>>>>>>> ifr->ifr_flags &
>>>>>>>>>> IFF_NAPI_FRAGS);
>>>>>>>>>> if (err < 0)
>>>>>>>>>> @@ -2836,6 +2837,7 @@ static int tun_set_iff(struct net *net,
>>>>>>>>>> struct file *file, struct ifreq *ifr)
>>>>>>>>>> err = register_netdevice(tun->dev);
>>>>>>>>>> if (err < 0)
>>>>>>>>>> goto err_detach;
>>>>>>>>>> + dev_put(dev);
>>>>>>>>>> }
>>>>>>>>>> netif_carrier_on(tun->dev);
>>>>>>>>>> @@ -2852,11 +2854,13 @@ static int tun_set_iff(struct net *net,
>>>>>>>>>> struct file *file, struct ifreq *ifr)
>>>>>>>>>> return 0;
>>>>>>>>>> err_detach:
>>>>>>>>>> + dev_put(dev);
>>>>>>>>>> tun_detach_all(dev);
>>>>>>>>>> /* register_netdevice() already called
>>>>>>>>>> tun_free_netdev() */
>>>>>>>>>> goto err_free_dev;
>>>>>>>>>> err_free_flow:
>>>>>>>>>> + dev_put(dev);
>>>>>>>>>> tun_flow_uninit(tun);
>>>>>>>>>> security_tun_dev_free_security(tun->security);
>>>>>>>>>> err_free_stat:
>>>>>>>>>>
>>>>>>>>>> What's your thought?
>>>>>>>>> The dev pointer are freed without checking the refcount in
>>>>>>>>> free_netdev() called by err_free_dev
>>>>>>>>>
>>>>>>>>> path, so I don't understand how the refcount protects this
>>>>>>>>> pointer.
>>>>>>>>>
>>>>>>>> The refcount are guaranteed to be zero there, isn't it?
>>>>>>> No, it's not.
>>>>>>>
>>>>>>> err_free_dev:
>>>>>>> free_netdev(dev);
>>>>>>>
>>>>>>> void free_netdev(struct net_device *dev)
>>>>>>> {
>>>>>>> ...
>>>>>>> /* pcpu_refcnt can be freed without checking refcount */
>>>>>>> free_percpu(dev->pcpu_refcnt);
>>>>>>> dev->pcpu_refcnt = NULL;
>>>>>>>
>>>>>>> /* Compatibility with error handling in drivers */
>>>>>>> if (dev->reg_state == NETREG_UNINITIALIZED) {
>>>>>>> /* dev can be freed without checking refcount */
>>>>>>> netdev_freemem(dev);
>>>>>>> return;
>>>>>>> }
>>>>>>> ...
>>>>>>> }
>>>>>>
>>>>>> Right, but what I meant is in my patch, when code reaches
>>>>>> free_netdev() the refcnt is zero. What did I miss?
>>>>> Yes, but it can't fix the UAF problem.
>>>>
>>>> Well, it looks to me that the dev_put() in tun_put() won't release the
>>>> device in this case.
>>> The device is not released in tun_put().
>>> This is how the UAF occurs:
>>>
>>> CPUA CPUB
>>> tun_set_iff()
>>> alloc_netdev_mqs()
>>> tun_attach()
>>>
>>> tun_chr_read_iter()
>>> tun_get()
>>> tun_do_read()
>>>
>>> tun_ring_recv()
>>> register_netdevice() <-- inject error
>>> goto err_detach
>>> tun_detach_all() <-- set RCV_SHUTDOWN
>>> free_netdev() <-- called from
>>> err_free_dev path
>>> netdev_freemem() <-- free the memory
>>> without check refcount
>>> (In this path, the refcount cannot prevent
>>> freeing the memory of dev, and the memory
>>> will be used by dev_put() called by
>>> tun_chr_read_iter() on CPUB.)
>>> (Break from
>>>
>>> tun_ring_recv(),
>>> because
>>> RCV_SHUTDOWN
>>> is set)
>>> tun_put()
>>> dev_put() <--
>>> use the
>>> memory freed by
>>> netdev_freemem()
>>>
>>>
>> My bad, thanks for the patience. Since all evil come from the
>> tfile->tun, how about delay the publishing of tfile->tun until the
>> success of registration to make sure dev_put() and dev_hold() work.
>> (Compile test only)
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index db16d7a13e00..aab0be40d443 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -787,7 +787,8 @@ static void tun_detach_all(struct net_device *dev)
>> }
>> static int tun_attach(struct tun_struct *tun, struct file *file,
>> - bool skip_filter, bool napi, bool napi_frags)
>> + bool skip_filter, bool napi, bool napi_frags,
>> + bool publish_tun)
>> {
>> struct tun_file *tfile = file->private_data;
>> struct net_device *dev = tun->dev;
>> @@ -870,7 +871,8 @@ static int tun_attach(struct tun_struct *tun,
>> struct file *file,
>> * initialized tfile; otherwise we risk using half-initialized
>> * object.
>> */
>> - rcu_assign_pointer(tfile->tun, tun);
>> + if (publish_tun)
>> + rcu_assign_pointer(tfile->tun, tun);
>> rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
>> tun->numqueues++;
>> tun_set_real_num_queues(tun);
>> @@ -2730,7 +2732,7 @@ static int tun_set_iff(struct net *net, struct
>> file *file, struct ifreq *ifr)
>> err = tun_attach(tun, file, ifr->ifr_flags & IFF_NOFILTER,
>> ifr->ifr_flags & IFF_NAPI,
>> - ifr->ifr_flags & IFF_NAPI_FRAGS);
>> + ifr->ifr_flags & IFF_NAPI_FRAGS, true);
>> if (err < 0)
>> return err;
>> @@ -2829,13 +2831,17 @@ static int tun_set_iff(struct net *net,
>> struct file *file, struct ifreq *ifr)
>> INIT_LIST_HEAD(&tun->disabled);
>> err = tun_attach(tun, file, false, ifr->ifr_flags & IFF_NAPI,
>> - ifr->ifr_flags & IFF_NAPI_FRAGS);
>> + ifr->ifr_flags & IFF_NAPI_FRAGS, false);
>> if (err < 0)
>> goto err_free_flow;
>> err = register_netdevice(tun->dev);
>> if (err < 0)
>> goto err_detach;
>> + /* free_netdev() won't check refcnt, to aovid race
>> + * with dev_put() we need publish tun after registration.
>> + */
>> + rcu_assign_pointer(tfile->tun, tun);
>> }
>> netif_carrier_on(tun->dev);
>> @@ -2978,7 +2984,7 @@ static int tun_set_queue(struct file *file,
>> struct ifreq *ifr)
>> if (ret < 0)
>> goto unlock;
>> ret = tun_attach(tun, file, false, tun->flags & IFF_NAPI,
>> - tun->flags & IFF_NAPI_FRAGS);
>> + tun->flags & IFF_NAPI_FRAGS, true);
>> } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) {
>> tun = rtnl_dereference(tfile->tun);
>> if (!tun || !(tun->flags & IFF_MULTI_QUEUE) ||
>> tfile->detached)
> I tested this patch, it can fix this UAF.
>
> But as Eric replied in my patch v1, tun_get() will return NULL as long
> as tun_set_iff() (TUNSETIFF ioctl())
> has not yet been called.
Isn't this the expected behavior. Without TUNSETIFF, it means the
netdevice is not attached, tun_get() should return NULL here.
> This could break some applications, since tun_get() is used from poll()
> and other syscalls.
>
> I think it should return '-EAGIAN' instead of '-EBADFD' in this way. I
> did some change in patch v1,
> if it's OK, I will send a v4.
>
> drivers/net/tun.c | 34 ++++++++++++++++++++++++++++++----
> 1 file changed, 30 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> index db16d7a13e00..0abc654010e3 100644
> --- a/drivers/net/tun.c
> +++ b/drivers/net/tun.c
> @@ -115,6 +115,7 @@ do { \
> /* High bits in flags field are unused. */
> #define TUN_VNET_LE 0x80000000
> #define TUN_VNET_BE 0x40000000
> +#define TUN_DEV_REGISTERED 0x20000000
>
> #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
> IFF_MULTI_QUEUE | IFF_NAPI | IFF_NAPI_FRAGS)
> @@ -719,8 +720,10 @@ static void __tun_detach(struct tun_file *tfile,
> bool clean)
> netif_carrier_off(tun->dev);
>
> if (!(tun->flags & IFF_PERSIST) &&
> - tun->dev->reg_state == NETREG_REGISTERED)
> + tun->dev->reg_state == NETREG_REGISTERED) {
> + tun->flags &= ~TUN_DEV_REGISTERED;
As I said for previous versions. It's not good that try to invent new
internal state like this, and you need carefully to deal with the
synchronization, it could be lock or barriers. Consider the
synchronization of tun is already complex, let's better try to avoid
adding more but using exist mechanism, e.g pointer publishing through RCU.
Thanks
> unregister_netdevice(tun->dev);
> + }
> }
> if (tun)
> xdp_rxq_info_unreg(&tfile->xdp_rxq);
> @@ -884,8 +887,12 @@ static struct tun_struct *tun_get(struct tun_file
> *tfile)
>
> rcu_read_lock();
> tun = rcu_dereference(tfile->tun);
> - if (tun)
> - dev_hold(tun->dev);
> + if (tun) {
> + if (tun->flags & TUN_DEV_REGISTERED)
> + dev_hold(tun->dev);
> + else
> + tun = ERR_PTR(-EAGAIN);
> + }
> rcu_read_unlock();
>
> return tun;
> @@ -1428,7 +1435,7 @@ static __poll_t tun_chr_poll(struct file *file,
> poll_table *wait)
> struct sock *sk;
> __poll_t mask = 0;
>
> - if (!tun)
> + if (IS_ERR_OR_NULL(tun))
> return EPOLLERR;
>
> sk = tfile->socket.sk;
> @@ -2017,6 +2024,9 @@ static ssize_t tun_chr_write_iter(struct kiocb
> *iocb, struct iov_iter *from)
> if (!tun)
> return -EBADFD;
>
> + if (IS_ERR(tun))
> + return PTR_ERR(tun);
> +
> result = tun_get_user(tun, tfile, NULL, from,
> file->f_flags & O_NONBLOCK, false);
>
> @@ -2242,6 +2252,10 @@ static ssize_t tun_chr_read_iter(struct kiocb
> *iocb, struct iov_iter *to)
>
> if (!tun)
> return -EBADFD;
> +
> + if (IS_ERR(tun))
> + return PTR_ERR(tun);
> +
> ret = tun_do_read(tun, tfile, to, file->f_flags & O_NONBLOCK, NULL);
> ret = min_t(ssize_t, ret, len);
> if (ret > 0)
> @@ -2525,6 +2539,9 @@ static int tun_sendmsg(struct socket *sock,
> struct msghdr *m, size_t total_len)
> if (!tun)
> return -EBADFD;
>
> + if (IS_ERR(tun))
> + return PTR_ERR(tun);
> +
> if (ctl && (ctl->type == TUN_MSG_PTR)) {
> struct tun_page tpage;
> int n = ctl->num;
> @@ -2573,6 +2590,11 @@ static int tun_recvmsg(struct socket *sock,
> struct msghdr *m, size_t total_len,
> goto out_free;
> }
>
> + if (IS_ERR(tun)) {
> + ret = PTR_ERR(tun);
> + goto out_free;
> + }
> +
> if (flags & ~(MSG_DONTWAIT|MSG_TRUNC|MSG_ERRQUEUE)) {
> ret = -EINVAL;
> goto out_put_tun;
> @@ -2622,6 +2644,9 @@ static int tun_peek_len(struct socket *sock)
> if (!tun)
> return 0;
>
> + if (IS_ERR(tun))
> + return PTR_ERR(tun);
> +
> ret = PTR_RING_PEEK_CALL(&tfile->tx_ring, tun_ptr_peek_len);
> tun_put(tun);
>
> @@ -2836,6 +2861,7 @@ static int tun_set_iff(struct net *net, struct
> file *file, struct ifreq *ifr)
> err = register_netdevice(tun->dev);
> if (err < 0)
> goto err_detach;
> + tun->flags |= TUN_DEV_REGISTERED;
> }
>
> netif_carrier_on(tun->dev);
Download attachment "pEpkey.asc" of type "application/pgp-keys" (2453 bytes)
Powered by blists - more mailing lists