[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a311521e-0017-4964-5a8a-20355b708763@gmail.com>
Date: Mon, 7 Jan 2019 13:10:15 -0800
From: Eric Dumazet <eric.dumazet@...il.com>
To: Stanislav Fomichev <sdf@...ichev.me>,
Eric Dumazet <edumazet@...gle.com>
Cc: Stanislav Fomichev <sdf@...gle.com>,
netdev <netdev@...r.kernel.org>,
David Miller <davem@...emloft.net>,
Jason Wang <jasowang@...hat.com>,
Jesper Dangaard Brouer <brouer@...hat.com>,
"Michael S. Tsirkin" <mst@...hat.com>,
syzbot <syzkaller@...glegroups.com>
Subject: Re: [PATCH net 1/2] tun: hold napi_mutex for all napi operations
On 01/07/2019 01:02 PM, Stanislav Fomichev wrote:
> On 01/07, Eric Dumazet wrote:
>> On Mon, Jan 7, 2019 at 12:02 PM Stanislav Fomichev <sdf@...gle.com> wrote:
>>>
>>> BUG: unable to handle kernel NULL pointer dereference at 00000000000000d1
>>> Call Trace:
>>> ? napi_gro_frags+0xa7/0x2c0
>>> tun_get_user+0xb50/0xf20
>>> tun_chr_write_iter+0x53/0x70
>>> new_sync_write+0xff/0x160
>>> vfs_write+0x191/0x1e0
>>> __x64_sys_write+0x5e/0xd0
>>> do_syscall_64+0x47/0xf0
>>> entry_SYSCALL_64_after_hwframe+0x44/0xa9
>>>
>>> I think there is a subtle race between sending a packet via tap and
>>> attaching it:
>>>
>>> CPU0: CPU1:
>>> tun_chr_ioctl(TUNSETIFF)
>>> tun_set_iff
>>> tun_attach
>>> rcu_assign_pointer(tfile->tun, tun);
>>> tun_fops->write_iter()
>>> tun_chr_write_iter
>>> tun_napi_alloc_frags
>>> napi_get_frags
>>> napi->skb = napi_alloc_skb
>>> tun_napi_init
>>> netif_napi_add
>>> napi->skb = NULL
>>> napi->skb is NULL here
>>> napi_gro_frags
>>> napi_frags_skb
>>> skb = napi->skb
>>> skb_reset_mac_header(skb)
>>> panic()
>>>
>>> To fix, do the following:
>>> * Move rcu_assign_pointer(tfile->tun, tun) to be the last thing we do
>>> in tun_attach(); this should guarantee that when we call tun_get()
>>> we always get an initialized object
>>> * As another safeguard, always grab napi_mutex whenever doing any
>>> napi operation; this should prevent napi state change between
>>> calls to napi_get_frags and napi_gro_frags
>>>
>>> Reported-by: syzbot <syzkaller@...glegroups.com>
>>> Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
>>>
>>> Signed-off-by: Stanislav Fomichev <sdf@...gle.com>
>>> ---
>>> drivers/net/tun.c | 18 +++++++++++++++---
>>> 1 file changed, 15 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>>> index a4fdad475594..7875f06011f2 100644
>>> --- a/drivers/net/tun.c
>>> +++ b/drivers/net/tun.c
>>> @@ -323,22 +323,30 @@ static void tun_napi_init(struct tun_struct *tun, struct tun_file *tfile,
>>> tfile->napi_enabled = napi_en;
>>> tfile->napi_frags_enabled = napi_en && napi_frags;
>>> if (napi_en) {
>>> + mutex_lock(&tfile->napi_mutex);
>>> netif_napi_add(tun->dev, &tfile->napi, tun_napi_poll,
>>> NAPI_POLL_WEIGHT);
>>> napi_enable(&tfile->napi);
>>> + mutex_unlock(&tfile->napi_mutex);
>>> }
>>> }
>>>
>>> static void tun_napi_disable(struct tun_file *tfile)
>>> {
>>> - if (tfile->napi_enabled)
>>> + if (tfile->napi_enabled) {
>>> + mutex_lock(&tfile->napi_mutex);
>>> napi_disable(&tfile->napi);
>>> + mutex_unlock(&tfile->napi_mutex);
>>> + }
>>> }
>>>
>>> static void tun_napi_del(struct tun_file *tfile)
>>> {
>>> - if (tfile->napi_enabled)
>>> + if (tfile->napi_enabled) {
>>> + mutex_lock(&tfile->napi_mutex);
>>> netif_napi_del(&tfile->napi);
>>> + mutex_unlock(&tfile->napi_mutex);
>>> + }
>>> }
>>>
>>> static bool tun_napi_frags_enabled(const struct tun_file *tfile)
>>> @@ -856,7 +864,6 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
>>> err = 0;
>>> }
>>>
>>> - rcu_assign_pointer(tfile->tun, tun);
>>> rcu_assign_pointer(tun->tfiles[tun->numqueues], tfile);
>>> tun->numqueues++;
>>>
>>> @@ -876,6 +883,11 @@ static int tun_attach(struct tun_struct *tun, struct file *file,
>>> * refcnt.
>>> */
>>>
>>> + /* All tun_fops depend on tun_get() returning non-null pointer.
>>> + * Thus, assigning tun to a tfile should be the last init operation,
>>> + * otherwise we risk using half-initialized object.
>>> + */
>>> + rcu_assign_pointer(tfile->tun, tun);
>>> out:
>>> return err;
>>> }
>>
>> Hmmm I believe the issue is different : We need to call
>> tun_napi_init() before doing the publish in the tun->tfiles[] array
>>
>> My patch was :
> Still fails with your patch.
>
> Maybe the best way is to move both of those publishes (tfile->tun and
> tun->tfiles[]) to the end of tun_attach? It looks like tfile->tun
> is a publish for syscall side (most of tun_socket_ops and tun_fops call
> tun_get which looks at tun->dev and bail out if it's NULL) and tun->tfiles
> is (mostly, but not really) for napi side.
>
> Here is a repro I'm using if you want to poke it:
>
> syz-execprog -threaded -collide -repeat=0 -procs=6 <rep.syz>
>
> # See https://goo.gl/kgGztJ for information about syzkaller reproducers.
> #{"threaded":true,"collide":true,"repeat":true,"procs":6,"sandbox":"none","fault_call":-1,"tun":true,"tmpdir":true,"cgroups":true,"netdev":true,"resetnet":true,"segv":true}
> openat$tun(0xffffffffffffff9c, 0x0, 0x0, 0x0)
> r0 = openat$tun(0xffffffffffffff9c,
> &(0x7f0000001a00)='/dev/net/tun\x00', 0x0, 0x0)
> ioctl$TUNSETIFF(r0, 0x400454ca, &(0x7f0000000300)={"6e72300100",
> 0x1132})
> r1 = socket$kcm(0x2, 0x3, 0x2)
> ioctl$PERF_EVENT_IOC_SET_FILTER(r1, 0x8914,
> &(0x7f0000000780)="6e7230010060a19ef9d2c673d9a1571cb9e1369bcd61ef7e49793ae18712eceb1daa769497800b7fbbd35b170c10751d39aeb660d863e49b8c4f3b3dad48902b5b2d6cfd0abd372c63bcf5d70df3fd4d2e8d443c88bc0e5637dd82fc3435bed4de5d693c9a781c863e05d8a6f8689a5be29216061f3ff53f8b6b396678e7ba155ef9152d7e43b1eccb2331eb8eb1ed5586dcf8b3b0b999361a44ff2c22c2abbef42dd24eabe6723346a6e46c0499a21442d8d00dcb57f013ff7595edd0ff076930de3675d34117a44eb0e4f832936da44e57e43a3e36bd48d2a85bf4fd4a804e83f2f3cf378a435af5e287d4e27337b4ada11b26219832ec6b2b38446b3b95fe3771e9f42ca30fb21e12f0a3d8bc2d85454af9fcc0232d8fd909448b01f46c593d31ea1c926465e35a4199079c3ca41128b17cb01fbf5b522be0fd02022ada37fecc14b6c8c8831883b85a1106f2f867020d529f17a350f20dd3bf51a98cfda70c2e3638a483fd3f87940bb478b07c4c110394c0093d17955089f2ca97bbe075124c9b1ff6500d536a95d96f03d48596e008bf0a028b539cec796cec9bf585eb80fe3e0d26")
> perf_event_open$cgroup(&(0x7f0000000440)={0x7, 0x70, 0x1, 0x0, 0x6e36,
> 0x38000000, 0x0, 0x1, 0x21060, 0x0, 0x0, 0x0, 0x20, 0x3, 0xfc50, 0x9,
> 0x8000, 0x0, 0x0, 0x0, 0x46c8, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
> 0x0, 0x0, 0x8, 0x3f, 0x401, 0x8001, 0x8, 0x9, 0x0, 0x0, 0x1, 0x1,
> @perf_config_ext={0x100000000, 0x6}, 0x10, 0x5, 0x2, 0x7,
> 0xfffffffffffffff9, 0x0, 0x7}, 0xffffffffffffffff, 0x5,
> 0xffffffffffffffff, 0x0)
> # 0x2 = O_RDWR
> r2 = openat$tun(0xffffffffffffff9c,
> &(0x7f0000001a00)='/dev/net/tun\x00', 0x2, 0x0)
> # IFF_TAP 0x0002
> # IFF_NAPI 0x0010
> # IFF_NAPI_FRAGS 0x0020
> # IFF_MULTI_QUEUE 0x0100
> # IFF_MULTICAST = 0x1000
> ioctl$TUNSETIFF(r2, 0x400454ca, &(0x7f0000000300)={"6e72300100",
> 0x1132})
> write$cgroup_int(r2, &(0x7f0000000000), 0x17b)
>
>
I dunno, I would prefer to not throw all these
mutex_lock(&tfile->napi_mutex)/mutex_unlock(&tfile->napi_mutex) all over the places.
Please publish a minimal patches, or a patch series explaining why each fix is needed.
I do not really see why tun_napi_disable() and/or tun_napi_del() needs extra synchronization.
Powered by blists - more mailing lists