[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <39597d43-7522-38e7-1b37-82c4a84158aa@mojatatu.com>
Date: Fri, 1 Sep 2023 15:09:22 -0300
From: Pedro Tammela <pctammela@...atatu.com>
To: Jamal Hadi Salim <jhs@...atatu.com>, davem@...emloft.net,
kuba@...nel.org, edumazet@...gle.com, pabeni@...hat.com
Cc: jiri@...nulli.us, xiyou.wangcong@...il.com, netdev@...r.kernel.org,
sec@...is.email, paolo.valente@...more.it
Subject: Re: [PATCH net 1/1] net: sched: sch_qfq: Fix UAF in qfq_dequeue()
On 01/09/2023 13:22, Jamal Hadi Salim wrote:
> From: valis <sec@...is.email>
>
> When the plug qdisc is used as a class of the qfq qdisc it could trigger a
> UAF. This issue can be reproduced with following commands:
>
> tc qdisc add dev lo root handle 1: qfq
> tc class add dev lo parent 1: classid 1:1 qfq weight 1 maxpkt 512
> tc qdisc add dev lo parent 1:1 handle 2: plug
> tc filter add dev lo parent 1: basic classid 1:1
> ping -c1 127.0.0.1
Not sure if net or net-next material, but I would also demote the
WARN_ON_ONCE
in qfq_dequeue to a pr_warn[0], as it would just crash kernels that set
panic_on_warn and honestly it's not that bad of a misconfiguration (a
plain tc qdisc/class del would fix it).
[0] https://elixir.bootlin.com/linux/latest/source/net/sched/sch_qfq.c#L1001
>
> and boom:
>
> [ 285.353793] BUG: KASAN: slab-use-after-free in qfq_dequeue+0xa7/0x7f0
> [ 285.354910] Read of size 4 at addr ffff8880bad312a8 by task ping/144
> [ 285.355903]
> [ 285.356165] CPU: 1 PID: 144 Comm: ping Not tainted 6.5.0-rc3+ #4
> [ 285.357112] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
> [ 285.358376] Call Trace:
> [ 285.358773] <IRQ>
> [ 285.359109] dump_stack_lvl+0x44/0x60
> [ 285.359708] print_address_description.constprop.0+0x2c/0x3c0
> [ 285.360611] kasan_report+0x10c/0x120
> [ 285.361195] ? qfq_dequeue+0xa7/0x7f0
> [ 285.361780] qfq_dequeue+0xa7/0x7f0
> [ 285.362342] __qdisc_run+0xf1/0x970
> [ 285.362903] net_tx_action+0x28e/0x460
> [ 285.363502] __do_softirq+0x11b/0x3de
> [ 285.364097] do_softirq.part.0+0x72/0x90
> [ 285.364721] </IRQ>
> [ 285.365072] <TASK>
> [ 285.365422] __local_bh_enable_ip+0x77/0x90
> [ 285.366079] __dev_queue_xmit+0x95f/0x1550
> [ 285.366732] ? __pfx_csum_and_copy_from_iter+0x10/0x10
> [ 285.367526] ? __pfx___dev_queue_xmit+0x10/0x10
> [ 285.368259] ? __build_skb_around+0x129/0x190
> [ 285.368960] ? ip_generic_getfrag+0x12c/0x170
> [ 285.369653] ? __pfx_ip_generic_getfrag+0x10/0x10
> [ 285.370390] ? csum_partial+0x8/0x20
> [ 285.370961] ? raw_getfrag+0xe5/0x140
> [ 285.371559] ip_finish_output2+0x539/0xa40
> [ 285.372222] ? __pfx_ip_finish_output2+0x10/0x10
> [ 285.372954] ip_output+0x113/0x1e0
> [ 285.373512] ? __pfx_ip_output+0x10/0x10
> [ 285.374130] ? icmp_out_count+0x49/0x60
> [ 285.374739] ? __pfx_ip_finish_output+0x10/0x10
> [ 285.375457] ip_push_pending_frames+0xf3/0x100
> [ 285.376173] raw_sendmsg+0xef5/0x12d0
> [ 285.376760] ? do_syscall_64+0x40/0x90
> [ 285.377359] ? __static_call_text_end+0x136578/0x136578
> [ 285.378173] ? do_syscall_64+0x40/0x90
> [ 285.378772] ? kasan_enable_current+0x11/0x20
> [ 285.379469] ? __pfx_raw_sendmsg+0x10/0x10
> [ 285.380137] ? __sock_create+0x13e/0x270
> [ 285.380673] ? __sys_socket+0xf3/0x180
> [ 285.381174] ? __x64_sys_socket+0x3d/0x50
> [ 285.381725] ? entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> [ 285.382425] ? __rcu_read_unlock+0x48/0x70
> [ 285.382975] ? ip4_datagram_release_cb+0xd8/0x380
> [ 285.383608] ? __pfx_ip4_datagram_release_cb+0x10/0x10
> [ 285.384295] ? preempt_count_sub+0x14/0xc0
> [ 285.384844] ? __list_del_entry_valid+0x76/0x140
> [ 285.385467] ? _raw_spin_lock_bh+0x87/0xe0
> [ 285.386014] ? __pfx__raw_spin_lock_bh+0x10/0x10
> [ 285.386645] ? release_sock+0xa0/0xd0
> [ 285.387148] ? preempt_count_sub+0x14/0xc0
> [ 285.387712] ? freeze_secondary_cpus+0x348/0x3c0
> [ 285.388341] ? aa_sk_perm+0x177/0x390
> [ 285.388856] ? __pfx_aa_sk_perm+0x10/0x10
> [ 285.389441] ? check_stack_object+0x22/0x70
> [ 285.390032] ? inet_send_prepare+0x2f/0x120
> [ 285.390603] ? __pfx_inet_sendmsg+0x10/0x10
> [ 285.391172] sock_sendmsg+0xcc/0xe0
> [ 285.391667] __sys_sendto+0x190/0x230
> [ 285.392168] ? __pfx___sys_sendto+0x10/0x10
> [ 285.392727] ? kvm_clock_get_cycles+0x14/0x30
> [ 285.393328] ? set_normalized_timespec64+0x57/0x70
> [ 285.393980] ? _raw_spin_unlock_irq+0x1b/0x40
> [ 285.394578] ? __x64_sys_clock_gettime+0x11c/0x160
> [ 285.395225] ? __pfx___x64_sys_clock_gettime+0x10/0x10
> [ 285.395908] ? _copy_to_user+0x3e/0x60
> [ 285.396432] ? exit_to_user_mode_prepare+0x1a/0x120
> [ 285.397086] ? syscall_exit_to_user_mode+0x22/0x50
> [ 285.397734] ? do_syscall_64+0x71/0x90
> [ 285.398258] __x64_sys_sendto+0x74/0x90
> [ 285.398786] do_syscall_64+0x64/0x90
> [ 285.399273] ? exit_to_user_mode_prepare+0x1a/0x120
> [ 285.399949] ? syscall_exit_to_user_mode+0x22/0x50
> [ 285.400605] ? do_syscall_64+0x71/0x90
> [ 285.401124] entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> [ 285.401807] RIP: 0033:0x495726
> [ 285.402233] Code: ff ff ff f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 11 b8 2c 00 00 00 0f 09
> [ 285.404683] RSP: 002b:00007ffcc25fb618 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
> [ 285.405677] RAX: ffffffffffffffda RBX: 0000000000000040 RCX: 0000000000495726
> [ 285.406628] RDX: 0000000000000040 RSI: 0000000002518750 RDI: 0000000000000000
> [ 285.407565] RBP: 00000000005205ef R08: 00000000005f8838 R09: 000000000000001c
> [ 285.408523] R10: 0000000000000000 R11: 0000000000000246 R12: 0000000002517634
> [ 285.409460] R13: 00007ffcc25fb6f0 R14: 0000000000000003 R15: 0000000000000000
> [ 285.410403] </TASK>
> [ 285.410704]
> [ 285.410929] Allocated by task 144:
> [ 285.411402] kasan_save_stack+0x1e/0x40
> [ 285.411926] kasan_set_track+0x21/0x30
> [ 285.412442] __kasan_slab_alloc+0x55/0x70
> [ 285.412973] kmem_cache_alloc_node+0x187/0x3d0
> [ 285.413567] __alloc_skb+0x1b4/0x230
> [ 285.414060] __ip_append_data+0x17f7/0x1b60
> [ 285.414633] ip_append_data+0x97/0xf0
> [ 285.415144] raw_sendmsg+0x5a8/0x12d0
> [ 285.415640] sock_sendmsg+0xcc/0xe0
> [ 285.416117] __sys_sendto+0x190/0x230
> [ 285.416626] __x64_sys_sendto+0x74/0x90
> [ 285.417145] do_syscall_64+0x64/0x90
> [ 285.417624] entry_SYSCALL_64_after_hwframe+0x6e/0xd8
> [ 285.418306]
> [ 285.418531] Freed by task 144:
> [ 285.418960] kasan_save_stack+0x1e/0x40
> [ 285.419469] kasan_set_track+0x21/0x30
> [ 285.419988] kasan_save_free_info+0x27/0x40
> [ 285.420556] ____kasan_slab_free+0x109/0x1a0
> [ 285.421146] kmem_cache_free+0x1c2/0x450
> [ 285.421680] __netif_receive_skb_core+0x2ce/0x1870
> [ 285.422333] __netif_receive_skb_one_core+0x97/0x140
> [ 285.423003] process_backlog+0x100/0x2f0
> [ 285.423537] __napi_poll+0x5c/0x2d0
> [ 285.424023] net_rx_action+0x2be/0x560
> [ 285.424510] __do_softirq+0x11b/0x3de
> [ 285.425034]
> [ 285.425254] The buggy address belongs to the object at ffff8880bad31280
> [ 285.425254] which belongs to the cache skbuff_head_cache of size 224
> [ 285.426993] The buggy address is located 40 bytes inside of
> [ 285.426993] freed 224-byte region [ffff8880bad31280, ffff8880bad31360)
> [ 285.428572]
> [ 285.428798] The buggy address belongs to the physical page:
> [ 285.429540] page:00000000f4b77674 refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0xbad31
> [ 285.430758] flags: 0x100000000000200(slab|node=0|zone=1)
> [ 285.431447] page_type: 0xffffffff()
> [ 285.431934] raw: 0100000000000200 ffff88810094a8c0 dead000000000122 0000000000000000
> [ 285.432757] raw: 0000000000000000 00000000800c000c 00000001ffffffff 0000000000000000
> [ 285.433562] page dumped because: kasan: bad access detected
> [ 285.434144]
> [ 285.434320] Memory state around the buggy address:
> [ 285.434828] ffff8880bad31180: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 285.435580] ffff8880bad31200: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 285.436264] >ffff8880bad31280: fa fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> [ 285.436777] ^
> [ 285.437106] ffff8880bad31300: fb fb fb fb fb fb fb fb fb fb fb fb fc fc fc fc
> [ 285.437616] ffff8880bad31380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> [ 285.438126] ==================================================================
> [ 285.438662] Disabling lock debugging due to kernel taint
>
> Fix this by:
> 1. Changing sch_plug's .peek handler to qdisc_peek_dequeued(), a
> function compatible with non-work-conserving qdiscs
> 2. Checking the return value of qdisc_dequeue_peeked() in sch_qfq.
>
> Fixes: 462dbc9101ac ("pkt_sched: QFQ Plus: fair-queueing service at DRR cost")
> Reported-by: valis <sec@...is.email>
> Signed-off-by: valis <sec@...is.email>
> Signed-off-by: Jamal Hadi Salim <jhs@...atatu.com>
> ---
> net/sched/sch_plug.c | 2 +-
> net/sched/sch_qfq.c | 22 +++++++++++++++++-----
> 2 files changed, 18 insertions(+), 6 deletions(-)
>
> diff --git a/net/sched/sch_plug.c b/net/sched/sch_plug.c
> index ea8c4a7174bb..35f49edf63db 100644
> --- a/net/sched/sch_plug.c
> +++ b/net/sched/sch_plug.c
> @@ -207,7 +207,7 @@ static struct Qdisc_ops plug_qdisc_ops __read_mostly = {
> .priv_size = sizeof(struct plug_sched_data),
> .enqueue = plug_enqueue,
> .dequeue = plug_dequeue,
> - .peek = qdisc_peek_head,
> + .peek = qdisc_peek_dequeued,
> .init = plug_init,
> .change = plug_change,
> .reset = qdisc_reset_queue,
> diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
> index befaf74b33ca..09d2955baab1 100644
> --- a/net/sched/sch_qfq.c
> +++ b/net/sched/sch_qfq.c
> @@ -974,10 +974,13 @@ static void qfq_update_eligible(struct qfq_sched *q)
> }
>
> /* Dequeue head packet of the head class in the DRR queue of the aggregate. */
> -static void agg_dequeue(struct qfq_aggregate *agg,
> - struct qfq_class *cl, unsigned int len)
> +static struct sk_buff *agg_dequeue(struct qfq_aggregate *agg,
> + struct qfq_class *cl, unsigned int len)
> {
> - qdisc_dequeue_peeked(cl->qdisc);
> + struct sk_buff *skb = qdisc_dequeue_peeked(cl->qdisc);
> +
> + if (!skb)
> + return NULL;
>
> cl->deficit -= (int) len;
>
> @@ -987,6 +990,8 @@ static void agg_dequeue(struct qfq_aggregate *agg,
> cl->deficit += agg->lmax;
> list_move_tail(&cl->alist, &agg->active);
> }
> +
> + return skb;
> }
>
> static inline struct sk_buff *qfq_peek_skb(struct qfq_aggregate *agg,
> @@ -1132,11 +1137,18 @@ static struct sk_buff *qfq_dequeue(struct Qdisc *sch)
> if (!skb)
> return NULL;
>
> - qdisc_qstats_backlog_dec(sch, skb);
> sch->q.qlen--;
> +
> + skb = agg_dequeue(in_serv_agg, cl, len);
> +
> + if (!skb) {
> + sch->q.qlen++;
> + return NULL;
> + }
> +
> + qdisc_qstats_backlog_dec(sch, skb);
> qdisc_bstats_update(sch, skb);
>
> - agg_dequeue(in_serv_agg, cl, len);
> /* If lmax is lowered, through qfq_change_class, for a class
> * owning pending packets with larger size than the new value
> * of lmax, then the following condition may hold.
Powered by blists - more mailing lists