[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <6888d4d07c92c_15cf79294cb@willemb.c.googlers.com.notmuch>
Date: Tue, 29 Jul 2025 10:04:00 -0400
From: Willem de Bruijn <willemdebruijn.kernel@...il.com>
To: Sharath Chandra Vurukala <quic_sharathv@...cinc.com>,
davem@...emloft.net,
dsahern@...nel.org,
edumazet@...gle.com,
kuba@...nel.org,
pabeni@...hat.com,
netdev@...r.kernel.org
Cc: quic_kapandey@...cinc.com,
quic_subashab@...cinc.com
Subject: Re: [PATCH v2] net: Add locking to protect skb->dev access in
ip_output
Sharath Chandra Vurukala wrote:
> In ip_output() skb->dev is updated from the skb_dst(skb)->dev
> this can become invalid when the interface is unregistered and freed,
>
> Introduced new skb_dst_dev_rcu() function to be used instead of
> skb_dst_dev() within rcu_locks in outout.This will ensure that
> all the skb's associated with the dev being deregistered will
> be transnmitted out first, before freeing the dev.
>
> Multiple panic call stacks were observed when UL traffic was run
> in concurrency with device deregistration from different functions,
> pasting one sample for reference.
>
> [496733.627565][T13385] Call trace:
> [496733.627570][T13385] bpf_prog_ce7c9180c3b128ea_cgroupskb_egres+0x24c/0x7f0
> [496733.627581][T13385] __cgroup_bpf_run_filter_skb+0x128/0x498
> [496733.627595][T13385] ip_finish_output+0xa4/0xf4
> [496733.627605][T13385] ip_output+0x100/0x1a0
> [496733.627613][T13385] ip_send_skb+0x68/0x100
> [496733.627618][T13385] udp_send_skb+0x1c4/0x384
> [496733.627625][T13385] udp_sendmsg+0x7b0/0x898
> [496733.627631][T13385] inet_sendmsg+0x5c/0x7c
> [496733.627639][T13385] __sys_sendto+0x174/0x1e4
> [496733.627647][T13385] __arm64_sys_sendto+0x28/0x3c
> [496733.627653][T13385] invoke_syscall+0x58/0x11c
> [496733.627662][T13385] el0_svc_common+0x88/0xf4
> [496733.627669][T13385] do_el0_svc+0x2c/0xb0
> [496733.627676][T13385] el0_svc+0x2c/0xa4
> [496733.627683][T13385] el0t_64_sync_handler+0x68/0xb4
> [496733.627689][T13385] el0t_64_sync+0x1a4/0x1a8
>
> Changes in v2:
> - Addressed review comments from Eric Dumazet
> - Used READ_ONCE() to prevent potential load/store tearing
> - Added skb_dst_dev_rcu() and used along with rcu_read_lock() in ip_output
>
> Signed-off-by: Sharath Chandra Vurukala <quic_sharathv@...cinc.com>
> ---
> include/net/dst.h | 12 ++++++++++++
> net/ipv4/ip_output.c | 17 ++++++++++++-----
> 2 files changed, 24 insertions(+), 5 deletions(-)
>
> diff --git a/include/net/dst.h b/include/net/dst.h
> index 00467c1b5093..692ebb1b3f42 100644
> --- a/include/net/dst.h
> +++ b/include/net/dst.h
> @@ -568,11 +568,23 @@ static inline struct net_device *dst_dev(const struct dst_entry *dst)
> return READ_ONCE(dst->dev);
> }
>
> +static inline struct net_device *dst_dev_rcu(const struct dst_entry *dst)
> +{
> + /* In the future, use rcu_dereference(dst->dev) */
> + WARN_ON(!rcu_read_lock_held());
WARN_ON_ONCE or even DEBUG_NET_WARN_ON_ONCE
> + return READ_ONCE(dst->dev);
> +}
> +
> static inline struct net_device *skb_dst_dev(const struct sk_buff *skb)
> {
> return dst_dev(skb_dst(skb));
> }
>
> +static inline struct net_device *skb_dst_dev_rcu(const struct sk_buff *skb)
> +{
> + return dst_dev_rcu(skb_dst(skb));
> +}
> +
> static inline struct net *skb_dst_dev_net(const struct sk_buff *skb)
> {
> return dev_net(skb_dst_dev(skb));
> diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
> index 10a1d182fd84..d70d79b71897 100644
> --- a/net/ipv4/ip_output.c
> +++ b/net/ipv4/ip_output.c
> @@ -425,15 +425,22 @@ int ip_mc_output(struct net *net, struct sock *sk, struct sk_buff *skb)
>
> int ip_output(struct net *net, struct sock *sk, struct sk_buff *skb)
> {
> - struct net_device *dev = skb_dst_dev(skb), *indev = skb->dev;
> + struct net_device *dev, *indev = skb->dev;
> + int ret_val;
>
> + IP_UPD_PO_STATS(net, IPSTATS_MIB_OUT, skb->len);
Why introduce this?
> +
> + rcu_read_lock();
> + dev = skb_dst_dev_rcu(skb);
> skb->dev = dev;
> skb->protocol = htons(ETH_P_IP);
>
> - return NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> - net, sk, skb, indev, dev,
> - ip_finish_output,
> - !(IPCB(skb)->flags & IPSKB_REROUTED));
> + ret_val = NF_HOOK_COND(NFPROTO_IPV4, NF_INET_POST_ROUTING,
> + net, sk, skb, indev, dev,
> + ip_finish_output,
> + !(IPCB(skb)->flags & IPSKB_REROUTED));
> + rcu_read_unlock();
> + return ret_val;
> }
> EXPORT_SYMBOL(ip_output);
>
Powered by blists - more mailing lists