[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <07490c75-86c3-4488-8adb-7740b14feb30@virtuozzo.com>
Date: Tue, 9 Jan 2024 12:57:36 +0800
From: Pavel Tikhomirov <ptikhomirov@...tuozzo.com>
To: Florian Westphal <fw@...len.de>
Cc: "David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>, netdev@...r.kernel.org,
linux-kernel@...r.kernel.org, kernel@...nvz.org
Subject: Re: [PATCH] neighbour: purge nf_bridged skb from foreign device neigh
On 08/01/2024 19:26, Pavel Tikhomirov wrote:
>
>
> On 08/01/2024 19:15, Florian Westphal wrote:
>> Pavel Tikhomirov <ptikhomirov@...tuozzo.com> wrote:
>>> An skb can be added to a neigh->arp_queue while waiting for an arp
>>> reply. Where original skb's skb->dev can be different to neigh's
>>> neigh->dev. For instance in case of bridging dnated skb from one veth to
>>> another, the skb would be added to a neigh->arp_queue of the bridge.
>>>
>>> There is no explicit mechanism that prevents the original skb->dev link
>>> of such skb from being freed under us. For instance neigh_flush_dev does
>>> not cleanup skbs from different device's neigh queue. But that original
>>> link can be used and lead to crash on e.g. this stack:
>>>
>>> arp_process
>>> neigh_update
>>> skb = __skb_dequeue(&neigh->arp_queue)
>>> neigh_resolve_output(..., skb)
>>> ...
>>> br_nf_dev_xmit
>>> br_nf_pre_routing_finish_bridge_slow
>>> skb->dev = nf_bridge->physindev
>>> br_handle_frame_finish
>>>
>>> So let's improve neigh_flush_dev to also purge skbs when device
>>> equal to their skb->nf_bridge->physindev gets destroyed.
>>
>> Can we fix this by replacing physindev pointer with plain
>> ifindex instead? There are not too many places that need to
>> peek into the original net_device struct, so I don't think
>> the additional dev_get_by_index_rcu() would be an issue.
>
> I will work on it, thanks for a good idea!
>
If we replace nf_bridge->physindev completely, we would need to do
something like this in every place physindev was used:
diff --git a/include/linux/netfilter_bridge.h
b/include/linux/netfilter_bridge.h
index f980edfdd2783..105fbdb029261 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -56,11 +56,15 @@ static inline int nf_bridge_get_physoutif(const
struct sk_buff *skb)
}
static inline struct net_device *
-nf_bridge_get_physindev(const struct sk_buff *skb)
+nf_bridge_get_physindev_rcu(const struct sk_buff *skb)
{
const struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
+ struct net_device *dev;
- return nf_bridge ? nf_bridge->physindev : NULL;
+ if (!nf_bridge || !skb->dev)
+ return 0;
+
+ return dev_get_by_index_rcu(skb->dev->net, nf_bridge->physindev_if);
}
static inline struct net_device *
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a5ae952454c89..51e7cdf9b51c9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -295,7 +295,7 @@ struct nf_bridge_info {
u8 bridged_dnat:1;
u8 sabotage_in_done:1;
__u16 frag_max_size;
- struct net_device *physindev;
+ int *physindev_if;
/* always valid & non-NULL from FORWARD on, for physdev match */
struct net_device *physoutdev;
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c
b/net/ipv4/netfilter/nf_reject_ipv4.c
index f01b038fc1cda..01b3eb169772e 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -289,7 +289,8 @@ void nf_send_reset(struct net *net, struct sock *sk,
struct sk_buff *oldskb,
* build the eth header using the original destination's MAC as the
* source, and send the RST packet directly.
*/
- br_indev = nf_bridge_get_physindev(oldskb);
+ rcu_read_lock_bh();
+ br_indev = nf_bridge_get_physindev_rcu(oldskb);
if (br_indev) {
struct ethhdr *oeth = eth_hdr(oldskb);
@@ -297,12 +298,19 @@ void nf_send_reset(struct net *net, struct sock
*sk, struct sk_buff *oldskb,
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol),
- oeth->h_source, oeth->h_dest,
nskb->len) < 0)
+ oeth->h_source, oeth->h_dest,
nskb->len) < 0) {
+ rcu_read_unlock_bh();
goto free_nskb;
+ }
dev_queue_xmit(nskb);
- } else
+ rcu_read_unlock_bh();
+ } else {
+ rcu_read_unlock_bh();
#endif
ip_local_out(net, nskb->sk, nskb);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ }
+#endif
return;
Does it sound good?
Or maybe instead we can have extra physindev_if field in addition to
existing physindev to only do dev_get_by_index_rcu inside
br_nf_pre_routing_finish_bridge_slow to doublecheck the ->physindev link?
Sorry in advance if I'm missing anything obvious.
--
Best regards, Tikhomirov Pavel
Senior Software Developer, Virtuozzo.
Powered by blists - more mailing lists