[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <47DEACF7.10202@openvz.org>
Date: Mon, 17 Mar 2008 20:40:07 +0300
From: Pavel Emelyanov <xemul@...nvz.org>
To: Phil Oester <kernel@...uxace.com>
CC: netdev@...r.kernel.org
Subject: Re: 2.6.25-rc: Null dereference in ip_defrag
Phil Oester wrote:
> Been seeing occasional panics in my testing of 2.6.25-rc in ip_defrag.
> Offending line in ip_defrag is here:
>
> net = skb->dev->nd_net
Yikes :(
> where dev is NULL. Bisected the problem down to commit
> ac18e7509e7df327e30d6e073a787d922eaf211d ([NETNS][FRAGS]: Make the
> inet_frag_queue lookup work in namespaces).
>
> To prevent panic, I added the below patch (whitespace damaged):
>
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -568,6 +568,14 @@ int ip_defrag(struct sk_buff *skb, u32 user)
>
> IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
>
> + if (!skb->dev) {
> + printk("ip_defrag_bug: %u.%u.%u.%u -> %u.%u.%u.%u\n",
> + NIPQUAD(ip_hdr(skb)->saddr), NIPQUAD(ip_hdr(skb)->daddr));
> + WARN_ON(1);
> + kfree_skb(skb);
> + return -ENOMEM;
> + }
> +
>
> And the packets causing the problem are all multicast fragments being
> generated by Quagga's OSPFD (see debug output below). Tried manually generating
> some multicast fragments with iperf, but couldn't reproduce it.
>
> Any ideas?
This is the same as the problem fixed here:
commit 4136cd523eb0c0bd53173e16fd7406d31d05824f
[IPV4]: route: fix crash ip_route_input
The sk_buff does not have a valid dev sometimes in ip_defrag() :(
and you have to setup conntrack rules to make packets go this way.
But unlike the above problem we cannot even trust the skb->dst to
be not NULL...
Can you check with this patch, please (untested, but should work)?
diff --git a/include/net/ip.h b/include/net/ip.h
index 9f50d4f..fc6b650 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -330,7 +330,8 @@ enum ip_defrag_users
IP_DEFRAG_VS_FWD
};
-int ip_defrag(struct sk_buff *skb, u32 user);
+struct net;
+int ip_defrag(struct net *net, struct sk_buff *skb, u32 user);
int ip_frag_mem(struct net *net);
int ip_frag_nqueues(struct net *net);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index a2e92f9..aad6652 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -561,14 +561,12 @@ out_fail:
}
/* Process an incoming IP datagram fragment. */
-int ip_defrag(struct sk_buff *skb, u32 user)
+int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
{
struct ipq *qp;
- struct net *net;
IP_INC_STATS_BH(IPSTATS_MIB_REASMREQDS);
- net = skb->dev->nd_net;
/* Start by cleaning up the memory. */
if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh)
ip_evictor(net);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 6563139..25c6846 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -172,7 +172,8 @@ int ip_call_ra_chain(struct sk_buff *skb)
(!sk->sk_bound_dev_if ||
sk->sk_bound_dev_if == skb->dev->ifindex)) {
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) {
+ if (ip_defrag(skb->dev->nd_net, skb,
+ IP_DEFRAG_CALL_RA_CHAIN)) {
read_unlock(&ip_ra_lock);
return 1;
}
@@ -256,7 +257,7 @@ int ip_local_deliver(struct sk_buff *skb)
*/
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER))
+ if (ip_defrag(skb->dev->nd_net, skb, IP_DEFRAG_LOCAL_DELIVER))
return 0;
}
diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
index 963981a..d2c3c0f 100644
--- a/net/ipv4/ipvs/ip_vs_core.c
+++ b/net/ipv4/ipvs/ip_vs_core.c
@@ -506,7 +506,7 @@ __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
static inline int ip_vs_gather_frags(struct sk_buff *skb, u_int32_t user)
{
- int err = ip_defrag(skb, user);
+ int err = ip_defrag(&init_net, skb, user);
if (!err)
ip_send_check(ip_hdr(skb));
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index a65b845..00a0f63 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -58,14 +58,15 @@ static int ipv4_print_tuple(struct seq_file *s,
}
/* Returns new sk_buff, or NULL */
-static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int nf_ct_ipv4_gather_frags(struct net *net, struct sk_buff *skb,
+ u_int32_t user)
{
int err;
skb_orphan(skb);
local_bh_disable();
- err = ip_defrag(skb, user);
+ err = ip_defrag(net, skb, user);
local_bh_enable();
if (!err)
@@ -138,6 +139,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ u_int32_t user;
+ struct net *net;
+
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
if (skb->nfct)
@@ -145,10 +149,14 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
/* Gather fragments. */
if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (nf_ct_ipv4_gather_frags(skb,
- hooknum == NF_INET_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT))
+ if (hooknum == NF_INET_PRE_ROUTING) {
+ user = IP_DEFRAG_CONNTRACK_IN;
+ net = in->nd_net;
+ } else {
+ user = IP_DEFRAG_CONNTRACK_OUT;
+ net = out->nd_net;
+ }
+ if (nf_ct_ipv4_gather_frags(net, skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
> Phil
>
>
> ip_defrag_bug: 10.253.13.122 -> 224.0.0.5
> ------------[ cut here ]------------
> WARNING: at net/ipv4/ip_fragment.c:574 ip_defrag+0x9d/0xa0d()
> Pid: 1662, comm: ospfd Not tainted 2.6.25-rc4 #4
>
> Call Trace:
> [<ffffffff8022742a>] warn_on_slowpath+0x53/0x66
> [<ffffffff80228230>] ? printk+0x67/0x69
> [<ffffffff803b36f5>] ? skb_release_data+0xa8/0xad
> [<ffffffff803b35b3>] ? __kfree_skb+0x74/0x78
> [<ffffffff803d77c7>] ip_defrag+0x9d/0xa0d
> [<ffffffff803b15c6>] ? sock_def_write_space+0x18/0x89
> [<ffffffff80404324>] ipv4_conntrack_defrag+0x67/0x96
> [<ffffffff803caa7b>] nf_iterate+0x41/0x81
> [<ffffffff803f29b0>] ? dst_output+0x0/0x10
> [<ffffffff803cab19>] nf_hook_slow+0x5e/0xbe
> [<ffffffff803f29b0>] ? dst_output+0x0/0x10
> [<ffffffff803f3b6e>] raw_sendmsg+0x586/0x758
> [<ffffffff803fb169>] inet_sendmsg+0x46/0x53
> [<ffffffff803adef9>] sock_sendmsg+0xdf/0xf8
> [<ffffffff80421cd1>] ? _spin_lock_bh+0x11/0x29
> [<ffffffff803afbfc>] ? release_sock+0x9b/0xa3
> [<ffffffff80238b37>] ? autoremove_wake_function+0x0/0x38
> [<ffffffff803ad359>] ? move_addr_to_kernel+0x25/0x35
> [<ffffffff803c4c6f>] ? verify_compat_iovec+0x60/0x9e
> [<ffffffff803ae0f3>] sys_sendmsg+0x1e1/0x253
> [<ffffffff802332ee>] ? getrusage+0x1c9/0x1e6
> [<ffffffff804206d4>] ? thread_return+0x3d/0x9c
> [<ffffffff803c45d0>] compat_sys_sendmsg+0xf/0x11
> [<ffffffff803c4e82>] compat_sys_socketcall+0x13f/0x158
> [<ffffffff8021cc12>] sysenter_do_call+0x1b/0x66
>
> ---[ end trace 48218d00aa061d3c ]---
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@...r.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists