2.6.23-stable review patch. If anyone has any objections, please let us know. ------------------ From: Eric Dumazet [IPV4] ROUTE: ip_rt_dump() is unecessary slow [ Upstream commit: d8c9283089287341c85a0a69de32c2287a990e71 ] I noticed "ip route list cache x.y.z.t" can be *very* slow. While strace-ing -T it I also noticed that first part of route cache is fetched quite fast : recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3772 <0.000047> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3736 <0.000042> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3740 <0.000055> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3712 <0.000043> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\ 202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3732 <0.000053> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3708 <0.000052> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202 GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3680 <0.000041> while the part at the end of the table is more expensive: recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3656 <0.003857> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\204\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3772 <0.003891> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3712 <0.003765> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3700 <0.003879> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3676 <0.003797> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"p\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\2\0\2\0"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3724 <0.003856> recvmsg(3, {msg_name(12)={sa_family=AF_NETLINK, pid=0, groups=00000000}, msg_iov(1)=[{"\234\0\0\0\30\0\2\0\254i\202GXm\0\0\2 \0\376\0\0\1\0\2"..., 16384}], msg_controllen=0, msg_flags=0}, 0) = 3736 <0.003848> The following patch corrects this performance/latency problem, removing quadratic behavior. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller Signed-off-by: Greg Kroah-Hartman --- net/ipv4/route.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2648,11 +2648,10 @@ int ip_rt_dump(struct sk_buff *skb, str int idx, s_idx; s_h = cb->args[0]; + if (s_h < 0) + s_h = 0; s_idx = idx = cb->args[1]; - for (h = 0; h <= rt_hash_mask; h++) { - if (h < s_h) continue; - if (h > s_h) - s_idx = 0; + for (h = s_h; h <= rt_hash_mask; h++) { rcu_read_lock_bh(); for (rt = rcu_dereference(rt_hash_table[h].chain), idx = 0; rt; rt = rcu_dereference(rt->u.dst.rt_next), idx++) { @@ -2669,6 +2668,7 @@ int ip_rt_dump(struct sk_buff *skb, str dst_release(xchg(&skb->dst, NULL)); } rcu_read_unlock_bh(); + s_idx = 0; } done: -- -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/