[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241213130212.1783302-5-edumazet@google.com>
Date: Fri, 13 Dec 2024 13:02:12 +0000
From: Eric Dumazet <edumazet@...gle.com>
To: "David S . Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>
Cc: netdev@...r.kernel.org, Simon Horman <horms@...nel.org>,
David Ahern <dsahern@...nel.org>, Kuniyuki Iwashima <kuniyu@...zon.com>, eric.dumazet@...il.com,
Eric Dumazet <edumazet@...gle.com>
Subject: [PATCH net-next 4/4] inetpeer: do not get a refcount in inet_getpeer()
All inet_getpeer() callers except ip4_frag_init() don't need
to acquire a permanent refcount on the inetpeer.
They can switch to full RCU protection.
Move the refcount_inc_not_zero() into ip4_frag_init(),
so that all the other callers no longer have to
perform a pair of expensive atomic operations on
a possibly contended cache line.
inet_putpeer() no longer needs to be exported.
After this patch, my DUT can receive 8,400,000 UDP packets
per second targetting closed ports, using 50% cpu cycles
less than before.
Fixes: 8c2bd38b95f7 ("icmp: change the order of rate limits")
Signed-off-by: Eric Dumazet <edumazet@...gle.com>
---
net/ipv4/icmp.c | 4 ++--
net/ipv4/inetpeer.c | 8 ++------
net/ipv4/ip_fragment.c | 15 ++++++++++-----
net/ipv4/route.c | 13 +++++++------
net/ipv6/icmp.c | 4 ++--
net/ipv6/ip6_output.c | 4 ++--
net/ipv6/ndisc.c | 6 ++++--
7 files changed, 29 insertions(+), 25 deletions(-)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 5eeb9f569a706cf2766d74bcf1a667c8930804f2..7a1b1af2edcae0b0648ef3c3411b4ef36e6d9b14 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -322,11 +322,11 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
goto out;
vif = l3mdev_master_ifindex(dst->dev);
+ rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif);
rc = inet_peer_xrlim_allow(peer,
READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
out:
if (!rc)
__ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 67827c9bf2c8f3ba842ff1dc3b7e1fc2976e6ef1..b025eaba501305635ae46672ff3c7de75c4fcc08 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -109,8 +109,6 @@ static struct inet_peer *lookup(const struct inetpeer_addr *daddr,
p = rb_entry(parent, struct inet_peer, rb_node);
cmp = inetpeer_addr_cmp(daddr, &p->daddr);
if (cmp == 0) {
- if (!refcount_inc_not_zero(&p->refcnt))
- break;
now = jiffies;
if (READ_ONCE(p->dtime) != now)
WRITE_ONCE(p->dtime, now);
@@ -169,6 +167,7 @@ static void inet_peer_gc(struct inet_peer_base *base,
}
}
+/* Must be called under RCU : No refcount change is done here. */
struct inet_peer *inet_getpeer(struct inet_peer_base *base,
const struct inetpeer_addr *daddr)
{
@@ -180,11 +179,9 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
/* Attempt a lockless lookup first.
* Because of a concurrent writer, we might not find an existing entry.
*/
- rcu_read_lock();
seq = read_seqbegin(&base->lock);
p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp);
invalidated = read_seqretry(&base->lock, seq);
- rcu_read_unlock();
if (p)
return p;
@@ -202,7 +199,7 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base,
if (p) {
p->daddr = *daddr;
p->dtime = (__u32)jiffies;
- refcount_set(&p->refcnt, 2);
+ refcount_set(&p->refcnt, 1);
atomic_set(&p->rid, 0);
p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW;
p->rate_tokens = 0;
@@ -230,7 +227,6 @@ void inet_putpeer(struct inet_peer *p)
if (refcount_dec_and_test(&p->refcnt))
kfree_rcu(p, rcu);
}
-EXPORT_SYMBOL_GPL(inet_putpeer);
/*
* Check transmit rate limitation for given message.
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 46e1171299f22ccf0b201eabbff5d3279a0703d8..7a435746a22dee9f11c0dc732a8b5a7724f4eea3 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -82,15 +82,20 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
- struct net *net = q->fqdir->net;
-
const struct frag_v4_compare_key *key = a;
+ struct net *net = q->fqdir->net;
+ struct inet_peer *p = NULL;
q->key.v4 = *key;
qp->ecn = 0;
- qp->peer = q->fqdir->max_dist ?
- inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif) :
- NULL;
+ if (q->fqdir->max_dist) {
+ rcu_read_lock();
+ p = inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif);
+ if (p && !refcount_inc_not_zero(&p->refcnt))
+ p = NULL;
+ rcu_read_unlock();
+ }
+ qp->peer = p;
}
static void ip4_frag_free(struct inet_frag_queue *q)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 297a9939c6e74beffc592dbdd7266281fe842440..d2086648dcf180375c8d7981dfb72f87e50957f6 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -870,11 +870,11 @@ void ip_rt_send_redirect(struct sk_buff *skb)
}
log_martians = IN_DEV_LOG_MARTIANS(in_dev);
vif = l3mdev_master_ifindex_rcu(rt->dst.dev);
- rcu_read_unlock();
net = dev_net(rt->dst.dev);
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, vif);
if (!peer) {
+ rcu_read_unlock();
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST,
rt_nexthop(rt, ip_hdr(skb)->daddr));
return;
@@ -893,7 +893,7 @@ void ip_rt_send_redirect(struct sk_buff *skb)
*/
if (peer->n_redirects >= ip_rt_redirect_number) {
peer->rate_last = jiffies;
- goto out_put_peer;
+ goto out_unlock;
}
/* Check for load limit; set rate_last to the latest sent
@@ -914,8 +914,8 @@ void ip_rt_send_redirect(struct sk_buff *skb)
&ip_hdr(skb)->saddr, inet_iif(skb),
&ip_hdr(skb)->daddr, &gw);
}
-out_put_peer:
- inet_putpeer(peer);
+out_unlock:
+ rcu_read_unlock();
}
static int ip_error(struct sk_buff *skb)
@@ -975,9 +975,9 @@ static int ip_error(struct sk_buff *skb)
break;
}
+ rcu_read_lock();
peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr,
l3mdev_master_ifindex(skb->dev));
-
send = true;
if (peer) {
now = jiffies;
@@ -989,8 +989,9 @@ static int ip_error(struct sk_buff *skb)
peer->rate_tokens -= ip_rt_error_cost;
else
send = false;
- inet_putpeer(peer);
}
+ rcu_read_unlock();
+
if (send)
icmp_send(skb, ICMP_DEST_UNREACH, code, 0);
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 4593e3992c67b84e3a10f30be28762974094d21f..a6984a29fdb9dd972a11ca9f8d5e794c443bac6f 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -222,10 +222,10 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (rt->rt6i_dst.plen < 128)
tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
+ rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr);
res = inet_peer_xrlim_allow(peer, tmo);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
}
if (!res)
__ICMP6_INC_STATS(net, ip6_dst_idev(dst),
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 2cbcfe70654b5cd90c433a24c47ef5496c604d0d..06cab008b8277f1b6e56541e91fc92f999221ac5 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -613,6 +613,7 @@ int ip6_forward(struct sk_buff *skb)
else
target = &hdr->daddr;
+ rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr);
/* Limit redirects both by destination (here)
@@ -620,8 +621,7 @@ int ip6_forward(struct sk_buff *skb)
*/
if (inet_peer_xrlim_allow(peer, 1*HZ))
ndisc_send_redirect(skb, target);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
} else {
int addrtype = ipv6_addr_type(&hdr->saddr);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index f113554d13325453cd04ce4e5686d837943e96ff..d044c67019de6da1eb29dee875cf8cda30210ceb 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1731,10 +1731,12 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target)
"Redirect: destination is not a neighbour\n");
goto release;
}
+
+ rcu_read_lock();
peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr);
ret = inet_peer_xrlim_allow(peer, 1*HZ);
- if (peer)
- inet_putpeer(peer);
+ rcu_read_unlock();
+
if (!ret)
goto release;
--
2.47.1.613.gc27f4b7a9f-goog
Powered by blists - more mailing lists