[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20241023105345.1376856-4-steffen.klassert@secunet.com>
Date: Wed, 23 Oct 2024 12:53:44 +0200
From: Steffen Klassert <steffen.klassert@...unet.com>
To: Tobias Brunner <tobias@...ongswan.org>, Antony Antony
<antony.antony@...unet.com>, Daniel Xu <dxu@...uu.xyz>, Paul Wouters
<paul@...ats.ca>, Simon Horman <horms@...nel.org>, Sabrina Dubroca
<sd@...asysnail.net>
CC: Steffen Klassert <steffen.klassert@...unet.com>, <netdev@...r.kernel.org>,
<devel@...ux-ipsec.org>
Subject: [PATCH v3 ipsec-next 3/4] xfrm: Add an inbound percpu state cache.
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we add a percpu cache to cache the used inbound xfrm states.
Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
---
include/net/netns/xfrm.h | 1 +
include/net/xfrm.h | 5 ++++
net/ipv4/esp4_offload.c | 6 ++---
net/ipv6/esp6_offload.c | 6 ++---
net/xfrm/xfrm_input.c | 2 +-
net/xfrm/xfrm_state.c | 57 ++++++++++++++++++++++++++++++++++++++++
6 files changed, 70 insertions(+), 7 deletions(-)
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index d489d9250bff..4e0702598d52 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -43,6 +43,7 @@ struct netns_xfrm {
struct hlist_head __rcu *state_bysrc;
struct hlist_head __rcu *state_byspi;
struct hlist_head __rcu *state_byseq;
+ struct hlist_head __percpu *state_cache_input;
unsigned int state_hmask;
unsigned int state_num;
struct work_struct state_hash_work;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 0710efb8c143..ecc35536853e 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -185,6 +185,7 @@ struct xfrm_state {
struct hlist_node byspi;
struct hlist_node byseq;
struct hlist_node state_cache;
+ struct hlist_node state_cache_input;
refcount_t refcnt;
spinlock_t lock;
@@ -1645,6 +1646,10 @@ int xfrm_state_update(struct xfrm_state *x);
struct xfrm_state *xfrm_state_lookup(struct net *net, u32 mark,
const xfrm_address_t *daddr, __be32 spi,
u8 proto, unsigned short family);
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family);
struct xfrm_state *xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index 80c4ea0e12f4..e0d94270da28 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -53,9 +53,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ip_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ip_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 919ebfabbe4e..7b41fb4f00b5 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -80,9 +80,9 @@ static struct sk_buff *esp6_gro_receive(struct list_head *head,
if (sp->len == XFRM_MAX_DEPTH)
goto out_reset;
- x = xfrm_state_lookup(dev_net(skb->dev), skb->mark,
- (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
- spi, IPPROTO_ESP, AF_INET6);
+ x = xfrm_input_state_lookup(dev_net(skb->dev), skb->mark,
+ (xfrm_address_t *)&ipv6_hdr(skb)->daddr,
+ spi, IPPROTO_ESP, AF_INET6);
if (unlikely(x && x->dir && x->dir != XFRM_SA_DIR_IN)) {
/* non-offload path will record the error and audit log */
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index 749e7eea99e4..841a60a6fbfe 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -572,7 +572,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
goto drop;
}
- x = xfrm_state_lookup(net, mark, daddr, spi, nexthdr, family);
+ x = xfrm_input_state_lookup(net, mark, daddr, spi, nexthdr, family);
if (x == NULL) {
secpath_reset(skb);
XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES);
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index a2047825f6c8..e3266a5d4f90 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -754,6 +754,9 @@ int __xfrm_state_delete(struct xfrm_state *x)
hlist_del_rcu(&x->byseq);
if (!hlist_unhashed(&x->state_cache))
hlist_del_rcu(&x->state_cache);
+ if (!hlist_unhashed(&x->state_cache_input))
+ hlist_del_rcu(&x->state_cache_input);
+
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1106,6 +1109,52 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark,
return NULL;
}
+struct xfrm_state *xfrm_input_state_lookup(struct net *net, u32 mark,
+ const xfrm_address_t *daddr,
+ __be32 spi, u8 proto,
+ unsigned short family)
+{
+ struct hlist_head *state_cache_input;
+ struct xfrm_state *x = NULL;
+ int cpu = get_cpu();
+
+ state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu);
+
+ rcu_read_lock();
+ hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) {
+ if (x->props.family != family ||
+ x->id.spi != spi ||
+ x->id.proto != proto ||
+ !xfrm_addr_equal(&x->id.daddr, daddr, family))
+ continue;
+
+ if ((mark & x->mark.m) != x->mark.v)
+ continue;
+ if (!xfrm_state_hold_rcu(x))
+ continue;
+ goto out;
+ }
+
+ x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family);
+
+ if (x && x->km.state == XFRM_STATE_VALID) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache_input)) {
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ } else {
+ hlist_del_rcu(&x->state_cache_input);
+ hlist_add_head_rcu(&x->state_cache_input, state_cache_input);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
+out:
+ rcu_read_unlock();
+ put_cpu();
+ return x;
+}
+EXPORT_SYMBOL(xfrm_input_state_lookup);
+
static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark,
const xfrm_address_t *daddr,
const xfrm_address_t *saddr,
@@ -3079,6 +3128,11 @@ int __net_init xfrm_state_init(struct net *net)
net->xfrm.state_byseq = xfrm_hash_alloc(sz);
if (!net->xfrm.state_byseq)
goto out_byseq;
+
+ net->xfrm.state_cache_input = alloc_percpu(struct hlist_head);
+ if (!net->xfrm.state_cache_input)
+ goto out_state_cache_input;
+
net->xfrm.state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
net->xfrm.state_num = 0;
@@ -3088,6 +3142,8 @@ int __net_init xfrm_state_init(struct net *net)
&net->xfrm.xfrm_state_lock);
return 0;
+out_state_cache_input:
+ xfrm_hash_free(net->xfrm.state_byseq, sz);
out_byseq:
xfrm_hash_free(net->xfrm.state_byspi, sz);
out_byspi:
@@ -3117,6 +3173,7 @@ void xfrm_state_fini(struct net *net)
xfrm_hash_free(net->xfrm.state_bysrc, sz);
WARN_ON(!hlist_empty(net->xfrm.state_bydst));
xfrm_hash_free(net->xfrm.state_bydst, sz);
+ free_percpu(net->xfrm.state_cache_input);
}
#ifdef CONFIG_AUDITSYSCALL
--
2.34.1
Powered by blists - more mailing lists