[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231214113645.2416005-3-steffen.klassert@secunet.com>
Date: Thu, 14 Dec 2023 12:36:44 +0100
From: Steffen Klassert <steffen.klassert@...unet.com>
To: <netdev@...r.kernel.org>, <devel@...ux-ipsec.org>
CC: Steffen Klassert <steffen.klassert@...unet.com>
Subject: [PATCH RFC ipsec-next 2/3] xfrm: Cache used outbound xfrm states at the policy.
Now that we can have percpu xfrm states, the number of active
states might increase. To get a better lookup performance,
we cache the used xfrm states at the policy for outbound
IPsec traffic.
Signed-off-by: Steffen Klassert <steffen.klassert@...unet.com>
---
include/net/xfrm.h | 3 +++
net/xfrm/xfrm_policy.c | 12 ++++++++++
net/xfrm/xfrm_state.c | 54 ++++++++++++++++++++++++++++++++++++++++++
3 files changed, 69 insertions(+)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8c5ea1802fda..5c4d67d0a4de 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -179,6 +179,7 @@ struct xfrm_state {
struct hlist_node bysrc;
struct hlist_node byspi;
struct hlist_node byseq;
+ struct hlist_node state_cache;
refcount_t refcnt;
spinlock_t lock;
@@ -522,6 +523,8 @@ struct xfrm_policy {
struct hlist_node bydst;
struct hlist_node byidx;
+ struct hlist_head state_cache_list;
+
/* This lock only affects elements except for entry. */
rwlock_t lock;
refcount_t refcnt;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index c13dc3ef7910..4f0b2892195b 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -409,6 +409,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
if (policy) {
write_pnet(&policy->xp_net, net);
INIT_LIST_HEAD(&policy->walk.all);
+ INIT_HLIST_HEAD(&policy->state_cache_list);
INIT_HLIST_NODE(&policy->bydst_inexact_list);
INIT_HLIST_NODE(&policy->bydst);
INIT_HLIST_NODE(&policy->byidx);
@@ -451,6 +452,9 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
static void xfrm_policy_kill(struct xfrm_policy *policy)
{
+ struct net *net = xp_net(policy);
+ struct xfrm_state *x;
+
write_lock_bh(&policy->lock);
policy->walk.dead = 1;
write_unlock_bh(&policy->lock);
@@ -464,6 +468,13 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
if (del_timer(&policy->timer))
xfrm_pol_put(policy);
+ /* XXX: Flush state cache */
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ hlist_for_each_entry_rcu(x, &policy->state_cache_list, state_cache) {
+ hlist_del_init_rcu(&x->state_cache);
+ }
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+
xfrm_pol_put(policy);
}
@@ -3250,6 +3261,7 @@ struct dst_entry *xfrm_lookup_with_ifid(struct net *net,
dst_release(dst);
dst = dst_orig;
}
+
ok:
xfrm_pols_put(pols, drop_pols);
if (dst && dst->xfrm &&
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index b5caac4678f7..416d08f15568 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -663,6 +663,7 @@ struct xfrm_state *xfrm_state_alloc(struct net *net)
refcount_set(&x->refcnt, 1);
atomic_set(&x->tunnel_users, 0);
INIT_LIST_HEAD(&x->km.all);
+ INIT_HLIST_NODE(&x->state_cache);
INIT_HLIST_NODE(&x->bydst);
INIT_HLIST_NODE(&x->bysrc);
INIT_HLIST_NODE(&x->byspi);
@@ -707,12 +708,15 @@ int __xfrm_state_delete(struct xfrm_state *x)
if (x->km.state != XFRM_STATE_DEAD) {
x->km.state = XFRM_STATE_DEAD;
+
spin_lock(&net->xfrm.xfrm_state_lock);
list_del(&x->km.all);
hlist_del_rcu(&x->bydst);
hlist_del_rcu(&x->bysrc);
if (x->km.seq)
hlist_del_rcu(&x->byseq);
+ if (!hlist_unhashed(&x->state_cache))
+ hlist_del_rcu(&x->state_cache);
if (x->id.spi)
hlist_del_rcu(&x->byspi);
net->xfrm.state_num--;
@@ -1160,6 +1164,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
unsigned short encap_family = tmpl->encap_family;
unsigned int sequence;
struct km_event c;
+ bool cached = false;
unsigned int pcpu_id = get_cpu();
put_cpu();
@@ -1168,6 +1173,45 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation);
rcu_read_lock();
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_state_addr_check(x, daddr, saddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, encap_family,
+ &best, &acquire_in_progress, &error);
+ }
+
+ if (best)
+ goto cached;
+
+ hlist_for_each_entry_rcu(x, &pol->state_cache_list, state_cache) {
+ if (x->props.family == encap_family &&
+ x->props.reqid == tmpl->reqid &&
+ (mark & x->mark.m) == x->mark.v &&
+ x->if_id == if_id &&
+ !(x->props.flags & XFRM_STATE_WILDRECV) &&
+ xfrm_addr_equal(&x->id.daddr, daddr, encap_family) &&
+ tmpl->mode == x->props.mode &&
+ tmpl->id.proto == x->id.proto &&
+ (tmpl->id.spi == x->id.spi || !tmpl->id.spi))
+ xfrm_state_look_at(pol, x, fl, family,
+ &best, &acquire_in_progress, &error);
+ }
+
+cached:
+ if (best)
+ goto found;
+ else if (error)
+ best = NULL;
+ else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */
+ WARN_ON(1);
+
h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family);
hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) {
#ifdef CONFIG_XFRM_OFFLOAD
@@ -1317,6 +1361,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
XFRM_STATE_INSERT(bysrc, &x->bysrc,
net->xfrm.state_bysrc + h,
x->xso.type);
+ INIT_HLIST_NODE(&x->state_cache);
if (x->id.spi) {
h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family);
XFRM_STATE_INSERT(byspi, &x->byspi,
@@ -1363,6 +1408,15 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr,
} else {
*err = acquire_in_progress ? -EAGAIN : error;
}
+
+ if (x && x->km.state == XFRM_STATE_VALID && !cached &&
+ (!(pol->flags & XFRM_POLICY_CPU_ACQUIRE) || x->pcpu_num == pcpu_id)) {
+ spin_lock_bh(&net->xfrm.xfrm_state_lock);
+ if (hlist_unhashed(&x->state_cache))
+ hlist_add_head_rcu(&x->state_cache, &pol->state_cache_list);
+ spin_unlock_bh(&net->xfrm.xfrm_state_lock);
+ }
+
rcu_read_unlock();
if (to_put)
xfrm_state_put(to_put);
--
2.34.1
Powered by blists - more mailing lists