[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <a4210935-7dc3-42fd-b8bd-68ea8151058e@intel.com>
Date: Thu, 13 Mar 2025 15:53:23 -0700
From: Jacob Keller <jacob.e.keller@...el.com>
To: Eric Dumazet <edumazet@...gle.com>, "David S . Miller"
<davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni
<pabeni@...hat.com>
CC: David Ahern <dsahern@...nel.org>, Simon Horman <horms@...nel.org>,
<netdev@...r.kernel.org>, <eric.dumazet@...il.com>
Subject: Re: [PATCH v2 net-next 4/4] inet: frags: save a pair of atomic
operations in reassembly
On 3/12/2025 1:22 AM, Eric Dumazet wrote:
> As mentioned in commit 648700f76b03 ("inet: frags:
> use rhashtables for reassembly units"):
>
> A followup patch will even remove the refcount hold/release
> left from prior implementation and save a couple of atomic
> operations.
>
> This patch implements this idea, seven years later.
>
Easy for things to slip through the cracks and get forgotten.
Reviewed-by: Jacob Keller <jacob.e.keller@...el.com>
> Signed-off-by: Eric Dumazet <edumazet@...gle.com>
> ---
> net/ieee802154/6lowpan/reassembly.c | 5 ++++-
> net/ipv4/inet_fragment.c | 18 ++++++++----------
> net/ipv4/ip_fragment.c | 5 ++++-
> net/ipv6/netfilter/nf_conntrack_reasm.c | 5 ++++-
> net/ipv6/reassembly.c | 9 ++++-----
> 5 files changed, 24 insertions(+), 18 deletions(-)
>
> diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c
> index c5f86cff06ee7f95556955f994b859c86a315646..d4b983d170382e616ea58821b197da89885a6bb2 100644
> --- a/net/ieee802154/6lowpan/reassembly.c
> +++ b/net/ieee802154/6lowpan/reassembly.c
> @@ -304,17 +304,20 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
> goto err;
> }
>
> + rcu_read_lock();
> fq = fq_find(net, cb, &hdr.source, &hdr.dest);
> if (fq != NULL) {
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&fq->q.lock);
> ret = lowpan_frag_queue(fq, skb, frag_type, &refs);
> spin_unlock(&fq->q.lock);
>
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> return ret;
> }
> + rcu_read_unlock();
>
> err:
> kfree_skb(skb);
> diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
> index 5eb18605001387e7f23b8661dc9f24a533ab1600..19fae4811ab2803bed2faa4900869f883cb3073c 100644
> --- a/net/ipv4/inet_fragment.c
> +++ b/net/ipv4/inet_fragment.c
> @@ -327,7 +327,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
>
> timer_setup(&q->timer, f->frag_expire, 0);
> spin_lock_init(&q->lock);
> - refcount_set(&q->refcnt, 3);
> + /* One reference for the timer, one for the hash table. */
> + refcount_set(&q->refcnt, 2);
>
> return q;
> }
> @@ -349,7 +350,11 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
> *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
> &q->node, f->rhash_params);
> if (*prev) {
> - int refs = 2;
> + /* We could not insert in the hash table,
> + * we need to cancel what inet_frag_alloc()
> + * anticipated.
> + */
> + int refs = 1;
>
> q->flags |= INET_FRAG_COMPLETE;
> inet_frag_kill(q, &refs);
> @@ -359,7 +364,6 @@ static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
> return q;
> }
>
> -/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
> struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
> {
> /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */
> @@ -369,17 +373,11 @@ struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
> if (!high_thresh || frag_mem_limit(fqdir) > high_thresh)
> return NULL;
>
> - rcu_read_lock();
> -
> prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
> if (!prev)
> fq = inet_frag_create(fqdir, key, &prev);
> - if (!IS_ERR_OR_NULL(prev)) {
> + if (!IS_ERR_OR_NULL(prev))
> fq = prev;
> - if (!refcount_inc_not_zero(&fq->refcnt))
> - fq = NULL;
> - }
> - rcu_read_unlock();
> return fq;
> }
> EXPORT_SYMBOL(inet_frag_find);
> diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
> index c5f3c810706fb328c3d8a4d8501424df0dceaa8e..77f395b28ec748bcd85b8dfa0a8c0b8a74684103 100644
> --- a/net/ipv4/ip_fragment.c
> +++ b/net/ipv4/ip_fragment.c
> @@ -483,18 +483,21 @@ int ip_defrag(struct net *net, struct sk_buff *skb, u32 user)
> __IP_INC_STATS(net, IPSTATS_MIB_REASMREQDS);
>
> /* Lookup (or create) queue header */
> + rcu_read_lock();
> qp = ip_find(net, ip_hdr(skb), user, vif);
> if (qp) {
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&qp->q.lock);
>
> ret = ip_frag_queue(qp, skb, &refs);
>
> spin_unlock(&qp->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&qp->q, refs);
> return ret;
> }
> + rcu_read_unlock();
>
> __IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
> kfree_skb(skb);
> diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
> index f33acb730dc5807205811c2675efd27a9ee99222..d6bd8f7079bb74ec99030201163332ed5c6d2eec 100644
> --- a/net/ipv6/netfilter/nf_conntrack_reasm.c
> +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
> @@ -450,7 +450,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> struct frag_hdr *fhdr;
> struct frag_queue *fq;
> struct ipv6hdr *hdr;
> - int refs = 1;
> + int refs = 0;
> u8 prevhdr;
>
> /* Jumbo payload inhibits frag. header */
> @@ -477,9 +477,11 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> hdr = ipv6_hdr(skb);
> fhdr = (struct frag_hdr *)skb_transport_header(skb);
>
> + rcu_read_lock();
> fq = fq_find(net, fhdr->identification, user, hdr,
> skb->dev ? skb->dev->ifindex : 0);
> if (fq == NULL) {
> + rcu_read_unlock();
> pr_debug("Can't find and can't create new queue\n");
> return -ENOMEM;
> }
> @@ -493,6 +495,7 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
> }
>
> spin_unlock_bh(&fq->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> return ret;
> }
> diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
> index 7560380bd5871217d476f2e0e39332926c458bc1..49740898bc1370ff0ca89928750c6de85a45303f 100644
> --- a/net/ipv6/reassembly.c
> +++ b/net/ipv6/reassembly.c
> @@ -305,9 +305,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
> skb_postpush_rcsum(skb, skb_network_header(skb),
> skb_network_header_len(skb));
>
> - rcu_read_lock();
> __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMOKS);
> - rcu_read_unlock();
> fq->q.rb_fragments = RB_ROOT;
> fq->q.fragments_tail = NULL;
> fq->q.last_run_head = NULL;
> @@ -319,9 +317,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
> out_oom:
> net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n");
> out_fail:
> - rcu_read_lock();
> __IP6_INC_STATS(net, __in6_dev_stats_get(dev, skb), IPSTATS_MIB_REASMFAILS);
> - rcu_read_unlock();
> inet_frag_kill(&fq->q, refs);
> return -1;
> }
> @@ -379,10 +375,11 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> }
>
> iif = skb->dev ? skb->dev->ifindex : 0;
> + rcu_read_lock();
> fq = fq_find(net, fhdr->identification, hdr, iif);
> if (fq) {
> u32 prob_offset = 0;
> - int ret, refs = 1;
> + int ret, refs = 0;
>
> spin_lock(&fq->q.lock);
>
> @@ -391,6 +388,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> &prob_offset, &refs);
>
> spin_unlock(&fq->q.lock);
> + rcu_read_unlock();
> inet_frag_putn(&fq->q, refs);
> if (prob_offset) {
> __IP6_INC_STATS(net, __in6_dev_get_safely(skb->dev),
> @@ -400,6 +398,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
> }
> return ret;
> }
> + rcu_read_unlock();
>
> __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS);
> kfree_skb(skb);
Powered by blists - more mailing lists