[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iLZUkQrsfqvEZGmz9ZVoVk1CNQzaZyCcJ53o9e2-1GTPQ@mail.gmail.com>
Date: Tue, 26 Aug 2025 00:32:53 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: Qingfang Deng <dqfext@...il.com>
Cc: Michal Ostrowski <mostrows@...thlink.net>, Andrew Lunn <andrew+netdev@...n.ch>,
"David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next 1/2] pppoe: remove rwlock usage
On Mon, Aug 25, 2025 at 7:34 PM Qingfang Deng <dqfext@...il.com> wrote:
>
> Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for
> lookups and a spinlock for updates. This removes rwlock usage and allows
> lockless readers on the fast path.
>
> - Mark hash table and list pointers as __rcu.
> - Use spin_lock() to protect writers.
> - Readers use rcu_dereference() under rcu_read_lock(). All known callers
> of get_item() already hold the RCU read lock, so no additional locking
> is needed.
> - Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace
> period.
>
> Signed-off-by: Qingfang Deng <dqfext@...il.com>
> ---
> drivers/net/ppp/pppoe.c | 83 ++++++++++++++++++++++------------------
> include/linux/if_pppox.h | 2 +-
> 2 files changed, 46 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
> index 410effa42ade..f99533c80b66 100644
> --- a/drivers/net/ppp/pppoe.c
> +++ b/drivers/net/ppp/pppoe.c
> @@ -100,8 +100,8 @@ struct pppoe_net {
> * as well, moreover in case of SMP less locking
> * controversy here
> */
> - struct pppox_sock *hash_table[PPPOE_HASH_SIZE];
> - rwlock_t hash_lock;
> + struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE];
> + spinlock_t hash_lock;
> };
>
> /*
> @@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
> int hash = hash_item(sid, addr);
> struct pppox_sock *ret;
>
> - ret = pn->hash_table[hash];
> + ret = rcu_dereference(pn->hash_table[hash]);
> while (ret) {
> if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
> ret->pppoe_ifindex == ifindex)
> return ret;
>
> - ret = ret->next;
> + ret = rcu_dereference(ret->next);
> }
>
> return NULL;
> @@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
> static int __set_item(struct pppoe_net *pn, struct pppox_sock *po)
> {
> int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
> - struct pppox_sock *ret;
> + struct pppox_sock *ret, *first;
>
> - ret = pn->hash_table[hash];
> + first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
> + ret = first;
> while (ret) {
> if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) &&
> ret->pppoe_ifindex == po->pppoe_ifindex)
> return -EALREADY;
>
> - ret = ret->next;
> + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
> }
>
> - po->next = pn->hash_table[hash];
> - pn->hash_table[hash] = po;
> + RCU_INIT_POINTER(po->next, first);
> + rcu_assign_pointer(pn->hash_table[hash], po);
>
> return 0;
> }
> @@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid,
> char *addr, int ifindex)
> {
> int hash = hash_item(sid, addr);
> - struct pppox_sock *ret, **src;
> + struct pppox_sock *ret, __rcu **src;
>
> - ret = pn->hash_table[hash];
> + ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
> src = &pn->hash_table[hash];
>
> while (ret) {
> if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
> ret->pppoe_ifindex == ifindex) {
> - *src = ret->next;
> + struct pppox_sock *next;
> +
> + next = rcu_dereference_protected(ret->next,
> + lockdep_is_held(&pn->hash_lock));
> + rcu_assign_pointer(*src, next);
> break;
> }
>
> src = &ret->next;
> - ret = ret->next;
> + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
> }
> }
>
> @@ -225,11 +230,9 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid,
> {
> struct pppox_sock *po;
>
> - read_lock_bh(&pn->hash_lock);
> po = __get_item(pn, sid, addr, ifindex);
> if (po)
> sock_hold(sk_pppox(po));
Are you sure that RCU rules make sure sk_refcnt can not be zero ?
sock_hold() will crash otherwise.
if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt))
po = NULL;
I will send fixes to drivers/net/pptp.c, net/l2tp/l2tp_ppp.c,
net/phonet/socket.c, net/qrtr/af_qrtr.c, net/tipc/socket.c
Powered by blists - more mailing lists