lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <CANn89iLZUkQrsfqvEZGmz9ZVoVk1CNQzaZyCcJ53o9e2-1GTPQ@mail.gmail.com>
Date: Tue, 26 Aug 2025 00:32:53 -0700
From: Eric Dumazet <edumazet@...gle.com>
To: Qingfang Deng <dqfext@...il.com>
Cc: Michal Ostrowski <mostrows@...thlink.net>, Andrew Lunn <andrew+netdev@...n.ch>, 
	"David S. Miller" <davem@...emloft.net>, Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>, 
	netdev@...r.kernel.org, linux-kernel@...r.kernel.org
Subject: Re: [PATCH net-next 1/2] pppoe: remove rwlock usage

On Mon, Aug 25, 2025 at 7:34 PM Qingfang Deng <dqfext@...il.com> wrote:
>
> Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for
> lookups and a spinlock for updates. This removes rwlock usage and allows
> lockless readers on the fast path.
>
> - Mark hash table and list pointers as __rcu.
> - Use spin_lock() to protect writers.
> - Readers use rcu_dereference() under rcu_read_lock(). All known callers
>   of get_item() already hold the RCU read lock, so no additional locking
>   is needed.
> - Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace
>   period.
>
> Signed-off-by: Qingfang Deng <dqfext@...il.com>
> ---
>  drivers/net/ppp/pppoe.c  | 83 ++++++++++++++++++++++------------------
>  include/linux/if_pppox.h |  2 +-
>  2 files changed, 46 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
> index 410effa42ade..f99533c80b66 100644
> --- a/drivers/net/ppp/pppoe.c
> +++ b/drivers/net/ppp/pppoe.c
> @@ -100,8 +100,8 @@ struct pppoe_net {
>          * as well, moreover in case of SMP less locking
>          * controversy here
>          */
> -       struct pppox_sock *hash_table[PPPOE_HASH_SIZE];
> -       rwlock_t hash_lock;
> +       struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE];
> +       spinlock_t hash_lock;
>  };
>
>  /*
> @@ -162,13 +162,13 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
>         int hash = hash_item(sid, addr);
>         struct pppox_sock *ret;
>
> -       ret = pn->hash_table[hash];
> +       ret = rcu_dereference(pn->hash_table[hash]);
>         while (ret) {
>                 if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
>                     ret->pppoe_ifindex == ifindex)
>                         return ret;
>
> -               ret = ret->next;
> +               ret = rcu_dereference(ret->next);
>         }
>
>         return NULL;
> @@ -177,19 +177,20 @@ static struct pppox_sock *__get_item(struct pppoe_net *pn, __be16 sid,
>  static int __set_item(struct pppoe_net *pn, struct pppox_sock *po)
>  {
>         int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote);
> -       struct pppox_sock *ret;
> +       struct pppox_sock *ret, *first;
>
> -       ret = pn->hash_table[hash];
> +       first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
> +       ret = first;
>         while (ret) {
>                 if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) &&
>                     ret->pppoe_ifindex == po->pppoe_ifindex)
>                         return -EALREADY;
>
> -               ret = ret->next;
> +               ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
>         }
>
> -       po->next = pn->hash_table[hash];
> -       pn->hash_table[hash] = po;
> +       RCU_INIT_POINTER(po->next, first);
> +       rcu_assign_pointer(pn->hash_table[hash], po);
>
>         return 0;
>  }
> @@ -198,20 +199,24 @@ static void __delete_item(struct pppoe_net *pn, __be16 sid,
>                                         char *addr, int ifindex)
>  {
>         int hash = hash_item(sid, addr);
> -       struct pppox_sock *ret, **src;
> +       struct pppox_sock *ret, __rcu **src;
>
> -       ret = pn->hash_table[hash];
> +       ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock));
>         src = &pn->hash_table[hash];
>
>         while (ret) {
>                 if (cmp_addr(&ret->pppoe_pa, sid, addr) &&
>                     ret->pppoe_ifindex == ifindex) {
> -                       *src = ret->next;
> +                       struct pppox_sock *next;
> +
> +                       next = rcu_dereference_protected(ret->next,
> +                                                        lockdep_is_held(&pn->hash_lock));
> +                       rcu_assign_pointer(*src, next);
>                         break;
>                 }
>
>                 src = &ret->next;
> -               ret = ret->next;
> +               ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock));
>         }
>  }
>
> @@ -225,11 +230,9 @@ static inline struct pppox_sock *get_item(struct pppoe_net *pn, __be16 sid,
>  {
>         struct pppox_sock *po;
>
> -       read_lock_bh(&pn->hash_lock);
>         po = __get_item(pn, sid, addr, ifindex);
>         if (po)
>                 sock_hold(sk_pppox(po));

Are you sure that RCU rules make sure sk_refcnt can not be zero ?

sock_hold()  will crash otherwise.

if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt))
    po = NULL;

I will send fixes to drivers/net/pptp.c, net/l2tp/l2tp_ppp.c,
net/phonet/socket.c, net/qrtr/af_qrtr.c, net/tipc/socket.c

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ