[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <f5507529-e61c-4b81-ab93-4ea6c8df46e9@openvpn.net>
Date: Fri, 17 Jan 2025 13:59:35 +0100
From: Antonio Quartulli <antonio@...nvpn.net>
To: Sabrina Dubroca <sd@...asysnail.net>, ryazanov.s.a@...il.com
Cc: netdev@...r.kernel.org, Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>, Paolo Abeni <pabeni@...hat.com>,
Donald Hunter <donald.hunter@...il.com>, Shuah Khan <shuah@...nel.org>,
Andrew Lunn <andrew+netdev@...n.ch>, Simon Horman <horms@...nel.org>,
linux-kernel@...r.kernel.org, linux-kselftest@...r.kernel.org,
Xiao Liang <shaw.leon@...il.com>
Subject: Re: [PATCH net-next v18 20/25] ovpn: implement peer
add/get/dump/delete via netlink
On 17/01/2025 12:48, Sabrina Dubroca wrote:
> 2025-01-13, 10:31:39 +0100, Antonio Quartulli wrote:
>> int ovpn_nl_peer_new_doit(struct sk_buff *skb, struct genl_info *info)
>> {
>> - return -EOPNOTSUPP;
>> + struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
>> + struct ovpn_priv *ovpn = info->user_ptr[0];
>> + struct ovpn_socket *ovpn_sock;
>> + struct socket *sock = NULL;
>> + struct ovpn_peer *peer;
>> + u32 sockfd, peer_id;
>> + int ret;
>> +
>> + /* peers can only be added when the interface is up and running */
>> + if (!netif_running(ovpn->dev))
>> + return -ENETDOWN;
>
> Since we're not under rtnl_lock here, the device could go down while
> we're creating this peer, and we may end up with a down device that
> has a peer anyway.
hmm, indeed. This means we must hold the rtnl_lock to prevent ending up
in an inconsistent state.
>
> I'm not sure what this (and the peer flushing on NETDEV_DOWN) is
> trying to accomplish. Is it a problem to keep peers when the netdevice
> is down?
This is the result of my discussion with Sergey that started in v23 5/23:
https://lore.kernel.org/r/netdev/20241029-b4-ovpn-v11-5-de4698c73a25@openvpn.net/
The idea was to match operational state with actual connectivity to peer(s).
Originally I wanted to simply kee the carrier always on, but after
further discussion (including the meaning of the openvpn option
--persist-tun) we agreed on following the logic where an UP device has a
peer connected (logic is slightly different between MP and P2P).
I am not extremely happy with the resulting complexity, but it seemed to
be blocker for Sergey.
>
>> +
>> + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
>> + return -EINVAL;
>> +
>> + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
>> + ovpn_peer_nl_policy, info->extack);
>> + if (ret)
>> + return ret;
>> +
>> + ret = ovpn_nl_peer_precheck(ovpn, info, attrs);
>> + if (ret < 0)
>> + return ret;
>> +
>> + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
>> + OVPN_A_PEER_SOCKET))
>> + return -EINVAL;
>> +
>> + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
>> + peer = ovpn_peer_new(ovpn, peer_id);
>> + if (IS_ERR(peer)) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "cannot create new peer object for peer %u: %ld",
>> + peer_id, PTR_ERR(peer));
>> + return PTR_ERR(peer);
>> + }
>> +
>> + /* lookup the fd in the kernel table and extract the socket object */
>> + sockfd = nla_get_u32(attrs[OVPN_A_PEER_SOCKET]);
>> + /* sockfd_lookup() increases sock's refcounter */
>> + sock = sockfd_lookup(sockfd, &ret);
>> + if (!sock) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "cannot lookup peer socket (fd=%u): %d",
>> + sockfd, ret);
>> + return -ENOTSOCK;
>
> All those returns should be "goto peer_release" (and setting ret) so
> that we don't leak peer.
indeed
>
>> + }
>> +
>> + /* Only when using UDP as transport protocol the remote endpoint
>> + * can be configured so that ovpn knows where to send packets to.
>> + *
>> + * In case of TCP, the socket is connected to the peer and ovpn
>> + * will just send bytes over it, without the need to specify a
>> + * destination.
>> + */
>> + if (sock->sk->sk_protocol != IPPROTO_UDP &&
>> + (attrs[OVPN_A_PEER_REMOTE_IPV4] ||
>> + attrs[OVPN_A_PEER_REMOTE_IPV6])) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "unexpected remote IP address for non UDP socket");
>> + sockfd_put(sock);
>> + return -EINVAL;
>
> goto peer_release
>
>> + }
>> +
>> + ovpn_sock = ovpn_socket_new(sock, peer);
>> + if (IS_ERR(ovpn_sock)) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "cannot encapsulate socket: %ld",
>> + PTR_ERR(ovpn_sock));
>> + sockfd_put(sock);
>> + return -ENOTSOCK;
>
> goto peer_release
>
>> + }
>> +
>> + peer->sock = ovpn_sock;
>> +
>> + ret = ovpn_nl_peer_modify(peer, info, attrs);
>> + if (ret < 0)
>> + goto peer_release;
>> +
>> + ret = ovpn_peer_add(ovpn, peer);
>> + if (ret < 0) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "cannot add new peer (id=%u) to hashtable: %d\n",
>> + peer->id, ret);
>> + goto peer_release;
>> + }
>> +
>> + return 0;
>> +
>> +peer_release:
>> + /* release right away because peer is not used in any context */
>> + ovpn_peer_release(peer);
>> +
>> + return ret;
>> }
>
>
> [...]
>> int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
>> {
>> - return -EOPNOTSUPP;
>> + struct nlattr *attrs[OVPN_A_PEER_MAX + 1];
>> + struct ovpn_priv *ovpn = info->user_ptr[0];
>> + struct ovpn_peer *peer;
>> + u32 peer_id;
>> + int ret;
>> +
>> + if (GENL_REQ_ATTR_CHECK(info, OVPN_A_PEER))
>> + return -EINVAL;
>> +
>> + ret = nla_parse_nested(attrs, OVPN_A_PEER_MAX, info->attrs[OVPN_A_PEER],
>> + ovpn_peer_nl_policy, info->extack);
>> + if (ret)
>> + return ret;
>> +
>> + if (NL_REQ_ATTR_CHECK(info->extack, info->attrs[OVPN_A_PEER], attrs,
>> + OVPN_A_PEER_ID))
>> + return -EINVAL;
>> +
>> + peer_id = nla_get_u32(attrs[OVPN_A_PEER_ID]);
>> + peer = ovpn_peer_get_by_id(ovpn, peer_id);
>> + if (!peer) {
>> + NL_SET_ERR_MSG_FMT_MOD(info->extack,
>> + "cannot find peer with id %u", peer_id);
>> + return -ENOENT;
>> + }
>> +
>> + netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
>> + ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);
>
> With the delayed socket release (which is similar to what was in v11,
> but now with refcounting on the netdevice which should make
> rtnl_link_unregister in ovpn_cleanup wait [*]), we may return to
> userspace as if the peer was gone, but the socket hasn't been detached
> yet.
>
> A userspace application that tries to remove the peer and immediately
> re-create it with the same socket could get EBUSY if the workqueue
> hasn't done its job yet. That would be quite confusing to the
> application.
This may happen only for TCP, because in the UDP case we would increase
the refcounter and keep the socket attached.
However, re-attaching the same TCP socket is hardly going to happen (in
TCP we have one socket per peer, therefore if the peer is going away,
we're most likely killing the socket too).
This said, the complexity added by the completion seems quite tiny,
therefore I'll add the code you are suggesting below.
>
> So I would add a completion to wait here until the socket has been
> fully detached. Something like below.
>
> [*] I don't think the current refcounting fully protects against that,
> I'll comment on 05/25
ok!
>
>
> -------- 8< --------
>
> diff --git a/drivers/net/ovpn/netlink.c b/drivers/net/ovpn/netlink.c
> index 72357bb5f30b..19aa4ee6d468 100644
> --- a/drivers/net/ovpn/netlink.c
> +++ b/drivers/net/ovpn/netlink.c
> @@ -733,6 +733,9 @@ int ovpn_nl_peer_del_doit(struct sk_buff *skb, struct genl_info *info)
>
> netdev_dbg(ovpn->dev, "del peer %u\n", peer->id);
> ret = ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_USERSPACE);
> + if (ret >= 0 && peer->sock)
> + wait_for_completion(&peer->sock_detach);
> +
> ovpn_peer_put(peer);
>
> return ret;
> diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
> index b032390047fe..6120521d0c32 100644
> --- a/drivers/net/ovpn/peer.c
> +++ b/drivers/net/ovpn/peer.c
> @@ -92,6 +92,7 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_priv *ovpn, u32 id)
> ovpn_peer_stats_init(&peer->vpn_stats);
> ovpn_peer_stats_init(&peer->link_stats);
> INIT_WORK(&peer->keepalive_work, ovpn_peer_keepalive_send);
> + init_completion(&peer->sock_detach);
>
> ret = dst_cache_init(&peer->dst_cache, GFP_KERNEL);
> if (ret < 0) {
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index 7a062cc5a5a4..8c54bf5709ef 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -112,6 +112,7 @@ struct ovpn_peer {
> struct rcu_head rcu;
> struct work_struct remove_work;
> struct work_struct keepalive_work;
> + struct completion sock_detach;
> };
>
> /**
> diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
> index a5c3bc834a35..7cefac42c3be 100644
> --- a/drivers/net/ovpn/socket.c
> +++ b/drivers/net/ovpn/socket.c
> @@ -31,6 +31,8 @@ static void ovpn_socket_release_kref(struct kref *kref)
>
> sockfd_put(sock->sock);
> kfree_rcu(sock, rcu);
> +
> + complete(&sock->peer->sock_detach);
> }
>
> /**
> @@ -181,12 +183,12 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
>
> ovpn_sock->sock = sock;
> kref_init(&ovpn_sock->refcount);
> + ovpn_sock->peer = peer;
>
> /* TCP sockets are per-peer, therefore they are linked to their unique
> * peer
> */
> if (sock->sk->sk_protocol == IPPROTO_TCP) {
> - ovpn_sock->peer = peer;
> ovpn_peer_hold(peer);
> } else if (sock->sk->sk_protocol == IPPROTO_UDP) {
> /* in UDP we only link the ovpn instance since the socket is
> diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
> index 15827e347f53..3f5a35fd9048 100644
> --- a/drivers/net/ovpn/socket.h
> +++ b/drivers/net/ovpn/socket.h
> @@ -28,12 +28,12 @@ struct ovpn_peer;
> * @rcu: member used to schedule RCU destructor callback
> */
> struct ovpn_socket {
> + struct ovpn_peer *peer;
> union {
> struct {
> struct ovpn_priv *ovpn;
> netdevice_tracker dev_tracker;
> };
> - struct ovpn_peer *peer;
> };
>
> struct socket *sock;
>
>
Thanks! will add this.
Regards,
--
Antonio Quartulli
OpenVPN Inc.
Powered by blists - more mailing lists