[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <ZkIosadLULByXFKc@hog>
Date: Mon, 13 May 2024 16:50:25 +0200
From: Sabrina Dubroca <sd@...asysnail.net>
To: Antonio Quartulli <antonio@...nvpn.net>
Cc: netdev@...r.kernel.org, Jakub Kicinski <kuba@...nel.org>,
Sergey Ryazanov <ryazanov.s.a@...il.com>,
Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>,
Andrew Lunn <andrew@...n.ch>, Esben Haabendal <esben@...nix.com>
Subject: Re: [PATCH net-next v3 13/24] ovpn: implement TCP transport
2024-05-06, 03:16:26 +0200, Antonio Quartulli wrote:
> @@ -307,6 +308,7 @@ static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb)
> /* Process packets in TX queue in a transport-specific way.
> *
> * UDP transport - encrypt and send across the tunnel.
> + * TCP transport - encrypt and put into TCP TX queue.
> */
> void ovpn_encrypt_work(struct work_struct *work)
> {
> @@ -340,6 +342,9 @@ void ovpn_encrypt_work(struct work_struct *work)
> ovpn_udp_send_skb(peer->ovpn, peer,
> curr);
> break;
> + case IPPROTO_TCP:
> + ovpn_tcp_send_skb(peer, curr);
> + break;
> default:
> /* no transport configured yet */
> consume_skb(skb);
> diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
> index 9ae9844dd281..a04d6e55a473 100644
> --- a/drivers/net/ovpn/main.c
> +++ b/drivers/net/ovpn/main.c
> @@ -23,6 +23,7 @@
> #include "io.h"
> #include "packet.h"
> #include "peer.h"
> +#include "tcp.h"
>
> /* Driver info */
> #define DRV_DESCRIPTION "OpenVPN data channel offload (ovpn)"
> @@ -247,8 +248,14 @@ static struct pernet_operations ovpn_pernet_ops = {
>
> static int __init ovpn_init(void)
> {
> - int err = register_netdevice_notifier(&ovpn_netdev_notifier);
> + int err = ovpn_tcp_init();
>
> + if (err) {
ovpn_tcp_init cannot fail (and if it could, you'd need to clean up
when register_netdevice_notifier fails). I'd make ovpn_tcp_init void
and kill this check.
> + pr_err("ovpn: cannot initialize TCP component: %d\n", err);
> + return err;
> + }
> +
> + err = register_netdevice_notifier(&ovpn_netdev_notifier);
> if (err) {
> pr_err("ovpn: can't register netdevice notifier: %d\n", err);
> return err;
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index b5ff59a4b40f..ac4907705d98 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -33,6 +33,16 @@
> * @netif_rx_ring: queue of packets to be sent to the netdevice via NAPI
> * @napi: NAPI object
> * @sock: the socket being used to talk to this peer
> + * @tcp.tx_ring: queue for packets to be forwarded to userspace (TCP only)
> + * @tcp.tx_work: work for processing outgoing socket data (TCP only)
> + * @tcp.rx_work: wok for processing incoming socket data (TCP only)
Never actually used.
If you keep it: s/wok/work/
> + * @tcp.raw_len: next packet length as read from the stream (TCP only)
> + * @tcp.skb: next packet being filled with data from the stream (TCP only)
> + * @tcp.offset: position of the next byte to write in the skb (TCP only)
> + * @tcp.data_len: next packet length converted to host order (TCP only)
It would be nice to add information about whether they're used for TX or RX.
> + * @tcp.sk_cb.sk_data_ready: pointer to original cb
> + * @tcp.sk_cb.sk_write_space: pointer to original cb
> + * @tcp.sk_cb.prot: pointer to original prot object
> * @crypto: the crypto configuration (ciphers, keys, etc..)
> * @dst_cache: cache for dst_entry used to send to peer
> * @bind: remote peer binding
> @@ -59,6 +69,25 @@ struct ovpn_peer {
> struct ptr_ring netif_rx_ring;
> struct napi_struct napi;
> struct ovpn_socket *sock;
> + /* state of the TCP reading. Needed to keep track of how much of a
> + * single packet has already been read from the stream and how much is
> + * missing
> + */
> + struct {
> + struct ptr_ring tx_ring;
> + struct work_struct tx_work;
> + struct work_struct rx_work;
> +
> + u8 raw_len[sizeof(u16)];
Why not u16 or __be16 for this one?
> + struct sk_buff *skb;
> + u16 offset;
> + u16 data_len;
> + struct {
> + void (*sk_data_ready)(struct sock *sk);
> + void (*sk_write_space)(struct sock *sk);
> + struct proto *prot;
> + } sk_cb;
> + } tcp;
> struct ovpn_crypto_state crypto;
> struct dst_cache dst_cache;
> struct ovpn_bind __rcu *bind;
> diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
> new file mode 100644
> index 000000000000..ba92811e12ff
> --- /dev/null
> +++ b/drivers/net/ovpn/skb.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* OpenVPN data channel offload
> + *
> + * Copyright (C) 2020-2024 OpenVPN, Inc.
> + *
> + * Author: Antonio Quartulli <antonio@...nvpn.net>
> + * James Yonan <james@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_SKB_H_
> +#define _NET_OVPN_SKB_H_
> +
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/ip.h>
> +#include <linux/skbuff.h>
> +#include <linux/socket.h>
> +#include <linux/types.h>
> +
> +#define OVPN_SKB_CB(skb) ((struct ovpn_skb_cb *)&((skb)->cb))
> +
> +struct ovpn_skb_cb {
> + union {
> + struct in_addr ipv4;
> + struct in6_addr ipv6;
> + } local;
> + sa_family_t sa_fam;
> +};
> +
> +/* Return IP protocol version from skb header.
> + * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
> + */
> +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
A dupe of this function exists in drivers/net/ovpn/io.c. I guess you
can just introduce skb.h from the start (with only
ovpn_ip_check_protocol at first).
> +{
> + __be16 proto = 0;
> +
> + /* skb could be non-linear,
> + * make sure IP header is in non-fragmented part
> + */
> + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
> + return 0;
> +
> + if (ip_hdr(skb)->version == 4)
> + proto = htons(ETH_P_IP);
> + else if (ip_hdr(skb)->version == 6)
> + proto = htons(ETH_P_IPV6);
> +
> + return proto;
> +}
> +
> +#endif /* _NET_OVPN_SKB_H_ */
> diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
> index e099a61b03fa..004db5b13663 100644
> --- a/drivers/net/ovpn/socket.c
> +++ b/drivers/net/ovpn/socket.c
> @@ -16,6 +16,7 @@
> #include "packet.h"
> #include "peer.h"
> #include "socket.h"
> +#include "tcp.h"
> #include "udp.h"
>
> /* Finalize release of socket, called after RCU grace period */
> @@ -26,6 +27,8 @@ static void ovpn_socket_detach(struct socket *sock)
>
> if (sock->sk->sk_protocol == IPPROTO_UDP)
> ovpn_udp_socket_detach(sock);
> + else if (sock->sk->sk_protocol == IPPROTO_TCP)
> + ovpn_tcp_socket_detach(sock);
>
> sockfd_put(sock);
> }
> @@ -69,6 +72,8 @@ static int ovpn_socket_attach(struct socket *sock, struct ovpn_peer *peer)
>
> if (sock->sk->sk_protocol == IPPROTO_UDP)
> ret = ovpn_udp_socket_attach(sock, peer->ovpn);
> + else if (sock->sk->sk_protocol == IPPROTO_TCP)
> + ret = ovpn_tcp_socket_attach(sock, peer);
>
> return ret;
> }
> @@ -124,6 +129,21 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
> ovpn_sock->sock = sock;
The line above this is:
ovpn_sock->ovpn = peer->ovpn;
It's technically fine since you then overwrite this with peer in case
we're on TCP, but ovpn_sock->ovpn only exists on UDP since you moved
it into a union in this patch.
> kref_init(&ovpn_sock->refcount);
>
> + /* TCP sockets are per-peer, therefore they are linked to their unique
> + * peer
> + */
> + if (sock->sk->sk_protocol == IPPROTO_TCP) {
> + ovpn_sock->peer = peer;
> + ret = ptr_ring_init(&ovpn_sock->recv_ring, OVPN_QUEUE_LEN,
> + GFP_KERNEL);
> + if (ret < 0) {
> + netdev_err(peer->ovpn->dev, "%s: cannot allocate TCP recv ring\n",
> + __func__);
Should you also call ovpn_socket_detach here? (as well when the
kzalloc for ovpn_sock fails a bit earlier)
> + kfree(ovpn_sock);
> + return ERR_PTR(ret);
> + }
> + }
> +
> rcu_assign_sk_user_data(sock->sk, ovpn_sock);
>
> return ovpn_sock;
> diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
> index 0d23de5a9344..88c6271ba5c7 100644
> --- a/drivers/net/ovpn/socket.h
> +++ b/drivers/net/ovpn/socket.h
> @@ -21,12 +21,25 @@ struct ovpn_peer;
> /**
> * struct ovpn_socket - a kernel socket referenced in the ovpn code
> * @ovpn: ovpn instance owning this socket (UDP only)
> + * @peer: unique peer transmitting over this socket (TCP only)
> + * @recv_ring: queue where non-data packets directed to userspace are stored
> * @sock: the low level sock object
> * @refcount: amount of contexts currently referencing this object
> * @rcu: member used to schedule RCU destructor callback
> */
> struct ovpn_socket {
> - struct ovpn_struct *ovpn;
> + union {
> + /* the VPN session object owning this socket (UDP only) */
nit: Probably not needed
> + struct ovpn_struct *ovpn;
> +
> + /* TCP only */
> + struct {
> + /** @peer: unique peer transmitting over this socket */
Is kdoc upset about peer but not recv_ring?
> + struct ovpn_peer *peer;
> + struct ptr_ring recv_ring;
> + };
> + };
> +
> struct socket *sock;
> struct kref refcount;
> struct rcu_head rcu;
> diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
> new file mode 100644
> index 000000000000..84ad7cd4fc4f
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.c
> @@ -0,0 +1,511 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* OpenVPN data channel offload
> + *
> + * Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + * Author: Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#include <linux/ptr_ring.h>
> +#include <linux/skbuff.h>
> +#include <net/tcp.h>
> +#include <net/route.h>
> +
> +#include "ovpnstruct.h"
> +#include "main.h"
> +#include "io.h"
> +#include "packet.h"
> +#include "peer.h"
> +#include "proto.h"
> +#include "skb.h"
> +#include "socket.h"
> +#include "tcp.h"
> +
> +static struct proto ovpn_tcp_prot;
> +
> +static int ovpn_tcp_read_sock(read_descriptor_t *desc, struct sk_buff *in_skb,
> + unsigned int in_offset, size_t in_len)
> +{
> + struct sock *sk = desc->arg.data;
> + struct ovpn_socket *sock;
> + struct ovpn_skb_cb *cb;
> + struct ovpn_peer *peer;
> + size_t chunk, copied = 0;
> + void *data;
> + u16 len;
> + int st;
> +
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + rcu_read_unlock();
You can't just release rcu_read_lock and keep using sock (here and in
the rest of this file). Either you keep rcu_read_lock, or you can take
a reference on the ovpn_socket.
Anyway, this looks like you're reinventing strparser. Overall this is
very similar to net/xfrm/espintcp.c, but the receive side of espintcp
uses strp and is much shorter (recv_ring looks equivalent to
ike_queue, both sending a few messages to userspace -- look for
strp_init, espintcp_rcv, espintcp_parse in that file).
> +/* Set TCP encapsulation callbacks */
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer)
> +{
> + void *old_data;
> + int ret;
> +
> + INIT_WORK(&peer->tcp.tx_work, ovpn_tcp_tx_work);
> +
> + ret = ptr_ring_init(&peer->tcp.tx_ring, OVPN_QUEUE_LEN, GFP_KERNEL);
> + if (ret < 0) {
> + netdev_err(peer->ovpn->dev, "cannot allocate TCP TX ring\n");
> + return ret;
> + }
> +
> + peer->tcp.skb = NULL;
> + peer->tcp.offset = 0;
> + peer->tcp.data_len = 0;
> +
> + write_lock_bh(&sock->sk->sk_callback_lock);
> +
> + /* make sure no pre-existing encapsulation handler exists */
> + rcu_read_lock();
> + old_data = rcu_dereference_sk_user_data(sock->sk);
> + rcu_read_unlock();
> + if (old_data) {
> + netdev_err(peer->ovpn->dev,
> + "provided socket already taken by other user\n");
> + ret = -EBUSY;
> + goto err;
The UDP code differentiates "socket already owned by this interface"
from "already taken by other user". That doesn't apply to TCP?
> +int __init ovpn_tcp_init(void)
> +{
> + /* We need to substitute the recvmsg and the sock_is_readable
> + * callbacks in the sk_prot member of the sock object for TCP
> + * sockets.
> + *
> + * However sock->sk_prot is a pointer to a static variable and
> + * therefore we can't directly modify it, otherwise every socket
> + * pointing to it will be affected.
> + *
> + * For this reason we create our own static copy and modify what
> + * we need. Then we make sk_prot point to this copy
> + * (in ovpn_tcp_socket_attach())
> + */
> + ovpn_tcp_prot = tcp_prot;
Don't you need a separate variant for IPv6, like TLS does?
> + ovpn_tcp_prot.recvmsg = ovpn_tcp_recvmsg;
You don't need to replace ->sendmsg as well? The userspace client is
not expected to send messages?
--
Sabrina
Powered by blists - more mailing lists