lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4b42fa39-f204-481d-a097-7d41da53f7d6@openvpn.net>
Date: Mon, 13 May 2024 15:37:54 +0200
From: Antonio Quartulli <antonio@...nvpn.net>
To: Simon Horman <horms@...nel.org>
Cc: Jakub Kicinski <kuba@...nel.org>, Sergey Ryazanov
 <ryazanov.s.a@...il.com>, Paolo Abeni <pabeni@...hat.com>,
 Eric Dumazet <edumazet@...gle.com>, Andrew Lunn <andrew@...n.ch>,
 Esben Haabendal <esben@...nix.com>, netdev@...r.kernel.org
Subject: Re: [PATCH net-next v3 13/24] ovpn: implement TCP transport

Hi Simon,

On 06/05/2024 03:16, Antonio Quartulli wrote:
[...]
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index b5ff59a4b40f..ac4907705d98 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -33,6 +33,16 @@
>    * @netif_rx_ring: queue of packets to be sent to the netdevice via NAPI
>    * @napi: NAPI object
>    * @sock: the socket being used to talk to this peer
> + * @tcp.tx_ring: queue for packets to be forwarded to userspace (TCP only)
> + * @tcp.tx_work: work for processing outgoing socket data (TCP only)
> + * @tcp.rx_work: wok for processing incoming socket data (TCP only)
> + * @tcp.raw_len: next packet length as read from the stream (TCP only)

can you please help me with the following warning from kerneldoc?
As you can see below, raw_len is an array.

May that be the reason why the script isn't picking it up correctly?

drivers/net/ovpn/peer.h:101: warning: Function parameter or struct 
member 'raw_len' not described in 'ovpn_peer'
drivers/net/ovpn/peer.h:101: warning: Excess struct member 'tcp.raw_len' 
description in 'ovpn_peer'

(line number may differ as I am in the middle of a rebase)

Regards,


> + * @tcp.skb: next packet being filled with data from the stream (TCP only)
> + * @tcp.offset: position of the next byte to write in the skb (TCP only)
> + * @tcp.data_len: next packet length converted to host order (TCP only)
> + * @tcp.sk_cb.sk_data_ready: pointer to original cb
> + * @tcp.sk_cb.sk_write_space: pointer to original cb
> + * @tcp.sk_cb.prot: pointer to original prot object
>    * @crypto: the crypto configuration (ciphers, keys, etc..)
>    * @dst_cache: cache for dst_entry used to send to peer
>    * @bind: remote peer binding
> @@ -59,6 +69,25 @@ struct ovpn_peer {
>   	struct ptr_ring netif_rx_ring;
>   	struct napi_struct napi;
>   	struct ovpn_socket *sock;
> +	/* state of the TCP reading. Needed to keep track of how much of a
> +	 * single packet has already been read from the stream and how much is
> +	 * missing
> +	 */
> +	struct {
> +		struct ptr_ring tx_ring;
> +		struct work_struct tx_work;
> +		struct work_struct rx_work;
> +
> +		u8 raw_len[sizeof(u16)];
> +		struct sk_buff *skb;
> +		u16 offset;
> +		u16 data_len;
> +		struct {
> +			void (*sk_data_ready)(struct sock *sk);
> +			void (*sk_write_space)(struct sock *sk);
> +			struct proto *prot;
> +		} sk_cb;
> +	} tcp;
>   	struct ovpn_crypto_state crypto;
>   	struct dst_cache dst_cache;
>   	struct ovpn_bind __rcu *bind;
> diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
> new file mode 100644
> index 000000000000..ba92811e12ff
> --- /dev/null
> +++ b/drivers/net/ovpn/skb.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2020-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@...nvpn.net>
> + *		James Yonan <james@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_SKB_H_
> +#define _NET_OVPN_SKB_H_
> +
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/ip.h>
> +#include <linux/skbuff.h>
> +#include <linux/socket.h>
> +#include <linux/types.h>
> +
> +#define OVPN_SKB_CB(skb) ((struct ovpn_skb_cb *)&((skb)->cb))
> +
> +struct ovpn_skb_cb {
> +	union {
> +		struct in_addr ipv4;
> +		struct in6_addr ipv6;
> +	} local;
> +	sa_family_t sa_fam;
> +};
> +
> +/* Return IP protocol version from skb header.
> + * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
> + */
> +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
> +{
> +	__be16 proto = 0;
> +
> +	/* skb could be non-linear,
> +	 * make sure IP header is in non-fragmented part
> +	 */
> +	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
> +		return 0;
> +
> +	if (ip_hdr(skb)->version == 4)
> +		proto = htons(ETH_P_IP);
> +	else if (ip_hdr(skb)->version == 6)
> +		proto = htons(ETH_P_IPV6);
> +
> +	return proto;
> +}
> +
> +#endif /* _NET_OVPN_SKB_H_ */
> diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
> index e099a61b03fa..004db5b13663 100644
> --- a/drivers/net/ovpn/socket.c
> +++ b/drivers/net/ovpn/socket.c
> @@ -16,6 +16,7 @@
>   #include "packet.h"
>   #include "peer.h"
>   #include "socket.h"
> +#include "tcp.h"
>   #include "udp.h"
>   
>   /* Finalize release of socket, called after RCU grace period */
> @@ -26,6 +27,8 @@ static void ovpn_socket_detach(struct socket *sock)
>   
>   	if (sock->sk->sk_protocol == IPPROTO_UDP)
>   		ovpn_udp_socket_detach(sock);
> +	else if (sock->sk->sk_protocol == IPPROTO_TCP)
> +		ovpn_tcp_socket_detach(sock);
>   
>   	sockfd_put(sock);
>   }
> @@ -69,6 +72,8 @@ static int ovpn_socket_attach(struct socket *sock, struct ovpn_peer *peer)
>   
>   	if (sock->sk->sk_protocol == IPPROTO_UDP)
>   		ret = ovpn_udp_socket_attach(sock, peer->ovpn);
> +	else if (sock->sk->sk_protocol == IPPROTO_TCP)
> +		ret = ovpn_tcp_socket_attach(sock, peer);
>   
>   	return ret;
>   }
> @@ -124,6 +129,21 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
>   	ovpn_sock->sock = sock;
>   	kref_init(&ovpn_sock->refcount);
>   
> +	/* TCP sockets are per-peer, therefore they are linked to their unique
> +	 * peer
> +	 */
> +	if (sock->sk->sk_protocol == IPPROTO_TCP) {
> +		ovpn_sock->peer = peer;
> +		ret = ptr_ring_init(&ovpn_sock->recv_ring, OVPN_QUEUE_LEN,
> +				    GFP_KERNEL);
> +		if (ret < 0) {
> +			netdev_err(peer->ovpn->dev, "%s: cannot allocate TCP recv ring\n",
> +				   __func__);
> +			kfree(ovpn_sock);
> +			return ERR_PTR(ret);
> +		}
> +	}
> +
>   	rcu_assign_sk_user_data(sock->sk, ovpn_sock);
>   
>   	return ovpn_sock;
> diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
> index 0d23de5a9344..88c6271ba5c7 100644
> --- a/drivers/net/ovpn/socket.h
> +++ b/drivers/net/ovpn/socket.h
> @@ -21,12 +21,25 @@ struct ovpn_peer;
>   /**
>    * struct ovpn_socket - a kernel socket referenced in the ovpn code
>    * @ovpn: ovpn instance owning this socket (UDP only)
> + * @peer: unique peer transmitting over this socket (TCP only)
> + * @recv_ring: queue where non-data packets directed to userspace are stored
>    * @sock: the low level sock object
>    * @refcount: amount of contexts currently referencing this object
>    * @rcu: member used to schedule RCU destructor callback
>    */
>   struct ovpn_socket {
> -	struct ovpn_struct *ovpn;
> +	union {
> +		/* the VPN session object owning this socket (UDP only) */
> +		struct ovpn_struct *ovpn;
> +
> +		/* TCP only */
> +		struct {
> +			/** @peer: unique peer transmitting over this socket */
> +			struct ovpn_peer *peer;
> +			struct ptr_ring recv_ring;
> +		};
> +	};
> +
>   	struct socket *sock;
>   	struct kref refcount;
>   	struct rcu_head rcu;
> diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
> new file mode 100644
> index 000000000000..84ad7cd4fc4f
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.c
> @@ -0,0 +1,511 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#include <linux/ptr_ring.h>
> +#include <linux/skbuff.h>
> +#include <net/tcp.h>
> +#include <net/route.h>
> +
> +#include "ovpnstruct.h"
> +#include "main.h"
> +#include "io.h"
> +#include "packet.h"
> +#include "peer.h"
> +#include "proto.h"
> +#include "skb.h"
> +#include "socket.h"
> +#include "tcp.h"
> +
> +static struct proto ovpn_tcp_prot;
> +
> +static int ovpn_tcp_read_sock(read_descriptor_t *desc, struct sk_buff *in_skb,
> +			      unsigned int in_offset, size_t in_len)
> +{
> +	struct sock *sk = desc->arg.data;
> +	struct ovpn_socket *sock;
> +	struct ovpn_skb_cb *cb;
> +	struct ovpn_peer *peer;
> +	size_t chunk, copied = 0;
> +	void *data;
> +	u16 len;
> +	int st;
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference_sk_user_data(sk);
> +	rcu_read_unlock();
> +
> +	if (unlikely(!sock || !sock->peer)) {
> +		pr_err("ovpn: read_sock triggered for socket with no metadata\n");
> +		desc->error = -EINVAL;
> +		return 0;
> +	}
> +
> +	peer = sock->peer;
> +
> +	while (in_len > 0) {
> +		/* no skb allocated means that we have to read (or finish
> +		 * reading) the 2 bytes prefix containing the actual packet
> +		 * size.
> +		 */
> +		if (!peer->tcp.skb) {
> +			chunk = min_t(size_t, in_len,
> +				      sizeof(u16) - peer->tcp.offset);
> +			WARN_ON(skb_copy_bits(in_skb, in_offset,
> +					      peer->tcp.raw_len +
> +					      peer->tcp.offset, chunk) < 0);
> +			peer->tcp.offset += chunk;
> +
> +			/* keep on reading until we got the whole packet size */
> +			if (peer->tcp.offset != sizeof(u16))
> +				goto next_read;
> +
> +			len = ntohs(*(__be16 *)peer->tcp.raw_len);
> +			/* invalid packet length: this is a fatal TCP error */
> +			if (!len) {
> +				netdev_err(peer->ovpn->dev,
> +					   "%s: received invalid packet length: %d\n",
> +					   __func__, len);
> +				desc->error = -EINVAL;
> +				goto err;
> +			}
> +
> +			/* add 2 bytes to allocated space (and immediately
> +			 * reserve them) for packet length prepending, in case
> +			 * the skb has to be forwarded to userspace
> +			 */
> +			peer->tcp.skb =
> +				netdev_alloc_skb_ip_align(peer->ovpn->dev,
> +							  len + sizeof(u16));
> +			if (!peer->tcp.skb) {
> +				desc->error = -ENOMEM;
> +				goto err;
> +			}
> +			skb_reserve(peer->tcp.skb, sizeof(u16));
> +
> +			peer->tcp.offset = 0;
> +			peer->tcp.data_len = len;
> +		} else {
> +			chunk = min_t(size_t, in_len,
> +				      peer->tcp.data_len - peer->tcp.offset);
> +
> +			/* extend skb to accommodate the new chunk and copy it
> +			 * from the input skb
> +			 */
> +			data = skb_put(peer->tcp.skb, chunk);
> +			WARN_ON(skb_copy_bits(in_skb, in_offset, data,
> +					      chunk) < 0);
> +			peer->tcp.offset += chunk;
> +
> +			/* keep on reading until we get the full packet */
> +			if (peer->tcp.offset != peer->tcp.data_len)
> +				goto next_read;
> +
> +			/* do not perform IP caching for TCP connections */
> +			cb = OVPN_SKB_CB(peer->tcp.skb);
> +			cb->sa_fam = AF_UNSPEC;
> +
> +			/* At this point we know the packet is from a configured
> +			 * peer.
> +			 * DATA_V2 packets are handled in kernel space, the rest
> +			 * goes to user space.
> +			 *
> +			 * Queue skb for sending to userspace via recvmsg on the
> +			 * socket
> +			 */
> +			if (likely(ovpn_opcode_from_skb(peer->tcp.skb, 0) ==
> +				   OVPN_DATA_V2)) {
> +				/* hold reference to peer as required by
> +				 * ovpn_recv().
> +				 *
> +				 * NOTE: in this context we should already be
> +				 * holding a reference to this peer, therefore
> +				 * ovpn_peer_hold() is not expected to fail
> +				 */
> +				WARN_ON(!ovpn_peer_hold(peer));
> +				st = ovpn_recv(peer->ovpn, peer, peer->tcp.skb);
> +				if (unlikely(st < 0))
> +					ovpn_peer_put(peer);
> +
> +			} else {
> +				/* prepend skb with packet len. this way
> +				 * userspace can parse the packet as if it just
> +				 * arrived from the remote endpoint
> +				 */
> +				void *raw_len = __skb_push(peer->tcp.skb,
> +							   sizeof(u16));
> +
> +				memcpy(raw_len, peer->tcp.raw_len, sizeof(u16));
> +
> +				st = ptr_ring_produce_bh(&peer->sock->recv_ring,
> +							 peer->tcp.skb);
> +				if (likely(!st))
> +					peer->tcp.sk_cb.sk_data_ready(sk);
> +			}
> +
> +			/* skb not consumed - free it now */
> +			if (unlikely(st < 0))
> +				kfree_skb(peer->tcp.skb);
> +
> +			peer->tcp.skb = NULL;
> +			peer->tcp.offset = 0;
> +			peer->tcp.data_len = 0;
> +		}
> +next_read:
> +		in_len -= chunk;
> +		in_offset += chunk;
> +		copied += chunk;
> +	}
> +
> +	return copied;
> +err:
> +	netdev_err(peer->ovpn->dev, "cannot process incoming TCP data: %d\n",
> +		   desc->error);
> +	ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
> +	return 0;
> +}
> +
> +static void ovpn_tcp_data_ready(struct sock *sk)
> +{
> +	struct socket *sock = sk->sk_socket;
> +	read_descriptor_t desc;
> +
> +	if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
> +		return;
> +
> +	desc.arg.data = sk;
> +	desc.error = 0;
> +	desc.count = 1;
> +
> +	sock->ops->read_sock(sk, &desc, ovpn_tcp_read_sock);
> +}
> +
> +static void ovpn_tcp_write_space(struct sock *sk)
> +{
> +	struct ovpn_socket *sock;
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference_sk_user_data(sk);
> +	rcu_read_unlock();
> +
> +	if (!sock || !sock->peer)
> +		return;
> +
> +	queue_work(sock->peer->ovpn->events_wq, &sock->peer->tcp.tx_work);
> +}
> +
> +static bool ovpn_tcp_sock_is_readable(struct sock *sk)
> +
> +{
> +	struct ovpn_socket *sock;
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference_sk_user_data(sk);
> +	rcu_read_unlock();
> +
> +	if (!sock || !sock->peer)
> +		return false;
> +
> +	return !ptr_ring_empty_bh(&sock->recv_ring);
> +}
> +
> +static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> +			    int flags, int *addr_len)
> +{
> +	bool tmp = flags & MSG_DONTWAIT;
> +	DEFINE_WAIT_FUNC(wait, woken_wake_function);
> +	int ret, chunk, copied = 0;
> +	struct ovpn_socket *sock;
> +	struct sk_buff *skb;
> +	long timeo;
> +
> +	if (unlikely(flags & MSG_ERRQUEUE))
> +		return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
> +
> +	timeo = sock_rcvtimeo(sk, tmp);
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference_sk_user_data(sk);
> +	rcu_read_unlock();
> +
> +	if (!sock || !sock->peer) {
> +		ret = -EBADF;
> +		goto unlock;
> +	}
> +
> +	while (ptr_ring_empty_bh(&sock->recv_ring)) {
> +		if (sk->sk_shutdown & RCV_SHUTDOWN)
> +			return 0;
> +
> +		if (sock_flag(sk, SOCK_DONE))
> +			return 0;
> +
> +		if (!timeo) {
> +			ret = -EAGAIN;
> +			goto unlock;
> +		}
> +
> +		add_wait_queue(sk_sleep(sk), &wait);
> +		sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> +		sk_wait_event(sk, &timeo, !ptr_ring_empty_bh(&sock->recv_ring),
> +			      &wait);
> +		sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> +		remove_wait_queue(sk_sleep(sk), &wait);
> +
> +		/* take care of signals */
> +		if (signal_pending(current)) {
> +			ret = sock_intr_errno(timeo);
> +			goto unlock;
> +		}
> +	}
> +
> +	while (len && (skb = __ptr_ring_peek(&sock->recv_ring))) {
> +		chunk = min_t(size_t, len, skb->len);
> +		ret = skb_copy_datagram_msg(skb, 0, msg, chunk);
> +		if (ret < 0) {
> +			pr_err("ovpn: cannot copy TCP data to userspace: %d\n",
> +			       ret);
> +			kfree_skb(skb);
> +			goto unlock;
> +		}
> +
> +		__skb_pull(skb, chunk);
> +
> +		if (!skb->len) {
> +			/* skb was entirely consumed and can now be removed from
> +			 * the ring
> +			 */
> +			__ptr_ring_discard_one(&sock->recv_ring);
> +			consume_skb(skb);
> +		}
> +
> +		len -= chunk;
> +		copied += chunk;
> +	}
> +	ret = copied;
> +
> +unlock:
> +	return ret ? : -EAGAIN;
> +}
> +
> +static void ovpn_destroy_skb(void *skb)
> +{
> +	consume_skb(skb);
> +}
> +
> +void ovpn_tcp_socket_detach(struct socket *sock)
> +{
> +	struct ovpn_socket *ovpn_sock;
> +	struct ovpn_peer *peer;
> +
> +	if (!sock)
> +		return;
> +
> +	rcu_read_lock();
> +	ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
> +	rcu_read_unlock();
> +
> +	if (!ovpn_sock->peer)
> +		return;
> +
> +	peer = ovpn_sock->peer;
> +
> +	/* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
> +	write_lock_bh(&sock->sk->sk_callback_lock);
> +	sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
> +	sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
> +	sock->sk->sk_prot = peer->tcp.sk_cb.prot;
> +	rcu_assign_sk_user_data(sock->sk, NULL);
> +	write_unlock_bh(&sock->sk->sk_callback_lock);
> +
> +	/* cancel any ongoing work. Done after removing the CBs so that these
> +	 * workers cannot be re-armed
> +	 */
> +	cancel_work_sync(&peer->tcp.tx_work);
> +
> +	ptr_ring_cleanup(&ovpn_sock->recv_ring, ovpn_destroy_skb);
> +	ptr_ring_cleanup(&peer->tcp.tx_ring, ovpn_destroy_skb);
> +}
> +
> +/* Try to send one skb (or part of it) over the TCP stream.
> + *
> + * Return 0 on success or a negative error code otherwise.
> + *
> + * Note that the skb is modified by putting away the data being sent, therefore
> + * the caller should check if skb->len is zero to understand if the full skb was
> + * sent or not.
> + */
> +static int ovpn_tcp_send_one(struct ovpn_peer *peer, struct sk_buff *skb)
> +{
> +	struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
> +	struct kvec iv = { 0 };
> +	int ret;
> +
> +	if (skb_linearize(skb) < 0) {
> +		net_err_ratelimited("%s: can't linearize packet\n", __func__);
> +		return -ENOMEM;
> +	}
> +
> +	/* initialize iv structure now as skb_linearize() may have changed
> +	 * skb->data
> +	 */
> +	iv.iov_base = skb->data;
> +	iv.iov_len = skb->len;
> +
> +	ret = kernel_sendmsg(peer->sock->sock, &msg, &iv, 1, iv.iov_len);
> +	if (ret > 0) {
> +		__skb_pull(skb, ret);
> +
> +		/* since we update per-cpu stats in process context,
> +		 * we need to disable softirqs
> +		 */
> +		local_bh_disable();
> +		dev_sw_netstats_tx_add(peer->ovpn->dev, 1, ret);
> +		local_bh_enable();
> +
> +		return 0;
> +	}
> +
> +	return ret;
> +}
> +
> +/* Process packets in TCP TX queue */
> +static void ovpn_tcp_tx_work(struct work_struct *work)
> +{
> +	struct ovpn_peer *peer;
> +	struct sk_buff *skb;
> +	int ret;
> +
> +	peer = container_of(work, struct ovpn_peer, tcp.tx_work);
> +	while ((skb = __ptr_ring_peek(&peer->tcp.tx_ring))) {
> +		ret = ovpn_tcp_send_one(peer, skb);
> +		if (ret < 0 && ret != -EAGAIN) {
> +			net_warn_ratelimited("%s: cannot send TCP packet to peer %u: %d\n",
> +					     __func__, peer->id, ret);
> +			/* in case of TCP error stop sending loop and delete
> +			 * peer
> +			 */
> +			ovpn_peer_del(peer,
> +				      OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
> +			break;
> +		} else if (!skb->len) {
> +			/* skb was entirely consumed and can now be removed from
> +			 * the ring
> +			 */
> +			__ptr_ring_discard_one(&peer->tcp.tx_ring);
> +			consume_skb(skb);
> +		}
> +
> +		/* give a chance to be rescheduled if needed */
> +		cond_resched();
> +	}
> +}
> +
> +/* Put packet into TCP TX queue and schedule a consumer */
> +void ovpn_queue_tcp_skb(struct ovpn_peer *peer, struct sk_buff *skb)
> +{
> +	int ret;
> +
> +	ret = ptr_ring_produce_bh(&peer->tcp.tx_ring, skb);
> +	if (ret < 0) {
> +		kfree_skb_list(skb);
> +		return;
> +	}
> +
> +	queue_work(peer->ovpn->events_wq, &peer->tcp.tx_work);
> +}
> +
> +/* Set TCP encapsulation callbacks */
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer)
> +{
> +	void *old_data;
> +	int ret;
> +
> +	INIT_WORK(&peer->tcp.tx_work, ovpn_tcp_tx_work);
> +
> +	ret = ptr_ring_init(&peer->tcp.tx_ring, OVPN_QUEUE_LEN, GFP_KERNEL);
> +	if (ret < 0) {
> +		netdev_err(peer->ovpn->dev, "cannot allocate TCP TX ring\n");
> +		return ret;
> +	}
> +
> +	peer->tcp.skb = NULL;
> +	peer->tcp.offset = 0;
> +	peer->tcp.data_len = 0;
> +
> +	write_lock_bh(&sock->sk->sk_callback_lock);
> +
> +	/* make sure no pre-existing encapsulation handler exists */
> +	rcu_read_lock();
> +	old_data = rcu_dereference_sk_user_data(sock->sk);
> +	rcu_read_unlock();
> +	if (old_data) {
> +		netdev_err(peer->ovpn->dev,
> +			   "provided socket already taken by other user\n");
> +		ret = -EBUSY;
> +		goto err;
> +	}
> +
> +	/* sanity check */
> +	if (sock->sk->sk_protocol != IPPROTO_TCP) {
> +		netdev_err(peer->ovpn->dev,
> +			   "provided socket is UDP but expected TCP\n");
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	/* only a fully connected socket are expected. Connection should be
> +	 * handled in userspace
> +	 */
> +	if (sock->sk->sk_state != TCP_ESTABLISHED) {
> +		netdev_err(peer->ovpn->dev,
> +			   "provided TCP socket is not in ESTABLISHED state: %d\n",
> +			   sock->sk->sk_state);
> +		ret = -EINVAL;
> +		goto err;
> +	}
> +
> +	/* save current CBs so that they can be restored upon socket release */
> +	peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
> +	peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
> +	peer->tcp.sk_cb.prot = sock->sk->sk_prot;
> +
> +	/* assign our static CBs */
> +	sock->sk->sk_data_ready = ovpn_tcp_data_ready;
> +	sock->sk->sk_write_space = ovpn_tcp_write_space;
> +	sock->sk->sk_prot = &ovpn_tcp_prot;
> +
> +	write_unlock_bh(&sock->sk->sk_callback_lock);
> +
> +	return 0;
> +err:
> +	write_unlock_bh(&sock->sk->sk_callback_lock);
> +	ptr_ring_cleanup(&peer->tcp.tx_ring, NULL);
> +
> +	return ret;
> +}
> +
> +int __init ovpn_tcp_init(void)
> +{
> +	/* We need to substitute the recvmsg and the sock_is_readable
> +	 * callbacks in the sk_prot member of the sock object for TCP
> +	 * sockets.
> +	 *
> +	 * However sock->sk_prot is a pointer to a static variable and
> +	 * therefore we can't directly modify it, otherwise every socket
> +	 * pointing to it will be affected.
> +	 *
> +	 * For this reason we create our own static copy and modify what
> +	 * we need. Then we make sk_prot point to this copy
> +	 * (in ovpn_tcp_socket_attach())
> +	 */
> +	ovpn_tcp_prot = tcp_prot;
> +	ovpn_tcp_prot.recvmsg = ovpn_tcp_recvmsg;
> +	ovpn_tcp_prot.sock_is_readable = ovpn_tcp_sock_is_readable;
> +
> +	return 0;
> +}
> diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h
> new file mode 100644
> index 000000000000..7e73f6e76e6c
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.h
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_TCP_H_
> +#define _NET_OVPN_TCP_H_
> +
> +#include <linux/net.h>
> +#include <linux/skbuff.h>
> +#include <linux/types.h>
> +#include <linux/workqueue.h>
> +
> +#include "peer.h"
> +
> +/* Initialize TCP static objects */
> +int __init ovpn_tcp_init(void);
> +
> +void ovpn_queue_tcp_skb(struct ovpn_peer *peer, struct sk_buff *skb);
> +
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer);
> +void ovpn_tcp_socket_detach(struct socket *sock);
> +
> +/* Prepare skb and enqueue it for sending to peer.
> + *
> + * Preparation consist in prepending the skb payload with its size.
> + * Required by the OpenVPN protocol in order to extract packets from
> + * the TCP stream on the receiver side.
> + */
> +static inline void ovpn_tcp_send_skb(struct ovpn_peer *peer,
> +				     struct sk_buff *skb)
> +{
> +	u16 len = skb->len;
> +
> +	*(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
> +	ovpn_queue_tcp_skb(peer, skb);
> +}
> +
> +#endif /* _NET_OVPN_TCP_H_ */

-- 
Antonio Quartulli
OpenVPN Inc.

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ