[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <4b42fa39-f204-481d-a097-7d41da53f7d6@openvpn.net>
Date: Mon, 13 May 2024 15:37:54 +0200
From: Antonio Quartulli <antonio@...nvpn.net>
To: Simon Horman <horms@...nel.org>
Cc: Jakub Kicinski <kuba@...nel.org>, Sergey Ryazanov
<ryazanov.s.a@...il.com>, Paolo Abeni <pabeni@...hat.com>,
Eric Dumazet <edumazet@...gle.com>, Andrew Lunn <andrew@...n.ch>,
Esben Haabendal <esben@...nix.com>, netdev@...r.kernel.org
Subject: Re: [PATCH net-next v3 13/24] ovpn: implement TCP transport
Hi Simon,
On 06/05/2024 03:16, Antonio Quartulli wrote:
[...]
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index b5ff59a4b40f..ac4907705d98 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -33,6 +33,16 @@
> * @netif_rx_ring: queue of packets to be sent to the netdevice via NAPI
> * @napi: NAPI object
> * @sock: the socket being used to talk to this peer
> + * @tcp.tx_ring: queue for packets to be forwarded to userspace (TCP only)
> + * @tcp.tx_work: work for processing outgoing socket data (TCP only)
> + * @tcp.rx_work: wok for processing incoming socket data (TCP only)
> + * @tcp.raw_len: next packet length as read from the stream (TCP only)
can you please help me with the following warning from kerneldoc?
As you can see below, raw_len is an array.
May that be the reason why the script isn't picking it up correctly?
drivers/net/ovpn/peer.h:101: warning: Function parameter or struct
member 'raw_len' not described in 'ovpn_peer'
drivers/net/ovpn/peer.h:101: warning: Excess struct member 'tcp.raw_len'
description in 'ovpn_peer'
(line number may differ as I am in the middle of a rebase)
Regards,
> + * @tcp.skb: next packet being filled with data from the stream (TCP only)
> + * @tcp.offset: position of the next byte to write in the skb (TCP only)
> + * @tcp.data_len: next packet length converted to host order (TCP only)
> + * @tcp.sk_cb.sk_data_ready: pointer to original cb
> + * @tcp.sk_cb.sk_write_space: pointer to original cb
> + * @tcp.sk_cb.prot: pointer to original prot object
> * @crypto: the crypto configuration (ciphers, keys, etc..)
> * @dst_cache: cache for dst_entry used to send to peer
> * @bind: remote peer binding
> @@ -59,6 +69,25 @@ struct ovpn_peer {
> struct ptr_ring netif_rx_ring;
> struct napi_struct napi;
> struct ovpn_socket *sock;
> + /* state of the TCP reading. Needed to keep track of how much of a
> + * single packet has already been read from the stream and how much is
> + * missing
> + */
> + struct {
> + struct ptr_ring tx_ring;
> + struct work_struct tx_work;
> + struct work_struct rx_work;
> +
> + u8 raw_len[sizeof(u16)];
> + struct sk_buff *skb;
> + u16 offset;
> + u16 data_len;
> + struct {
> + void (*sk_data_ready)(struct sock *sk);
> + void (*sk_write_space)(struct sock *sk);
> + struct proto *prot;
> + } sk_cb;
> + } tcp;
> struct ovpn_crypto_state crypto;
> struct dst_cache dst_cache;
> struct ovpn_bind __rcu *bind;
> diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
> new file mode 100644
> index 000000000000..ba92811e12ff
> --- /dev/null
> +++ b/drivers/net/ovpn/skb.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* OpenVPN data channel offload
> + *
> + * Copyright (C) 2020-2024 OpenVPN, Inc.
> + *
> + * Author: Antonio Quartulli <antonio@...nvpn.net>
> + * James Yonan <james@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_SKB_H_
> +#define _NET_OVPN_SKB_H_
> +
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/ip.h>
> +#include <linux/skbuff.h>
> +#include <linux/socket.h>
> +#include <linux/types.h>
> +
> +#define OVPN_SKB_CB(skb) ((struct ovpn_skb_cb *)&((skb)->cb))
> +
> +struct ovpn_skb_cb {
> + union {
> + struct in_addr ipv4;
> + struct in6_addr ipv6;
> + } local;
> + sa_family_t sa_fam;
> +};
> +
> +/* Return IP protocol version from skb header.
> + * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
> + */
> +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
> +{
> + __be16 proto = 0;
> +
> + /* skb could be non-linear,
> + * make sure IP header is in non-fragmented part
> + */
> + if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
> + return 0;
> +
> + if (ip_hdr(skb)->version == 4)
> + proto = htons(ETH_P_IP);
> + else if (ip_hdr(skb)->version == 6)
> + proto = htons(ETH_P_IPV6);
> +
> + return proto;
> +}
> +
> +#endif /* _NET_OVPN_SKB_H_ */
> diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
> index e099a61b03fa..004db5b13663 100644
> --- a/drivers/net/ovpn/socket.c
> +++ b/drivers/net/ovpn/socket.c
> @@ -16,6 +16,7 @@
> #include "packet.h"
> #include "peer.h"
> #include "socket.h"
> +#include "tcp.h"
> #include "udp.h"
>
> /* Finalize release of socket, called after RCU grace period */
> @@ -26,6 +27,8 @@ static void ovpn_socket_detach(struct socket *sock)
>
> if (sock->sk->sk_protocol == IPPROTO_UDP)
> ovpn_udp_socket_detach(sock);
> + else if (sock->sk->sk_protocol == IPPROTO_TCP)
> + ovpn_tcp_socket_detach(sock);
>
> sockfd_put(sock);
> }
> @@ -69,6 +72,8 @@ static int ovpn_socket_attach(struct socket *sock, struct ovpn_peer *peer)
>
> if (sock->sk->sk_protocol == IPPROTO_UDP)
> ret = ovpn_udp_socket_attach(sock, peer->ovpn);
> + else if (sock->sk->sk_protocol == IPPROTO_TCP)
> + ret = ovpn_tcp_socket_attach(sock, peer);
>
> return ret;
> }
> @@ -124,6 +129,21 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
> ovpn_sock->sock = sock;
> kref_init(&ovpn_sock->refcount);
>
> + /* TCP sockets are per-peer, therefore they are linked to their unique
> + * peer
> + */
> + if (sock->sk->sk_protocol == IPPROTO_TCP) {
> + ovpn_sock->peer = peer;
> + ret = ptr_ring_init(&ovpn_sock->recv_ring, OVPN_QUEUE_LEN,
> + GFP_KERNEL);
> + if (ret < 0) {
> + netdev_err(peer->ovpn->dev, "%s: cannot allocate TCP recv ring\n",
> + __func__);
> + kfree(ovpn_sock);
> + return ERR_PTR(ret);
> + }
> + }
> +
> rcu_assign_sk_user_data(sock->sk, ovpn_sock);
>
> return ovpn_sock;
> diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
> index 0d23de5a9344..88c6271ba5c7 100644
> --- a/drivers/net/ovpn/socket.h
> +++ b/drivers/net/ovpn/socket.h
> @@ -21,12 +21,25 @@ struct ovpn_peer;
> /**
> * struct ovpn_socket - a kernel socket referenced in the ovpn code
> * @ovpn: ovpn instance owning this socket (UDP only)
> + * @peer: unique peer transmitting over this socket (TCP only)
> + * @recv_ring: queue where non-data packets directed to userspace are stored
> * @sock: the low level sock object
> * @refcount: amount of contexts currently referencing this object
> * @rcu: member used to schedule RCU destructor callback
> */
> struct ovpn_socket {
> - struct ovpn_struct *ovpn;
> + union {
> + /* the VPN session object owning this socket (UDP only) */
> + struct ovpn_struct *ovpn;
> +
> + /* TCP only */
> + struct {
> + /** @peer: unique peer transmitting over this socket */
> + struct ovpn_peer *peer;
> + struct ptr_ring recv_ring;
> + };
> + };
> +
> struct socket *sock;
> struct kref refcount;
> struct rcu_head rcu;
> diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
> new file mode 100644
> index 000000000000..84ad7cd4fc4f
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.c
> @@ -0,0 +1,511 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* OpenVPN data channel offload
> + *
> + * Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + * Author: Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#include <linux/ptr_ring.h>
> +#include <linux/skbuff.h>
> +#include <net/tcp.h>
> +#include <net/route.h>
> +
> +#include "ovpnstruct.h"
> +#include "main.h"
> +#include "io.h"
> +#include "packet.h"
> +#include "peer.h"
> +#include "proto.h"
> +#include "skb.h"
> +#include "socket.h"
> +#include "tcp.h"
> +
> +static struct proto ovpn_tcp_prot;
> +
> +static int ovpn_tcp_read_sock(read_descriptor_t *desc, struct sk_buff *in_skb,
> + unsigned int in_offset, size_t in_len)
> +{
> + struct sock *sk = desc->arg.data;
> + struct ovpn_socket *sock;
> + struct ovpn_skb_cb *cb;
> + struct ovpn_peer *peer;
> + size_t chunk, copied = 0;
> + void *data;
> + u16 len;
> + int st;
> +
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + rcu_read_unlock();
> +
> + if (unlikely(!sock || !sock->peer)) {
> + pr_err("ovpn: read_sock triggered for socket with no metadata\n");
> + desc->error = -EINVAL;
> + return 0;
> + }
> +
> + peer = sock->peer;
> +
> + while (in_len > 0) {
> + /* no skb allocated means that we have to read (or finish
> + * reading) the 2 bytes prefix containing the actual packet
> + * size.
> + */
> + if (!peer->tcp.skb) {
> + chunk = min_t(size_t, in_len,
> + sizeof(u16) - peer->tcp.offset);
> + WARN_ON(skb_copy_bits(in_skb, in_offset,
> + peer->tcp.raw_len +
> + peer->tcp.offset, chunk) < 0);
> + peer->tcp.offset += chunk;
> +
> + /* keep on reading until we got the whole packet size */
> + if (peer->tcp.offset != sizeof(u16))
> + goto next_read;
> +
> + len = ntohs(*(__be16 *)peer->tcp.raw_len);
> + /* invalid packet length: this is a fatal TCP error */
> + if (!len) {
> + netdev_err(peer->ovpn->dev,
> + "%s: received invalid packet length: %d\n",
> + __func__, len);
> + desc->error = -EINVAL;
> + goto err;
> + }
> +
> + /* add 2 bytes to allocated space (and immediately
> + * reserve them) for packet length prepending, in case
> + * the skb has to be forwarded to userspace
> + */
> + peer->tcp.skb =
> + netdev_alloc_skb_ip_align(peer->ovpn->dev,
> + len + sizeof(u16));
> + if (!peer->tcp.skb) {
> + desc->error = -ENOMEM;
> + goto err;
> + }
> + skb_reserve(peer->tcp.skb, sizeof(u16));
> +
> + peer->tcp.offset = 0;
> + peer->tcp.data_len = len;
> + } else {
> + chunk = min_t(size_t, in_len,
> + peer->tcp.data_len - peer->tcp.offset);
> +
> + /* extend skb to accommodate the new chunk and copy it
> + * from the input skb
> + */
> + data = skb_put(peer->tcp.skb, chunk);
> + WARN_ON(skb_copy_bits(in_skb, in_offset, data,
> + chunk) < 0);
> + peer->tcp.offset += chunk;
> +
> + /* keep on reading until we get the full packet */
> + if (peer->tcp.offset != peer->tcp.data_len)
> + goto next_read;
> +
> + /* do not perform IP caching for TCP connections */
> + cb = OVPN_SKB_CB(peer->tcp.skb);
> + cb->sa_fam = AF_UNSPEC;
> +
> + /* At this point we know the packet is from a configured
> + * peer.
> + * DATA_V2 packets are handled in kernel space, the rest
> + * goes to user space.
> + *
> + * Queue skb for sending to userspace via recvmsg on the
> + * socket
> + */
> + if (likely(ovpn_opcode_from_skb(peer->tcp.skb, 0) ==
> + OVPN_DATA_V2)) {
> + /* hold reference to peer as required by
> + * ovpn_recv().
> + *
> + * NOTE: in this context we should already be
> + * holding a reference to this peer, therefore
> + * ovpn_peer_hold() is not expected to fail
> + */
> + WARN_ON(!ovpn_peer_hold(peer));
> + st = ovpn_recv(peer->ovpn, peer, peer->tcp.skb);
> + if (unlikely(st < 0))
> + ovpn_peer_put(peer);
> +
> + } else {
> + /* prepend skb with packet len. this way
> + * userspace can parse the packet as if it just
> + * arrived from the remote endpoint
> + */
> + void *raw_len = __skb_push(peer->tcp.skb,
> + sizeof(u16));
> +
> + memcpy(raw_len, peer->tcp.raw_len, sizeof(u16));
> +
> + st = ptr_ring_produce_bh(&peer->sock->recv_ring,
> + peer->tcp.skb);
> + if (likely(!st))
> + peer->tcp.sk_cb.sk_data_ready(sk);
> + }
> +
> + /* skb not consumed - free it now */
> + if (unlikely(st < 0))
> + kfree_skb(peer->tcp.skb);
> +
> + peer->tcp.skb = NULL;
> + peer->tcp.offset = 0;
> + peer->tcp.data_len = 0;
> + }
> +next_read:
> + in_len -= chunk;
> + in_offset += chunk;
> + copied += chunk;
> + }
> +
> + return copied;
> +err:
> + netdev_err(peer->ovpn->dev, "cannot process incoming TCP data: %d\n",
> + desc->error);
> + ovpn_peer_del(peer, OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
> + return 0;
> +}
> +
> +static void ovpn_tcp_data_ready(struct sock *sk)
> +{
> + struct socket *sock = sk->sk_socket;
> + read_descriptor_t desc;
> +
> + if (unlikely(!sock || !sock->ops || !sock->ops->read_sock))
> + return;
> +
> + desc.arg.data = sk;
> + desc.error = 0;
> + desc.count = 1;
> +
> + sock->ops->read_sock(sk, &desc, ovpn_tcp_read_sock);
> +}
> +
> +static void ovpn_tcp_write_space(struct sock *sk)
> +{
> + struct ovpn_socket *sock;
> +
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + rcu_read_unlock();
> +
> + if (!sock || !sock->peer)
> + return;
> +
> + queue_work(sock->peer->ovpn->events_wq, &sock->peer->tcp.tx_work);
> +}
> +
> +static bool ovpn_tcp_sock_is_readable(struct sock *sk)
> +
> +{
> + struct ovpn_socket *sock;
> +
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + rcu_read_unlock();
> +
> + if (!sock || !sock->peer)
> + return false;
> +
> + return !ptr_ring_empty_bh(&sock->recv_ring);
> +}
> +
> +static int ovpn_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
> + int flags, int *addr_len)
> +{
> + bool tmp = flags & MSG_DONTWAIT;
> + DEFINE_WAIT_FUNC(wait, woken_wake_function);
> + int ret, chunk, copied = 0;
> + struct ovpn_socket *sock;
> + struct sk_buff *skb;
> + long timeo;
> +
> + if (unlikely(flags & MSG_ERRQUEUE))
> + return sock_recv_errqueue(sk, msg, len, SOL_IP, IP_RECVERR);
> +
> + timeo = sock_rcvtimeo(sk, tmp);
> +
> + rcu_read_lock();
> + sock = rcu_dereference_sk_user_data(sk);
> + rcu_read_unlock();
> +
> + if (!sock || !sock->peer) {
> + ret = -EBADF;
> + goto unlock;
> + }
> +
> + while (ptr_ring_empty_bh(&sock->recv_ring)) {
> + if (sk->sk_shutdown & RCV_SHUTDOWN)
> + return 0;
> +
> + if (sock_flag(sk, SOCK_DONE))
> + return 0;
> +
> + if (!timeo) {
> + ret = -EAGAIN;
> + goto unlock;
> + }
> +
> + add_wait_queue(sk_sleep(sk), &wait);
> + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> + sk_wait_event(sk, &timeo, !ptr_ring_empty_bh(&sock->recv_ring),
> + &wait);
> + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
> + remove_wait_queue(sk_sleep(sk), &wait);
> +
> + /* take care of signals */
> + if (signal_pending(current)) {
> + ret = sock_intr_errno(timeo);
> + goto unlock;
> + }
> + }
> +
> + while (len && (skb = __ptr_ring_peek(&sock->recv_ring))) {
> + chunk = min_t(size_t, len, skb->len);
> + ret = skb_copy_datagram_msg(skb, 0, msg, chunk);
> + if (ret < 0) {
> + pr_err("ovpn: cannot copy TCP data to userspace: %d\n",
> + ret);
> + kfree_skb(skb);
> + goto unlock;
> + }
> +
> + __skb_pull(skb, chunk);
> +
> + if (!skb->len) {
> + /* skb was entirely consumed and can now be removed from
> + * the ring
> + */
> + __ptr_ring_discard_one(&sock->recv_ring);
> + consume_skb(skb);
> + }
> +
> + len -= chunk;
> + copied += chunk;
> + }
> + ret = copied;
> +
> +unlock:
> + return ret ? : -EAGAIN;
> +}
> +
> +static void ovpn_destroy_skb(void *skb)
> +{
> + consume_skb(skb);
> +}
> +
> +void ovpn_tcp_socket_detach(struct socket *sock)
> +{
> + struct ovpn_socket *ovpn_sock;
> + struct ovpn_peer *peer;
> +
> + if (!sock)
> + return;
> +
> + rcu_read_lock();
> + ovpn_sock = rcu_dereference_sk_user_data(sock->sk);
> + rcu_read_unlock();
> +
> + if (!ovpn_sock->peer)
> + return;
> +
> + peer = ovpn_sock->peer;
> +
> + /* restore CBs that were saved in ovpn_sock_set_tcp_cb() */
> + write_lock_bh(&sock->sk->sk_callback_lock);
> + sock->sk->sk_data_ready = peer->tcp.sk_cb.sk_data_ready;
> + sock->sk->sk_write_space = peer->tcp.sk_cb.sk_write_space;
> + sock->sk->sk_prot = peer->tcp.sk_cb.prot;
> + rcu_assign_sk_user_data(sock->sk, NULL);
> + write_unlock_bh(&sock->sk->sk_callback_lock);
> +
> + /* cancel any ongoing work. Done after removing the CBs so that these
> + * workers cannot be re-armed
> + */
> + cancel_work_sync(&peer->tcp.tx_work);
> +
> + ptr_ring_cleanup(&ovpn_sock->recv_ring, ovpn_destroy_skb);
> + ptr_ring_cleanup(&peer->tcp.tx_ring, ovpn_destroy_skb);
> +}
> +
> +/* Try to send one skb (or part of it) over the TCP stream.
> + *
> + * Return 0 on success or a negative error code otherwise.
> + *
> + * Note that the skb is modified by putting away the data being sent, therefore
> + * the caller should check if skb->len is zero to understand if the full skb was
> + * sent or not.
> + */
> +static int ovpn_tcp_send_one(struct ovpn_peer *peer, struct sk_buff *skb)
> +{
> + struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
> + struct kvec iv = { 0 };
> + int ret;
> +
> + if (skb_linearize(skb) < 0) {
> + net_err_ratelimited("%s: can't linearize packet\n", __func__);
> + return -ENOMEM;
> + }
> +
> + /* initialize iv structure now as skb_linearize() may have changed
> + * skb->data
> + */
> + iv.iov_base = skb->data;
> + iv.iov_len = skb->len;
> +
> + ret = kernel_sendmsg(peer->sock->sock, &msg, &iv, 1, iv.iov_len);
> + if (ret > 0) {
> + __skb_pull(skb, ret);
> +
> + /* since we update per-cpu stats in process context,
> + * we need to disable softirqs
> + */
> + local_bh_disable();
> + dev_sw_netstats_tx_add(peer->ovpn->dev, 1, ret);
> + local_bh_enable();
> +
> + return 0;
> + }
> +
> + return ret;
> +}
> +
> +/* Process packets in TCP TX queue */
> +static void ovpn_tcp_tx_work(struct work_struct *work)
> +{
> + struct ovpn_peer *peer;
> + struct sk_buff *skb;
> + int ret;
> +
> + peer = container_of(work, struct ovpn_peer, tcp.tx_work);
> + while ((skb = __ptr_ring_peek(&peer->tcp.tx_ring))) {
> + ret = ovpn_tcp_send_one(peer, skb);
> + if (ret < 0 && ret != -EAGAIN) {
> + net_warn_ratelimited("%s: cannot send TCP packet to peer %u: %d\n",
> + __func__, peer->id, ret);
> + /* in case of TCP error stop sending loop and delete
> + * peer
> + */
> + ovpn_peer_del(peer,
> + OVPN_DEL_PEER_REASON_TRANSPORT_ERROR);
> + break;
> + } else if (!skb->len) {
> + /* skb was entirely consumed and can now be removed from
> + * the ring
> + */
> + __ptr_ring_discard_one(&peer->tcp.tx_ring);
> + consume_skb(skb);
> + }
> +
> + /* give a chance to be rescheduled if needed */
> + cond_resched();
> + }
> +}
> +
> +/* Put packet into TCP TX queue and schedule a consumer */
> +void ovpn_queue_tcp_skb(struct ovpn_peer *peer, struct sk_buff *skb)
> +{
> + int ret;
> +
> + ret = ptr_ring_produce_bh(&peer->tcp.tx_ring, skb);
> + if (ret < 0) {
> + kfree_skb_list(skb);
> + return;
> + }
> +
> + queue_work(peer->ovpn->events_wq, &peer->tcp.tx_work);
> +}
> +
> +/* Set TCP encapsulation callbacks */
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer)
> +{
> + void *old_data;
> + int ret;
> +
> + INIT_WORK(&peer->tcp.tx_work, ovpn_tcp_tx_work);
> +
> + ret = ptr_ring_init(&peer->tcp.tx_ring, OVPN_QUEUE_LEN, GFP_KERNEL);
> + if (ret < 0) {
> + netdev_err(peer->ovpn->dev, "cannot allocate TCP TX ring\n");
> + return ret;
> + }
> +
> + peer->tcp.skb = NULL;
> + peer->tcp.offset = 0;
> + peer->tcp.data_len = 0;
> +
> + write_lock_bh(&sock->sk->sk_callback_lock);
> +
> + /* make sure no pre-existing encapsulation handler exists */
> + rcu_read_lock();
> + old_data = rcu_dereference_sk_user_data(sock->sk);
> + rcu_read_unlock();
> + if (old_data) {
> + netdev_err(peer->ovpn->dev,
> + "provided socket already taken by other user\n");
> + ret = -EBUSY;
> + goto err;
> + }
> +
> + /* sanity check */
> + if (sock->sk->sk_protocol != IPPROTO_TCP) {
> + netdev_err(peer->ovpn->dev,
> + "provided socket is UDP but expected TCP\n");
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + /* only a fully connected socket are expected. Connection should be
> + * handled in userspace
> + */
> + if (sock->sk->sk_state != TCP_ESTABLISHED) {
> + netdev_err(peer->ovpn->dev,
> + "provided TCP socket is not in ESTABLISHED state: %d\n",
> + sock->sk->sk_state);
> + ret = -EINVAL;
> + goto err;
> + }
> +
> + /* save current CBs so that they can be restored upon socket release */
> + peer->tcp.sk_cb.sk_data_ready = sock->sk->sk_data_ready;
> + peer->tcp.sk_cb.sk_write_space = sock->sk->sk_write_space;
> + peer->tcp.sk_cb.prot = sock->sk->sk_prot;
> +
> + /* assign our static CBs */
> + sock->sk->sk_data_ready = ovpn_tcp_data_ready;
> + sock->sk->sk_write_space = ovpn_tcp_write_space;
> + sock->sk->sk_prot = &ovpn_tcp_prot;
> +
> + write_unlock_bh(&sock->sk->sk_callback_lock);
> +
> + return 0;
> +err:
> + write_unlock_bh(&sock->sk->sk_callback_lock);
> + ptr_ring_cleanup(&peer->tcp.tx_ring, NULL);
> +
> + return ret;
> +}
> +
> +int __init ovpn_tcp_init(void)
> +{
> + /* We need to substitute the recvmsg and the sock_is_readable
> + * callbacks in the sk_prot member of the sock object for TCP
> + * sockets.
> + *
> + * However sock->sk_prot is a pointer to a static variable and
> + * therefore we can't directly modify it, otherwise every socket
> + * pointing to it will be affected.
> + *
> + * For this reason we create our own static copy and modify what
> + * we need. Then we make sk_prot point to this copy
> + * (in ovpn_tcp_socket_attach())
> + */
> + ovpn_tcp_prot = tcp_prot;
> + ovpn_tcp_prot.recvmsg = ovpn_tcp_recvmsg;
> + ovpn_tcp_prot.sock_is_readable = ovpn_tcp_sock_is_readable;
> +
> + return 0;
> +}
> diff --git a/drivers/net/ovpn/tcp.h b/drivers/net/ovpn/tcp.h
> new file mode 100644
> index 000000000000..7e73f6e76e6c
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.h
> @@ -0,0 +1,42 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/* OpenVPN data channel offload
> + *
> + * Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + * Author: Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_TCP_H_
> +#define _NET_OVPN_TCP_H_
> +
> +#include <linux/net.h>
> +#include <linux/skbuff.h>
> +#include <linux/types.h>
> +#include <linux/workqueue.h>
> +
> +#include "peer.h"
> +
> +/* Initialize TCP static objects */
> +int __init ovpn_tcp_init(void);
> +
> +void ovpn_queue_tcp_skb(struct ovpn_peer *peer, struct sk_buff *skb);
> +
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer);
> +void ovpn_tcp_socket_detach(struct socket *sock);
> +
> +/* Prepare skb and enqueue it for sending to peer.
> + *
> + * Preparation consist in prepending the skb payload with its size.
> + * Required by the OpenVPN protocol in order to extract packets from
> + * the TCP stream on the receiver side.
> + */
> +static inline void ovpn_tcp_send_skb(struct ovpn_peer *peer,
> + struct sk_buff *skb)
> +{
> + u16 len = skb->len;
> +
> + *(__be16 *)__skb_push(skb, sizeof(u16)) = htons(len);
> + ovpn_queue_tcp_skb(peer, skb);
> +}
> +
> +#endif /* _NET_OVPN_TCP_H_ */
--
Antonio Quartulli
OpenVPN Inc.
Powered by blists - more mailing lists