lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date: Mon, 13 May 2024 16:50:25 +0200
From: Sabrina Dubroca <sd@...asysnail.net>
To: Antonio Quartulli <antonio@...nvpn.net>
Cc: netdev@...r.kernel.org, Jakub Kicinski <kuba@...nel.org>,
	Sergey Ryazanov <ryazanov.s.a@...il.com>,
	Paolo Abeni <pabeni@...hat.com>, Eric Dumazet <edumazet@...gle.com>,
	Andrew Lunn <andrew@...n.ch>, Esben Haabendal <esben@...nix.com>
Subject: Re: [PATCH net-next v3 13/24] ovpn: implement TCP transport

2024-05-06, 03:16:26 +0200, Antonio Quartulli wrote:
> @@ -307,6 +308,7 @@ static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb)
>  /* Process packets in TX queue in a transport-specific way.
>   *
>   * UDP transport - encrypt and send across the tunnel.
> + * TCP transport - encrypt and put into TCP TX queue.
>   */
>  void ovpn_encrypt_work(struct work_struct *work)
>  {
> @@ -340,6 +342,9 @@ void ovpn_encrypt_work(struct work_struct *work)
>  					ovpn_udp_send_skb(peer->ovpn, peer,
>  							  curr);
>  					break;
> +				case IPPROTO_TCP:
> +					ovpn_tcp_send_skb(peer, curr);
> +					break;
>  				default:
>  					/* no transport configured yet */
>  					consume_skb(skb);
> diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
> index 9ae9844dd281..a04d6e55a473 100644
> --- a/drivers/net/ovpn/main.c
> +++ b/drivers/net/ovpn/main.c
> @@ -23,6 +23,7 @@
>  #include "io.h"
>  #include "packet.h"
>  #include "peer.h"
> +#include "tcp.h"
>  
>  /* Driver info */
>  #define DRV_DESCRIPTION	"OpenVPN data channel offload (ovpn)"
> @@ -247,8 +248,14 @@ static struct pernet_operations ovpn_pernet_ops = {
>  
>  static int __init ovpn_init(void)
>  {
> -	int err = register_netdevice_notifier(&ovpn_netdev_notifier);
> +	int err = ovpn_tcp_init();
>  
> +	if (err) {

ovpn_tcp_init cannot fail (and if it could, you'd need to clean up
when register_netdevice_notifier fails). I'd make ovpn_tcp_init void
and kill this check.

> +		pr_err("ovpn: cannot initialize TCP component: %d\n", err);
> +		return err;
> +	}
> +
> +	err = register_netdevice_notifier(&ovpn_netdev_notifier);
>  	if (err) {
>  		pr_err("ovpn: can't register netdevice notifier: %d\n", err);
>  		return err;
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index b5ff59a4b40f..ac4907705d98 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -33,6 +33,16 @@
>   * @netif_rx_ring: queue of packets to be sent to the netdevice via NAPI
>   * @napi: NAPI object
>   * @sock: the socket being used to talk to this peer
> + * @tcp.tx_ring: queue for packets to be forwarded to userspace (TCP only)
> + * @tcp.tx_work: work for processing outgoing socket data (TCP only)
> + * @tcp.rx_work: wok for processing incoming socket data (TCP only)

Never actually used.
If you keep it: s/wok/work/

> + * @tcp.raw_len: next packet length as read from the stream (TCP only)
> + * @tcp.skb: next packet being filled with data from the stream (TCP only)
> + * @tcp.offset: position of the next byte to write in the skb (TCP only)
> + * @tcp.data_len: next packet length converted to host order (TCP only)

It would be nice to add information about whether they're used for TX or RX.

> + * @tcp.sk_cb.sk_data_ready: pointer to original cb
> + * @tcp.sk_cb.sk_write_space: pointer to original cb
> + * @tcp.sk_cb.prot: pointer to original prot object
>   * @crypto: the crypto configuration (ciphers, keys, etc..)
>   * @dst_cache: cache for dst_entry used to send to peer
>   * @bind: remote peer binding
> @@ -59,6 +69,25 @@ struct ovpn_peer {
>  	struct ptr_ring netif_rx_ring;
>  	struct napi_struct napi;
>  	struct ovpn_socket *sock;
> +	/* state of the TCP reading. Needed to keep track of how much of a
> +	 * single packet has already been read from the stream and how much is
> +	 * missing
> +	 */
> +	struct {
> +		struct ptr_ring tx_ring;
> +		struct work_struct tx_work;
> +		struct work_struct rx_work;
> +
> +		u8 raw_len[sizeof(u16)];

Why not u16 or __be16 for this one?

> +		struct sk_buff *skb;
> +		u16 offset;
> +		u16 data_len;
> +		struct {
> +			void (*sk_data_ready)(struct sock *sk);
> +			void (*sk_write_space)(struct sock *sk);
> +			struct proto *prot;
> +		} sk_cb;
> +	} tcp;
>  	struct ovpn_crypto_state crypto;
>  	struct dst_cache dst_cache;
>  	struct ovpn_bind __rcu *bind;
> diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
> new file mode 100644
> index 000000000000..ba92811e12ff
> --- /dev/null
> +++ b/drivers/net/ovpn/skb.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2020-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@...nvpn.net>
> + *		James Yonan <james@...nvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_SKB_H_
> +#define _NET_OVPN_SKB_H_
> +
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/ip.h>
> +#include <linux/skbuff.h>
> +#include <linux/socket.h>
> +#include <linux/types.h>
> +
> +#define OVPN_SKB_CB(skb) ((struct ovpn_skb_cb *)&((skb)->cb))
> +
> +struct ovpn_skb_cb {
> +	union {
> +		struct in_addr ipv4;
> +		struct in6_addr ipv6;
> +	} local;
> +	sa_family_t sa_fam;
> +};
> +
> +/* Return IP protocol version from skb header.
> + * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
> + */
> +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)

A dupe of this function exists in drivers/net/ovpn/io.c. I guess you
can just introduce skb.h from the start (with only
ovpn_ip_check_protocol at first).

> +{
> +	__be16 proto = 0;
> +
> +	/* skb could be non-linear,
> +	 * make sure IP header is in non-fragmented part
> +	 */
> +	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
> +		return 0;
> +
> +	if (ip_hdr(skb)->version == 4)
> +		proto = htons(ETH_P_IP);
> +	else if (ip_hdr(skb)->version == 6)
> +		proto = htons(ETH_P_IPV6);
> +
> +	return proto;
> +}
> +
> +#endif /* _NET_OVPN_SKB_H_ */
> diff --git a/drivers/net/ovpn/socket.c b/drivers/net/ovpn/socket.c
> index e099a61b03fa..004db5b13663 100644
> --- a/drivers/net/ovpn/socket.c
> +++ b/drivers/net/ovpn/socket.c
> @@ -16,6 +16,7 @@
>  #include "packet.h"
>  #include "peer.h"
>  #include "socket.h"
> +#include "tcp.h"
>  #include "udp.h"
>  
>  /* Finalize release of socket, called after RCU grace period */
> @@ -26,6 +27,8 @@ static void ovpn_socket_detach(struct socket *sock)
>  
>  	if (sock->sk->sk_protocol == IPPROTO_UDP)
>  		ovpn_udp_socket_detach(sock);
> +	else if (sock->sk->sk_protocol == IPPROTO_TCP)
> +		ovpn_tcp_socket_detach(sock);
>  
>  	sockfd_put(sock);
>  }
> @@ -69,6 +72,8 @@ static int ovpn_socket_attach(struct socket *sock, struct ovpn_peer *peer)
>  
>  	if (sock->sk->sk_protocol == IPPROTO_UDP)
>  		ret = ovpn_udp_socket_attach(sock, peer->ovpn);
> +	else if (sock->sk->sk_protocol == IPPROTO_TCP)
> +		ret = ovpn_tcp_socket_attach(sock, peer);
>  
>  	return ret;
>  }
> @@ -124,6 +129,21 @@ struct ovpn_socket *ovpn_socket_new(struct socket *sock, struct ovpn_peer *peer)
>  	ovpn_sock->sock = sock;

The line above this is:

    ovpn_sock->ovpn = peer->ovpn;

It's technically fine since you then overwrite this with peer in case
we're on TCP, but ovpn_sock->ovpn only exists on UDP since you moved
it into a union in this patch.

>  	kref_init(&ovpn_sock->refcount);
>  
> +	/* TCP sockets are per-peer, therefore they are linked to their unique
> +	 * peer
> +	 */
> +	if (sock->sk->sk_protocol == IPPROTO_TCP) {
> +		ovpn_sock->peer = peer;
> +		ret = ptr_ring_init(&ovpn_sock->recv_ring, OVPN_QUEUE_LEN,
> +				    GFP_KERNEL);
> +		if (ret < 0) {
> +			netdev_err(peer->ovpn->dev, "%s: cannot allocate TCP recv ring\n",
> +				   __func__);

Should you also call ovpn_socket_detach here? (as well when the
kzalloc for ovpn_sock fails a bit earlier)

> +			kfree(ovpn_sock);
> +			return ERR_PTR(ret);
> +		}
> +	}
> +
>  	rcu_assign_sk_user_data(sock->sk, ovpn_sock);
>  
>  	return ovpn_sock;
> diff --git a/drivers/net/ovpn/socket.h b/drivers/net/ovpn/socket.h
> index 0d23de5a9344..88c6271ba5c7 100644
> --- a/drivers/net/ovpn/socket.h
> +++ b/drivers/net/ovpn/socket.h
> @@ -21,12 +21,25 @@ struct ovpn_peer;
>  /**
>   * struct ovpn_socket - a kernel socket referenced in the ovpn code
>   * @ovpn: ovpn instance owning this socket (UDP only)
> + * @peer: unique peer transmitting over this socket (TCP only)
> + * @recv_ring: queue where non-data packets directed to userspace are stored
>   * @sock: the low level sock object
>   * @refcount: amount of contexts currently referencing this object
>   * @rcu: member used to schedule RCU destructor callback
>   */
>  struct ovpn_socket {
> -	struct ovpn_struct *ovpn;
> +	union {
> +		/* the VPN session object owning this socket (UDP only) */

nit: Probably not needed

> +		struct ovpn_struct *ovpn;
> +
> +		/* TCP only */
> +		struct {
> +			/** @peer: unique peer transmitting over this socket */

Is kdoc upset about peer but not recv_ring?

> +			struct ovpn_peer *peer;
> +			struct ptr_ring recv_ring;
> +		};
> +	};
> +
>  	struct socket *sock;
>  	struct kref refcount;
>  	struct rcu_head rcu;
> diff --git a/drivers/net/ovpn/tcp.c b/drivers/net/ovpn/tcp.c
> new file mode 100644
> index 000000000000..84ad7cd4fc4f
> --- /dev/null
> +++ b/drivers/net/ovpn/tcp.c
> @@ -0,0 +1,511 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2019-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@...nvpn.net>
> + */
> +
> +#include <linux/ptr_ring.h>
> +#include <linux/skbuff.h>
> +#include <net/tcp.h>
> +#include <net/route.h>
> +
> +#include "ovpnstruct.h"
> +#include "main.h"
> +#include "io.h"
> +#include "packet.h"
> +#include "peer.h"
> +#include "proto.h"
> +#include "skb.h"
> +#include "socket.h"
> +#include "tcp.h"
> +
> +static struct proto ovpn_tcp_prot;
> +
> +static int ovpn_tcp_read_sock(read_descriptor_t *desc, struct sk_buff *in_skb,
> +			      unsigned int in_offset, size_t in_len)
> +{
> +	struct sock *sk = desc->arg.data;
> +	struct ovpn_socket *sock;
> +	struct ovpn_skb_cb *cb;
> +	struct ovpn_peer *peer;
> +	size_t chunk, copied = 0;
> +	void *data;
> +	u16 len;
> +	int st;
> +
> +	rcu_read_lock();
> +	sock = rcu_dereference_sk_user_data(sk);
> +	rcu_read_unlock();

You can't just release rcu_read_lock and keep using sock (here and in
the rest of this file). Either you keep rcu_read_lock, or you can take
a reference on the ovpn_socket.


Anyway, this looks like you're reinventing strparser. Overall this is
very similar to net/xfrm/espintcp.c, but the receive side of espintcp
uses strp and is much shorter (recv_ring looks equivalent to
ike_queue, both sending a few messages to userspace -- look for
strp_init, espintcp_rcv, espintcp_parse in that file).

> +/* Set TCP encapsulation callbacks */
> +int ovpn_tcp_socket_attach(struct socket *sock, struct ovpn_peer *peer)
> +{
> +	void *old_data;
> +	int ret;
> +
> +	INIT_WORK(&peer->tcp.tx_work, ovpn_tcp_tx_work);
> +
> +	ret = ptr_ring_init(&peer->tcp.tx_ring, OVPN_QUEUE_LEN, GFP_KERNEL);
> +	if (ret < 0) {
> +		netdev_err(peer->ovpn->dev, "cannot allocate TCP TX ring\n");
> +		return ret;
> +	}
> +
> +	peer->tcp.skb = NULL;
> +	peer->tcp.offset = 0;
> +	peer->tcp.data_len = 0;
> +
> +	write_lock_bh(&sock->sk->sk_callback_lock);
> +
> +	/* make sure no pre-existing encapsulation handler exists */
> +	rcu_read_lock();
> +	old_data = rcu_dereference_sk_user_data(sock->sk);
> +	rcu_read_unlock();
> +	if (old_data) {
> +		netdev_err(peer->ovpn->dev,
> +			   "provided socket already taken by other user\n");
> +		ret = -EBUSY;
> +		goto err;

The UDP code differentiates "socket already owned by this interface"
from "already taken by other user". That doesn't apply to TCP?



> +int __init ovpn_tcp_init(void)
> +{
> +	/* We need to substitute the recvmsg and the sock_is_readable
> +	 * callbacks in the sk_prot member of the sock object for TCP
> +	 * sockets.
> +	 *
> +	 * However sock->sk_prot is a pointer to a static variable and
> +	 * therefore we can't directly modify it, otherwise every socket
> +	 * pointing to it will be affected.
> +	 *
> +	 * For this reason we create our own static copy and modify what
> +	 * we need. Then we make sk_prot point to this copy
> +	 * (in ovpn_tcp_socket_attach())
> +	 */
> +	ovpn_tcp_prot = tcp_prot;

Don't you need a separate variant for IPv6, like TLS does?

> +	ovpn_tcp_prot.recvmsg = ovpn_tcp_recvmsg;

You don't need to replace ->sendmsg as well? The userspace client is
not expected to send messages?

-- 
Sabrina


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ