lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 27 Jun 2019 10:36:42 -0700
From:   John Fastabend <john.fastabend@...il.com>
To:     daniel@...earbox.io, jakub.kicinski@...ronome.com, ast@...nel.org
Cc:     netdev@...r.kernel.org, edumazet@...gle.com,
        john.fastabend@...il.com, bpf@...r.kernel.org
Subject: [PATCH 1/2] tls: remove close callback sock unlock/lock and
 flush_sync

The tls close() callback currently drops the sock lock, makes a
cancel_delayed_work_sync() call, and then relocks the sock. This
seems suspect at best. The lock_sock() is applied to stop concurrent
operations on the socket while tearing the sock down. Further we
will need to add support for unhash() shortly and this complicates
matters because the lock may or may not be held then.

So to fix the above situation and simplify the next patch to add
unhash this patch creates a function tls_sk_proto_cleanup() that
tears down the socket without calling lock_sock/release_sock. In
order to flush the workqueue then we do the following,

  - Add a new bit to ctx, BIT_TX_CLOSING that is set when the
    tls resources are being removed.
  - Check this bit before scheduling any new work. This way we
    avoid queueing new work after tear down has started.
  - With the BIT_TX_CLOSING ensuring no new work is being added
    convert the cancel_delayed_work_sync to flush_delayed_work()
  - Finally call tlx_tx_records() to complete any available records
    before,
  - releasing and removing tls ctx.

The above is implemented for the software case namely any of
the following configurations from build_protos,

   prot[TLS_SW][TLS_BASE]
   prot[TLS_BASE][TLS_SW]
   prot[TLS_SW][TLS_SW]

The implication is a follow up patch is needed to resolve the
hardware offload case.

Tested with net selftests and bpf selftests.

Signed-off-by: John Fastabend <john.fastabend@...il.com>
---
 include/net/tls.h  |    4 ++--
 net/tls/tls_main.c |   54 ++++++++++++++++++++++++++--------------------------
 net/tls/tls_sw.c   |   50 ++++++++++++++++++++++++++++++++----------------
 3 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index 4a55ce6a303f..6fe1f5c96f4a 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -105,9 +105,7 @@ struct tls_device {
 enum {
 	TLS_BASE,
 	TLS_SW,
-#ifdef CONFIG_TLS_DEVICE
 	TLS_HW,
-#endif
 	TLS_HW_RECORD,
 	TLS_NUM_CONFIG,
 };
@@ -160,6 +158,7 @@ struct tls_sw_context_tx {
 	int async_capable;
 
 #define BIT_TX_SCHEDULED	0
+#define BIT_TX_CLOSING		1
 	unsigned long tx_bitmask;
 };
 
@@ -327,6 +326,7 @@ void tls_sw_close(struct sock *sk, long timeout);
 void tls_sw_free_resources_tx(struct sock *sk);
 void tls_sw_free_resources_rx(struct sock *sk);
 void tls_sw_release_resources_rx(struct sock *sk);
+void tls_sw_release_strp_rx(struct tls_context *tls_ctx);
 int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 		   int nonblock, int flags, int *addr_len);
 bool tls_sw_stream_read(const struct sock *sk);
diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c
index fc81ae18cc44..51cb19e24dd9 100644
--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -261,24 +261,9 @@ static void tls_ctx_free(struct tls_context *ctx)
 	kfree(ctx);
 }
 
-static void tls_sk_proto_close(struct sock *sk, long timeout)
+static void tls_sk_proto_cleanup(struct sock *sk,
+				 struct tls_context *ctx, long timeo)
 {
-	struct tls_context *ctx = tls_get_ctx(sk);
-	long timeo = sock_sndtimeo(sk, 0);
-	void (*sk_proto_close)(struct sock *sk, long timeout);
-	bool free_ctx = false;
-
-	lock_sock(sk);
-	sk_proto_close = ctx->sk_proto_close;
-
-	if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)
-		goto skip_tx_cleanup;
-
-	if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
-		free_ctx = true;
-		goto skip_tx_cleanup;
-	}
-
 	if (!tls_complete_pending_work(sk, ctx, 0, &timeo))
 		tls_handle_open_record(sk, 0);
 
@@ -299,22 +284,37 @@ static void tls_sk_proto_close(struct sock *sk, long timeout)
 #ifdef CONFIG_TLS_DEVICE
 	if (ctx->rx_conf == TLS_HW)
 		tls_device_offload_cleanup_rx(sk);
-
-	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW) {
-#else
-	{
 #endif
-		tls_ctx_free(ctx);
-		ctx = NULL;
+}
+
+static void tls_sk_proto_close(struct sock *sk, long timeout)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+	long timeo = sock_sndtimeo(sk, 0);
+	void (*sk_proto_close)(struct sock *sk, long timeout);
+	bool free_ctx = false;
+
+	lock_sock(sk);
+	sk_proto_close = ctx->sk_proto_close;
+
+	if (ctx->tx_conf == TLS_HW_RECORD && ctx->rx_conf == TLS_HW_RECORD)
+		goto skip_tx_cleanup;
+
+	if (ctx->tx_conf == TLS_BASE && ctx->rx_conf == TLS_BASE) {
+		free_ctx = true;
+		goto skip_tx_cleanup;
 	}
 
+	tls_sk_proto_cleanup(sk, ctx, timeo);
+
 skip_tx_cleanup:
 	release_sock(sk);
+	if (ctx->rx_conf == TLS_SW)
+		tls_sw_release_strp_rx(ctx);
 	sk_proto_close(sk, timeout);
-	/* free ctx for TLS_HW_RECORD, used by tcp_set_state
-	 * for sk->sk_prot->unhash [tls_hw_unhash]
-	 */
-	if (free_ctx)
+
+	if (ctx->tx_conf != TLS_HW && ctx->rx_conf != TLS_HW &&
+	    ctx->tx_conf != TLS_HW_RECORD && ctx->rx_conf != TLS_HW_RECORD)
 		tls_ctx_free(ctx);
 }
 
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 455a782c7658..d234a6b818e6 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -473,7 +473,8 @@ static void tls_encrypt_done(struct crypto_async_request *req, int err)
 		return;
 
 	/* Schedule the transmission */
-	if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
+	if (!test_and_set_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask) &&
+	    !test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask))
 		schedule_delayed_work(&ctx->tx_work.work, 1);
 }
 
@@ -2058,16 +2059,26 @@ void tls_sw_free_resources_tx(struct sock *sk)
 	struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
 	struct tls_rec *rec, *tmp;
 
+	/* Set TX CLOSING bit to stop tx_work from being scheduled
+	 * while tearing down TX context. We will flush any pending
+	 * work before free'ing ctx anyways. If already set then
+	 * another call is already free'ing resources.
+	 */
+	if (test_and_set_bit(BIT_TX_CLOSING, &ctx->tx_bitmask))
+		return;
+
 	/* Wait for any pending async encryptions to complete */
 	smp_store_mb(ctx->async_notify, true);
 	if (atomic_read(&ctx->encrypt_pending))
 		crypto_wait_req(-EINPROGRESS, &ctx->async_wait);
 
-	release_sock(sk);
-	cancel_delayed_work_sync(&ctx->tx_work.work);
-	lock_sock(sk);
-
-	/* Tx whatever records we can transmit and abandon the rest */
+	/* Flush work queue and then Tx whatever records we can
+	 * transmit and abandon the rest, lock_sock(sk) must be
+	 * held here. We ensure no further work is enqueue by
+	 * checking CLOSING bit before queueing new work and
+	 * setting it above.
+	 */
+	flush_delayed_work(&ctx->tx_work.work);
 	tls_tx_records(sk, -1);
 
 	/* Free up un-sent records in tx_list. First, free
@@ -2111,22 +2122,22 @@ void tls_sw_release_resources_rx(struct sock *sk)
 		write_lock_bh(&sk->sk_callback_lock);
 		sk->sk_data_ready = ctx->saved_data_ready;
 		write_unlock_bh(&sk->sk_callback_lock);
-		release_sock(sk);
-		strp_done(&ctx->strp);
-		lock_sock(sk);
 	}
 }
 
-void tls_sw_free_resources_rx(struct sock *sk)
+void tls_sw_release_strp_rx(struct tls_context *tls_ctx)
 {
-	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
 
-	tls_sw_release_resources_rx(sk);
-
+	strp_done(&ctx->strp);
 	kfree(ctx);
 }
 
+void tls_sw_free_resources_rx(struct sock *sk)
+{
+	tls_sw_release_resources_rx(sk);
+}
+
 /* The work handler to transmitt the encrypted records in tx_list */
 static void tx_work_handler(struct work_struct *work)
 {
@@ -2140,9 +2151,14 @@ static void tx_work_handler(struct work_struct *work)
 	if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
 		return;
 
-	lock_sock(sk);
+	/* If we are running from a socket close operation then the
+	 * lock is already held so we do not need to hold it.
+	 */
+	if (likely(!test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)))
+		lock_sock(sk);
 	tls_tx_records(sk, -1);
-	release_sock(sk);
+	if (likely(!test_bit(BIT_TX_CLOSING, &ctx->tx_bitmask)))
+		release_sock(sk);
 }
 
 void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
@@ -2152,8 +2168,8 @@ void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)
 	/* Schedule the transmission if tx list is ready */
 	if (is_tx_ready(tx_ctx) && !sk->sk_write_pending) {
 		/* Schedule the transmission */
-		if (!test_and_set_bit(BIT_TX_SCHEDULED,
-				      &tx_ctx->tx_bitmask))
+		if (!test_and_set_bit(BIT_TX_SCHEDULED, &tx_ctx->tx_bitmask) &&
+		    !test_bit(BIT_TX_CLOSING, &tx_ctx->tx_bitmask))
 			schedule_delayed_work(&tx_ctx->tx_work.work, 0);
 	}
 }

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ