lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Date:   Thu, 20 Dec 2018 11:35:35 -0800
From:   John Fastabend <john.fastabend@...il.com>
To:     ast@...nel.org, daniel@...earbox.net
Cc:     netdev@...r.kernel.org, john.fastabend@...il.com
Subject: [PATCH bpf-next 7/8] bpf: sk_msg, sock{map|hash} redirect through ULP

A sockmap program that redirects through a kTLS ULP enabled socket
will not work correctly because the ULP layer is skipped. This
fixes the behavior to call through the ULP layer on redirect to
ensure any operations required on the data stream at the ULP layer
continue to be applied.

To do this we add an internal flag MSG_SENDPAGE_NOPOLICY to avoid
calling the BPF layer on a redirected message. This is
required to avoid calling the BPF layer multiple times (possibly
recursively) which is not the current/expected behavior without
ULPs. In the future we may add a redirect flag if users _do_
want the policy applied again but this would need to work for both
ULP and non-ULP sockets and be opt-in to avoid breaking existing
programs.

Also to avoid polluting the flag space with an internal flag we
reuse the flag space overlapping MSG_SENDPAGE_NOPOLICY with
MSG_WAITFORONE. Here WAITFORONE is specific to recv path and
SENDPAGE_NOPOLICY is only used for sendpage hooks. The last thing
to verify is user space API is masked correctly to ensure the flag
can not be set by user. (Note this needs to be true regardless
because we have internal flags already in-use that user space
should not be able to set). But for completeness we have two UAPI
paths into sendpage, sendfile and splice.

In the sendfile case the function do_sendfile() zero's flags,

./fs/read_write.c:
 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
		   	    size_t count, loff_t max)
 {
   ...
   fl = 0;
#if 0
   /*
    * We need to debate whether we can enable this or not. The
    * man page documents EAGAIN return for the output at least,
    * and the application is arguably buggy if it doesn't expect
    * EAGAIN on a non-blocking file descriptor.
    */
    if (in.file->f_flags & O_NONBLOCK)
	fl = SPLICE_F_NONBLOCK;
#endif
    file_start_write(out.file);
    retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
 }

In the splice case the pipe_to_sendpage "actor" is used which
masks flags with SPLICE_F_MORE.

./fs/splice.c:
 static int pipe_to_sendpage(struct pipe_inode_info *pipe,
			    struct pipe_buffer *buf, struct splice_desc *sd)
 {
   ...
   more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0;
   ...
 }

Confirming what we expect that internal flags  are in fact internal
to socket side.

Fixes: d3b18ad31f93 ("tls: add bpf support to sk_msg handling")
Signed-off-by: John Fastabend <john.fastabend@...il.com>
---
 include/linux/socket.h |  1 +
 include/net/tls.h      |  9 +++++++++
 net/ipv4/tcp_bpf.c     | 13 ++++++++++++-
 net/tls/tls_sw.c       | 43 ++++++++++++++++++++++++++++++-------------
 4 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 8b571e9..84c48a3 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -286,6 +286,7 @@ struct ucred {
 #define MSG_NOSIGNAL	0x4000	/* Do not generate SIGPIPE */
 #define MSG_MORE	0x8000	/* Sender will send more */
 #define MSG_WAITFORONE	0x10000	/* recvmmsg(): block until 1+ packets avail */
+#define MSG_SENDPAGE_NOPOLICY 0x10000 /* sendpage() internal : do no apply policy */
 #define MSG_SENDPAGE_NOTLAST 0x20000 /* sendpage() internal : not the last page */
 #define MSG_BATCH	0x40000 /* sendmmsg(): more messages coming */
 #define MSG_EOF         MSG_FIN
diff --git a/include/net/tls.h b/include/net/tls.h
index bab5627..23601f3 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -454,6 +454,15 @@ tls_offload_ctx_tx(const struct tls_context *tls_ctx)
 	return (struct tls_offload_context_tx *)tls_ctx->priv_ctx_tx;
 }
 
+static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (!ctx)
+		return false;
+	return !!tls_sw_ctx_tx(ctx);
+}
+
 static inline struct tls_offload_context_rx *
 tls_offload_ctx_rx(const struct tls_context *tls_ctx)
 {
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 8750334..1bb7321 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -8,6 +8,7 @@
 #include <linux/wait.h>
 
 #include <net/inet_common.h>
+#include <net/tls.h>
 
 static bool tcp_bpf_stream_read(const struct sock *sk)
 {
@@ -218,6 +219,8 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
 	u32 off;
 
 	while (1) {
+		bool has_tx_ulp;
+
 		sge = sk_msg_elem(msg, msg->sg.start);
 		size = (apply && apply_bytes < sge->length) ?
 			apply_bytes : sge->length;
@@ -226,7 +229,15 @@ static int tcp_bpf_push(struct sock *sk, struct sk_msg *msg, u32 apply_bytes,
 
 		tcp_rate_check_app_limited(sk);
 retry:
-		ret = do_tcp_sendpages(sk, page, off, size, flags);
+		has_tx_ulp = tls_sw_has_ctx_tx(sk);
+		if (has_tx_ulp) {
+			flags |= MSG_SENDPAGE_NOPOLICY;
+			ret = kernel_sendpage_locked(sk,
+						     page, off, size, flags);
+		} else {
+			ret = do_tcp_sendpages(sk, page, off, size, flags);
+		}
+
 		if (ret <= 0)
 			return ret;
 		if (apply)
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index d4ecc66..5aee9ae 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -686,12 +686,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
 	struct sk_psock *psock;
 	struct sock *sk_redir;
 	struct tls_rec *rec;
+	bool enospc, policy;
 	int err = 0, send;
 	u32 delta = 0;
-	bool enospc;
 
+	policy = !(flags & MSG_SENDPAGE_NOPOLICY);
 	psock = sk_psock_get(sk);
-	if (!psock)
+	if (!psock || !policy)
 		return tls_push_record(sk, flags, record_type);
 more_data:
 	enospc = sk_msg_full(msg);
@@ -1017,8 +1018,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 	return copied ? copied : ret;
 }
 
-int tls_sw_sendpage(struct sock *sk, struct page *page,
-		    int offset, size_t size, int flags)
+int tls_sw_do_sendpage(struct sock *sk, struct page *page,
+		       int offset, size_t size, int flags)
 {
 	long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
@@ -1033,15 +1034,7 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 	int ret = 0;
 	bool eor;
 
-	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
-		      MSG_SENDPAGE_NOTLAST))
-		return -ENOTSUPP;
-
-	/* No MSG_EOR from splice, only look at MSG_MORE */
 	eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
-
-	lock_sock(sk);
-
 	sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
 
 	/* Wait till there is any pending write on socket */
@@ -1145,10 +1138,34 @@ int tls_sw_sendpage(struct sock *sk, struct page *page,
 	}
 sendpage_end:
 	ret = sk_stream_error(sk, flags, ret);
-	release_sock(sk);
 	return copied ? copied : ret;
 }
 
+int tls_sw_sendpage_locked(struct sock *sk, struct page *page,
+			   int offset, size_t size, int flags)
+{
+	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+		return -ENOTSUPP;
+
+	return tls_sw_do_sendpage(sk, page, offset, size, flags);
+}
+
+int tls_sw_sendpage(struct sock *sk, struct page *page,
+		    int offset, size_t size, int flags)
+{
+	int ret;
+
+	if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
+		      MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
+		return -ENOTSUPP;
+
+	lock_sock(sk);
+	ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
+	release_sock(sk);
+	return ret;
+}
+
 static struct sk_buff *tls_wait_data(struct sock *sk, struct sk_psock *psock,
 				     int flags, long timeo, int *err)
 {
-- 
2.7.4

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ