[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20180630131736.17344.77449.stgit@john-Precision-Tower-5810>
Date: Sat, 30 Jun 2018 06:17:36 -0700
From: John Fastabend <john.fastabend@...il.com>
To: ast@...nel.org, daniel@...earbox.net, kafai@...com
Cc: netdev@...r.kernel.org
Subject: [bpf PATCH v5 1/4] bpf: sockmap, fix crash when ipv6 sock is added
This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.
Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used.
Tested with 'netserver -6' and 'netperf -H [IPv6]' as well as
'netperf -H [IPv4]'. The ESTABLISHED check resolves the previously
crashing case here.
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@...kaller.appspotmail.com
Acked-by: Martin KaFai Lau <kafai@...com>
Signed-off-by: John Fastabend <john.fastabend@...il.com>
Signed-off-by: Wei Wang <weiwan@...gle.com>
---
kernel/bpf/sockmap.c | 58 +++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 48 insertions(+), 10 deletions(-)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 52a91d8..d7fd17a 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -140,6 +140,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags);
+static void bpf_tcp_close(struct sock *sk, long timeout);
static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
{
@@ -161,7 +162,42 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
return !empty;
}
-static struct proto tcp_bpf_proto;
+enum {
+ SOCKMAP_IPV4,
+ SOCKMAP_IPV6,
+ SOCKMAP_NUM_PROTS,
+};
+
+enum {
+ SOCKMAP_BASE,
+ SOCKMAP_TX,
+ SOCKMAP_NUM_CONFIGS,
+};
+
+static struct proto *saved_tcpv6_prot __read_mostly;
+static DEFINE_SPINLOCK(tcpv6_prot_lock);
+static struct proto bpf_tcp_prots[SOCKMAP_NUM_PROTS][SOCKMAP_NUM_CONFIGS];
+static void build_protos(struct proto prot[SOCKMAP_NUM_CONFIGS],
+ struct proto *base)
+{
+ prot[SOCKMAP_BASE] = *base;
+ prot[SOCKMAP_BASE].close = bpf_tcp_close;
+ prot[SOCKMAP_BASE].recvmsg = bpf_tcp_recvmsg;
+ prot[SOCKMAP_BASE].stream_memory_read = bpf_tcp_stream_read;
+
+ prot[SOCKMAP_TX] = prot[SOCKMAP_BASE];
+ prot[SOCKMAP_TX].sendmsg = bpf_tcp_sendmsg;
+ prot[SOCKMAP_TX].sendpage = bpf_tcp_sendpage;
+}
+
+static void update_sk_prot(struct sock *sk, struct smap_psock *psock)
+{
+ int family = sk->sk_family == AF_INET6 ? SOCKMAP_IPV6 : SOCKMAP_IPV4;
+ int conf = psock->bpf_tx_msg ? SOCKMAP_TX : SOCKMAP_BASE;
+
+ sk->sk_prot = &bpf_tcp_prots[family][conf];
+}
+
static int bpf_tcp_init(struct sock *sk)
{
struct smap_psock *psock;
@@ -181,14 +217,17 @@ static int bpf_tcp_init(struct sock *sk)
psock->save_close = sk->sk_prot->close;
psock->sk_proto = sk->sk_prot;
- if (psock->bpf_tx_msg) {
- tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
- tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
- tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
- tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
+ /* Build IPv6 sockmap whenever the address of tcpv6_prot changes */
+ if (sk->sk_family == AF_INET6 &&
+ unlikely(sk->sk_prot != smp_load_acquire(&saved_tcpv6_prot))) {
+ spin_lock_bh(&tcpv6_prot_lock);
+ if (likely(sk->sk_prot != saved_tcpv6_prot)) {
+ build_protos(bpf_tcp_prots[SOCKMAP_IPV6], sk->sk_prot);
+ smp_store_release(&saved_tcpv6_prot, sk->sk_prot);
+ }
+ spin_unlock_bh(&tcpv6_prot_lock);
}
-
- sk->sk_prot = &tcp_bpf_proto;
+ update_sk_prot(sk, psock);
rcu_read_unlock();
return 0;
}
@@ -1111,8 +1150,7 @@ static void bpf_tcp_msg_add(struct smap_psock *psock,
static int bpf_tcp_ulp_register(void)
{
- tcp_bpf_proto = tcp_prot;
- tcp_bpf_proto.close = bpf_tcp_close;
+ build_protos(bpf_tcp_prots[SOCKMAP_IPV4], &tcp_prot);
/* Once BPF TX ULP is registered it is never unregistered. It
* will be in the ULP list for the lifetime of the system. Doing
* duplicate registers is not a problem.
Powered by blists - more mailing lists