[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <c0647d0d3c7158b96dec4604ba317df311c5012d.1690531142.git.geliang.tang@suse.com>
Date: Fri, 28 Jul 2023 15:59:49 +0800
From: Geliang Tang <geliang.tang@...e.com>
To: Alexei Starovoitov <ast@...nel.org>,
Daniel Borkmann <daniel@...earbox.net>,
Andrii Nakryiko <andrii@...nel.org>,
Martin KaFai Lau <martin.lau@...ux.dev>,
Song Liu <song@...nel.org>,
Yonghong Song <yhs@...com>,
John Fastabend <john.fastabend@...il.com>,
KP Singh <kpsingh@...nel.org>,
Stanislav Fomichev <sdf@...gle.com>,
Hao Luo <haoluo@...gle.com>,
Jiri Olsa <jolsa@...nel.org>,
Florent Revest <revest@...omium.org>,
Brendan Jackman <jackmanb@...omium.org>,
Matthieu Baerts <matthieu.baerts@...sares.net>,
Mat Martineau <martineau@...nel.org>,
"David S. Miller" <davem@...emloft.net>,
Eric Dumazet <edumazet@...gle.com>,
Jakub Kicinski <kuba@...nel.org>,
Paolo Abeni <pabeni@...hat.com>,
John Johansen <john.johansen@...onical.com>,
Paul Moore <paul@...l-moore.com>,
James Morris <jmorris@...ei.org>,
"Serge E. Hallyn" <serge@...lyn.com>,
Stephen Smalley <stephen.smalley.work@...il.com>,
Eric Paris <eparis@...isplace.org>,
Mykola Lysenko <mykolal@...com>,
Shuah Khan <shuah@...nel.org>
Cc: Geliang Tang <geliang.tang@...e.com>,
bpf@...r.kernel.org,
netdev@...r.kernel.org,
mptcp@...ts.linux.dev,
apparmor@...ts.ubuntu.com,
linux-security-module@...r.kernel.org,
selinux@...r.kernel.org,
linux-kselftest@...r.kernel.org
Subject: [RFC bpf-next v6] bpf: Force to MPTCP
As is described in the "How to use MPTCP?" section in MPTCP wiki [1]:
"Your app can create sockets with IPPROTO_MPTCP as the proto:
( socket(AF_INET, SOCK_STREAM, IPPROTO_MPTCP); ). Legacy apps can be
forced to create and use MPTCP sockets instead of TCP ones via the
mptcpize command bundled with the mptcpd daemon."
But the mptcpize (LD_PRELOAD technique) command has some limitations
[2]:
- it doesn't work if the application is not using libc (e.g. GoLang
apps)
- in some envs, it might not be easy to set env vars / change the way
apps are launched, e.g. on Android
- mptcpize needs to be launched with all apps that want MPTCP: we could
have more control from BPF to enable MPTCP only for some apps or all the
ones of a netns or a cgroup, etc.
- it is not in BPF, we cannot talk about it at netdev conf.
So this patchset attempts to use BPF to implement functions similer to
mptcpize.
The main idea is add a hook in sys_socket() to change the protocol id
from IPPROTO_TCP (or 0) to IPPROTO_MPTCP.
[1]
https://github.com/multipath-tcp/mptcp_net-next/wiki
[2]
https://github.com/multipath-tcp/mptcp_net-next/issues/79
v6:
- add update_socket_protocol.
v5:
- add bpf_mptcpify helper.
v4:
- use lsm_cgroup/socket_create
v3:
- patch 8: char cmd[128]; -> char cmd[256];
v2:
- Fix build selftests errors reported by CI
Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/79
Signed-off-by: Geliang Tang <geliang.tang@...e.com>
---
net/mptcp/bpf.c | 17 +++
net/socket.c | 6 +
.../testing/selftests/bpf/prog_tests/mptcp.c | 126 ++++++++++++++++--
tools/testing/selftests/bpf/progs/mptcpify.c | 26 ++++
4 files changed, 166 insertions(+), 9 deletions(-)
create mode 100644 tools/testing/selftests/bpf/progs/mptcpify.c
diff --git a/net/mptcp/bpf.c b/net/mptcp/bpf.c
index 5a0a84ad94af..c43aee31014d 100644
--- a/net/mptcp/bpf.c
+++ b/net/mptcp/bpf.c
@@ -12,6 +12,23 @@
#include <linux/bpf.h>
#include "protocol.h"
+#ifdef CONFIG_BPF_JIT
+BTF_SET8_START(bpf_mptcp_fmodret_ids)
+BTF_ID_FLAGS(func, update_socket_protocol)
+BTF_SET8_END(bpf_mptcp_fmodret_ids)
+
+static const struct btf_kfunc_id_set bpf_mptcp_fmodret_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_mptcp_fmodret_ids,
+};
+
+static int __init bpf_mptcp_kfunc_init(void)
+{
+ return register_btf_fmodret_id_set(&bpf_mptcp_fmodret_set);
+}
+late_initcall(bpf_mptcp_kfunc_init);
+#endif /* CONFIG_BPF_JIT */
+
struct mptcp_sock *bpf_mptcp_sock_from_subflow(struct sock *sk)
{
if (sk && sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP && sk_is_mptcp(sk))
diff --git a/net/socket.c b/net/socket.c
index 2b0e54b2405c..4c7b2ff711f0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1644,11 +1644,17 @@ struct file *__sys_socket_file(int family, int type, int protocol)
return sock_alloc_file(sock, flags, NULL);
}
+noinline int update_socket_protocol(int family, int type, int protocol)
+{
+ return protocol;
+}
+
int __sys_socket(int family, int type, int protocol)
{
struct socket *sock;
int flags;
+ protocol = update_socket_protocol(family, type, protocol);
sock = __sys_socket_create(family, type, protocol);
if (IS_ERR(sock))
return PTR_ERR(sock);
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index cd0c42fff7c0..ffa98d5c46af 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,8 +6,9 @@
#include "cgroup_helpers.h"
#include "network_helpers.h"
#include "mptcp_sock.skel.h"
+#include "mptcpify.skel.h"
-#define NS_TEST "mptcp_ns"
+char NS_TEST[32];
#ifndef TCP_CA_NAME_MAX
#define TCP_CA_NAME_MAX 16
@@ -22,6 +23,26 @@ struct mptcp_storage {
char ca_name[TCP_CA_NAME_MAX];
};
+static struct nstoken *create_netns(void)
+{
+ srand(time(NULL));
+ snprintf(NS_TEST, sizeof(NS_TEST), "mptcp_ns_%d", rand());
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ return open_netns(NS_TEST);
+fail:
+ return NULL;
+}
+
+static void cleanup_netns(struct nstoken *nstoken)
+{
+ if (nstoken)
+ close_netns(nstoken);
+
+ SYS_NOFAIL("ip netns del %s &> /dev/null", NS_TEST);
+}
+
static int verify_tsk(int map_fd, int client_fd)
{
int err, cfd = client_fd;
@@ -147,11 +168,8 @@ static void test_base(void)
if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
return;
- SYS(fail, "ip netns add %s", NS_TEST);
- SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
-
- nstoken = open_netns(NS_TEST);
- if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ nstoken = create_netns();
+ if (!ASSERT_OK_PTR(nstoken, "create_netns"))
goto fail;
/* without MPTCP */
@@ -174,11 +192,99 @@ static void test_base(void)
close(server_fd);
fail:
- if (nstoken)
- close_netns(nstoken);
+ cleanup_netns(nstoken);
- SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ close(cgroup_fd);
+}
+
+static void send_byte(int fd)
+{
+ char b = 0x55;
+
+ ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
+}
+
+static int verify_mptcpify(void)
+{
+ char cmd[256];
+ int err = 0;
+
+ snprintf(cmd, sizeof(cmd),
+ "ip netns exec %s ss -tOni | grep -q '%s'",
+ NS_TEST, "tcp-ulp-mptcp");
+ if (!ASSERT_OK(system(cmd), "No tcp-ulp-mptcp found!"))
+ err++;
+
+ snprintf(cmd, sizeof(cmd),
+ "ip netns exec %s nstat -asz %s | awk '%s' | grep -q '%s'",
+ NS_TEST, "MPTcpExtMPCapableSYNACKRX",
+ "NR==1 {next} {print $2}", "1");
+ if (!ASSERT_OK(system(cmd), "No MPTcpExtMPCapableSYNACKRX found!"))
+ err++;
+
+ return err;
+}
+
+static int run_mptcpify(int cgroup_fd)
+{
+ int server_fd, client_fd, prog_fd, err = 0;
+ struct mptcpify *mptcpify_skel;
+
+ mptcpify_skel = mptcpify__open_and_load();
+ if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
+ return -EIO;
+ err = mptcpify__attach(mptcpify_skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(mptcpify_skel->progs.mptcpify);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) {
+ err = -EIO;
+ goto out;
+ }
+
+ /* without MPTCP */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server")) {
+ err = -EIO;
+ goto out;
+ }
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+ err = -EIO;
+ goto close_server;
+ }
+
+ send_byte(client_fd);
+ err += verify_mptcpify();
+
+ close(client_fd);
+close_server:
+ close(server_fd);
+out:
+ mptcpify__destroy(mptcpify_skel);
+ return err;
+}
+
+static void test_mptcpify(void)
+{
+ struct nstoken *nstoken = NULL;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcpify");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ nstoken = create_netns();
+ if (!ASSERT_OK_PTR(nstoken, "create_netns"))
+ goto fail;
+
+ ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");
+
+fail:
+ cleanup_netns(nstoken);
close(cgroup_fd);
}
@@ -186,4 +292,6 @@ void test_mptcp(void)
{
if (test__start_subtest("base"))
test_base();
+ if (test__start_subtest("mptcpify"))
+ test_mptcpify();
}
diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c
new file mode 100644
index 000000000000..c5e5b27ac3df
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcpify.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023, SUSE. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define AF_INET 2
+#define AF_INET6 10
+#define SOCK_STREAM 1
+#define IPPROTO_TCP 6
+#define IPPROTO_MPTCP 262
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(mptcpify, int family, int type, int protocol)
+{
+ if ((family == AF_INET || family == AF_INET6) &&
+ type == SOCK_STREAM &&
+ (!protocol || protocol == IPPROTO_TCP)) {
+ return IPPROTO_MPTCP;
+ }
+
+ return protocol;
+}
--
2.35.3
Powered by blists - more mailing lists