lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20231009182753.851551-2-toke@redhat.com>
Date: Mon,  9 Oct 2023 20:27:49 +0200
From: Toke Høiland-Jørgensen <toke@...hat.com>
To: David Ahern <dsahern@...il.com>,
	Stephen Hemminger <stephen@...workplumber.org>
Cc: netdev@...r.kernel.org,
	Toke Høiland-Jørgensen <toke@...hat.com>,
	Nicolas Dichtel <nicolas.dichtel@...nd.com>,
	Christian Brauner <brauner@...nel.org>,
	"Eric W . Biederman" <ebiederm@...ssion.com>,
	David Laight <David.Laight@...LAB.COM>
Subject: [RFC PATCH iproute2-next 1/5] ip: Mount netns in child process instead of from inside the new namespace

Refactor the netns creation code so that we offload the mounting of the
namespace file to a child process instead of bind mounting from inside the newly
created namespace.

This is done in preparation for also persisting a mount namespace; the mount
namespace reference cannot be bind-mounted from inside that namespace itself, so
we need to mount that from a child process anyway. The child process
approach (as well as some of the helper functions used for it) is adapted from
the code in the unshare(1) utility that is part of util-linux.

No functional change is intended with this patch.

Signed-off-by: Toke Høiland-Jørgensen <toke@...hat.com>
---
 ip/ipnetns.c | 184 ++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 130 insertions(+), 54 deletions(-)

diff --git a/ip/ipnetns.c b/ip/ipnetns.c
index 9d996832aef8..a05d84514326 100644
--- a/ip/ipnetns.c
+++ b/ip/ipnetns.c
@@ -1,4 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <stdlib.h>
 #define _ATFILE_SOURCE
 #include <sys/file.h>
 #include <sys/types.h>
@@ -7,6 +8,7 @@
 #include <sys/inotify.h>
 #include <sys/mount.h>
 #include <sys/syscall.h>
+#include <sys/eventfd.h>
 #include <stdio.h>
 #include <string.h>
 #include <sched.h>
@@ -25,6 +27,9 @@
 #include "namespace.h"
 #include "json_print.h"
 
+/* synchronize parent and child by pipe */
+#define PIPE_SYNC_BYTE	0x06
+
 static int usage(void)
 {
 	fprintf(stderr,
@@ -46,7 +51,6 @@ static int usage(void)
 static struct rtnl_handle rtnsh = { .fd = -1 };
 
 static int have_rtnl_getnsid = -1;
-static int saved_netns = -1;
 static struct link_filter filter;
 
 static int ipnetns_accept_msg(struct rtnl_ctrl_data *ctrl,
@@ -768,31 +772,131 @@ static int create_netns_dir(void)
 	return 0;
 }
 
-/* Obtain a FD for the current namespace, so we can reenter it later */
-static void netns_save(void)
+/**
+ * waitchild() - Wait for a process to exit successfully
+ * @pid: PID of the process to wait for
+ *
+ * Wait for a process to exit successfully and return its exit status.
+ */
+static int waitchild(int pid)
 {
-	if (saved_netns != -1)
-		return;
+	int rc, status;
+
+	do {
+		rc = waitpid(pid, &status, 0);
+		if (rc < 0) {
+			if (errno == EINTR)
+				continue;
+			return -errno;
+		}
+		if (WIFEXITED(status) &&
+		    WEXITSTATUS(status) != EXIT_SUCCESS)
+			return WEXITSTATUS(status);
+	} while (rc < 0);
 
-	saved_netns = open("/proc/self/ns/net", O_RDONLY | O_CLOEXEC);
-	if (saved_netns == -1) {
-		perror("Cannot open init namespace");
-		exit(1);
+	return 0;
+}
+
+/**
+ * sync_with_child() - Tell our child we're ready and wait for it to exit
+ * @pid: The pid of our child
+ * @fd: A file descriptor created with eventfd()
+ *
+ * This tells a child created with fork_and_wait() that we are ready for it to
+ * continue. Once we have done that, wait for our child to exit.
+ */
+static int sync_with_child(pid_t pid, int fd)
+{
+	uint64_t ch = PIPE_SYNC_BYTE;
+
+	write(fd, &ch, sizeof(ch));
+	close(fd);
+
+	return waitchild(pid);
+}
+
+/**
+ * fork_and_wait() - Fork and wait to be sync'd with
+ * @fd - A file descriptor created with eventfd() which should be passed to
+ *       sync_with_child()
+ *
+ * This creates an eventfd and forks. The parent process returns immediately,
+ * but the child waits for a %PIPE_SYNC_BYTE on the eventfd before returning.
+ * This allows the parent to perform some tasks before the child starts its
+ * work. The parent should call sync_with_child() once it is ready for the
+ * child to continue.
+ *
+ * Return: The pid from fork()
+ */
+static pid_t fork_and_wait(int *fd)
+{
+	uint64_t ch;
+	pid_t pid;
+
+	*fd = eventfd(0, 0);
+	if (*fd < 0) {
+		fprintf(stderr, "eventfd failed: %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	pid = fork();
+	if (pid < 0) {
+		fprintf(stderr, "fork failed: %s\n", strerror(errno));
+		exit(EXIT_FAILURE);
+	}
+
+	if (!pid) {
+		/* wait for the our parent to tell us to continue */
+		if (read(*fd, (char *)&ch, sizeof(ch)) != sizeof(ch) ||
+		    ch != PIPE_SYNC_BYTE) {
+			fprintf(stderr, "failed to read eventfd\n");
+			exit(EXIT_FAILURE);
+		}
+		close(*fd);
 	}
+
+	return pid;
 }
 
-static void netns_restore(void)
+static int bind_ns_file(const char *parent, const char *nsfile,
+			const char *ns_name, pid_t target_pid)
 {
-	if (saved_netns == -1)
-		return;
+	char ns_path[PATH_MAX], proc_path[PATH_MAX];
+	int fd;
 
-	if (setns(saved_netns, CLONE_NEWNET)) {
-		perror("setns");
-		exit(1);
+	snprintf(ns_path, sizeof(ns_path), "%s/%s", parent, ns_name);
+	snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/%s", target_pid, nsfile);
+
+	/* Create the filesystem state */
+	fd = open(ns_path, O_RDONLY|O_CREAT|O_EXCL, 0);
+	if (fd < 0) {
+		fprintf(stderr, "Cannot create namespace file \"%s\": %s\n",
+			ns_path, strerror(errno));
+		return -1;
 	}
+	close(fd);
 
-	close(saved_netns);
-	saved_netns = -1;
+	if (mount(proc_path, ns_path, "none", MS_BIND, NULL) < 0) {
+		fprintf(stderr, "Bind %s -> %s failed: %s\n", proc_path,
+			ns_path, strerror(errno));
+		unlink(ns_path);
+		return -1;
+	}
+	return 0;
+}
+
+static pid_t bind_ns_files_from_child(const char *ns_name, pid_t target_pid,
+				      int *fd)
+{
+	pid_t child;
+
+	child = fork_and_wait(fd);
+	if (child)
+		return child;
+
+	if (bind_ns_file(NETNS_RUN_DIR, "net", ns_name, target_pid))
+		exit(EXIT_FAILURE);
+	exit(EXIT_SUCCESS);
 }
 
 static int netns_add(int argc, char **argv, bool create)
@@ -808,10 +912,9 @@ static int netns_add(int argc, char **argv, bool create)
 	 * userspace tweaks like remounting /sys, or bind mounting
 	 * a new /etc/resolv.conf can be shared between users.
 	 */
-	char netns_path[PATH_MAX], proc_path[PATH_MAX];
 	const char *name;
-	pid_t pid;
-	int fd;
+	pid_t pid, child;
+	int event_fd;
 	int lock;
 	int made_netns_run_dir_mount = 0;
 
@@ -820,6 +923,7 @@ static int netns_add(int argc, char **argv, bool create)
 			fprintf(stderr, "No netns name specified\n");
 			return -1;
 		}
+		pid = getpid();
 	} else {
 		if (argc < 2) {
 			fprintf(stderr, "No netns name and PID specified\n");
@@ -833,8 +937,6 @@ static int netns_add(int argc, char **argv, bool create)
 	}
 	name = argv[0];
 
-	snprintf(netns_path, sizeof(netns_path), "%s/%s", NETNS_RUN_DIR, name);
-
 	if (create_netns_dir())
 		return -1;
 
@@ -894,46 +996,20 @@ static int netns_add(int argc, char **argv, bool create)
 		close(lock);
 	}
 
-	/* Create the filesystem state */
-	fd = open(netns_path, O_RDONLY|O_CREAT|O_EXCL, 0);
-	if (fd < 0) {
-		fprintf(stderr, "Cannot create namespace file \"%s\": %s\n",
-			netns_path, strerror(errno));
-		return -1;
-	}
-	close(fd);
+	child = bind_ns_files_from_child(name, pid, &event_fd);
+	if (child < 0)
+		exit(EXIT_FAILURE);
 
 	if (create) {
-		netns_save();
 		if (unshare(CLONE_NEWNET) < 0) {
 			fprintf(stderr, "Failed to create a new network namespace \"%s\": %s\n",
 				name, strerror(errno));
-			goto out_delete;
+			close(event_fd);
+			exit(EXIT_FAILURE);
 		}
-
-		strcpy(proc_path, "/proc/self/ns/net");
-	} else {
-		snprintf(proc_path, sizeof(proc_path), "/proc/%d/ns/net", pid);
-	}
-
-	/* Bind the netns last so I can watch for it */
-	if (mount(proc_path, netns_path, "none", MS_BIND, NULL) < 0) {
-		fprintf(stderr, "Bind %s -> %s failed: %s\n",
-			proc_path, netns_path, strerror(errno));
-		goto out_delete;
 	}
-	netns_restore();
 
-	return 0;
-out_delete:
-	if (create) {
-		netns_restore();
-		netns_delete(argc, argv);
-	} else if (unlink(netns_path) < 0) {
-		fprintf(stderr, "Cannot remove namespace file \"%s\": %s\n",
-			netns_path, strerror(errno));
-	}
-	return -1;
+	return sync_with_child(child, event_fd);
 }
 
 int set_netnsid_from_name(const char *name, int nsid)
-- 
2.42.0


Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ