[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Message-Id: <1451925136-13327-1-git-send-email-dsa@cumulusnetworks.com>
Date: Mon, 4 Jan 2016 08:32:16 -0800
From: David Ahern <dsa@...ulusnetworks.com>
To: netdev@...r.kernel.org, tj@...nel.org
Cc: cgroups@...r.kernel.org, shm@...ulusnetworks.com,
roopa@...ulusnetworks.com, David Ahern <dsa@...ulusnetworks.com>
Subject: [RFC PATCH net-next] net: Add l3mdev cgroup
Add cgroup to assoicate tasks with L3 networking domains. AF_INET{6}
sockets opened by tasks associated with an l3mdev cgroup are bound to
the associated master device when the socket is created. This allows a
user to run a command (and its children) within an L3 networking context.
The master-device for an l3mdev cgroup must be an L3 master device
(e.g., VRF), and it must be set before attaching tasks to the cgroup. Once
set the master-device can not change. Nested l3mdev cgroups are not
supported. The root (aka default) l3mdev cgroup can not be bound to a
master device.
Example:
ip link add vrf-red type vrf table vrf-red
ip link set dev vrf-red up
ip link set dev eth1 master vrf-red
cgcreate -g l3mdev:vrf-red
cgset -r l3mdev.master-device=vrf-red vrf-red
cgexec -g l3mdev:vrf-red bash
At this point the current shell and its child processes are attached to
the vrf-red L3 domain. Any AF_INET and AF_INET6 sockets opened by the
tasks are bound to the vrf-red device.
TO-DO:
- how to auto-create the cgroup when a VRF device is created and auto-deleted
when a VRF device is destroyed
Signed-off-by: David Ahern <dsa@...ulusnetworks.com>
---
include/linux/cgroup_subsys.h | 3 +
include/net/l3mdev_cgroup.h | 27 ++++++
net/core/sock.c | 2 +
net/l3mdev/Kconfig | 12 +++
net/l3mdev/Makefile | 1 +
net/l3mdev/l3mdev_cgroup.c | 195 ++++++++++++++++++++++++++++++++++++++++++
6 files changed, 240 insertions(+)
create mode 100644 include/net/l3mdev_cgroup.h
create mode 100644 net/l3mdev/l3mdev_cgroup.c
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 1a96fdaa33d5..507df40f11de 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -58,6 +58,9 @@ SUBSYS(net_prio)
SUBSYS(hugetlb)
#endif
+#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
+SUBSYS(l3mdev)
+#endif
/*
* Subsystems that implement the can_fork() family of callbacks.
*/
diff --git a/include/net/l3mdev_cgroup.h b/include/net/l3mdev_cgroup.h
new file mode 100644
index 000000000000..c20fbb0a7f46
--- /dev/null
+++ b/include/net/l3mdev_cgroup.h
@@ -0,0 +1,27 @@
+/*
+ * l3mdev_cgroup.h Control Group for L3 Master Device
+ *
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2015 David Ahern <dsa@...ulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _L3MDEV_CGROUP_H
+#define _L3MDEV_CGROUP_H
+
+#if IS_ENABLED(CONFIG_CGROUP_L3MDEV)
+
+void sock_update_l3mdev(struct sock *sk);
+
+#else /* !CONFIG_CGROUP_L3MDEV */
+
+static inline void sock_update_l3mdev(struct sock *sk)
+{
+}
+
+#endif /* CONFIG_CGROUP_L3MDEV */
+#endif /* _L3MDEV_CGROUP_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 565bab7baca9..19ce06674dd9 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
#include <linux/ipsec.h>
#include <net/cls_cgroup.h>
#include <net/netprio_cgroup.h>
+#include <net/l3mdev_cgroup.h>
#include <linux/sock_diag.h>
#include <linux/filter.h>
@@ -1424,6 +1425,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sock_update_classid(&sk->sk_cgrp_data);
sock_update_netprioidx(&sk->sk_cgrp_data);
+ sock_update_l3mdev(sk);
}
return sk;
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
index 5d47325037bc..3142d810e222 100644
--- a/net/l3mdev/Kconfig
+++ b/net/l3mdev/Kconfig
@@ -8,3 +8,15 @@ config NET_L3_MASTER_DEV
---help---
This module provides glue between core networking code and device
drivers to support L3 master devices like VRF.
+
+config CGROUP_L3MDEV
+ bool "L3 Master Device cgroup"
+ depends on CGROUPS
+ depends on NET_L3_MASTER_DEV
+ ---help---
+ Cgroup subsystem for assigning processes to an L3 domain.
+ When a process is assigned to an l3mdev domain all AF_INET and
+ AF_INET6 sockets opened by the process are bound to the L3 master
+ device.
+
+
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
index 84a53a6f609a..ae74ebad8db7 100644
--- a/net/l3mdev/Makefile
+++ b/net/l3mdev/Makefile
@@ -3,3 +3,4 @@
#
obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
+obj-$(CONFIG_CGROUP_L3MDEV) += l3mdev_cgroup.o
diff --git a/net/l3mdev/l3mdev_cgroup.c b/net/l3mdev/l3mdev_cgroup.c
new file mode 100644
index 000000000000..0326c06bfe02
--- /dev/null
+++ b/net/l3mdev/l3mdev_cgroup.c
@@ -0,0 +1,195 @@
+/*
+ * net/l3mdev/l3mdev_cgroup.c Control Group for L3 Master Devices
+ *
+ * Copyright (c) 2015 Cumulus Networks. All rights reserved.
+ * Copyright (c) 2015 David Ahern <dsa@...ulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/cgroup.h>
+#include <net/sock.h>
+#include <net/l3mdev_cgroup.h>
+
+struct l3mdev_cgroup {
+ struct cgroup_subsys_state css;
+ struct net *net;
+ int dev_idx;
+};
+
+static inline struct l3mdev_cgroup *css_l3mdev(struct cgroup_subsys_state *css)
+{
+ return css ? container_of(css, struct l3mdev_cgroup, css) : NULL;
+}
+
+static void l3mdev_set_bound_dev(struct sock *sk)
+{
+ struct task_struct *tsk = current;
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ rcu_read_lock();
+
+ l3mdev_cgrp = css_l3mdev(task_css(tsk, l3mdev_cgrp_id));
+ if (l3mdev_cgrp && l3mdev_cgrp->dev_idx)
+ sk->sk_bound_dev_if = l3mdev_cgrp->dev_idx;
+
+ rcu_read_unlock();
+}
+
+void sock_update_l3mdev(struct sock *sk)
+{
+ switch (sk->sk_family) {
+ case AF_INET:
+ case AF_INET6:
+ l3mdev_set_bound_dev(sk);
+ break;
+ }
+}
+
+static bool is_root_cgroup(struct cgroup_subsys_state *css)
+{
+ return !css || !css->parent;
+}
+
+static struct cgroup_subsys_state *
+l3mdev_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ /* nested l3mdev domains are not supportd */
+ if (!is_root_cgroup(parent_css))
+ return ERR_PTR(-EINVAL);
+
+ l3mdev_cgrp = kzalloc(sizeof(*l3mdev_cgrp), GFP_KERNEL);
+ if (!l3mdev_cgrp)
+ return ERR_PTR(-ENOMEM);
+
+ return &l3mdev_cgrp->css;
+}
+
+static int l3mdev_css_online(struct cgroup_subsys_state *css)
+{
+ return 0;
+}
+
+static void l3mdev_css_free(struct cgroup_subsys_state *css)
+{
+ kfree(css_l3mdev(css));
+}
+
+static int l3mdev_read(struct seq_file *sf, void *v)
+{
+ struct cgroup_subsys_state *css = seq_css(sf);
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
+
+ if (!l3mdev_cgrp)
+ return -EINVAL;
+
+ if (l3mdev_cgrp->net) {
+ struct net_device *dev;
+
+ dev = dev_get_by_index(l3mdev_cgrp->net, l3mdev_cgrp->dev_idx);
+
+ seq_printf(sf, "net[%u]: device index %d ==> %s\n",
+ l3mdev_cgrp->net->ns.inum, l3mdev_cgrp->dev_idx,
+ dev ? dev->name : "<none>");
+
+ if (dev)
+ dev_put(dev);
+ }
+ return 0;
+}
+
+static ssize_t l3mdev_write(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct cgroup_subsys_state *css = of_css(of);
+ struct l3mdev_cgroup *l3mdev_cgrp = css_l3mdev(css);
+ struct net *net = current->nsproxy->net_ns;
+ struct net_device *dev;
+ char name[IFNAMSIZ];
+ int rc = -EINVAL;
+
+ /* once master device is set can not undo. Must delete
+ * cgroup and reset
+ */
+ if (l3mdev_cgrp->dev_idx)
+ goto out;
+
+ /* root cgroup does not bind to an L3 domain */
+ if (is_root_cgroup(css))
+ goto out;
+
+ if (sscanf(buf, "%" __stringify(IFNAMSIZ) "s", name) != 1)
+ goto out;
+
+ dev = dev_get_by_name(net, name);
+ if (!dev) {
+ rc = -ENODEV;
+ goto out;
+ }
+
+ if (netif_is_l3_master(dev)) {
+ l3mdev_cgrp->net = net;
+ l3mdev_cgrp->dev_idx = dev->ifindex;
+ rc = 0;
+ }
+
+ dev_put(dev);
+out:
+ return rc ? : nbytes;
+}
+
+/* make master device is set for non-root cgroups before tasks can be added */
+static int l3mdev_can_attach(struct cgroup_taskset *tset)
+{
+ struct cgroup_subsys_state *dst_css;
+ struct task_struct *tsk;
+ int rc = 0;
+
+ cgroup_taskset_for_each(tsk, dst_css, tset) {
+ struct l3mdev_cgroup *l3mdev_cgrp;
+
+ l3mdev_cgrp = css_l3mdev(dst_css);
+ if (!is_root_cgroup(dst_css) && !l3mdev_cgrp->dev_idx) {
+ rc = -ENODEV;
+ break;
+ }
+ }
+
+ return rc;
+}
+
+static struct cftype ss_files[] = {
+ {
+ .name = "master-device",
+ .seq_show = l3mdev_read,
+ .write = l3mdev_write,
+ },
+ { } /* terminate */
+};
+
+struct cgroup_subsys l3mdev_cgrp_subsys = {
+ .css_alloc = l3mdev_css_alloc,
+ .css_online = l3mdev_css_online,
+ .css_free = l3mdev_css_free,
+ .can_attach = l3mdev_can_attach,
+ .legacy_cftypes = ss_files,
+};
+
+static int __init init_cgroup_l3mdev(void)
+{
+ return 0;
+}
+
+subsys_initcall(init_cgroup_l3mdev);
+MODULE_AUTHOR("David Ahern");
+MODULE_DESCRIPTION("Control Group for L3 Networking Domains");
+MODULE_LICENSE("GPL");
--
1.9.1
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Powered by blists - more mailing lists