lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [thread-next>] [day] [month] [year] [list]
Date:	Wed, 23 Jan 2008 10:09:28 +0100 (MET)
From:	Andrea Righi <righiandr@...rs.sourceforge.net>
To:	Balbir Singh <balbir@...ux.vnet.ibm.com>,
	Naveen Gupta <ngupta@...gle.com>,
	Paul Menage <menage@...gle.com>
Cc:	LKML <linux-kernel@...r.kernel.org>,
	David Miller <davem@...emloft.net>
Subject: [RFC] [PATCH] cgroup: limit network bandwidth

Allow to limit the network bandwidth for specific process containers (cgroups)
imposing additional delays in the sockets' sendmsg()/recvmsg() calls made by
those processes that exceed the limits defined in the control group filesystem.

Example:
  # mkdir /dev/cgroup
  # mount -t cgroup -onet net /dev/cgroup
  # cd /dev/cgroup
  # mkdir foo
  --> the cgroup foo has been created
  # /bin/echo $$ > foo/tasks
  # /bin/echo 1024 > foo/net.tcp
  # /bin/echo 2048 > foo/net.tot
  # sh
  --> the subshell 'sh' is running in cgroup "foo" that has a maximum network
      bandwidth for TCP traffic of 1MB/s and 2MB/s for total network
      activities.

The netlimit approach can be easily extended to support additional network
protocols or different socket families or types (PF_UNIX, PF_BLUETOOTH,
SOCK_SEQPACKET, etc.).

Signed-off-by: Andrea Righi <a.righi@...eca.it>
---

diff -urpN linux-2.6.24-rc8/include/linux/cgroup_netlimit.h linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_netlimit.h
--- linux-2.6.24-rc8/include/linux/cgroup_netlimit.h	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_netlimit.h	2008-01-22 21:36:15.000000000 +0100
@@ -0,0 +1,29 @@
+#ifndef CGROUP_NETLIMIT_H
+#define CGROUP_NETLIMIT_H
+
+enum {
+	CGROUP_NETLIMIT_TOT,
+	CGROUP_NETLIMIT_TCP,
+	CGROUP_NETLIMIT_UDP,
+	CGROUP_NETLIMIT_RAW,
+	/* This sets the size of the different netlimit types */
+	CGROUP_NETLIMIT_END,
+};
+
+#define CGROUP_NETLIMIT_FILE(_x, _y) \
+	{ \
+		.name = _x, \
+		.read = netlimit_read, \
+		.write_uint = netlimit_write_uint, \
+		.private = _y, \
+	}
+
+#ifdef CONFIG_CGROUP_NETLIMIT
+extern void cgroup_nl_acct(int limit_id, size_t bytes);
+extern void cgroup_nl_throttle(int limit_id, int interruptible);
+#else
+static inline void cgroup_nl_acct(int limit_id, size_t bytes) { }
+static inline void cgroup_nl_throttle(int limit_id, int interruptible) { }
+#endif /* CONFIG_CGROUP_NETLIMIT */
+
+#endif
diff -urpN linux-2.6.24-rc8/include/linux/cgroup_subsys.h linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_subsys.h
--- linux-2.6.24-rc8/include/linux/cgroup_subsys.h	2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/include/linux/cgroup_subsys.h	2008-01-22 14:42:17.000000000 +0100
@@ -37,3 +37,9 @@ SUBSYS(cpuacct)
 
 /* */
 
+#ifdef CONFIG_CGROUP_NETLIMIT
+SUBSYS(netlimit)
+#endif
+
+/* */
+
diff -urpN linux-2.6.24-rc8/init/Kconfig linux-2.6.24-rc8-cgroup-netlimit/init/Kconfig
--- linux-2.6.24-rc8/init/Kconfig	2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/init/Kconfig	2008-01-23 00:44:04.000000000 +0100
@@ -313,6 +313,15 @@ config CGROUP_NS
           for instance virtual servers and checkpoint/restart
           jobs.
 
+config CGROUP_NETLIMIT
+        bool "Enable cgroup network bandwidth limitinig (EXPERIMENTAL)"
+        depends on EXPERIMENTAL && CGROUPS
+        help
+	  This allows to define network bandwidth limiting/shaping rules for
+	  specific cgroup(s).
+
+	  Say N if unsure.
+
 config CPUSETS
 	bool "Cpuset support"
 	depends on SMP && CGROUPS
diff -urpN linux-2.6.24-rc8/kernel/cgroup_netlimit.c linux-2.6.24-rc8-cgroup-netlimit/kernel/cgroup_netlimit.c
--- linux-2.6.24-rc8/kernel/cgroup_netlimit.c	1970-01-01 01:00:00.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/kernel/cgroup_netlimit.c	2008-01-22 21:35:53.000000000 +0100
@@ -0,0 +1,229 @@
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Copyright (C) 2008 Andrea Righi <a.righi@...eca.it>
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/slab.h>
+#include <linux/gfp.h>
+#include <linux/err.h>
+#include <linux/sched.h>
+#include <linux/fs.h>
+#include <linux/jiffies.h>
+#include <linux/spinlock.h>
+#include <linux/cgroup_netlimit.h>
+
+struct netlimit {
+	struct cgroup_subsys_state css;
+	spinlock_t lock[CGROUP_NETLIMIT_END];
+	unsigned long bandwidth[CGROUP_NETLIMIT_END];
+	unsigned long req[CGROUP_NETLIMIT_END];
+	unsigned long last_request[CGROUP_NETLIMIT_END];
+};
+
+static inline struct netlimit *cgroup_to_netlimit(struct cgroup *cont)
+{
+	return container_of(cgroup_subsys_state(cont, netlimit_subsys_id),
+			    struct netlimit, css);
+}
+
+static inline struct netlimit *task_to_netlimit(struct task_struct *task)
+{
+	return container_of(task_subsys_state(task, netlimit_subsys_id),
+			    struct netlimit, css);
+}
+
+/*
+ * Rules: you can only create a cgroup if:
+ *   1. you are capable(CAP_SYS_ADMIN)
+ *   2. the target cgroup is a descendant of your own cgroup
+ *
+ * Note: called from kernel/cgroup.c with cgroup_lock() held.
+ */
+static struct cgroup_subsys_state *netlimit_create(
+			struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	struct netlimit *nl;
+	int i;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (!cgroup_is_descendant(cont))
+		return ERR_PTR(-EPERM);
+
+	nl = kzalloc(sizeof(struct netlimit), GFP_KERNEL);
+	if (unlikely(!nl))
+		return ERR_PTR(-ENOMEM);
+
+	for (i = 0; i < CGROUP_NETLIMIT_END; i++) {
+		spin_lock_init(&nl->lock[i]);
+		nl->last_request[i] = jiffies;
+	}
+
+	return &nl->css;
+}
+
+/*
+ * Note: called from kernel/cgroup.c with cgroup_lock() held.
+ */
+static void netlimit_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	kfree(cgroup_to_netlimit(cont));
+}
+
+static ssize_t netlimit_read(struct cgroup *cont, struct cftype *cft,
+			       struct file *file, char __user *buf,
+			       size_t nbytes, loff_t *ppos)
+{
+	ssize_t count, ret;
+	unsigned long delta, bandwidth, req, last_request;
+	struct netlimit *nl;
+	char *page;
+
+	page = (char *)__get_free_page(GFP_TEMPORARY);
+	if (!page)
+		return -ENOMEM;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		cgroup_unlock();
+		ret = -ENODEV;
+		goto out;
+	}
+
+	nl = cgroup_to_netlimit(cont);
+	spin_lock_irq(&nl->lock[cft->private]);
+
+	delta = (long)jiffies - (long)nl->last_request[cft->private];
+	bandwidth = nl->bandwidth[cft->private];
+	req = nl->req[cft->private];
+	last_request = nl->last_request[cft->private];
+
+	spin_unlock_irq(&nl->lock[cft->private]);
+	cgroup_unlock();
+
+	/* print additional debugging stuff */
+	count = sprintf(page, "         type: %s\n"
+			      "max-bandwidth: %lu KB/sec\n"
+			      "    requested: %lu KB\n"
+			      " last request: %lu jiffies\n"
+			      "        delta: %lu jiffies\n",
+			      cft->name,
+			      bandwidth, req >> 10, last_request, delta);
+
+	ret = simple_read_from_buffer(buf, nbytes, ppos, page, count);
+
+out:
+	free_page((unsigned long)page);
+	return ret;
+}
+
+static int netlimit_write_uint(struct cgroup *cont, struct cftype *cft,
+				 u64 val)
+{
+	struct netlimit *nl;
+	int ret = 0;
+
+	cgroup_lock();
+	if (cgroup_is_removed(cont)) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	nl = cgroup_to_netlimit(cont);
+
+	spin_lock_irq(&nl->lock[cft->private]);
+	nl->bandwidth[cft->private] = (unsigned long)val;
+	nl->req[cft->private] = 0;
+	nl->last_request[cft->private] = jiffies;
+	spin_unlock_irq(&nl->lock[cft->private]);
+
+out:
+	cgroup_unlock();
+	return ret;
+}
+
+static struct cftype files[] = {
+	CGROUP_NETLIMIT_FILE("tot", CGROUP_NETLIMIT_TOT),
+	CGROUP_NETLIMIT_FILE("tcp", CGROUP_NETLIMIT_TCP),
+	CGROUP_NETLIMIT_FILE("udp", CGROUP_NETLIMIT_UDP),
+	CGROUP_NETLIMIT_FILE("raw", CGROUP_NETLIMIT_RAW),
+};
+
+static int netlimit_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+	return cgroup_add_files(cont, ss, files, ARRAY_SIZE(files));
+}
+
+struct cgroup_subsys netlimit_subsys = {
+	.name = "net",
+	.create = netlimit_create,
+	.destroy = netlimit_destroy,
+	.populate = netlimit_populate,
+	.subsys_id = netlimit_subsys_id,
+};
+
+void cgroup_nl_acct(int limit_id, size_t bytes)
+{
+	struct netlimit *nl;
+
+	nl = task_to_netlimit(current);
+	if (!nl || !nl->bandwidth[limit_id])
+		return;
+
+	nl->req[limit_id] += bytes;
+}
+EXPORT_SYMBOL(cgroup_nl_acct);
+
+void cgroup_nl_throttle(int limit_id, int interruptible)
+{
+	struct netlimit *nl;
+	unsigned long delta, t;
+	long sleep;
+
+	nl = task_to_netlimit(current);
+	if (!nl || !nl->bandwidth[limit_id])
+		return;
+
+	delta = (long)jiffies - (long)nl->last_request[limit_id];
+	if (!delta)
+		return;
+
+	t = msecs_to_jiffies(nl->req[limit_id] / nl->bandwidth[limit_id]);
+	if (!t)
+		return;
+
+	sleep = t - delta;
+	if (sleep > 0) {
+		pr_debug("cgroup-netlimit(%s):"
+			 " task %p (%s) must sleep %lu jiffies\n",
+			 files[limit_id].name,
+			 current, current->comm, sleep);
+		if (interruptible)
+			schedule_timeout_interruptible(sleep);
+		else
+			schedule_timeout_uninterruptible(sleep);
+		return;
+	}
+
+	nl->req[limit_id] = 0;
+	nl->last_request[limit_id] = jiffies;
+}
+EXPORT_SYMBOL(cgroup_nl_throttle);
diff -urpN linux-2.6.24-rc8/kernel/Makefile linux-2.6.24-rc8-cgroup-netlimit/kernel/Makefile
--- linux-2.6.24-rc8/kernel/Makefile	2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/kernel/Makefile	2008-01-22 11:39:23.000000000 +0100
@@ -41,6 +41,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o
 obj-$(CONFIG_CGROUP_DEBUG) += cgroup_debug.o
 obj-$(CONFIG_CPUSETS) += cpuset.o
 obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
+obj-$(CONFIG_CGROUP_NETLIMIT) += cgroup_netlimit.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff -urpN linux-2.6.24-rc8/net/socket.c linux-2.6.24-rc8-cgroup-netlimit/net/socket.c
--- linux-2.6.24-rc8/net/socket.c	2008-01-16 05:22:48.000000000 +0100
+++ linux-2.6.24-rc8-cgroup-netlimit/net/socket.c	2008-01-22 21:33:46.000000000 +0100
@@ -85,6 +85,7 @@
 #include <linux/audit.h>
 #include <linux/wireless.h>
 #include <linux/nsproxy.h>
+#include <linux/cgroup_netlimit.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -551,10 +552,33 @@ static inline int __sock_sendmsg(struct 
 	si->size = size;
 
 	err = security_socket_sendmsg(sock, msg, size);
-	if (err)
-		return err;
+	if (!err)
+		err = sock->ops->sendmsg(iocb, sock, msg, size);
 
-	return sock->ops->sendmsg(iocb, sock, msg, size);
+	if (err >= 0 && sock->sk) {
+		switch (sock->sk->sk_family) {
+		case PF_INET:
+		case PF_INET6:
+			switch (sock->sk->sk_type) {
+			case SOCK_STREAM:
+				cgroup_nl_acct(CGROUP_NETLIMIT_TCP, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_TCP, 1);
+				break;
+			case SOCK_DGRAM:
+				cgroup_nl_acct(CGROUP_NETLIMIT_UDP, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_UDP, 1);
+				break;
+			case SOCK_RAW:
+				cgroup_nl_acct(CGROUP_NETLIMIT_RAW, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_RAW, 1);
+				break;
+			}
+			cgroup_nl_acct(CGROUP_NETLIMIT_TOT, size);
+			cgroup_nl_throttle(CGROUP_NETLIMIT_TOT, 1);
+		}
+	}
+
+	return err;
 }
 
 int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -633,10 +657,33 @@ static inline int __sock_recvmsg(struct 
 	si->flags = flags;
 
 	err = security_socket_recvmsg(sock, msg, size, flags);
-	if (err)
-		return err;
+	if (!err)
+		err = sock->ops->recvmsg(iocb, sock, msg, size, flags);
 
-	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
+	if (err >= 0 && sock->sk) {
+		switch (sock->sk->sk_family) {
+		case PF_INET:
+		case PF_INET6:
+			switch (sock->sk->sk_type) {
+			case SOCK_STREAM:
+				cgroup_nl_acct(CGROUP_NETLIMIT_TCP, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_TCP, 1);
+				break;
+			case SOCK_DGRAM:
+				cgroup_nl_acct(CGROUP_NETLIMIT_UDP, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_UDP, 1);
+				break;
+			case SOCK_RAW:
+				cgroup_nl_acct(CGROUP_NETLIMIT_RAW, size);
+				cgroup_nl_throttle(CGROUP_NETLIMIT_RAW, 1);
+				break;
+			}
+			cgroup_nl_acct(CGROUP_NETLIMIT_TOT, size);
+			cgroup_nl_throttle(CGROUP_NETLIMIT_TOT, 1);
+		}
+	}
+
+	return err;
 }
 
 int sock_recvmsg(struct socket *sock, struct msghdr *msg,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ