[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-ID: <20090702021133.14469.35140.stgit@menage.mtv.corp.google.com>
Date: Wed, 01 Jul 2009 19:11:34 -0700
From: Paul Menage <menage@...gle.com>
To: lizf@...fujitsu.com, balbir@...ux.vnet.ibm.com
Cc: linux-kernel@...r.kernel.org, akpm@...ux-foundation.org,
containers@...ts.linux-foundation.org,
kamezawa.hiroyu@...fujitsu.com
Subject: [PATCH 8/9] [RFC] Example multi-bindable subsystem: a per-cgroup
notes field
[RFC] Example multi-bindable subsystem: a per-cgroup notes field
As an example of a multiply-bindable subsystem, this patch introduces
the "info" subsystem, which provides a single file, "info.notes", in
which user-space middleware can store an arbitrary (by default up to
one page) binary string representing configuration data about that
cgroup. This reduces the need to keep additional state outside the
cgroup filesystem. The maximum notes size for a hierarchy can be set
by updating the "info.size" file in the root cgroup.
Signed-off-by: Paul Menage <menage@...gle.com>
---
include/linux/cgroup_subsys.h | 6 ++
init/Kconfig | 9 +++
kernel/Makefile | 1
kernel/info_cgroup.c | 133 +++++++++++++++++++++++++++++++++++++++++
4 files changed, 149 insertions(+), 0 deletions(-)
create mode 100644 kernel/info_cgroup.c
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index f78605e..5dfea38 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -60,3 +60,9 @@ SUBSYS(net_cls)
#endif
/* */
+
+#ifdef CONFIG_CGROUP_INFO
+MULTI_SUBSYS(info)
+#endif
+
+/* */
diff --git a/init/Kconfig b/init/Kconfig
index d904d6c..3bd4685 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -604,6 +604,15 @@ config CGROUP_MEM_RES_CTLR_SWAP
Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page
size is 4096bytes, 512k per 1Gbytes of swap.
+config CGROUP_INFO
+ bool "Simple application-specific info cgroup subsystem"
+ depends on CGROUPS
+ help
+ Provides a simple cgroups subsystem with an "info.notes"
+ field, which can be used by middleware to store
+ application-specific configuration data about a cgroup. Can
+ be mounted on multiple hierarchies at once.
+
endif # CGROUPS
config MM_OWNER
diff --git a/kernel/Makefile b/kernel/Makefile
index 7ffdc16..e713a67 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_CGROUPS) += cgroup.o
obj-$(CONFIG_CGROUP_FREEZER) += cgroup_freezer.o
obj-$(CONFIG_CPUSETS) += cpuset.o
obj-$(CONFIG_CGROUP_NS) += ns_cgroup.o
+obj-$(CONFIG_CGROUP_INFO) += info_cgroup.o
obj-$(CONFIG_UTS_NS) += utsname.o
obj-$(CONFIG_USER_NS) += user_namespace.o
obj-$(CONFIG_PID_NS) += pid_namespace.o
diff --git a/kernel/info_cgroup.c b/kernel/info_cgroup.c
new file mode 100644
index 0000000..34cfdb8
--- /dev/null
+++ b/kernel/info_cgroup.c
@@ -0,0 +1,133 @@
+/*
+ * info_cgroup.c - simple cgroup providing a "notes" field
+ */
+
+#include "linux/cgroup.h"
+#include "linux/err.h"
+#include "linux/seq_file.h"
+
+struct info_cgroup {
+ struct cgroup_subsys_state css;
+ /* notes string for this cgroup */
+ const char *notes;
+ size_t len;
+ /*
+ * size limit for notes in this hierarchy. Only relevant for
+ * the root cgroup. Not synchronized since it's a single word
+ * value and writes to it never depend on previously read
+ * values.
+ */
+ size_t max_len;
+ spinlock_t lock;
+};
+
+static inline struct info_cgroup *cg_info(struct cgroup *cg)
+{
+ return container_of(cgroup_subsys_state(cg, info_subsys_id),
+ struct info_cgroup, css);
+}
+
+static struct cgroup_subsys_state *info_create(struct cgroup_subsys *ss,
+ struct cgroup *cg)
+{
+ struct info_cgroup *info = kzalloc(sizeof(*info), GFP_KERNEL);
+ if (!info)
+ return ERR_PTR(-ENOMEM);
+ spin_lock_init(&info->lock);
+ if (!cg->parent)
+ info->max_len = PAGE_SIZE;
+ return &info->css;
+}
+
+static void info_destroy(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ struct info_cgroup *css = cg_info(cont);
+ kfree(css->notes);
+ kfree(css);
+}
+
+
+static int info_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct seq_file *seq)
+{
+ struct info_cgroup *css = cg_info(cont);
+ spin_lock(&css->lock);
+ if (css->notes)
+ seq_write(seq, css->notes, css->len);
+ spin_unlock(&css->lock);
+ return 0;
+}
+
+/*
+ * Use a custom write function so that we can handle binary data
+ */
+
+static ssize_t info_write(struct cgroup *cgrp, struct cftype *cft,
+ struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *unused_ppos) {
+ struct info_cgroup *css = cg_info(cgrp);
+ char *notes = NULL;
+ if (nbytes > cg_info(cgrp->top_cgroup)->max_len)
+ return -E2BIG;
+ if (nbytes) {
+ notes = kmalloc(nbytes, GFP_USER);
+ if (!notes)
+ return -ENOMEM;
+ if (copy_from_user(notes, userbuf, nbytes))
+ return -EFAULT;
+ }
+
+ spin_lock(&css->lock);
+ kfree(css->notes);
+ css->notes = notes;
+ css->len = nbytes;
+ spin_unlock(&css->lock);
+ return nbytes;
+}
+
+static u64 notes_size_read(struct cgroup *cont, struct cftype *cft)
+{
+ struct info_cgroup *css = cg_info(cont);
+ return css->max_len;
+}
+
+static int notes_size_write(struct cgroup *cont, struct cftype *cft, u64 val)
+{
+ struct info_cgroup *css = cg_info(cont);
+ css->max_len = val;
+ return 0;
+}
+
+static struct cftype info_files[] = {
+ {
+ .name = "notes",
+ .read_seq_string = info_read,
+ .write = info_write,
+ },
+};
+
+static struct cftype info_root_files[] = {
+ {
+ .name = "size",
+ .read_u64 = notes_size_read,
+ .write_u64 = notes_size_write,
+ },
+};
+
+static int info_populate(struct cgroup_subsys *ss, struct cgroup *cont)
+{
+ if (!cont->parent)
+ cgroup_add_files(cont, ss, info_root_files,
+ ARRAY_SIZE(info_root_files));
+ return cgroup_add_files(cont, ss, info_files, ARRAY_SIZE(info_files));
+}
+
+struct cgroup_subsys info_subsys = {
+ .name = "info",
+ .create = info_create,
+ .destroy = info_destroy,
+ .populate = info_populate,
+ .subsys_id = info_subsys_id,
+};
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/
Powered by blists - more mailing lists