lists.openwall.net   lists  /  announce  owl-users  owl-dev  john-users  john-dev  passwdqc-users  yescrypt  popa3d-users  /  oss-security  kernel-hardening  musl  sabotage  tlsify  passwords  /  crypt-dev  xvendor  /  Bugtraq  Full-Disclosure  linux-kernel  linux-netdev  linux-ext4  linux-hardening  linux-cve-announce  PHC 
Open Source and information security mailing list archives
 
Hash Suite: Windows password security audit tool. GUI, reports in PDF.
[<prev] [next>] [<thread-prev] [thread-next>] [day] [month] [year] [list]
Message-Id: <1193474399.27652.15.camel@twins>
Date:	Sat, 27 Oct 2007 10:39:59 +0200
From:	Peter Zijlstra <peterz@...radead.org>
To:	Greg KH <greg@...ah.com>
Cc:	Kay Sievers <kay.sievers@...y.org>,
	Nick Piggin <nickpiggin@...oo.com.au>,
	Andrew Morton <akpm@...ux-foundation.org>,
	linux-kernel@...r.kernel.org, Jens Axboe <jens.axboe@...cle.com>,
	Fengguang Wu <fengguang.wu@...il.com>,
	Trond Myklebust <trond.myklebust@....uio.no>,
	Miklos Szeredi <miklos@...redi.hu>
Subject: Re: per BDI dirty limit (was Re: -mm merge plans for 2.6.24)

On Fri, 2007-10-26 at 19:40 -0700, Greg KH wrote:
> On Sat, Oct 27, 2007 at 03:18:08AM +0200, Peter Zijlstra wrote:
> > 
> > On Fri, 2007-10-26 at 22:04 +0200, Peter Zijlstra wrote:
> > > This crashes and burns on bootup, but I'm too tired to figure out what I
> > > did wrong... will give it another try tomorrow..
> > 
> > Ok, can't sleep.. took a look. I have several problems here.
> > 
> > The thing that makes it go *boom* is the __ATTR_NULL. Removing that
> > makes it boot. Albeit it then warns me of multiple duplicate sysfs
> > objects, all named "bdi".
> > 
> > For some obscure reason this device interface insists on using the
> > bus_id as name (?!), and further reduces usability by limiting that to
> > 20 odd characters.
> > 
> > This makes it quite useless. I tried fudging around that limit by using
> > device_rename and kobject_rename, but to no avail.
> > 
> > Really, it should not be this hard to use, trying to expose a handfull
> > of simple integers to userspace should not take 8h+ and still not work.
> > 
> > Peter, who thinks sysfs is contorted mess beyond his skill. I'll stick
> > to VM and scheduler code, that actually makes sense.
> 
> Heh, that's funny :)
> 
> I'll look at this and see what I can come up with.  Would you just like
> a whole new patch, or one against this one?

Sorry for the grumpy note, I get that way at 3.30 am. Maybe I ought not
have mailed :-/

This is the code I had at that time.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@...llo.nl>
---
 block/genhd.c               |    2 
 fs/fuse/inode.c             |    2 
 fs/nfs/client.c             |    2 
 include/linux/backing-dev.h |   21 ++++++
 include/linux/string.h      |    4 +
 include/linux/writeback.h   |    3 
 mm/backing-dev.c            |  144 ++++++++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c         |    2 
 mm/util.c                   |   42 ++++++++++++
 9 files changed, 219 insertions(+), 3 deletions(-)

Index: linux-2.6-2/fs/fuse/inode.c
===================================================================
--- linux-2.6-2.orig/fs/fuse/inode.c
+++ linux-2.6-2/fs/fuse/inode.c
@@ -467,7 +467,7 @@ static struct fuse_conn *new_conn(void)
 		atomic_set(&fc->num_waiting, 0);
 		fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 		fc->bdi.unplug_io_fn = default_unplug_io_fn;
-		err = bdi_init(&fc->bdi);
+		err = bdi_init_fmt(&fc->bdi, "bdi-fuse-%llu", (unsigned long long)fc->id);
 		if (err) {
 			kfree(fc);
 			fc = NULL;
Index: linux-2.6-2/fs/nfs/client.c
===================================================================
--- linux-2.6-2.orig/fs/nfs/client.c
+++ linux-2.6-2/fs/nfs/client.c
@@ -678,7 +678,7 @@ static int nfs_probe_fsinfo(struct nfs_s
 		goto out_error;
 
 	nfs_server_set_fsinfo(server, &fsinfo);
-	error = bdi_init(&server->backing_dev_info);
+	error = bdi_init_fmt(&server->backing_dev_info, "bdi-nfs-%s-%p", clp->cl_hostname, server);
 	if (error)
 		goto out_error;
 
Index: linux-2.6-2/include/linux/backing-dev.h
===================================================================
--- linux-2.6-2.orig/include/linux/backing-dev.h
+++ linux-2.6-2/include/linux/backing-dev.h
@@ -11,6 +11,8 @@
 #include <linux/percpu_counter.h>
 #include <linux/log2.h>
 #include <linux/proportions.h>
+#include <linux/kernel.h>
+#include <linux/device.h>
 #include <asm/atomic.h>
 
 struct page;
@@ -48,11 +50,30 @@ struct backing_dev_info {
 
 	struct prop_local_percpu completions;
 	int dirty_exceeded;
+
+#ifdef CONFIG_SYSFS
+	struct device kdev;
+#endif
 };
 
 int bdi_init(struct backing_dev_info *bdi);
 void bdi_destroy(struct backing_dev_info *bdi);
 
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...);
+void bdi_unregister(struct backing_dev_info *bdi);
+
+#define bdi_init_fmt(bdi, fmt...)				\
+	({							\
+	 	int ret;					\
+	 	ret = bdi_init(bdi);				\
+	 	if (!ret) {					\
+	 		ret = bdi_register(bdi, ##fmt);		\
+	 		if (ret)				\
+	 			bdi_destroy(bdi);		\
+	 	}						\
+	 	ret;						\
+	 })
+
 static inline void __add_bdi_stat(struct backing_dev_info *bdi,
 		enum bdi_stat_item item, s64 amount)
 {
Index: linux-2.6-2/include/linux/writeback.h
===================================================================
--- linux-2.6-2.orig/include/linux/writeback.h
+++ linux-2.6-2/include/linux/writeback.h
@@ -113,6 +113,9 @@ struct file;
 int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *,
 				      void __user *, size_t *, loff_t *);
 
+void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
+		 struct backing_dev_info *bdi);
+
 void page_writeback_init(void);
 void balance_dirty_pages_ratelimited_nr(struct address_space *mapping,
 					unsigned long nr_pages_dirtied);
Index: linux-2.6-2/mm/backing-dev.c
===================================================================
--- linux-2.6-2.orig/mm/backing-dev.c
+++ linux-2.6-2/mm/backing-dev.c
@@ -4,12 +4,153 @@
 #include <linux/fs.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/writeback.h>
+#include <linux/device.h>
+
+#ifdef CONFIG_SYSFS
+
+static void bdi_release(struct device *dev)
+{
+}
+
+static int bdi_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return 0;
+}
+
+static struct class bdi_class = {
+	.name = "bdi",
+	.dev_release = bdi_release,
+	.dev_uevent = bdi_uevent,
+};
+
+static ssize_t readahead_store(struct device *dev,
+			       struct device_attribute *attr,
+			       const char *buf, size_t count)
+{
+	struct backing_dev_info *bdi =
+		container_of(dev, struct backing_dev_info, kdev);
+	char *end;
+
+	bdi->ra_pages = simple_strtoul(buf, &end, 10);
+
+	return end - buf;
+}
+
+#define BDI_SHOW(name, expr)						\
+static ssize_t name##_show(struct device *dev,				\
+			   struct device_attribute *attr, char *page)	\
+{									\
+	struct backing_dev_info *bdi =					\
+		container_of(dev, struct backing_dev_info, kdev);	\
+									\
+	return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr);	\
+}
+
+BDI_SHOW(readahead, bdi->ra_pages)
+
+BDI_SHOW(reclaimable, bdi_stat(bdi, BDI_RECLAIMABLE))
+BDI_SHOW(writeback, bdi_stat(bdi, BDI_WRITEBACK))
+
+static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i)
+{
+	unsigned long thresh[3];
+
+	get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi);
+
+	return thresh[i];
+}
+
+BDI_SHOW(dirty, get_dirty(bdi, 1))
+BDI_SHOW(bdi_dirty, get_dirty(bdi, 2))
+
+static struct device_attribute bdi_dev_attrs[] = {
+	__ATTR(readahead, 0644, readahead_show, readahead_store),
+	__ATTR_RO(reclaimable),
+	__ATTR_RO(writeback),
+	__ATTR_RO(dirty),
+	__ATTR_RO(bdi_dirty),
+};
+
+static struct attribute *bdi_attributes[ARRAY_SIZE(bdi_dev_attrs) + 1];
+
+static struct attribute_group bdi_attribute_group = {
+	.attrs = bdi_attributes,
+};
+
+static struct attribute_group *bdi_attribute_groups[] = {
+	&bdi_attribute_group,
+	NULL,
+};
+
+static __init int bdi_class_init(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(bdi_dev_attrs); i++)
+		bdi_attributes[i] = &bdi_dev_attrs[i].attr;
+
+	return class_register(&bdi_class);
+}
+
+__initcall(bdi_class_init);
+
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
+{
+	int ret;
+
+	bdi->kdev.class = &bdi_class;
+	bdi->kdev.groups = bdi_attribute_groups;
+	strcpy(bdi->kdev.bus_id, "bdi");
+	ret = device_register(&bdi->kdev);
+	if (!ret) {
+		char *name;
+		va_list args;
+
+		va_start(args, fmt);
+		name = kvprintf(fmt, args);
+		va_end(args);
+
+		if (!name) {
+			device_unregister(&bdi->kdev);
+			return -ENOMEM;
+		}
+
+		ret = device_rename(&bdi->kdev, name);
+		kfree(name);
+
+		if (!ret)
+			device_unregister(&bdi->kdev);
+	}
+
+	return ret;
+}
+
+void bdi_unregister(struct backing_dev_info *bdi)
+{
+	device_unregister(&bdi->kdev);
+}
+#else
+int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...)
+{
+	return 0;
+}
+
+void bdi_unregister(struct backing_dev_info *bdi)
+{
+}
+#endif
+
+EXPORT_SYMBOL(bdi_register);
+EXPORT_SYMBOL(bdi_unregister);
 
 int bdi_init(struct backing_dev_info *bdi)
 {
 	int i, j;
 	int err;
 
+	memset(bdi, 0, sizeof(*bdi));
+
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
 		err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0);
 		if (err)
@@ -33,6 +174,8 @@ void bdi_destroy(struct backing_dev_info
 {
 	int i;
 
+	bdi_unregister(bdi);
+
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
 		percpu_counter_destroy(&bdi->bdi_stat[i]);
 
@@ -90,3 +233,4 @@ long congestion_wait(int rw, long timeou
 }
 EXPORT_SYMBOL(congestion_wait);
 
+
Index: linux-2.6-2/mm/page-writeback.c
===================================================================
--- linux-2.6-2.orig/mm/page-writeback.c
+++ linux-2.6-2/mm/page-writeback.c
@@ -291,7 +291,7 @@ static unsigned long determine_dirtyable
 	return x + 1;	/* Ensure that we never return 0 */
 }
 
-static void
+void
 get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty,
 		 struct backing_dev_info *bdi)
 {
Index: linux-2.6-2/block/genhd.c
===================================================================
--- linux-2.6-2.orig/block/genhd.c
+++ linux-2.6-2/block/genhd.c
@@ -182,6 +182,7 @@ void add_disk(struct gendisk *disk)
 			    disk->minors, NULL, exact_match, exact_lock, disk);
 	register_disk(disk);
 	blk_register_queue(disk);
+	bdi_register(&disk->queue->backing_dev_info, "bdi-%s", disk->disk_name);
 }
 
 EXPORT_SYMBOL(add_disk);
@@ -190,6 +191,7 @@ EXPORT_SYMBOL(del_gendisk);	/* in partit
 void unlink_gendisk(struct gendisk *disk)
 {
 	blk_unregister_queue(disk);
+	bdi_unregister(&disk->queue->backing_dev_info);
 	blk_unregister_region(MKDEV(disk->major, disk->first_minor),
 			      disk->minors);
 }
Index: linux-2.6-2/include/linux/string.h
===================================================================
--- linux-2.6-2.orig/include/linux/string.h
+++ linux-2.6-2/include/linux/string.h
@@ -8,6 +8,7 @@
 #include <linux/compiler.h>	/* for inline */
 #include <linux/types.h>	/* for size_t */
 #include <linux/stddef.h>	/* for NULL */
+#include <stdarg.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -111,6 +112,9 @@ extern void *kmemdup(const void *src, si
 extern char **argv_split(gfp_t gfp, const char *str, int *argcp);
 extern void argv_free(char **argv);
 
+char *kvprintf(const char *fmt, va_list args);
+char *kprintf(const char *fmt, ...);
+
 #ifdef __cplusplus
 }
 #endif
Index: linux-2.6-2/mm/util.c
===================================================================
--- linux-2.6-2.orig/mm/util.c
+++ linux-2.6-2/mm/util.c
@@ -136,3 +136,45 @@ char *strndup_user(const char __user *s,
 	return p;
 }
 EXPORT_SYMBOL(strndup_user);
+
+char *kvprintf(const char *fmt, va_list args)
+{
+	char c;
+	char *buf;
+	int need;
+	int limit;
+	va_list args1;
+
+	va_copy(args1, args);
+	need = vsnprintf(&c, 1, fmt, args1);
+	va_end(args1);
+
+	/* Allocate the new space and copy the string in */
+	limit = need + 1;
+	buf = kmalloc(limit, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+	need = vsnprintf(buf, limit, fmt, args);
+
+	/* something wrong with the string we copied? */
+	if (need >= limit) {
+		kfree(buf);
+		return NULL;
+	}
+
+	return buf;
+}
+EXPORT_SYMBOL(kvprintf);
+
+char *kprintf(const char *fmt, ...)
+{
+	char *buf;
+	va_list args;
+
+	va_start(args, fmt);
+	buf = kvprintf(fmt, args);
+	va_end(args);
+
+	return buf;
+}
+EXPORT_SYMBOL(kprintf);


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@...r.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Powered by blists - more mailing lists

Powered by Openwall GNU/*/Linux Powered by OpenVZ