Subject: mm: sysfs: expose the BDI object in sysfs Provide a place in sysfs for the backing_dev_info object. This allows us to see and set the various BDI specific variables. In particular this properly exposes the read-ahead window for all relevant users and /sys/block//queue/read_ahead_kb should be deprecated. With patient help from Kay Sievers and Greg KH Signed-off-by: Peter Zijlstra --- block/genhd.c | 3 + fs/fuse/inode.c | 3 - fs/nfs/client.c | 24 +++++---- fs/nfs/internal.h | 10 ++-- fs/nfs/super.c | 10 ++-- include/linux/backing-dev.h | 19 +++++++ include/linux/writeback.h | 3 + lib/percpu_counter.c | 1 mm/backing-dev.c | 109 ++++++++++++++++++++++++++++++++++++++++++++ mm/page-writeback.c | 2 10 files changed, 163 insertions(+), 21 deletions(-) Index: linux-2.6-2/block/genhd.c =================================================================== --- linux-2.6-2.orig/block/genhd.c +++ linux-2.6-2/block/genhd.c @@ -182,6 +182,8 @@ void add_disk(struct gendisk *disk) disk->minors, NULL, exact_match, exact_lock, disk); register_disk(disk); blk_register_queue(disk); + bdi_register(&disk->queue->backing_dev_info, NULL, + "%s", disk->disk_name); } EXPORT_SYMBOL(add_disk); @@ -190,6 +192,7 @@ EXPORT_SYMBOL(del_gendisk); /* in partit void unlink_gendisk(struct gendisk *disk) { blk_unregister_queue(disk); + bdi_unregister(&disk->queue->backing_dev_info); blk_unregister_region(MKDEV(disk->major, disk->first_minor), disk->minors); } Index: linux-2.6-2/fs/fuse/inode.c =================================================================== --- linux-2.6-2.orig/fs/fuse/inode.c +++ linux-2.6-2/fs/fuse/inode.c @@ -467,7 +467,8 @@ static struct fuse_conn *new_conn(void) atomic_set(&fc->num_waiting, 0); fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; fc->bdi.unplug_io_fn = default_unplug_io_fn; - err = bdi_init(&fc->bdi); + err = bdi_init_fmt(&fc->bdi, NULL, + "fuse-%llu", (unsigned long long)fc->id); if (err) { kfree(fc); fc = NULL; Index: linux-2.6-2/fs/nfs/client.c =================================================================== --- linux-2.6-2.orig/fs/nfs/client.c +++ linux-2.6-2/fs/nfs/client.c @@ -657,7 +657,8 @@ static void nfs_server_set_fsinfo(struct /* * Probe filesystem information, including the FSID on v2/v3 */ -static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs_fattr *fattr) +static int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, + struct nfs_fattr *fattr, const char *dev_name) { struct nfs_fsinfo fsinfo; struct nfs_client *clp = server->nfs_client; @@ -678,7 +679,8 @@ static int nfs_probe_fsinfo(struct nfs_s goto out_error; nfs_server_set_fsinfo(server, &fsinfo); - error = bdi_init(&server->backing_dev_info); + error = bdi_init_fmt(&server->backing_dev_info, NULL, + "nfs-%s", dev_name); if (error) goto out_error; @@ -772,7 +774,7 @@ void nfs_free_server(struct nfs_server * * - keyed on server and FSID */ struct nfs_server *nfs_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, const char *dev_name) { struct nfs_server *server; struct nfs_fattr fattr; @@ -792,7 +794,7 @@ struct nfs_server *nfs_create_server(con BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); /* Probe the root fh to retrieve its FSID */ - error = nfs_probe_fsinfo(server, mntfh, &fattr); + error = nfs_probe_fsinfo(server, mntfh, &fattr, dev_name); if (error < 0) goto error; if (server->nfs_client->rpc_ops->version == 3) { @@ -949,7 +951,7 @@ static int nfs4_init_server(struct nfs_s * - keyed on server and FSID */ struct nfs_server *nfs4_create_server(const struct nfs_parsed_mount_data *data, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, const char *dev_name) { struct nfs_fattr fattr; struct nfs_server *server; @@ -991,7 +993,7 @@ struct nfs_server *nfs4_create_server(co (unsigned long long) server->fsid.minor); dprintk("Mount FH: %d\n", mntfh->size); - error = nfs_probe_fsinfo(server, mntfh, &fattr); + error = nfs_probe_fsinfo(server, mntfh, &fattr, dev_name); if (error < 0) goto error; @@ -1021,7 +1023,8 @@ error: * Create an NFS4 referral server record */ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data, - struct nfs_fh *mntfh) + struct nfs_fh *mntfh, + const char *dev_name) { struct nfs_client *parent_client; struct nfs_server *server, *parent_server; @@ -1066,7 +1069,7 @@ struct nfs_server *nfs4_create_referral_ goto error; /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, mntfh, &fattr); + error = nfs_probe_fsinfo(server, mntfh, &fattr, dev_name); if (error < 0) goto error; @@ -1100,7 +1103,8 @@ error: */ struct nfs_server *nfs_clone_server(struct nfs_server *source, struct nfs_fh *fh, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, + const char *dev_name) { struct nfs_server *server; struct nfs_fattr fattr_fsinfo; @@ -1128,7 +1132,7 @@ struct nfs_server *nfs_clone_server(stru nfs_init_server_aclclient(server); /* probe the filesystem info for this server filesystem */ - error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo); + error = nfs_probe_fsinfo(server, fh, &fattr_fsinfo, dev_name); if (error < 0) goto out_free_server; Index: linux-2.6-2/include/linux/backing-dev.h =================================================================== --- linux-2.6-2.orig/include/linux/backing-dev.h +++ linux-2.6-2/include/linux/backing-dev.h @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include struct page; @@ -48,11 +50,28 @@ struct backing_dev_info { struct prop_local_percpu completions; int dirty_exceeded; + + struct device *dev; }; int bdi_init(struct backing_dev_info *bdi); void bdi_destroy(struct backing_dev_info *bdi); +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...); +void bdi_unregister(struct backing_dev_info *bdi); + +#define bdi_init_fmt(bdi, parent, fmt...) \ + ({ \ + int ret = bdi_init(bdi); \ + if (!ret) { \ + ret = 0; /* bdi_register(bdi, parent, ##fmt); */ \ + if (ret) \ + bdi_destroy(bdi); \ + } \ + ret; \ + }) + static inline void __add_bdi_stat(struct backing_dev_info *bdi, enum bdi_stat_item item, s64 amount) { Index: linux-2.6-2/include/linux/writeback.h =================================================================== --- linux-2.6-2.orig/include/linux/writeback.h +++ linux-2.6-2/include/linux/writeback.h @@ -113,6 +113,9 @@ struct file; int dirty_writeback_centisecs_handler(struct ctl_table *, int, struct file *, void __user *, size_t *, loff_t *); +void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, + struct backing_dev_info *bdi); + void page_writeback_init(void); void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, unsigned long nr_pages_dirtied); Index: linux-2.6-2/mm/backing-dev.c =================================================================== --- linux-2.6-2.orig/mm/backing-dev.c +++ linux-2.6-2/mm/backing-dev.c @@ -4,12 +4,119 @@ #include #include #include +#include +#include + + +static struct class *bdi_class; + +static ssize_t read_ahead_kb_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct backing_dev_info *bdi = dev_get_drvdata(dev); + char *end; + + bdi->ra_pages = simple_strtoul(buf, &end, 10) >> (PAGE_SHIFT - 10); + + return end - buf; +} + +#define K(pages) ((pages) << (PAGE_SHIFT - 10)) + +#define BDI_SHOW(name, expr) \ +static ssize_t name##_show(struct device *dev, \ + struct device_attribute *attr, char *page) \ +{ \ + struct backing_dev_info *bdi = dev_get_drvdata(dev); \ + \ + return snprintf(page, PAGE_SIZE-1, "%lld\n", (long long)expr); \ +} + +BDI_SHOW(read_ahead_kb, K(bdi->ra_pages)) + +BDI_SHOW(reclaimable_kb, K(bdi_stat(bdi, BDI_RECLAIMABLE))) +BDI_SHOW(writeback_kb, K(bdi_stat(bdi, BDI_WRITEBACK))) + +static inline unsigned long get_dirty(struct backing_dev_info *bdi, int i) +{ + unsigned long thresh[3]; + + get_dirty_limits(&thresh[0], &thresh[1], &thresh[2], bdi); + + return thresh[i]; +} + +BDI_SHOW(dirty_kb, K(get_dirty(bdi, 1))) +BDI_SHOW(bdi_dirty_kb, K(get_dirty(bdi, 2))) + +#define __ATTR_RW(attr) __ATTR(attr, 0644, attr##_show, attr##_store) + +static struct device_attribute bdi_dev_attrs[] = { + __ATTR_RW(read_ahead_kb), + __ATTR_RO(reclaimable_kb), + __ATTR_RO(writeback_kb), + __ATTR_RO(dirty_kb), + __ATTR_RO(bdi_dirty_kb), + __ATTR_NULL, +}; + +static __init int bdi_class_init(void) +{ + bdi_class = class_create(THIS_MODULE, "bdi"); + bdi_class->dev_attrs = bdi_dev_attrs; + return 0; +} + +__initcall(bdi_class_init); + +int bdi_register(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, ...) +{ + char *name; + va_list args; + int ret = 0; + struct device *dev; + + va_start(args, fmt); + name = kvasprintf(GFP_KERNEL, fmt, args); + va_end(args); + + if (!name) + return -ENOMEM; + + dev = device_create(bdi_class, parent, MKDEV(0,0), name); + if (IS_ERR(dev)) { + ret = PTR_ERR(dev); + goto exit; + } + + bdi->dev = dev; + dev_set_drvdata(bdi->dev, bdi); + +exit: + kfree(name); + return ret; +} + +void bdi_unregister(struct backing_dev_info *bdi) +{ + if (bdi->dev) { + device_unregister(bdi->dev); + bdi->dev = NULL; + } +} + +EXPORT_SYMBOL(bdi_register); +EXPORT_SYMBOL(bdi_unregister); int bdi_init(struct backing_dev_info *bdi) { int i, j; int err; + bdi->dev = NULL; + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) { err = percpu_counter_init_irq(&bdi->bdi_stat[i], 0); if (err) @@ -33,6 +140,8 @@ void bdi_destroy(struct backing_dev_info { int i; + bdi_unregister(bdi); + for (i = 0; i < NR_BDI_STAT_ITEMS; i++) percpu_counter_destroy(&bdi->bdi_stat[i]); Index: linux-2.6-2/mm/page-writeback.c =================================================================== --- linux-2.6-2.orig/mm/page-writeback.c +++ linux-2.6-2/mm/page-writeback.c @@ -295,7 +295,7 @@ static unsigned long determine_dirtyable return x + 1; /* Ensure that we never return 0 */ } -static void +void get_dirty_limits(long *pbackground, long *pdirty, long *pbdi_dirty, struct backing_dev_info *bdi) { Index: linux-2.6-2/lib/percpu_counter.c =================================================================== --- linux-2.6-2.orig/lib/percpu_counter.c +++ linux-2.6-2/lib/percpu_counter.c @@ -102,6 +102,7 @@ void percpu_counter_destroy(struct percp return; free_percpu(fbc->counters); + fbc->counters = NULL; #ifdef CONFIG_HOTPLUG_CPU mutex_lock(&percpu_counters_lock); list_del(&fbc->list); Index: linux-2.6-2/fs/nfs/internal.h =================================================================== --- linux-2.6-2.orig/fs/nfs/internal.h +++ linux-2.6-2/fs/nfs/internal.h @@ -65,16 +65,18 @@ extern void nfs_put_client(struct nfs_cl extern struct nfs_client *nfs_find_client(const struct sockaddr_in *, int); extern struct nfs_server *nfs_create_server( const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_fh *, const char *); extern struct nfs_server *nfs4_create_server( const struct nfs_parsed_mount_data *, - struct nfs_fh *); + struct nfs_fh *, const char *); extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, - struct nfs_fh *); + struct nfs_fh *, + const char *); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *); + struct nfs_fattr *, + const char *); #ifdef CONFIG_PROC_FS extern int __init nfs_fs_proc_init(void); extern void nfs_fs_proc_exit(void); Index: linux-2.6-2/fs/nfs/super.c =================================================================== --- linux-2.6-2.orig/fs/nfs/super.c +++ linux-2.6-2/fs/nfs/super.c @@ -1359,7 +1359,7 @@ static int nfs_get_sb(struct file_system goto out; /* Get a volume representation */ - server = nfs_create_server(&data, &mntfh); + server = nfs_create_server(&data, &mntfh, dev_name); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1442,7 +1442,7 @@ static int nfs_xdev_get_sb(struct file_s dprintk("--> nfs_xdev_get_sb()\n"); /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, dev_name); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver; @@ -1702,7 +1702,7 @@ static int nfs4_get_sb(struct file_syste goto out; /* Get a volume representation */ - server = nfs4_create_server(&data, &mntfh); + server = nfs4_create_server(&data, &mntfh, dev_name); if (IS_ERR(server)) { error = PTR_ERR(server); goto out; @@ -1787,7 +1787,7 @@ static int nfs4_xdev_get_sb(struct file_ dprintk("--> nfs4_xdev_get_sb()\n"); /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, dev_name); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver; @@ -1861,7 +1861,7 @@ static int nfs4_referral_get_sb(struct f dprintk("--> nfs4_referral_get_sb()\n"); /* create a new volume representation */ - server = nfs4_create_referral_server(data, &mntfh); + server = nfs4_create_referral_server(data, &mntfh, dev_name); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver;