From 91bbee69d19b59ec3a08a2c4e7c92747a23b3956 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 26 Jul 2016 18:47:21 +0200 Subject: [PATCH] bcache: read_super: handle architectures with more than 4k page size There is no guarantee that the superblock which __bread returns in a buffer_head starts at offset 0 when an architecture has bigger pages than 4k (the used sector size). This is the attempt to fix this with the minimum amount of change by having a buffer allocated with kmalloc that holds the superblock data as it is on disk. This buffer can then be passed to bch_map_bio which will set up the bio_vec correctly. Also get rid of __bread as this is doing cached IO and the only reason this did not cause weird effects was the direct use of the page cache page that was returned by __bread. Signed-off-by: Stefan Bader --- drivers/md/bcache/bcache.h | 2 ++ drivers/md/bcache/super.c | 76 +++++++++++++++++++++++++++++----------------- 2 files changed, 50 insertions(+), 28 deletions(-) diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6b420a5..3c48927 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -295,6 +295,7 @@ struct cached_dev { struct cache_sb sb; struct bio sb_bio; struct bio_vec sb_bv[1]; + void *sb_disk_data; struct closure sb_write; struct semaphore sb_write_mutex; @@ -382,6 +383,7 @@ struct cache { struct cache_sb sb; struct bio sb_bio; struct bio_vec sb_bv[1]; + void *sb_disk_data; struct kobject kobj; struct block_device *bdev; diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c index e169739..5a2c848 100644 --- a/drivers/md/bcache/super.c +++ b/drivers/md/bcache/super.c @@ -62,17 +62,27 @@ struct workqueue_struct *bcache_wq; /* Superblock */ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, - struct page **res) + void *sb_data) { const char *err; struct cache_sb *s; - struct buffer_head *bh = __bread(bdev, 1, SB_SIZE); + struct bio rs_bio; + struct bio_vec rs_bv[1]; unsigned i; - if (!bh) + bio_init(&rs_bio); + rs_bio.bi_bdev = bdev; + rs_bio.bi_rw = READ; + rs_bio.bi_max_vecs = 1; + rs_bio.bi_io_vec = &rs_bv[0]; + rs_bio.bi_iter.bi_sector = SB_SECTOR; + rs_bio.bi_iter.bi_size = SB_SIZE; + bch_bio_map(&rs_bio, sb_data); + + if (submit_bio_wait(READ, &rs_bio)) return "IO error"; - s = (struct cache_sb *) bh->b_data; + s = (struct cache_sb *) sb_data; sb->offset = le64_to_cpu(s->offset); sb->version = le64_to_cpu(s->version); @@ -191,10 +201,7 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev, sb->last_mount = get_seconds(); err = NULL; - get_page(bh->b_page); - *res = bh->b_page; err: - put_bh(bh); return err; } @@ -208,13 +215,13 @@ static void write_bdev_super_endio(struct bio *bio) static void __write_super(struct cache_sb *sb, struct bio *bio) { - struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page); + struct cache_sb *out = page_address(bio->bi_io_vec[0].bv_page) + + bio->bi_io_vec[0].bv_offset; unsigned i; bio->bi_iter.bi_sector = SB_SECTOR; bio->bi_rw = REQ_SYNC|REQ_META; bio->bi_iter.bi_size = SB_SIZE; - bch_bio_map(bio, NULL); out->offset = cpu_to_le64(sb->offset); out->version = cpu_to_le64(sb->version); @@ -238,7 +245,7 @@ static void __write_super(struct cache_sb *sb, struct bio *bio) pr_debug("ver %llu, flags %llu, seq %llu", sb->version, sb->flags, sb->seq); - submit_bio(REQ_WRITE, bio); + submit_bio(WRITE_FLUSH_FUA, bio); } static void bch_write_bdev_super_unlock(struct closure *cl) @@ -1045,6 +1052,8 @@ void bch_cached_dev_release(struct kobject *kobj) { struct cached_dev *dc = container_of(kobj, struct cached_dev, disk.kobj); + + kfree(dc->sb_disk_data); kfree(dc); module_put(THIS_MODULE); } @@ -1138,7 +1147,7 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size) /* Cached device - bcache superblock */ -static void register_bdev(struct cache_sb *sb, struct page *sb_page, +static void register_bdev(struct cache_sb *sb, void *sb_disk_data, struct block_device *bdev, struct cached_dev *dc) { @@ -1152,9 +1161,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, bio_init(&dc->sb_bio); dc->sb_bio.bi_max_vecs = 1; - dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs; - dc->sb_bio.bi_io_vec[0].bv_page = sb_page; - get_page(sb_page); + dc->sb_bio.bi_io_vec = &dc->sb_bv[0]; if (cached_dev_init(dc, sb->block_size << 9)) goto err; @@ -1168,6 +1175,11 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, pr_info("registered backing device %s", bdevname(bdev, name)); + /* Do assignment and mapping late, cannot error after this */ + dc->sb_disk_data = sb_disk_data; + dc->sb_bio.bi_iter.bi_size = SB_SIZE; + bch_bio_map(&dc->sb_bio, sb_disk_data); + list_add(&dc->list, &uncached_devices); list_for_each_entry(c, &bch_cache_sets, list) bch_cached_dev_attach(dc, c); @@ -1179,6 +1191,7 @@ static void register_bdev(struct cache_sb *sb, struct page *sb_page, return; err: pr_notice("error opening %s: %s", bdevname(bdev, name), err); + kfree(sb_disk_data); bcache_device_stop(&dc->disk); } @@ -1793,8 +1806,7 @@ void bch_cache_release(struct kobject *kobj) for (i = 0; i < RESERVE_NR; i++) free_fifo(&ca->free[i]); - if (ca->sb_bio.bi_inline_vecs[0].bv_page) - put_page(ca->sb_bio.bi_io_vec[0].bv_page); + kfree(ca->sb_disk_data); if (!IS_ERR_OR_NULL(ca->bdev)) blkdev_put(ca->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); @@ -1838,7 +1850,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) return 0; } -static int register_cache(struct cache_sb *sb, struct page *sb_page, +static int register_cache(struct cache_sb *sb, void *sb_disk_data, struct block_device *bdev, struct cache *ca) { char name[BDEVNAME_SIZE]; @@ -1851,16 +1863,16 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, bio_init(&ca->sb_bio); ca->sb_bio.bi_max_vecs = 1; - ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs; - ca->sb_bio.bi_io_vec[0].bv_page = sb_page; - get_page(sb_page); + ca->sb_bio.bi_io_vec = &ca->sb_bv[0]; if (blk_queue_discard(bdev_get_queue(ca->bdev))) ca->discard = CACHE_DISCARD(&ca->sb); ret = cache_alloc(sb, ca); - if (ret != 0) + if (ret != 0) { + err = "error calling cache_alloc"; goto err; + } if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache")) { err = "error calling kobject_add"; @@ -1868,11 +1880,17 @@ static int register_cache(struct cache_sb *sb, struct page *sb_page, goto out; } + /* Do assignment and mapping late */ + ca->sb_disk_data = sb_disk_data; + ca->sb_bio.bi_iter.bi_size = SB_SIZE; + bch_bio_map(&ca->sb_bio, sb_disk_data); + mutex_lock(&bch_register_lock); err = register_cache_set(ca); mutex_unlock(&bch_register_lock); if (err) { + ca->sb_disk_data = NULL; ret = -ENODEV; goto out; } @@ -1935,13 +1953,14 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, char *path = NULL; struct cache_sb *sb = NULL; struct block_device *bdev = NULL; - struct page *sb_page = NULL; + void *sb_disk_data = NULL; if (!try_module_get(THIS_MODULE)) return -EBUSY; if (!(path = kstrndup(buffer, size, GFP_KERNEL)) || - !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL))) + !(sb = kmalloc(sizeof(struct cache_sb), GFP_KERNEL)) || + !(sb_disk_data = kmalloc(SB_SIZE, GFP_KERNEL))) goto err; err = "failed to open device"; @@ -1967,7 +1986,7 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, if (set_blocksize(bdev, 4096)) goto err_close; - err = read_super(sb, bdev, &sb_page); + err = read_super(sb, bdev, sb_disk_data); if (err) goto err_close; @@ -1977,19 +1996,20 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, goto err_close; mutex_lock(&bch_register_lock); - register_bdev(sb, sb_page, bdev, dc); + register_bdev(sb, sb_disk_data, bdev, dc); + sb_disk_data = NULL; /* Consumed or freed in register call */ mutex_unlock(&bch_register_lock); } else { struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL); if (!ca) goto err_close; - if (register_cache(sb, sb_page, bdev, ca) != 0) + if (register_cache(sb, sb_disk_data, bdev, ca) != 0) goto err_close; + sb_disk_data = NULL; } out: - if (sb_page) - put_page(sb_page); + kfree(sb_disk_data); kfree(sb); kfree(path); module_put(THIS_MODULE); -- 1.9.1